diff options
Diffstat (limited to 'compiler/GHC')
60 files changed, 19029 insertions, 112 deletions
diff --git a/compiler/GHC/Cmm.hs b/compiler/GHC/Cmm.hs new file mode 100644 index 0000000000..5efecdc534 --- /dev/null +++ b/compiler/GHC/Cmm.hs @@ -0,0 +1,231 @@ +-- Cmm representations using Hoopl's Graph CmmNode e x. +{-# LANGUAGE GADTs #-} + +module GHC.Cmm ( + -- * Cmm top-level datatypes + CmmProgram, CmmGroup, GenCmmGroup, + CmmDecl, GenCmmDecl(..), + CmmGraph, GenCmmGraph(..), + CmmBlock, + RawCmmDecl, RawCmmGroup, + Section(..), SectionType(..), CmmStatics(..), CmmStatic(..), + isSecConstant, + + -- ** Blocks containing lists + GenBasicBlock(..), blockId, + ListGraph(..), pprBBlock, + + -- * Info Tables + CmmTopInfo(..), CmmStackInfo(..), CmmInfoTable(..), topInfoTable, + ClosureTypeInfo(..), + ProfilingInfo(..), ConstrDescription, + + -- * Statements, expressions and types + module GHC.Cmm.Node, + module GHC.Cmm.Expr, + ) where + +import GhcPrelude + +import Id +import CostCentre +import GHC.Cmm.CLabel +import GHC.Cmm.BlockId +import GHC.Cmm.Node +import GHC.Runtime.Layout +import GHC.Cmm.Expr +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label +import Outputable +import Data.ByteString (ByteString) + +----------------------------------------------------------------------------- +-- Cmm, GenCmm +----------------------------------------------------------------------------- + +-- A CmmProgram is a list of CmmGroups +-- A CmmGroup is a list of top-level declarations + +-- When object-splitting is on, each group is compiled into a separate +-- .o file. So typically we put closely related stuff in a CmmGroup. +-- Section-splitting follows suit and makes one .text subsection for each +-- CmmGroup. + +type CmmProgram = [CmmGroup] + +type GenCmmGroup d h g = [GenCmmDecl d h g] +type CmmGroup = GenCmmGroup CmmStatics CmmTopInfo CmmGraph +type RawCmmGroup = GenCmmGroup CmmStatics (LabelMap CmmStatics) CmmGraph + +----------------------------------------------------------------------------- +-- CmmDecl, GenCmmDecl +----------------------------------------------------------------------------- + +-- GenCmmDecl is abstracted over +-- d, the type of static data elements in CmmData +-- h, the static info preceding the code of a CmmProc +-- g, the control-flow graph of a CmmProc +-- +-- We expect there to be two main instances of this type: +-- (a) C--, i.e. populated with various C-- constructs +-- (b) Native code, populated with data/instructions + +-- | A top-level chunk, abstracted over the type of the contents of +-- the basic blocks (Cmm or instructions are the likely instantiations). +data GenCmmDecl d h g + = CmmProc -- A procedure + h -- Extra header such as the info table + CLabel -- Entry label + [GlobalReg] -- Registers live on entry. Note that the set of live + -- registers will be correct in generated C-- code, but + -- not in hand-written C-- code. However, + -- splitAtProcPoints calculates correct liveness + -- information for CmmProcs. + g -- Control-flow graph for the procedure's code + + | CmmData -- Static data + Section + d + +type CmmDecl = GenCmmDecl CmmStatics CmmTopInfo CmmGraph + +type RawCmmDecl + = GenCmmDecl + CmmStatics + (LabelMap CmmStatics) + CmmGraph + +----------------------------------------------------------------------------- +-- Graphs +----------------------------------------------------------------------------- + +type CmmGraph = GenCmmGraph CmmNode +data GenCmmGraph n = CmmGraph { g_entry :: BlockId, g_graph :: Graph n C C } +type CmmBlock = Block CmmNode C C + +----------------------------------------------------------------------------- +-- Info Tables +----------------------------------------------------------------------------- + +-- | CmmTopInfo is attached to each CmmDecl (see defn of CmmGroup), and contains +-- the extra info (beyond the executable code) that belongs to that CmmDecl. +data CmmTopInfo = TopInfo { info_tbls :: LabelMap CmmInfoTable + , stack_info :: CmmStackInfo } + +topInfoTable :: GenCmmDecl a CmmTopInfo (GenCmmGraph n) -> Maybe CmmInfoTable +topInfoTable (CmmProc infos _ _ g) = mapLookup (g_entry g) (info_tbls infos) +topInfoTable _ = Nothing + +data CmmStackInfo + = StackInfo { + arg_space :: ByteOff, + -- number of bytes of arguments on the stack on entry to the + -- the proc. This is filled in by GHC.StgToCmm.codeGen, and + -- used by the stack allocator later. + updfr_space :: Maybe ByteOff, + -- XXX: this never contains anything useful, but it should. + -- See comment in GHC.Cmm.LayoutStack. + do_layout :: Bool + -- Do automatic stack layout for this proc. This is + -- True for all code generated by the code generator, + -- but is occasionally False for hand-written Cmm where + -- we want to do the stack manipulation manually. + } + +-- | Info table as a haskell data type +data CmmInfoTable + = CmmInfoTable { + cit_lbl :: CLabel, -- Info table label + cit_rep :: SMRep, + cit_prof :: ProfilingInfo, + cit_srt :: Maybe CLabel, -- empty, or a closure address + cit_clo :: Maybe (Id, CostCentreStack) + -- Just (id,ccs) <=> build a static closure later + -- Nothing <=> don't build a static closure + -- + -- Static closures for FUNs and THUNKs are *not* generated by + -- the code generator, because we might want to add SRT + -- entries to them later (for FUNs at least; THUNKs are + -- treated the same for consistency). See Note [SRTs] in + -- GHC.Cmm.Info.Build, in particular the [FUN] optimisation. + -- + -- This is strictly speaking not a part of the info table that + -- will be finally generated, but it's the only convenient + -- place to convey this information from the code generator to + -- where we build the static closures in + -- GHC.Cmm.Info.Build.doSRTs. + } + +data ProfilingInfo + = NoProfilingInfo + | ProfilingInfo ByteString ByteString -- closure_type, closure_desc + +----------------------------------------------------------------------------- +-- Static Data +----------------------------------------------------------------------------- + +data SectionType + = Text + | Data + | ReadOnlyData + | RelocatableReadOnlyData + | UninitialisedData + | ReadOnlyData16 -- .rodata.cst16 on x86_64, 16-byte aligned + | CString + | OtherSection String + deriving (Show) + +-- | Should a data in this section be considered constant +isSecConstant :: Section -> Bool +isSecConstant (Section t _) = case t of + Text -> True + ReadOnlyData -> True + RelocatableReadOnlyData -> True + ReadOnlyData16 -> True + CString -> True + Data -> False + UninitialisedData -> False + (OtherSection _) -> False + +data Section = Section SectionType CLabel + +data CmmStatic + = CmmStaticLit CmmLit + -- a literal value, size given by cmmLitRep of the literal. + | CmmUninitialised Int + -- uninitialised data, N bytes long + | CmmString ByteString + -- string of 8-bit values only, not zero terminated. + +data CmmStatics + = Statics + CLabel -- Label of statics + [CmmStatic] -- The static data itself + +-- ----------------------------------------------------------------------------- +-- Basic blocks consisting of lists + +-- These are used by the LLVM and NCG backends, when populating Cmm +-- with lists of instructions. + +data GenBasicBlock i = BasicBlock BlockId [i] + +-- | The branch block id is that of the first block in +-- the branch, which is that branch's entry point +blockId :: GenBasicBlock i -> BlockId +blockId (BasicBlock blk_id _ ) = blk_id + +newtype ListGraph i = ListGraph [GenBasicBlock i] + +instance Outputable instr => Outputable (ListGraph instr) where + ppr (ListGraph blocks) = vcat (map ppr blocks) + +instance Outputable instr => Outputable (GenBasicBlock instr) where + ppr = pprBBlock + +pprBBlock :: Outputable stmt => GenBasicBlock stmt -> SDoc +pprBBlock (BasicBlock ident stmts) = + hang (ppr ident <> colon) 4 (vcat (map ppr stmts)) + diff --git a/compiler/GHC/Cmm/BlockId.hs b/compiler/GHC/Cmm/BlockId.hs new file mode 100644 index 0000000000..f7f369551b --- /dev/null +++ b/compiler/GHC/Cmm/BlockId.hs @@ -0,0 +1,46 @@ +{-# LANGUAGE TypeSynonymInstances #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +{- BlockId module should probably go away completely, being superseded by Label -} +module GHC.Cmm.BlockId + ( BlockId, mkBlockId -- ToDo: BlockId should be abstract, but it isn't yet + , newBlockId + , blockLbl, infoTblLbl + ) where + +import GhcPrelude + +import GHC.Cmm.CLabel +import IdInfo +import Name +import Unique +import UniqSupply + +import GHC.Cmm.Dataflow.Label (Label, mkHooplLabel) + +---------------------------------------------------------------- +--- Block Ids, their environments, and their sets + +{- Note [Unique BlockId] +~~~~~~~~~~~~~~~~~~~~~~~~ +Although a 'BlockId' is a local label, for reasons of implementation, +'BlockId's must be unique within an entire compilation unit. The reason +is that each local label is mapped to an assembly-language label, and in +most assembly languages allow, a label is visible throughout the entire +compilation unit in which it appears. +-} + +type BlockId = Label + +mkBlockId :: Unique -> BlockId +mkBlockId unique = mkHooplLabel $ getKey unique + +newBlockId :: MonadUnique m => m BlockId +newBlockId = mkBlockId <$> getUniqueM + +blockLbl :: BlockId -> CLabel +blockLbl label = mkLocalBlockLabel (getUnique label) + +infoTblLbl :: BlockId -> CLabel +infoTblLbl label + = mkBlockInfoTableLabel (mkFCallName (getUnique label) "block") NoCafRefs diff --git a/compiler/GHC/Cmm/BlockId.hs-boot b/compiler/GHC/Cmm/BlockId.hs-boot new file mode 100644 index 0000000000..76fd6180a9 --- /dev/null +++ b/compiler/GHC/Cmm/BlockId.hs-boot @@ -0,0 +1,8 @@ +module GHC.Cmm.BlockId (BlockId, mkBlockId) where + +import GHC.Cmm.Dataflow.Label (Label) +import Unique (Unique) + +type BlockId = Label + +mkBlockId :: Unique -> BlockId diff --git a/compiler/GHC/Cmm/CLabel.hs b/compiler/GHC/Cmm/CLabel.hs new file mode 100644 index 0000000000..e84278bf65 --- /dev/null +++ b/compiler/GHC/Cmm/CLabel.hs @@ -0,0 +1,1571 @@ +----------------------------------------------------------------------------- +-- +-- Object-file symbols (called CLabel for histerical raisins). +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +{-# LANGUAGE CPP #-} + +module GHC.Cmm.CLabel ( + CLabel, -- abstract type + ForeignLabelSource(..), + pprDebugCLabel, + + mkClosureLabel, + mkSRTLabel, + mkInfoTableLabel, + mkEntryLabel, + mkRednCountsLabel, + mkConInfoTableLabel, + mkApEntryLabel, + mkApInfoTableLabel, + mkClosureTableLabel, + mkBytesLabel, + + mkLocalBlockLabel, + mkLocalClosureLabel, + mkLocalInfoTableLabel, + mkLocalClosureTableLabel, + + mkBlockInfoTableLabel, + + mkBitmapLabel, + mkStringLitLabel, + + mkAsmTempLabel, + mkAsmTempDerivedLabel, + mkAsmTempEndLabel, + mkAsmTempDieLabel, + + mkDirty_MUT_VAR_Label, + mkNonmovingWriteBarrierEnabledLabel, + mkUpdInfoLabel, + mkBHUpdInfoLabel, + mkIndStaticInfoLabel, + mkMainCapabilityLabel, + mkMAP_FROZEN_CLEAN_infoLabel, + mkMAP_FROZEN_DIRTY_infoLabel, + mkMAP_DIRTY_infoLabel, + mkSMAP_FROZEN_CLEAN_infoLabel, + mkSMAP_FROZEN_DIRTY_infoLabel, + mkSMAP_DIRTY_infoLabel, + mkBadAlignmentLabel, + mkArrWords_infoLabel, + mkSRTInfoLabel, + + mkTopTickyCtrLabel, + mkCAFBlackHoleInfoTableLabel, + mkRtsPrimOpLabel, + mkRtsSlowFastTickyCtrLabel, + + mkSelectorInfoLabel, + mkSelectorEntryLabel, + + mkCmmInfoLabel, + mkCmmEntryLabel, + mkCmmRetInfoLabel, + mkCmmRetLabel, + mkCmmCodeLabel, + mkCmmDataLabel, + mkCmmClosureLabel, + + mkRtsApFastLabel, + + mkPrimCallLabel, + + mkForeignLabel, + addLabelSize, + + foreignLabelStdcallInfo, + isBytesLabel, + isForeignLabel, + isSomeRODataLabel, + isStaticClosureLabel, + mkCCLabel, mkCCSLabel, + + DynamicLinkerLabelInfo(..), + mkDynamicLinkerLabel, + dynamicLinkerLabelInfo, + + mkPicBaseLabel, + mkDeadStripPreventer, + + mkHpcTicksLabel, + + -- * Predicates + hasCAF, + needsCDecl, maybeLocalBlockLabel, externallyVisibleCLabel, + isMathFun, + isCFunctionLabel, isGcPtrLabel, labelDynamic, + isLocalCLabel, mayRedirectTo, + + -- * Conversions + toClosureLbl, toSlowEntryLbl, toEntryLbl, toInfoLbl, hasHaskellName, + + pprCLabel, + isInfoTableLabel, + isConInfoTableLabel + ) where + +#include "HsVersions.h" + +import GhcPrelude + +import IdInfo +import BasicTypes +import {-# SOURCE #-} GHC.Cmm.BlockId (BlockId, mkBlockId) +import Packages +import Module +import Name +import Unique +import PrimOp +import CostCentre +import Outputable +import FastString +import DynFlags +import GHC.Platform +import UniqSet +import Util +import PprCore ( {- instances -} ) + +-- ----------------------------------------------------------------------------- +-- The CLabel type + +{- | + 'CLabel' is an abstract type that supports the following operations: + + - Pretty printing + + - In a C file, does it need to be declared before use? (i.e. is it + guaranteed to be already in scope in the places we need to refer to it?) + + - If it needs to be declared, what type (code or data) should it be + declared to have? + + - Is it visible outside this object file or not? + + - Is it "dynamic" (see details below) + + - Eq and Ord, so that we can make sets of CLabels (currently only + used in outputting C as far as I can tell, to avoid generating + more than one declaration for any given label). + + - Converting an info table label into an entry label. + + CLabel usage is a bit messy in GHC as they are used in a number of different + contexts: + + - By the C-- AST to identify labels + + - By the unregisterised C code generator ("PprC") for naming functions (hence + the name 'CLabel') + + - By the native and LLVM code generators to identify labels + + For extra fun, each of these uses a slightly different subset of constructors + (e.g. 'AsmTempLabel' and 'AsmTempDerivedLabel' are used only in the NCG and + LLVM backends). + + In general, we use 'IdLabel' to represent Haskell things early in the + pipeline. However, later optimization passes will often represent blocks they + create with 'LocalBlockLabel' where there is no obvious 'Name' to hang off the + label. +-} + +data CLabel + = -- | A label related to the definition of a particular Id or Con in a .hs file. + IdLabel + Name + CafInfo + IdLabelInfo -- encodes the suffix of the label + + -- | A label from a .cmm file that is not associated with a .hs level Id. + | CmmLabel + UnitId -- what package the label belongs to. + FastString -- identifier giving the prefix of the label + CmmLabelInfo -- encodes the suffix of the label + + -- | A label with a baked-in \/ algorithmically generated name that definitely + -- comes from the RTS. The code for it must compile into libHSrts.a \/ libHSrts.so + -- If it doesn't have an algorithmically generated name then use a CmmLabel + -- instead and give it an appropriate UnitId argument. + | RtsLabel + RtsLabelInfo + + -- | A label associated with a block. These aren't visible outside of the + -- compilation unit in which they are defined. These are generally used to + -- name blocks produced by Cmm-to-Cmm passes and the native code generator, + -- where we don't have a 'Name' to associate the label to and therefore can't + -- use 'IdLabel'. + | LocalBlockLabel + {-# UNPACK #-} !Unique + + -- | A 'C' (or otherwise foreign) label. + -- + | ForeignLabel + FastString -- name of the imported label. + + (Maybe Int) -- possible '@n' suffix for stdcall functions + -- When generating C, the '@n' suffix is omitted, but when + -- generating assembler we must add it to the label. + + ForeignLabelSource -- what package the foreign label is in. + + FunctionOrData + + -- | Local temporary label used for native (or LLVM) code generation; must not + -- appear outside of these contexts. Use primarily for debug information + | AsmTempLabel + {-# UNPACK #-} !Unique + + -- | A label \"derived\" from another 'CLabel' by the addition of a suffix. + -- Must not occur outside of the NCG or LLVM code generators. + | AsmTempDerivedLabel + CLabel + FastString -- suffix + + | StringLitLabel + {-# UNPACK #-} !Unique + + | CC_Label CostCentre + | CCS_Label CostCentreStack + + + -- | These labels are generated and used inside the NCG only. + -- They are special variants of a label used for dynamic linking + -- see module PositionIndependentCode for details. + | DynamicLinkerLabel DynamicLinkerLabelInfo CLabel + + -- | This label is generated and used inside the NCG only. + -- It is used as a base for PIC calculations on some platforms. + -- It takes the form of a local numeric assembler label '1'; and + -- is pretty-printed as 1b, referring to the previous definition + -- of 1: in the assembler source file. + | PicBaseLabel + + -- | A label before an info table to prevent excessive dead-stripping on darwin + | DeadStripPreventer CLabel + + + -- | Per-module table of tick locations + | HpcTicksLabel Module + + -- | Static reference table + | SRTLabel + {-# UNPACK #-} !Unique + + -- | A bitmap (function or case return) + | LargeBitmapLabel + {-# UNPACK #-} !Unique + + deriving Eq + +-- This is laborious, but necessary. We can't derive Ord because +-- Unique doesn't have an Ord instance. Note nonDetCmpUnique in the +-- implementation. See Note [No Ord for Unique] +-- This is non-deterministic but we do not currently support deterministic +-- code-generation. See Note [Unique Determinism and code generation] +instance Ord CLabel where + compare (IdLabel a1 b1 c1) (IdLabel a2 b2 c2) = + compare a1 a2 `thenCmp` + compare b1 b2 `thenCmp` + compare c1 c2 + compare (CmmLabel a1 b1 c1) (CmmLabel a2 b2 c2) = + compare a1 a2 `thenCmp` + compare b1 b2 `thenCmp` + compare c1 c2 + compare (RtsLabel a1) (RtsLabel a2) = compare a1 a2 + compare (LocalBlockLabel u1) (LocalBlockLabel u2) = nonDetCmpUnique u1 u2 + compare (ForeignLabel a1 b1 c1 d1) (ForeignLabel a2 b2 c2 d2) = + compare a1 a2 `thenCmp` + compare b1 b2 `thenCmp` + compare c1 c2 `thenCmp` + compare d1 d2 + compare (AsmTempLabel u1) (AsmTempLabel u2) = nonDetCmpUnique u1 u2 + compare (AsmTempDerivedLabel a1 b1) (AsmTempDerivedLabel a2 b2) = + compare a1 a2 `thenCmp` + compare b1 b2 + compare (StringLitLabel u1) (StringLitLabel u2) = + nonDetCmpUnique u1 u2 + compare (CC_Label a1) (CC_Label a2) = + compare a1 a2 + compare (CCS_Label a1) (CCS_Label a2) = + compare a1 a2 + compare (DynamicLinkerLabel a1 b1) (DynamicLinkerLabel a2 b2) = + compare a1 a2 `thenCmp` + compare b1 b2 + compare PicBaseLabel PicBaseLabel = EQ + compare (DeadStripPreventer a1) (DeadStripPreventer a2) = + compare a1 a2 + compare (HpcTicksLabel a1) (HpcTicksLabel a2) = + compare a1 a2 + compare (SRTLabel u1) (SRTLabel u2) = + nonDetCmpUnique u1 u2 + compare (LargeBitmapLabel u1) (LargeBitmapLabel u2) = + nonDetCmpUnique u1 u2 + compare IdLabel{} _ = LT + compare _ IdLabel{} = GT + compare CmmLabel{} _ = LT + compare _ CmmLabel{} = GT + compare RtsLabel{} _ = LT + compare _ RtsLabel{} = GT + compare LocalBlockLabel{} _ = LT + compare _ LocalBlockLabel{} = GT + compare ForeignLabel{} _ = LT + compare _ ForeignLabel{} = GT + compare AsmTempLabel{} _ = LT + compare _ AsmTempLabel{} = GT + compare AsmTempDerivedLabel{} _ = LT + compare _ AsmTempDerivedLabel{} = GT + compare StringLitLabel{} _ = LT + compare _ StringLitLabel{} = GT + compare CC_Label{} _ = LT + compare _ CC_Label{} = GT + compare CCS_Label{} _ = LT + compare _ CCS_Label{} = GT + compare DynamicLinkerLabel{} _ = LT + compare _ DynamicLinkerLabel{} = GT + compare PicBaseLabel{} _ = LT + compare _ PicBaseLabel{} = GT + compare DeadStripPreventer{} _ = LT + compare _ DeadStripPreventer{} = GT + compare HpcTicksLabel{} _ = LT + compare _ HpcTicksLabel{} = GT + compare SRTLabel{} _ = LT + compare _ SRTLabel{} = GT + +-- | Record where a foreign label is stored. +data ForeignLabelSource + + -- | Label is in a named package + = ForeignLabelInPackage UnitId + + -- | Label is in some external, system package that doesn't also + -- contain compiled Haskell code, and is not associated with any .hi files. + -- We don't have to worry about Haskell code being inlined from + -- external packages. It is safe to treat the RTS package as "external". + | ForeignLabelInExternalPackage + + -- | Label is in the package currently being compiled. + -- This is only used for creating hacky tmp labels during code generation. + -- Don't use it in any code that might be inlined across a package boundary + -- (ie, core code) else the information will be wrong relative to the + -- destination module. + | ForeignLabelInThisPackage + + deriving (Eq, Ord) + + +-- | For debugging problems with the CLabel representation. +-- We can't make a Show instance for CLabel because lots of its components don't have instances. +-- The regular Outputable instance only shows the label name, and not its other info. +-- +pprDebugCLabel :: CLabel -> SDoc +pprDebugCLabel lbl + = case lbl of + IdLabel _ _ info-> ppr lbl <> (parens $ text "IdLabel" + <> whenPprDebug (text ":" <> text (show info))) + CmmLabel pkg _name _info + -> ppr lbl <> (parens $ text "CmmLabel" <+> ppr pkg) + + RtsLabel{} -> ppr lbl <> (parens $ text "RtsLabel") + + ForeignLabel _name mSuffix src funOrData + -> ppr lbl <> (parens $ text "ForeignLabel" + <+> ppr mSuffix + <+> ppr src + <+> ppr funOrData) + + _ -> ppr lbl <> (parens $ text "other CLabel") + + +data IdLabelInfo + = Closure -- ^ Label for closure + | InfoTable -- ^ Info tables for closures; always read-only + | Entry -- ^ Entry point + | Slow -- ^ Slow entry point + + | LocalInfoTable -- ^ Like InfoTable but not externally visible + | LocalEntry -- ^ Like Entry but not externally visible + + | RednCounts -- ^ Label of place to keep Ticky-ticky info for this Id + + | ConEntry -- ^ Constructor entry point + | ConInfoTable -- ^ Corresponding info table + + | ClosureTable -- ^ Table of closures for Enum tycons + + | Bytes -- ^ Content of a string literal. See + -- Note [Bytes label]. + | BlockInfoTable -- ^ Like LocalInfoTable but for a proc-point block + -- instead of a closure entry-point. + -- See Note [Proc-point local block entry-point]. + + deriving (Eq, Ord, Show) + + +data RtsLabelInfo + = RtsSelectorInfoTable Bool{-updatable-} Int{-offset-} -- ^ Selector thunks + | RtsSelectorEntry Bool{-updatable-} Int{-offset-} + + | RtsApInfoTable Bool{-updatable-} Int{-arity-} -- ^ AP thunks + | RtsApEntry Bool{-updatable-} Int{-arity-} + + | RtsPrimOp PrimOp + | RtsApFast FastString -- ^ _fast versions of generic apply + | RtsSlowFastTickyCtr String + + deriving (Eq, Ord) + -- NOTE: Eq on PtrString compares the pointer only, so this isn't + -- a real equality. + + +-- | What type of Cmm label we're dealing with. +-- Determines the suffix appended to the name when a CLabel.CmmLabel +-- is pretty printed. +data CmmLabelInfo + = CmmInfo -- ^ misc rts info tables, suffix _info + | CmmEntry -- ^ misc rts entry points, suffix _entry + | CmmRetInfo -- ^ misc rts ret info tables, suffix _info + | CmmRet -- ^ misc rts return points, suffix _ret + | CmmData -- ^ misc rts data bits, eg CHARLIKE_closure + | CmmCode -- ^ misc rts code + | CmmClosure -- ^ closures eg CHARLIKE_closure + | CmmPrimCall -- ^ a prim call to some hand written Cmm code + deriving (Eq, Ord) + +data DynamicLinkerLabelInfo + = CodeStub -- MachO: Lfoo$stub, ELF: foo@plt + | SymbolPtr -- MachO: Lfoo$non_lazy_ptr, Windows: __imp_foo + | GotSymbolPtr -- ELF: foo@got + | GotSymbolOffset -- ELF: foo@gotoff + + deriving (Eq, Ord) + + +-- ----------------------------------------------------------------------------- +-- Constructing CLabels +-- ----------------------------------------------------------------------------- + +-- Constructing IdLabels +-- These are always local: + +mkSRTLabel :: Unique -> CLabel +mkSRTLabel u = SRTLabel u + +mkRednCountsLabel :: Name -> CLabel +mkRednCountsLabel name = + IdLabel name NoCafRefs RednCounts -- Note [ticky for LNE] + +-- These have local & (possibly) external variants: +mkLocalClosureLabel :: Name -> CafInfo -> CLabel +mkLocalInfoTableLabel :: Name -> CafInfo -> CLabel +mkLocalClosureTableLabel :: Name -> CafInfo -> CLabel +mkLocalClosureLabel name c = IdLabel name c Closure +mkLocalInfoTableLabel name c = IdLabel name c LocalInfoTable +mkLocalClosureTableLabel name c = IdLabel name c ClosureTable + +mkClosureLabel :: Name -> CafInfo -> CLabel +mkInfoTableLabel :: Name -> CafInfo -> CLabel +mkEntryLabel :: Name -> CafInfo -> CLabel +mkClosureTableLabel :: Name -> CafInfo -> CLabel +mkConInfoTableLabel :: Name -> CafInfo -> CLabel +mkBytesLabel :: Name -> CLabel +mkClosureLabel name c = IdLabel name c Closure +mkInfoTableLabel name c = IdLabel name c InfoTable +mkEntryLabel name c = IdLabel name c Entry +mkClosureTableLabel name c = IdLabel name c ClosureTable +mkConInfoTableLabel name c = IdLabel name c ConInfoTable +mkBytesLabel name = IdLabel name NoCafRefs Bytes + +mkBlockInfoTableLabel :: Name -> CafInfo -> CLabel +mkBlockInfoTableLabel name c = IdLabel name c BlockInfoTable + -- See Note [Proc-point local block entry-point]. + +-- Constructing Cmm Labels +mkDirty_MUT_VAR_Label, + mkNonmovingWriteBarrierEnabledLabel, + mkUpdInfoLabel, + mkBHUpdInfoLabel, mkIndStaticInfoLabel, mkMainCapabilityLabel, + mkMAP_FROZEN_CLEAN_infoLabel, mkMAP_FROZEN_DIRTY_infoLabel, + mkMAP_DIRTY_infoLabel, + mkArrWords_infoLabel, + mkTopTickyCtrLabel, + mkCAFBlackHoleInfoTableLabel, + mkSMAP_FROZEN_CLEAN_infoLabel, mkSMAP_FROZEN_DIRTY_infoLabel, + mkSMAP_DIRTY_infoLabel, mkBadAlignmentLabel :: CLabel +mkDirty_MUT_VAR_Label = mkForeignLabel (fsLit "dirty_MUT_VAR") Nothing ForeignLabelInExternalPackage IsFunction +mkNonmovingWriteBarrierEnabledLabel + = CmmLabel rtsUnitId (fsLit "nonmoving_write_barrier_enabled") CmmData +mkUpdInfoLabel = CmmLabel rtsUnitId (fsLit "stg_upd_frame") CmmInfo +mkBHUpdInfoLabel = CmmLabel rtsUnitId (fsLit "stg_bh_upd_frame" ) CmmInfo +mkIndStaticInfoLabel = CmmLabel rtsUnitId (fsLit "stg_IND_STATIC") CmmInfo +mkMainCapabilityLabel = CmmLabel rtsUnitId (fsLit "MainCapability") CmmData +mkMAP_FROZEN_CLEAN_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_FROZEN_CLEAN") CmmInfo +mkMAP_FROZEN_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_FROZEN_DIRTY") CmmInfo +mkMAP_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_MUT_ARR_PTRS_DIRTY") CmmInfo +mkTopTickyCtrLabel = CmmLabel rtsUnitId (fsLit "top_ct") CmmData +mkCAFBlackHoleInfoTableLabel = CmmLabel rtsUnitId (fsLit "stg_CAF_BLACKHOLE") CmmInfo +mkArrWords_infoLabel = CmmLabel rtsUnitId (fsLit "stg_ARR_WORDS") CmmInfo +mkSMAP_FROZEN_CLEAN_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_CLEAN") CmmInfo +mkSMAP_FROZEN_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_FROZEN_DIRTY") CmmInfo +mkSMAP_DIRTY_infoLabel = CmmLabel rtsUnitId (fsLit "stg_SMALL_MUT_ARR_PTRS_DIRTY") CmmInfo +mkBadAlignmentLabel = CmmLabel rtsUnitId (fsLit "stg_badAlignment") CmmEntry + +mkSRTInfoLabel :: Int -> CLabel +mkSRTInfoLabel n = CmmLabel rtsUnitId lbl CmmInfo + where + lbl = + case n of + 1 -> fsLit "stg_SRT_1" + 2 -> fsLit "stg_SRT_2" + 3 -> fsLit "stg_SRT_3" + 4 -> fsLit "stg_SRT_4" + 5 -> fsLit "stg_SRT_5" + 6 -> fsLit "stg_SRT_6" + 7 -> fsLit "stg_SRT_7" + 8 -> fsLit "stg_SRT_8" + 9 -> fsLit "stg_SRT_9" + 10 -> fsLit "stg_SRT_10" + 11 -> fsLit "stg_SRT_11" + 12 -> fsLit "stg_SRT_12" + 13 -> fsLit "stg_SRT_13" + 14 -> fsLit "stg_SRT_14" + 15 -> fsLit "stg_SRT_15" + 16 -> fsLit "stg_SRT_16" + _ -> panic "mkSRTInfoLabel" + +----- +mkCmmInfoLabel, mkCmmEntryLabel, mkCmmRetInfoLabel, mkCmmRetLabel, + mkCmmCodeLabel, mkCmmDataLabel, mkCmmClosureLabel + :: UnitId -> FastString -> CLabel + +mkCmmInfoLabel pkg str = CmmLabel pkg str CmmInfo +mkCmmEntryLabel pkg str = CmmLabel pkg str CmmEntry +mkCmmRetInfoLabel pkg str = CmmLabel pkg str CmmRetInfo +mkCmmRetLabel pkg str = CmmLabel pkg str CmmRet +mkCmmCodeLabel pkg str = CmmLabel pkg str CmmCode +mkCmmDataLabel pkg str = CmmLabel pkg str CmmData +mkCmmClosureLabel pkg str = CmmLabel pkg str CmmClosure + +mkLocalBlockLabel :: Unique -> CLabel +mkLocalBlockLabel u = LocalBlockLabel u + +-- Constructing RtsLabels +mkRtsPrimOpLabel :: PrimOp -> CLabel +mkRtsPrimOpLabel primop = RtsLabel (RtsPrimOp primop) + +mkSelectorInfoLabel :: Bool -> Int -> CLabel +mkSelectorEntryLabel :: Bool -> Int -> CLabel +mkSelectorInfoLabel upd off = RtsLabel (RtsSelectorInfoTable upd off) +mkSelectorEntryLabel upd off = RtsLabel (RtsSelectorEntry upd off) + +mkApInfoTableLabel :: Bool -> Int -> CLabel +mkApEntryLabel :: Bool -> Int -> CLabel +mkApInfoTableLabel upd off = RtsLabel (RtsApInfoTable upd off) +mkApEntryLabel upd off = RtsLabel (RtsApEntry upd off) + + +-- A call to some primitive hand written Cmm code +mkPrimCallLabel :: PrimCall -> CLabel +mkPrimCallLabel (PrimCall str pkg) + = CmmLabel pkg str CmmPrimCall + + +-- Constructing ForeignLabels + +-- | Make a foreign label +mkForeignLabel + :: FastString -- name + -> Maybe Int -- size prefix + -> ForeignLabelSource -- what package it's in + -> FunctionOrData + -> CLabel + +mkForeignLabel = ForeignLabel + + +-- | Update the label size field in a ForeignLabel +addLabelSize :: CLabel -> Int -> CLabel +addLabelSize (ForeignLabel str _ src fod) sz + = ForeignLabel str (Just sz) src fod +addLabelSize label _ + = label + +-- | Whether label is a top-level string literal +isBytesLabel :: CLabel -> Bool +isBytesLabel (IdLabel _ _ Bytes) = True +isBytesLabel _lbl = False + +-- | Whether label is a non-haskell label (defined in C code) +isForeignLabel :: CLabel -> Bool +isForeignLabel (ForeignLabel _ _ _ _) = True +isForeignLabel _lbl = False + +-- | Whether label is a static closure label (can come from haskell or cmm) +isStaticClosureLabel :: CLabel -> Bool +-- Closure defined in haskell (.hs) +isStaticClosureLabel (IdLabel _ _ Closure) = True +-- Closure defined in cmm +isStaticClosureLabel (CmmLabel _ _ CmmClosure) = True +isStaticClosureLabel _lbl = False + +-- | Whether label is a .rodata label +isSomeRODataLabel :: CLabel -> Bool +-- info table defined in haskell (.hs) +isSomeRODataLabel (IdLabel _ _ ClosureTable) = True +isSomeRODataLabel (IdLabel _ _ ConInfoTable) = True +isSomeRODataLabel (IdLabel _ _ InfoTable) = True +isSomeRODataLabel (IdLabel _ _ LocalInfoTable) = True +isSomeRODataLabel (IdLabel _ _ BlockInfoTable) = True +-- info table defined in cmm (.cmm) +isSomeRODataLabel (CmmLabel _ _ CmmInfo) = True +isSomeRODataLabel _lbl = False + +-- | Whether label is points to some kind of info table +isInfoTableLabel :: CLabel -> Bool +isInfoTableLabel (IdLabel _ _ InfoTable) = True +isInfoTableLabel (IdLabel _ _ LocalInfoTable) = True +isInfoTableLabel (IdLabel _ _ ConInfoTable) = True +isInfoTableLabel (IdLabel _ _ BlockInfoTable) = True +isInfoTableLabel _ = False + +-- | Whether label is points to constructor info table +isConInfoTableLabel :: CLabel -> Bool +isConInfoTableLabel (IdLabel _ _ ConInfoTable) = True +isConInfoTableLabel _ = False + +-- | Get the label size field from a ForeignLabel +foreignLabelStdcallInfo :: CLabel -> Maybe Int +foreignLabelStdcallInfo (ForeignLabel _ info _ _) = info +foreignLabelStdcallInfo _lbl = Nothing + + +-- Constructing Large*Labels +mkBitmapLabel :: Unique -> CLabel +mkBitmapLabel uniq = LargeBitmapLabel uniq + +-- Constructing Cost Center Labels +mkCCLabel :: CostCentre -> CLabel +mkCCSLabel :: CostCentreStack -> CLabel +mkCCLabel cc = CC_Label cc +mkCCSLabel ccs = CCS_Label ccs + +mkRtsApFastLabel :: FastString -> CLabel +mkRtsApFastLabel str = RtsLabel (RtsApFast str) + +mkRtsSlowFastTickyCtrLabel :: String -> CLabel +mkRtsSlowFastTickyCtrLabel pat = RtsLabel (RtsSlowFastTickyCtr pat) + + +-- Constructing Code Coverage Labels +mkHpcTicksLabel :: Module -> CLabel +mkHpcTicksLabel = HpcTicksLabel + + +-- Constructing labels used for dynamic linking +mkDynamicLinkerLabel :: DynamicLinkerLabelInfo -> CLabel -> CLabel +mkDynamicLinkerLabel = DynamicLinkerLabel + +dynamicLinkerLabelInfo :: CLabel -> Maybe (DynamicLinkerLabelInfo, CLabel) +dynamicLinkerLabelInfo (DynamicLinkerLabel info lbl) = Just (info, lbl) +dynamicLinkerLabelInfo _ = Nothing + +mkPicBaseLabel :: CLabel +mkPicBaseLabel = PicBaseLabel + + +-- Constructing miscellaneous other labels +mkDeadStripPreventer :: CLabel -> CLabel +mkDeadStripPreventer lbl = DeadStripPreventer lbl + +mkStringLitLabel :: Unique -> CLabel +mkStringLitLabel = StringLitLabel + +mkAsmTempLabel :: Uniquable a => a -> CLabel +mkAsmTempLabel a = AsmTempLabel (getUnique a) + +mkAsmTempDerivedLabel :: CLabel -> FastString -> CLabel +mkAsmTempDerivedLabel = AsmTempDerivedLabel + +mkAsmTempEndLabel :: CLabel -> CLabel +mkAsmTempEndLabel l = mkAsmTempDerivedLabel l (fsLit "_end") + +-- | Construct a label for a DWARF Debug Information Entity (DIE) +-- describing another symbol. +mkAsmTempDieLabel :: CLabel -> CLabel +mkAsmTempDieLabel l = mkAsmTempDerivedLabel l (fsLit "_die") + +-- ----------------------------------------------------------------------------- +-- Convert between different kinds of label + +toClosureLbl :: CLabel -> CLabel +toClosureLbl (IdLabel n c _) = IdLabel n c Closure +toClosureLbl (CmmLabel m str _) = CmmLabel m str CmmClosure +toClosureLbl l = pprPanic "toClosureLbl" (ppr l) + +toSlowEntryLbl :: CLabel -> CLabel +toSlowEntryLbl (IdLabel n _ BlockInfoTable) + = pprPanic "toSlowEntryLbl" (ppr n) +toSlowEntryLbl (IdLabel n c _) = IdLabel n c Slow +toSlowEntryLbl l = pprPanic "toSlowEntryLbl" (ppr l) + +toEntryLbl :: CLabel -> CLabel +toEntryLbl (IdLabel n c LocalInfoTable) = IdLabel n c LocalEntry +toEntryLbl (IdLabel n c ConInfoTable) = IdLabel n c ConEntry +toEntryLbl (IdLabel n _ BlockInfoTable) = mkLocalBlockLabel (nameUnique n) + -- See Note [Proc-point local block entry-point]. +toEntryLbl (IdLabel n c _) = IdLabel n c Entry +toEntryLbl (CmmLabel m str CmmInfo) = CmmLabel m str CmmEntry +toEntryLbl (CmmLabel m str CmmRetInfo) = CmmLabel m str CmmRet +toEntryLbl l = pprPanic "toEntryLbl" (ppr l) + +toInfoLbl :: CLabel -> CLabel +toInfoLbl (IdLabel n c LocalEntry) = IdLabel n c LocalInfoTable +toInfoLbl (IdLabel n c ConEntry) = IdLabel n c ConInfoTable +toInfoLbl (IdLabel n c _) = IdLabel n c InfoTable +toInfoLbl (CmmLabel m str CmmEntry) = CmmLabel m str CmmInfo +toInfoLbl (CmmLabel m str CmmRet) = CmmLabel m str CmmRetInfo +toInfoLbl l = pprPanic "CLabel.toInfoLbl" (ppr l) + +hasHaskellName :: CLabel -> Maybe Name +hasHaskellName (IdLabel n _ _) = Just n +hasHaskellName _ = Nothing + +-- ----------------------------------------------------------------------------- +-- Does a CLabel's referent itself refer to a CAF? +hasCAF :: CLabel -> Bool +hasCAF (IdLabel _ _ RednCounts) = False -- Note [ticky for LNE] +hasCAF (IdLabel _ MayHaveCafRefs _) = True +hasCAF _ = False + +-- Note [ticky for LNE] +-- ~~~~~~~~~~~~~~~~~~~~~ + +-- Until 14 Feb 2013, every ticky counter was associated with a +-- closure. Thus, ticky labels used IdLabel. It is odd that +-- GHC.Cmm.Info.Build.cafTransfers would consider such a ticky label +-- reason to add the name to the CAFEnv (and thus eventually the SRT), +-- but it was harmless because the ticky was only used if the closure +-- was also. +-- +-- Since we now have ticky counters for LNEs, it is no longer the case +-- that every ticky counter has an actual closure. So I changed the +-- generation of ticky counters' CLabels to not result in their +-- associated id ending up in the SRT. +-- +-- NB IdLabel is still appropriate for ticky ids (as opposed to +-- CmmLabel) because the LNE's counter is still related to an .hs Id, +-- that Id just isn't for a proper closure. + +-- ----------------------------------------------------------------------------- +-- Does a CLabel need declaring before use or not? +-- +-- See wiki:commentary/compiler/backends/ppr-c#prototypes + +needsCDecl :: CLabel -> Bool + -- False <=> it's pre-declared; don't bother + -- don't bother declaring Bitmap labels, we always make sure + -- they are defined before use. +needsCDecl (SRTLabel _) = True +needsCDecl (LargeBitmapLabel _) = False +needsCDecl (IdLabel _ _ _) = True +needsCDecl (LocalBlockLabel _) = True + +needsCDecl (StringLitLabel _) = False +needsCDecl (AsmTempLabel _) = False +needsCDecl (AsmTempDerivedLabel _ _) = False +needsCDecl (RtsLabel _) = False + +needsCDecl (CmmLabel pkgId _ _) + -- Prototypes for labels defined in the runtime system are imported + -- into HC files via includes/Stg.h. + | pkgId == rtsUnitId = False + + -- For other labels we inline one into the HC file directly. + | otherwise = True + +needsCDecl l@(ForeignLabel{}) = not (isMathFun l) +needsCDecl (CC_Label _) = True +needsCDecl (CCS_Label _) = True +needsCDecl (HpcTicksLabel _) = True +needsCDecl (DynamicLinkerLabel {}) = panic "needsCDecl DynamicLinkerLabel" +needsCDecl PicBaseLabel = panic "needsCDecl PicBaseLabel" +needsCDecl (DeadStripPreventer {}) = panic "needsCDecl DeadStripPreventer" + +-- | If a label is a local block label then return just its 'BlockId', otherwise +-- 'Nothing'. +maybeLocalBlockLabel :: CLabel -> Maybe BlockId +maybeLocalBlockLabel (LocalBlockLabel uq) = Just $ mkBlockId uq +maybeLocalBlockLabel _ = Nothing + + +-- | Check whether a label corresponds to a C function that has +-- a prototype in a system header somewhere, or is built-in +-- to the C compiler. For these labels we avoid generating our +-- own C prototypes. +isMathFun :: CLabel -> Bool +isMathFun (ForeignLabel fs _ _ _) = fs `elementOfUniqSet` math_funs +isMathFun _ = False + +math_funs :: UniqSet FastString +math_funs = mkUniqSet [ + -- _ISOC99_SOURCE + (fsLit "acos"), (fsLit "acosf"), (fsLit "acosh"), + (fsLit "acoshf"), (fsLit "acoshl"), (fsLit "acosl"), + (fsLit "asin"), (fsLit "asinf"), (fsLit "asinl"), + (fsLit "asinh"), (fsLit "asinhf"), (fsLit "asinhl"), + (fsLit "atan"), (fsLit "atanf"), (fsLit "atanl"), + (fsLit "atan2"), (fsLit "atan2f"), (fsLit "atan2l"), + (fsLit "atanh"), (fsLit "atanhf"), (fsLit "atanhl"), + (fsLit "cbrt"), (fsLit "cbrtf"), (fsLit "cbrtl"), + (fsLit "ceil"), (fsLit "ceilf"), (fsLit "ceill"), + (fsLit "copysign"), (fsLit "copysignf"), (fsLit "copysignl"), + (fsLit "cos"), (fsLit "cosf"), (fsLit "cosl"), + (fsLit "cosh"), (fsLit "coshf"), (fsLit "coshl"), + (fsLit "erf"), (fsLit "erff"), (fsLit "erfl"), + (fsLit "erfc"), (fsLit "erfcf"), (fsLit "erfcl"), + (fsLit "exp"), (fsLit "expf"), (fsLit "expl"), + (fsLit "exp2"), (fsLit "exp2f"), (fsLit "exp2l"), + (fsLit "expm1"), (fsLit "expm1f"), (fsLit "expm1l"), + (fsLit "fabs"), (fsLit "fabsf"), (fsLit "fabsl"), + (fsLit "fdim"), (fsLit "fdimf"), (fsLit "fdiml"), + (fsLit "floor"), (fsLit "floorf"), (fsLit "floorl"), + (fsLit "fma"), (fsLit "fmaf"), (fsLit "fmal"), + (fsLit "fmax"), (fsLit "fmaxf"), (fsLit "fmaxl"), + (fsLit "fmin"), (fsLit "fminf"), (fsLit "fminl"), + (fsLit "fmod"), (fsLit "fmodf"), (fsLit "fmodl"), + (fsLit "frexp"), (fsLit "frexpf"), (fsLit "frexpl"), + (fsLit "hypot"), (fsLit "hypotf"), (fsLit "hypotl"), + (fsLit "ilogb"), (fsLit "ilogbf"), (fsLit "ilogbl"), + (fsLit "ldexp"), (fsLit "ldexpf"), (fsLit "ldexpl"), + (fsLit "lgamma"), (fsLit "lgammaf"), (fsLit "lgammal"), + (fsLit "llrint"), (fsLit "llrintf"), (fsLit "llrintl"), + (fsLit "llround"), (fsLit "llroundf"), (fsLit "llroundl"), + (fsLit "log"), (fsLit "logf"), (fsLit "logl"), + (fsLit "log10l"), (fsLit "log10"), (fsLit "log10f"), + (fsLit "log1pl"), (fsLit "log1p"), (fsLit "log1pf"), + (fsLit "log2"), (fsLit "log2f"), (fsLit "log2l"), + (fsLit "logb"), (fsLit "logbf"), (fsLit "logbl"), + (fsLit "lrint"), (fsLit "lrintf"), (fsLit "lrintl"), + (fsLit "lround"), (fsLit "lroundf"), (fsLit "lroundl"), + (fsLit "modf"), (fsLit "modff"), (fsLit "modfl"), + (fsLit "nan"), (fsLit "nanf"), (fsLit "nanl"), + (fsLit "nearbyint"), (fsLit "nearbyintf"), (fsLit "nearbyintl"), + (fsLit "nextafter"), (fsLit "nextafterf"), (fsLit "nextafterl"), + (fsLit "nexttoward"), (fsLit "nexttowardf"), (fsLit "nexttowardl"), + (fsLit "pow"), (fsLit "powf"), (fsLit "powl"), + (fsLit "remainder"), (fsLit "remainderf"), (fsLit "remainderl"), + (fsLit "remquo"), (fsLit "remquof"), (fsLit "remquol"), + (fsLit "rint"), (fsLit "rintf"), (fsLit "rintl"), + (fsLit "round"), (fsLit "roundf"), (fsLit "roundl"), + (fsLit "scalbln"), (fsLit "scalblnf"), (fsLit "scalblnl"), + (fsLit "scalbn"), (fsLit "scalbnf"), (fsLit "scalbnl"), + (fsLit "sin"), (fsLit "sinf"), (fsLit "sinl"), + (fsLit "sinh"), (fsLit "sinhf"), (fsLit "sinhl"), + (fsLit "sqrt"), (fsLit "sqrtf"), (fsLit "sqrtl"), + (fsLit "tan"), (fsLit "tanf"), (fsLit "tanl"), + (fsLit "tanh"), (fsLit "tanhf"), (fsLit "tanhl"), + (fsLit "tgamma"), (fsLit "tgammaf"), (fsLit "tgammal"), + (fsLit "trunc"), (fsLit "truncf"), (fsLit "truncl"), + -- ISO C 99 also defines these function-like macros in math.h: + -- fpclassify, isfinite, isinf, isnormal, signbit, isgreater, + -- isgreaterequal, isless, islessequal, islessgreater, isunordered + + -- additional symbols from _BSD_SOURCE + (fsLit "drem"), (fsLit "dremf"), (fsLit "dreml"), + (fsLit "finite"), (fsLit "finitef"), (fsLit "finitel"), + (fsLit "gamma"), (fsLit "gammaf"), (fsLit "gammal"), + (fsLit "isinf"), (fsLit "isinff"), (fsLit "isinfl"), + (fsLit "isnan"), (fsLit "isnanf"), (fsLit "isnanl"), + (fsLit "j0"), (fsLit "j0f"), (fsLit "j0l"), + (fsLit "j1"), (fsLit "j1f"), (fsLit "j1l"), + (fsLit "jn"), (fsLit "jnf"), (fsLit "jnl"), + (fsLit "lgamma_r"), (fsLit "lgammaf_r"), (fsLit "lgammal_r"), + (fsLit "scalb"), (fsLit "scalbf"), (fsLit "scalbl"), + (fsLit "significand"), (fsLit "significandf"), (fsLit "significandl"), + (fsLit "y0"), (fsLit "y0f"), (fsLit "y0l"), + (fsLit "y1"), (fsLit "y1f"), (fsLit "y1l"), + (fsLit "yn"), (fsLit "ynf"), (fsLit "ynl"), + + -- These functions are described in IEEE Std 754-2008 - + -- Standard for Floating-Point Arithmetic and ISO/IEC TS 18661 + (fsLit "nextup"), (fsLit "nextupf"), (fsLit "nextupl"), + (fsLit "nextdown"), (fsLit "nextdownf"), (fsLit "nextdownl") + ] + +-- ----------------------------------------------------------------------------- +-- | Is a CLabel visible outside this object file or not? +-- From the point of view of the code generator, a name is +-- externally visible if it has to be declared as exported +-- in the .o file's symbol table; that is, made non-static. +externallyVisibleCLabel :: CLabel -> Bool -- not C "static" +externallyVisibleCLabel (StringLitLabel _) = False +externallyVisibleCLabel (AsmTempLabel _) = False +externallyVisibleCLabel (AsmTempDerivedLabel _ _)= False +externallyVisibleCLabel (RtsLabel _) = True +externallyVisibleCLabel (LocalBlockLabel _) = False +externallyVisibleCLabel (CmmLabel _ _ _) = True +externallyVisibleCLabel (ForeignLabel{}) = True +externallyVisibleCLabel (IdLabel name _ info) = isExternalName name && externallyVisibleIdLabel info +externallyVisibleCLabel (CC_Label _) = True +externallyVisibleCLabel (CCS_Label _) = True +externallyVisibleCLabel (DynamicLinkerLabel _ _) = False +externallyVisibleCLabel (HpcTicksLabel _) = True +externallyVisibleCLabel (LargeBitmapLabel _) = False +externallyVisibleCLabel (SRTLabel _) = False +externallyVisibleCLabel (PicBaseLabel {}) = panic "externallyVisibleCLabel PicBaseLabel" +externallyVisibleCLabel (DeadStripPreventer {}) = panic "externallyVisibleCLabel DeadStripPreventer" + +externallyVisibleIdLabel :: IdLabelInfo -> Bool +externallyVisibleIdLabel LocalInfoTable = False +externallyVisibleIdLabel LocalEntry = False +externallyVisibleIdLabel BlockInfoTable = False +externallyVisibleIdLabel _ = True + +-- ----------------------------------------------------------------------------- +-- Finding the "type" of a CLabel + +-- For generating correct types in label declarations: + +data CLabelType + = CodeLabel -- Address of some executable instructions + | DataLabel -- Address of data, not a GC ptr + | GcPtrLabel -- Address of a (presumably static) GC object + +isCFunctionLabel :: CLabel -> Bool +isCFunctionLabel lbl = case labelType lbl of + CodeLabel -> True + _other -> False + +isGcPtrLabel :: CLabel -> Bool +isGcPtrLabel lbl = case labelType lbl of + GcPtrLabel -> True + _other -> False + + +-- | Work out the general type of data at the address of this label +-- whether it be code, data, or static GC object. +labelType :: CLabel -> CLabelType +labelType (IdLabel _ _ info) = idInfoLabelType info +labelType (CmmLabel _ _ CmmData) = DataLabel +labelType (CmmLabel _ _ CmmClosure) = GcPtrLabel +labelType (CmmLabel _ _ CmmCode) = CodeLabel +labelType (CmmLabel _ _ CmmInfo) = DataLabel +labelType (CmmLabel _ _ CmmEntry) = CodeLabel +labelType (CmmLabel _ _ CmmPrimCall) = CodeLabel +labelType (CmmLabel _ _ CmmRetInfo) = DataLabel +labelType (CmmLabel _ _ CmmRet) = CodeLabel +labelType (RtsLabel (RtsSelectorInfoTable _ _)) = DataLabel +labelType (RtsLabel (RtsApInfoTable _ _)) = DataLabel +labelType (RtsLabel (RtsApFast _)) = CodeLabel +labelType (RtsLabel _) = DataLabel +labelType (LocalBlockLabel _) = CodeLabel +labelType (SRTLabel _) = DataLabel +labelType (ForeignLabel _ _ _ IsFunction) = CodeLabel +labelType (ForeignLabel _ _ _ IsData) = DataLabel +labelType (AsmTempLabel _) = panic "labelType(AsmTempLabel)" +labelType (AsmTempDerivedLabel _ _) = panic "labelType(AsmTempDerivedLabel)" +labelType (StringLitLabel _) = DataLabel +labelType (CC_Label _) = DataLabel +labelType (CCS_Label _) = DataLabel +labelType (DynamicLinkerLabel _ _) = DataLabel -- Is this right? +labelType PicBaseLabel = DataLabel +labelType (DeadStripPreventer _) = DataLabel +labelType (HpcTicksLabel _) = DataLabel +labelType (LargeBitmapLabel _) = DataLabel + +idInfoLabelType :: IdLabelInfo -> CLabelType +idInfoLabelType info = + case info of + InfoTable -> DataLabel + LocalInfoTable -> DataLabel + BlockInfoTable -> DataLabel + Closure -> GcPtrLabel + ConInfoTable -> DataLabel + ClosureTable -> DataLabel + RednCounts -> DataLabel + Bytes -> DataLabel + _ -> CodeLabel + + +-- ----------------------------------------------------------------------------- + +-- | Is a 'CLabel' defined in the current module being compiled? +-- +-- Sometimes we can optimise references within a compilation unit in ways that +-- we couldn't for inter-module references. This provides a conservative +-- estimate of whether a 'CLabel' lives in the current module. +isLocalCLabel :: Module -> CLabel -> Bool +isLocalCLabel this_mod lbl = + case lbl of + IdLabel name _ _ + | isInternalName name -> True + | otherwise -> nameModule name == this_mod + LocalBlockLabel _ -> True + _ -> False + +-- ----------------------------------------------------------------------------- + +-- | Does a 'CLabel' need dynamic linkage? +-- +-- When referring to data in code, we need to know whether +-- that data resides in a DLL or not. [Win32 only.] +-- @labelDynamic@ returns @True@ if the label is located +-- in a DLL, be it a data reference or not. +labelDynamic :: DynFlags -> Module -> CLabel -> Bool +labelDynamic dflags this_mod lbl = + case lbl of + -- is the RTS in a DLL or not? + RtsLabel _ -> + externalDynamicRefs && (this_pkg /= rtsUnitId) + + IdLabel n _ _ -> + isDllName dflags this_mod n + + -- When compiling in the "dyn" way, each package is to be linked into + -- its own shared library. + CmmLabel pkg _ _ + | os == OSMinGW32 -> + externalDynamicRefs && (this_pkg /= pkg) + | otherwise -> + gopt Opt_ExternalDynamicRefs dflags + + LocalBlockLabel _ -> False + + ForeignLabel _ _ source _ -> + if os == OSMinGW32 + then case source of + -- Foreign label is in some un-named foreign package (or DLL). + ForeignLabelInExternalPackage -> True + + -- Foreign label is linked into the same package as the + -- source file currently being compiled. + ForeignLabelInThisPackage -> False + + -- Foreign label is in some named package. + -- When compiling in the "dyn" way, each package is to be + -- linked into its own DLL. + ForeignLabelInPackage pkgId -> + externalDynamicRefs && (this_pkg /= pkgId) + + else -- On Mac OS X and on ELF platforms, false positives are OK, + -- so we claim that all foreign imports come from dynamic + -- libraries + True + + CC_Label cc -> + externalDynamicRefs && not (ccFromThisModule cc this_mod) + + -- CCS_Label always contains a CostCentre defined in the current module + CCS_Label _ -> False + + HpcTicksLabel m -> + externalDynamicRefs && this_mod /= m + + -- Note that DynamicLinkerLabels do NOT require dynamic linking themselves. + _ -> False + where + externalDynamicRefs = gopt Opt_ExternalDynamicRefs dflags + os = platformOS (targetPlatform dflags) + this_pkg = moduleUnitId this_mod + + +----------------------------------------------------------------------------- +-- Printing out CLabels. + +{- +Convention: + + <name>_<type> + +where <name> is <Module>_<name> for external names and <unique> for +internal names. <type> is one of the following: + + info Info table + srt Static reference table + entry Entry code (function, closure) + slow Slow entry code (if any) + ret Direct return address + vtbl Vector table + <n>_alt Case alternative (tag n) + dflt Default case alternative + btm Large bitmap vector + closure Static closure + con_entry Dynamic Constructor entry code + con_info Dynamic Constructor info table + static_entry Static Constructor entry code + static_info Static Constructor info table + sel_info Selector info table + sel_entry Selector entry code + cc Cost centre + ccs Cost centre stack + +Many of these distinctions are only for documentation reasons. For +example, _ret is only distinguished from _entry to make it easy to +tell whether a code fragment is a return point or a closure/function +entry. + +Note [Closure and info labels] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +For a function 'foo, we have: + foo_info : Points to the info table describing foo's closure + (and entry code for foo with tables next to code) + foo_closure : Static (no-free-var) closure only: + points to the statically-allocated closure + +For a data constructor (such as Just or Nothing), we have: + Just_con_info: Info table for the data constructor itself + the first word of a heap-allocated Just + Just_info: Info table for the *worker function*, an + ordinary Haskell function of arity 1 that + allocates a (Just x) box: + Just = \x -> Just x + Just_closure: The closure for this worker + + Nothing_closure: a statically allocated closure for Nothing + Nothing_static_info: info table for Nothing_closure + +All these must be exported symbol, EXCEPT Just_info. We don't need to +export this because in other modules we either have + * A reference to 'Just'; use Just_closure + * A saturated call 'Just x'; allocate using Just_con_info +Not exporting these Just_info labels reduces the number of symbols +somewhat. + +Note [Bytes label] +~~~~~~~~~~~~~~~~~~ +For a top-level string literal 'foo', we have just one symbol 'foo_bytes', which +points to a static data block containing the content of the literal. + +Note [Proc-point local block entry-points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A label for a proc-point local block entry-point has no "_entry" suffix. With +`infoTblLbl` we derive an info table label from a proc-point block ID. If +we convert such an info table label into an entry label we must produce +the label without an "_entry" suffix. So an info table label records +the fact that it was derived from a block ID in `IdLabelInfo` as +`BlockInfoTable`. + +The info table label and the local block label are both local labels +and are not externally visible. +-} + +instance Outputable CLabel where + ppr c = sdocWithDynFlags $ \dynFlags -> pprCLabel dynFlags c + +pprCLabel :: DynFlags -> CLabel -> SDoc + +pprCLabel _ (LocalBlockLabel u) + = tempLabelPrefixOrUnderscore <> pprUniqueAlways u + +pprCLabel dynFlags (AsmTempLabel u) + | not (platformUnregisterised $ targetPlatform dynFlags) + = tempLabelPrefixOrUnderscore <> pprUniqueAlways u + +pprCLabel dynFlags (AsmTempDerivedLabel l suf) + | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags + = ptext (asmTempLabelPrefix $ targetPlatform dynFlags) + <> case l of AsmTempLabel u -> pprUniqueAlways u + LocalBlockLabel u -> pprUniqueAlways u + _other -> pprCLabel dynFlags l + <> ftext suf + +pprCLabel dynFlags (DynamicLinkerLabel info lbl) + | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags + = pprDynamicLinkerAsmLabel (targetPlatform dynFlags) info lbl + +pprCLabel dynFlags PicBaseLabel + | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags + = text "1b" + +pprCLabel dynFlags (DeadStripPreventer lbl) + | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags + = + {- + `lbl` can be temp one but we need to ensure that dsp label will stay + in the final binary so we prepend non-temp prefix ("dsp_") and + optional `_` (underscore) because this is how you mark non-temp symbols + on some platforms (Darwin) + -} + maybe_underscore dynFlags $ text "dsp_" + <> pprCLabel dynFlags lbl <> text "_dsp" + +pprCLabel dynFlags (StringLitLabel u) + | platformMisc_ghcWithNativeCodeGen $ platformMisc dynFlags + = pprUniqueAlways u <> ptext (sLit "_str") + +pprCLabel dynFlags lbl + = getPprStyle $ \ sty -> + if platformMisc_ghcWithNativeCodeGen (platformMisc dynFlags) && asmStyle sty + then maybe_underscore dynFlags $ pprAsmCLbl (targetPlatform dynFlags) lbl + else pprCLbl lbl + +maybe_underscore :: DynFlags -> SDoc -> SDoc +maybe_underscore dynFlags doc = + if platformMisc_leadingUnderscore $ platformMisc dynFlags + then pp_cSEP <> doc + else doc + +pprAsmCLbl :: Platform -> CLabel -> SDoc +pprAsmCLbl platform (ForeignLabel fs (Just sz) _ _) + | platformOS platform == OSMinGW32 + -- In asm mode, we need to put the suffix on a stdcall ForeignLabel. + -- (The C compiler does this itself). + = ftext fs <> char '@' <> int sz +pprAsmCLbl _ lbl + = pprCLbl lbl + +pprCLbl :: CLabel -> SDoc +pprCLbl (StringLitLabel u) + = pprUniqueAlways u <> text "_str" + +pprCLbl (SRTLabel u) + = tempLabelPrefixOrUnderscore <> pprUniqueAlways u <> pp_cSEP <> text "srt" + +pprCLbl (LargeBitmapLabel u) = + tempLabelPrefixOrUnderscore + <> char 'b' <> pprUniqueAlways u <> pp_cSEP <> text "btm" +-- Some bitsmaps for tuple constructors have a numeric tag (e.g. '7') +-- until that gets resolved we'll just force them to start +-- with a letter so the label will be legal assembly code. + + +pprCLbl (CmmLabel _ str CmmCode) = ftext str +pprCLbl (CmmLabel _ str CmmData) = ftext str +pprCLbl (CmmLabel _ str CmmPrimCall) = ftext str + +pprCLbl (LocalBlockLabel u) = + tempLabelPrefixOrUnderscore <> text "blk_" <> pprUniqueAlways u + +pprCLbl (RtsLabel (RtsApFast str)) = ftext str <> text "_fast" + +pprCLbl (RtsLabel (RtsSelectorInfoTable upd_reqd offset)) + = sdocWithDynFlags $ \dflags -> + ASSERT(offset >= 0 && offset <= mAX_SPEC_SELECTEE_SIZE dflags) + hcat [text "stg_sel_", text (show offset), + ptext (if upd_reqd + then (sLit "_upd_info") + else (sLit "_noupd_info")) + ] + +pprCLbl (RtsLabel (RtsSelectorEntry upd_reqd offset)) + = sdocWithDynFlags $ \dflags -> + ASSERT(offset >= 0 && offset <= mAX_SPEC_SELECTEE_SIZE dflags) + hcat [text "stg_sel_", text (show offset), + ptext (if upd_reqd + then (sLit "_upd_entry") + else (sLit "_noupd_entry")) + ] + +pprCLbl (RtsLabel (RtsApInfoTable upd_reqd arity)) + = sdocWithDynFlags $ \dflags -> + ASSERT(arity > 0 && arity <= mAX_SPEC_AP_SIZE dflags) + hcat [text "stg_ap_", text (show arity), + ptext (if upd_reqd + then (sLit "_upd_info") + else (sLit "_noupd_info")) + ] + +pprCLbl (RtsLabel (RtsApEntry upd_reqd arity)) + = sdocWithDynFlags $ \dflags -> + ASSERT(arity > 0 && arity <= mAX_SPEC_AP_SIZE dflags) + hcat [text "stg_ap_", text (show arity), + ptext (if upd_reqd + then (sLit "_upd_entry") + else (sLit "_noupd_entry")) + ] + +pprCLbl (CmmLabel _ fs CmmInfo) + = ftext fs <> text "_info" + +pprCLbl (CmmLabel _ fs CmmEntry) + = ftext fs <> text "_entry" + +pprCLbl (CmmLabel _ fs CmmRetInfo) + = ftext fs <> text "_info" + +pprCLbl (CmmLabel _ fs CmmRet) + = ftext fs <> text "_ret" + +pprCLbl (CmmLabel _ fs CmmClosure) + = ftext fs <> text "_closure" + +pprCLbl (RtsLabel (RtsPrimOp primop)) + = text "stg_" <> ppr primop + +pprCLbl (RtsLabel (RtsSlowFastTickyCtr pat)) + = text "SLOW_CALL_fast_" <> text pat <> ptext (sLit "_ctr") + +pprCLbl (ForeignLabel str _ _ _) + = ftext str + +pprCLbl (IdLabel name _cafs flavor) = + internalNamePrefix name <> ppr name <> ppIdFlavor flavor + +pprCLbl (CC_Label cc) = ppr cc +pprCLbl (CCS_Label ccs) = ppr ccs + +pprCLbl (HpcTicksLabel mod) + = text "_hpc_tickboxes_" <> ppr mod <> ptext (sLit "_hpc") + +pprCLbl (AsmTempLabel {}) = panic "pprCLbl AsmTempLabel" +pprCLbl (AsmTempDerivedLabel {})= panic "pprCLbl AsmTempDerivedLabel" +pprCLbl (DynamicLinkerLabel {}) = panic "pprCLbl DynamicLinkerLabel" +pprCLbl (PicBaseLabel {}) = panic "pprCLbl PicBaseLabel" +pprCLbl (DeadStripPreventer {}) = panic "pprCLbl DeadStripPreventer" + +ppIdFlavor :: IdLabelInfo -> SDoc +ppIdFlavor x = pp_cSEP <> text + (case x of + Closure -> "closure" + InfoTable -> "info" + LocalInfoTable -> "info" + Entry -> "entry" + LocalEntry -> "entry" + Slow -> "slow" + RednCounts -> "ct" + ConEntry -> "con_entry" + ConInfoTable -> "con_info" + ClosureTable -> "closure_tbl" + Bytes -> "bytes" + BlockInfoTable -> "info" + ) + + +pp_cSEP :: SDoc +pp_cSEP = char '_' + + +instance Outputable ForeignLabelSource where + ppr fs + = case fs of + ForeignLabelInPackage pkgId -> parens $ text "package: " <> ppr pkgId + ForeignLabelInThisPackage -> parens $ text "this package" + ForeignLabelInExternalPackage -> parens $ text "external package" + +internalNamePrefix :: Name -> SDoc +internalNamePrefix name = getPprStyle $ \ sty -> + if asmStyle sty && isRandomGenerated then + sdocWithPlatform $ \platform -> + ptext (asmTempLabelPrefix platform) + else + empty + where + isRandomGenerated = not $ isExternalName name + +tempLabelPrefixOrUnderscore :: SDoc +tempLabelPrefixOrUnderscore = sdocWithPlatform $ \platform -> + getPprStyle $ \ sty -> + if asmStyle sty then + ptext (asmTempLabelPrefix platform) + else + char '_' + +-- ----------------------------------------------------------------------------- +-- Machine-dependent knowledge about labels. + +asmTempLabelPrefix :: Platform -> PtrString -- for formatting labels +asmTempLabelPrefix platform = case platformOS platform of + OSDarwin -> sLit "L" + OSAIX -> sLit "__L" -- follow IBM XL C's convention + _ -> sLit ".L" + +pprDynamicLinkerAsmLabel :: Platform -> DynamicLinkerLabelInfo -> CLabel -> SDoc +pprDynamicLinkerAsmLabel platform dllInfo lbl = + case platformOS platform of + OSDarwin + | platformArch platform == ArchX86_64 -> + case dllInfo of + CodeStub -> char 'L' <> ppr lbl <> text "$stub" + SymbolPtr -> char 'L' <> ppr lbl <> text "$non_lazy_ptr" + GotSymbolPtr -> ppr lbl <> text "@GOTPCREL" + GotSymbolOffset -> ppr lbl + | otherwise -> + case dllInfo of + CodeStub -> char 'L' <> ppr lbl <> text "$stub" + SymbolPtr -> char 'L' <> ppr lbl <> text "$non_lazy_ptr" + _ -> panic "pprDynamicLinkerAsmLabel" + + OSAIX -> + case dllInfo of + SymbolPtr -> text "LC.." <> ppr lbl -- GCC's naming convention + _ -> panic "pprDynamicLinkerAsmLabel" + + _ | osElfTarget (platformOS platform) -> elfLabel + + OSMinGW32 -> + case dllInfo of + SymbolPtr -> text "__imp_" <> ppr lbl + _ -> panic "pprDynamicLinkerAsmLabel" + + _ -> panic "pprDynamicLinkerAsmLabel" + where + elfLabel + | platformArch platform == ArchPPC + = case dllInfo of + CodeStub -> -- See Note [.LCTOC1 in PPC PIC code] + ppr lbl <> text "+32768@plt" + SymbolPtr -> text ".LC_" <> ppr lbl + _ -> panic "pprDynamicLinkerAsmLabel" + + | platformArch platform == ArchX86_64 + = case dllInfo of + CodeStub -> ppr lbl <> text "@plt" + GotSymbolPtr -> ppr lbl <> text "@gotpcrel" + GotSymbolOffset -> ppr lbl + SymbolPtr -> text ".LC_" <> ppr lbl + + | platformArch platform == ArchPPC_64 ELF_V1 + || platformArch platform == ArchPPC_64 ELF_V2 + = case dllInfo of + GotSymbolPtr -> text ".LC_" <> ppr lbl + <> text "@toc" + GotSymbolOffset -> ppr lbl + SymbolPtr -> text ".LC_" <> ppr lbl + _ -> panic "pprDynamicLinkerAsmLabel" + + | otherwise + = case dllInfo of + CodeStub -> ppr lbl <> text "@plt" + SymbolPtr -> text ".LC_" <> ppr lbl + GotSymbolPtr -> ppr lbl <> text "@got" + GotSymbolOffset -> ppr lbl <> text "@gotoff" + +-- Figure out whether `symbol` may serve as an alias +-- to `target` within one compilation unit. +-- +-- This is true if any of these holds: +-- * `target` is a module-internal haskell name. +-- * `target` is an exported name, but comes from the same +-- module as `symbol` +-- +-- These are sufficient conditions for establishing e.g. a +-- GNU assembly alias ('.equiv' directive). Sadly, there is +-- no such thing as an alias to an imported symbol (conf. +-- http://blog.omega-prime.co.uk/2011/07/06/the-sad-state-of-symbol-aliases/) +-- See note [emit-time elimination of static indirections]. +-- +-- Precondition is that both labels represent the +-- same semantic value. + +mayRedirectTo :: CLabel -> CLabel -> Bool +mayRedirectTo symbol target + | Just nam <- haskellName + , staticClosureLabel + , isExternalName nam + , Just mod <- nameModule_maybe nam + , Just anam <- hasHaskellName symbol + , Just amod <- nameModule_maybe anam + = amod == mod + + | Just nam <- haskellName + , staticClosureLabel + , isInternalName nam + = True + + | otherwise = False + where staticClosureLabel = isStaticClosureLabel target + haskellName = hasHaskellName target + + +{- +Note [emit-time elimination of static indirections] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +As described in #15155, certain static values are representationally +equivalent, e.g. 'cast'ed values (when created by 'newtype' wrappers). + + newtype A = A Int + {-# NOINLINE a #-} + a = A 42 + +a1_rYB :: Int +[GblId, Caf=NoCafRefs, Unf=OtherCon []] +a1_rYB = GHC.Types.I# 42# + +a [InlPrag=NOINLINE] :: A +[GblId, Unf=OtherCon []] +a = a1_rYB `cast` (Sym (T15155.N:A[0]) :: Int ~R# A) + +Formerly we created static indirections for these (IND_STATIC), which +consist of a statically allocated forwarding closure that contains +the (possibly tagged) indirectee. (See CMM/assembly below.) +This approach is suboptimal for two reasons: + (a) they occupy extra space, + (b) they need to be entered in order to obtain the indirectee, + thus they cannot be tagged. + +Fortunately there is a common case where static indirections can be +eliminated while emitting assembly (native or LLVM), viz. when the +indirectee is in the same module (object file) as the symbol that +points to it. In this case an assembly-level identification can +be created ('.equiv' directive), and as such the same object will +be assigned two names in the symbol table. Any of the identified +symbols can be referenced by a tagged pointer. + +Currently the 'mayRedirectTo' predicate will +give a clue whether a label can be equated with another, already +emitted, label (which can in turn be an alias). The general mechanics +is that we identify data (IND_STATIC closures) that are amenable +to aliasing while pretty-printing of assembly output, and emit the +'.equiv' directive instead of static data in such a case. + +Here is a sketch how the output is massaged: + + Consider +newtype A = A Int +{-# NOINLINE a #-} +a = A 42 -- I# 42# is the indirectee + -- 'a' is exported + + results in STG + +a1_rXq :: GHC.Types.Int +[GblId, Caf=NoCafRefs, Unf=OtherCon []] = + CCS_DONT_CARE GHC.Types.I#! [42#]; + +T15155.a [InlPrag=NOINLINE] :: T15155.A +[GblId, Unf=OtherCon []] = + CAF_ccs \ u [] a1_rXq; + + and CMM + +[section ""data" . a1_rXq_closure" { + a1_rXq_closure: + const GHC.Types.I#_con_info; + const 42; + }] + +[section ""data" . T15155.a_closure" { + T15155.a_closure: + const stg_IND_STATIC_info; + const a1_rXq_closure+1; + const 0; + const 0; + }] + +The emitted assembly is + +#### INDIRECTEE +a1_rXq_closure: -- module local haskell value + .quad GHC.Types.I#_con_info -- an Int + .quad 42 + +#### BEFORE +.globl T15155.a_closure -- exported newtype wrapped value +T15155.a_closure: + .quad stg_IND_STATIC_info -- the closure info + .quad a1_rXq_closure+1 -- indirectee ('+1' being the tag) + .quad 0 + .quad 0 + +#### AFTER +.globl T15155.a_closure -- exported newtype wrapped value +.equiv a1_rXq_closure,T15155.a_closure -- both are shared + +The transformation is performed because + T15155.a_closure `mayRedirectTo` a1_rXq_closure+1 +returns True. +-} diff --git a/compiler/GHC/Cmm/CallConv.hs b/compiler/GHC/Cmm/CallConv.hs new file mode 100644 index 0000000000..9200daec57 --- /dev/null +++ b/compiler/GHC/Cmm/CallConv.hs @@ -0,0 +1,212 @@ +module GHC.Cmm.CallConv ( + ParamLocation(..), + assignArgumentsPos, + assignStack, + realArgRegsCover +) where + +import GhcPrelude + +import GHC.Cmm.Expr +import GHC.Runtime.Layout +import GHC.Cmm (Convention(..)) +import GHC.Cmm.Ppr () -- For Outputable instances + +import DynFlags +import GHC.Platform +import Outputable + +-- Calculate the 'GlobalReg' or stack locations for function call +-- parameters as used by the Cmm calling convention. + +data ParamLocation + = RegisterParam GlobalReg + | StackParam ByteOff + +instance Outputable ParamLocation where + ppr (RegisterParam g) = ppr g + ppr (StackParam p) = ppr p + +-- | +-- Given a list of arguments, and a function that tells their types, +-- return a list showing where each argument is passed +-- +assignArgumentsPos :: DynFlags + -> ByteOff -- stack offset to start with + -> Convention + -> (a -> CmmType) -- how to get a type from an arg + -> [a] -- args + -> ( + ByteOff -- bytes of stack args + , [(a, ParamLocation)] -- args and locations + ) + +assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments) + where + regs = case (reps, conv) of + (_, NativeNodeCall) -> getRegsWithNode dflags + (_, NativeDirectCall) -> getRegsWithoutNode dflags + ([_], NativeReturn) -> allRegs dflags + (_, NativeReturn) -> getRegsWithNode dflags + -- GC calling convention *must* put values in registers + (_, GC) -> allRegs dflags + (_, Slow) -> nodeOnly + -- The calling conventions first assign arguments to registers, + -- then switch to the stack when we first run out of registers + -- (even if there are still available registers for args of a + -- different type). When returning an unboxed tuple, we also + -- separate the stack arguments by pointerhood. + (reg_assts, stk_args) = assign_regs [] reps regs + (stk_off, stk_assts) = assignStack dflags off arg_ty stk_args + assignments = reg_assts ++ stk_assts + + assign_regs assts [] _ = (assts, []) + assign_regs assts (r:rs) regs | isVecType ty = vec + | isFloatType ty = float + | otherwise = int + where vec = case (w, regs) of + (W128, (vs, fs, ds, ls, s:ss)) + | passVectorInReg W128 dflags -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss)) + (W256, (vs, fs, ds, ls, s:ss)) + | passVectorInReg W256 dflags -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss)) + (W512, (vs, fs, ds, ls, s:ss)) + | passVectorInReg W512 dflags -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss)) + _ -> (assts, (r:rs)) + float = case (w, regs) of + (W32, (vs, fs, ds, ls, s:ss)) + | passFloatInXmm -> k (RegisterParam (FloatReg s), (vs, fs, ds, ls, ss)) + (W32, (vs, f:fs, ds, ls, ss)) + | not passFloatInXmm -> k (RegisterParam f, (vs, fs, ds, ls, ss)) + (W64, (vs, fs, ds, ls, s:ss)) + | passFloatInXmm -> k (RegisterParam (DoubleReg s), (vs, fs, ds, ls, ss)) + (W64, (vs, fs, d:ds, ls, ss)) + | not passFloatInXmm -> k (RegisterParam d, (vs, fs, ds, ls, ss)) + _ -> (assts, (r:rs)) + int = case (w, regs) of + (W128, _) -> panic "W128 unsupported register type" + (_, (v:vs, fs, ds, ls, ss)) | widthInBits w <= widthInBits (wordWidth dflags) + -> k (RegisterParam (v gcp), (vs, fs, ds, ls, ss)) + (_, (vs, fs, ds, l:ls, ss)) | widthInBits w > widthInBits (wordWidth dflags) + -> k (RegisterParam l, (vs, fs, ds, ls, ss)) + _ -> (assts, (r:rs)) + k (asst, regs') = assign_regs ((r, asst) : assts) rs regs' + ty = arg_ty r + w = typeWidth ty + gcp | isGcPtrType ty = VGcPtr + | otherwise = VNonGcPtr + passFloatInXmm = passFloatArgsInXmm dflags + +passFloatArgsInXmm :: DynFlags -> Bool +passFloatArgsInXmm dflags = case platformArch (targetPlatform dflags) of + ArchX86_64 -> True + ArchX86 -> False + _ -> False + +-- We used to spill vector registers to the stack since the LLVM backend didn't +-- support vector registers in its calling convention. However, this has now +-- been fixed. This function remains only as a convenient way to re-enable +-- spilling when debugging code generation. +passVectorInReg :: Width -> DynFlags -> Bool +passVectorInReg _ _ = True + +assignStack :: DynFlags -> ByteOff -> (a -> CmmType) -> [a] + -> ( + ByteOff -- bytes of stack args + , [(a, ParamLocation)] -- args and locations + ) +assignStack dflags offset arg_ty args = assign_stk offset [] (reverse args) + where + assign_stk offset assts [] = (offset, assts) + assign_stk offset assts (r:rs) + = assign_stk off' ((r, StackParam off') : assts) rs + where w = typeWidth (arg_ty r) + off' = offset + size + -- Stack arguments always take a whole number of words, we never + -- pack them unlike constructor fields. + size = roundUpToWords dflags (widthInBytes w) + +----------------------------------------------------------------------------- +-- Local information about the registers available + +type AvailRegs = ( [VGcPtr -> GlobalReg] -- available vanilla regs. + , [GlobalReg] -- floats + , [GlobalReg] -- doubles + , [GlobalReg] -- longs (int64 and word64) + , [Int] -- XMM (floats and doubles) + ) + +-- Vanilla registers can contain pointers, Ints, Chars. +-- Floats and doubles have separate register supplies. +-- +-- We take these register supplies from the *real* registers, i.e. those +-- that are guaranteed to map to machine registers. + +getRegsWithoutNode, getRegsWithNode :: DynFlags -> AvailRegs +getRegsWithoutNode dflags = + ( filter (\r -> r VGcPtr /= node) (realVanillaRegs dflags) + , realFloatRegs dflags + , realDoubleRegs dflags + , realLongRegs dflags + , realXmmRegNos dflags) + +-- getRegsWithNode uses R1/node even if it isn't a register +getRegsWithNode dflags = + ( if null (realVanillaRegs dflags) + then [VanillaReg 1] + else realVanillaRegs dflags + , realFloatRegs dflags + , realDoubleRegs dflags + , realLongRegs dflags + , realXmmRegNos dflags) + +allFloatRegs, allDoubleRegs, allLongRegs :: DynFlags -> [GlobalReg] +allVanillaRegs :: DynFlags -> [VGcPtr -> GlobalReg] +allXmmRegs :: DynFlags -> [Int] + +allVanillaRegs dflags = map VanillaReg $ regList (mAX_Vanilla_REG dflags) +allFloatRegs dflags = map FloatReg $ regList (mAX_Float_REG dflags) +allDoubleRegs dflags = map DoubleReg $ regList (mAX_Double_REG dflags) +allLongRegs dflags = map LongReg $ regList (mAX_Long_REG dflags) +allXmmRegs dflags = regList (mAX_XMM_REG dflags) + +realFloatRegs, realDoubleRegs, realLongRegs :: DynFlags -> [GlobalReg] +realVanillaRegs :: DynFlags -> [VGcPtr -> GlobalReg] +realXmmRegNos :: DynFlags -> [Int] + +realVanillaRegs dflags = map VanillaReg $ regList (mAX_Real_Vanilla_REG dflags) +realFloatRegs dflags = map FloatReg $ regList (mAX_Real_Float_REG dflags) +realDoubleRegs dflags = map DoubleReg $ regList (mAX_Real_Double_REG dflags) +realLongRegs dflags = map LongReg $ regList (mAX_Real_Long_REG dflags) + +realXmmRegNos dflags + | isSse2Enabled dflags = regList (mAX_Real_XMM_REG dflags) + | otherwise = [] + +regList :: Int -> [Int] +regList n = [1 .. n] + +allRegs :: DynFlags -> AvailRegs +allRegs dflags = (allVanillaRegs dflags, + allFloatRegs dflags, + allDoubleRegs dflags, + allLongRegs dflags, + allXmmRegs dflags) + +nodeOnly :: AvailRegs +nodeOnly = ([VanillaReg 1], [], [], [], []) + +-- This returns the set of global registers that *cover* the machine registers +-- used for argument passing. On platforms where registers can overlap---right +-- now just x86-64, where Float and Double registers overlap---passing this set +-- of registers is guaranteed to preserve the contents of all live registers. We +-- only use this functionality in hand-written C-- code in the RTS. +realArgRegsCover :: DynFlags -> [GlobalReg] +realArgRegsCover dflags + | passFloatArgsInXmm dflags = map ($VGcPtr) (realVanillaRegs dflags) ++ + realLongRegs dflags ++ + map XmmReg (realXmmRegNos dflags) + | otherwise = map ($VGcPtr) (realVanillaRegs dflags) ++ + realFloatRegs dflags ++ + realDoubleRegs dflags ++ + realLongRegs dflags ++ + map XmmReg (realXmmRegNos dflags) diff --git a/compiler/GHC/Cmm/CommonBlockElim.hs b/compiler/GHC/Cmm/CommonBlockElim.hs new file mode 100644 index 0000000000..86ea0e94e2 --- /dev/null +++ b/compiler/GHC/Cmm/CommonBlockElim.hs @@ -0,0 +1,320 @@ +{-# LANGUAGE GADTs, BangPatterns, ScopedTypeVariables #-} + +module GHC.Cmm.CommonBlockElim + ( elimCommonBlocks + ) +where + + +import GhcPrelude hiding (iterate, succ, unzip, zip) + +import GHC.Cmm.BlockId +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.Switch (eqSwitchTargetWith) +import GHC.Cmm.ContFlowOpt + +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label +import GHC.Cmm.Dataflow.Collections +import Data.Bits +import Data.Maybe (mapMaybe) +import qualified Data.List as List +import Data.Word +import qualified Data.Map as M +import Outputable +import qualified TrieMap as TM +import UniqFM +import Unique +import Control.Arrow (first, second) + +-- ----------------------------------------------------------------------------- +-- Eliminate common blocks + +-- If two blocks are identical except for the label on the first node, +-- then we can eliminate one of the blocks. To ensure that the semantics +-- of the program are preserved, we have to rewrite each predecessor of the +-- eliminated block to proceed with the block we keep. + +-- The algorithm iterates over the blocks in the graph, +-- checking whether it has seen another block that is equal modulo labels. +-- If so, then it adds an entry in a map indicating that the new block +-- is made redundant by the old block. +-- Otherwise, it is added to the useful blocks. + +-- To avoid comparing every block with every other block repeatedly, we group +-- them by +-- * a hash of the block, ignoring labels (explained below) +-- * the list of outgoing labels +-- The hash is invariant under relabeling, so we only ever compare within +-- the same group of blocks. +-- +-- The list of outgoing labels is updated as we merge blocks (that is why they +-- are not included in the hash, which we want to calculate only once). +-- +-- All in all, two blocks should never be compared if they have different +-- hashes, and at most once otherwise. Previously, we were slower, and people +-- rightfully complained: #10397 + +-- TODO: Use optimization fuel +elimCommonBlocks :: CmmGraph -> CmmGraph +elimCommonBlocks g = replaceLabels env $ copyTicks env g + where + env = iterate mapEmpty blocks_with_key + -- The order of blocks doesn't matter here. While we could use + -- revPostorder which drops unreachable blocks this is done in + -- ContFlowOpt already which runs before this pass. So we use + -- toBlockList since it is faster. + groups = groupByInt hash_block (toBlockList g) :: [[CmmBlock]] + blocks_with_key = [ [ (successors b, [b]) | b <- bs] | bs <- groups] + +-- Invariant: The blocks in the list are pairwise distinct +-- (so avoid comparing them again) +type DistinctBlocks = [CmmBlock] +type Key = [Label] +type Subst = LabelMap BlockId + +-- The outer list groups by hash. We retain this grouping throughout. +iterate :: Subst -> [[(Key, DistinctBlocks)]] -> Subst +iterate subst blocks + | mapNull new_substs = subst + | otherwise = iterate subst' updated_blocks + where + grouped_blocks :: [[(Key, [DistinctBlocks])]] + grouped_blocks = map groupByLabel blocks + + merged_blocks :: [[(Key, DistinctBlocks)]] + (new_substs, merged_blocks) = List.mapAccumL (List.mapAccumL go) mapEmpty grouped_blocks + where + go !new_subst1 (k,dbs) = (new_subst1 `mapUnion` new_subst2, (k,db)) + where + (new_subst2, db) = mergeBlockList subst dbs + + subst' = subst `mapUnion` new_substs + updated_blocks = map (map (first (map (lookupBid subst')))) merged_blocks + +-- Combine two lists of blocks. +-- While they are internally distinct they can still share common blocks. +mergeBlocks :: Subst -> DistinctBlocks -> DistinctBlocks -> (Subst, DistinctBlocks) +mergeBlocks subst existing new = go new + where + go [] = (mapEmpty, existing) + go (b:bs) = case List.find (eqBlockBodyWith (eqBid subst) b) existing of + -- This block is a duplicate. Drop it, and add it to the substitution + Just b' -> first (mapInsert (entryLabel b) (entryLabel b')) $ go bs + -- This block is not a duplicate, keep it. + Nothing -> second (b:) $ go bs + +mergeBlockList :: Subst -> [DistinctBlocks] -> (Subst, DistinctBlocks) +mergeBlockList _ [] = pprPanic "mergeBlockList" empty +mergeBlockList subst (b:bs) = go mapEmpty b bs + where + go !new_subst1 b [] = (new_subst1, b) + go !new_subst1 b1 (b2:bs) = go new_subst b bs + where + (new_subst2, b) = mergeBlocks subst b1 b2 + new_subst = new_subst1 `mapUnion` new_subst2 + + +-- ----------------------------------------------------------------------------- +-- Hashing and equality on blocks + +-- Below here is mostly boilerplate: hashing blocks ignoring labels, +-- and comparing blocks modulo a label mapping. + +-- To speed up comparisons, we hash each basic block modulo jump labels. +-- The hashing is a bit arbitrary (the numbers are completely arbitrary), +-- but it should be fast and good enough. + +-- We want to get as many small buckets as possible, as comparing blocks is +-- expensive. So include as much as possible in the hash. Ideally everything +-- that is compared with (==) in eqBlockBodyWith. + +type HashCode = Int + +hash_block :: CmmBlock -> HashCode +hash_block block = + fromIntegral (foldBlockNodesB3 (hash_fst, hash_mid, hash_lst) block (0 :: Word32) .&. (0x7fffffff :: Word32)) + -- UniqFM doesn't like negative Ints + where hash_fst _ h = h + hash_mid m h = hash_node m + h `shiftL` 1 + hash_lst m h = hash_node m + h `shiftL` 1 + + hash_node :: CmmNode O x -> Word32 + hash_node n | dont_care n = 0 -- don't care + hash_node (CmmAssign r e) = hash_reg r + hash_e e + hash_node (CmmStore e e') = hash_e e + hash_e e' + hash_node (CmmUnsafeForeignCall t _ as) = hash_tgt t + hash_list hash_e as + hash_node (CmmBranch _) = 23 -- NB. ignore the label + hash_node (CmmCondBranch p _ _ _) = hash_e p + hash_node (CmmCall e _ _ _ _ _) = hash_e e + hash_node (CmmForeignCall t _ _ _ _ _ _) = hash_tgt t + hash_node (CmmSwitch e _) = hash_e e + hash_node _ = error "hash_node: unknown Cmm node!" + + hash_reg :: CmmReg -> Word32 + hash_reg (CmmLocal localReg) = hash_unique localReg -- important for performance, see #10397 + hash_reg (CmmGlobal _) = 19 + + hash_e :: CmmExpr -> Word32 + hash_e (CmmLit l) = hash_lit l + hash_e (CmmLoad e _) = 67 + hash_e e + hash_e (CmmReg r) = hash_reg r + hash_e (CmmMachOp _ es) = hash_list hash_e es -- pessimal - no operator check + hash_e (CmmRegOff r i) = hash_reg r + cvt i + hash_e (CmmStackSlot _ _) = 13 + + hash_lit :: CmmLit -> Word32 + hash_lit (CmmInt i _) = fromInteger i + hash_lit (CmmFloat r _) = truncate r + hash_lit (CmmVec ls) = hash_list hash_lit ls + hash_lit (CmmLabel _) = 119 -- ugh + hash_lit (CmmLabelOff _ i) = cvt $ 199 + i + hash_lit (CmmLabelDiffOff _ _ i _) = cvt $ 299 + i + hash_lit (CmmBlock _) = 191 -- ugh + hash_lit (CmmHighStackMark) = cvt 313 + + hash_tgt (ForeignTarget e _) = hash_e e + hash_tgt (PrimTarget _) = 31 -- lots of these + + hash_list f = foldl' (\z x -> f x + z) (0::Word32) + + cvt = fromInteger . toInteger + + hash_unique :: Uniquable a => a -> Word32 + hash_unique = cvt . getKey . getUnique + +-- | Ignore these node types for equality +dont_care :: CmmNode O x -> Bool +dont_care CmmComment {} = True +dont_care CmmTick {} = True +dont_care CmmUnwind {} = True +dont_care _other = False + +-- Utilities: equality and substitution on the graph. + +-- Given a map ``subst'' from BlockID -> BlockID, we define equality. +eqBid :: LabelMap BlockId -> BlockId -> BlockId -> Bool +eqBid subst bid bid' = lookupBid subst bid == lookupBid subst bid' +lookupBid :: LabelMap BlockId -> BlockId -> BlockId +lookupBid subst bid = case mapLookup bid subst of + Just bid -> lookupBid subst bid + Nothing -> bid + +-- Middle nodes and expressions can contain BlockIds, in particular in +-- CmmStackSlot and CmmBlock, so we have to use a special equality for +-- these. +-- +eqMiddleWith :: (BlockId -> BlockId -> Bool) + -> CmmNode O O -> CmmNode O O -> Bool +eqMiddleWith eqBid (CmmAssign r1 e1) (CmmAssign r2 e2) + = r1 == r2 && eqExprWith eqBid e1 e2 +eqMiddleWith eqBid (CmmStore l1 r1) (CmmStore l2 r2) + = eqExprWith eqBid l1 l2 && eqExprWith eqBid r1 r2 +eqMiddleWith eqBid (CmmUnsafeForeignCall t1 r1 a1) + (CmmUnsafeForeignCall t2 r2 a2) + = t1 == t2 && r1 == r2 && eqListWith (eqExprWith eqBid) a1 a2 +eqMiddleWith _ _ _ = False + +eqExprWith :: (BlockId -> BlockId -> Bool) + -> CmmExpr -> CmmExpr -> Bool +eqExprWith eqBid = eq + where + CmmLit l1 `eq` CmmLit l2 = eqLit l1 l2 + CmmLoad e1 _ `eq` CmmLoad e2 _ = e1 `eq` e2 + CmmReg r1 `eq` CmmReg r2 = r1==r2 + CmmRegOff r1 i1 `eq` CmmRegOff r2 i2 = r1==r2 && i1==i2 + CmmMachOp op1 es1 `eq` CmmMachOp op2 es2 = op1==op2 && es1 `eqs` es2 + CmmStackSlot a1 i1 `eq` CmmStackSlot a2 i2 = eqArea a1 a2 && i1==i2 + _e1 `eq` _e2 = False + + xs `eqs` ys = eqListWith eq xs ys + + eqLit (CmmBlock id1) (CmmBlock id2) = eqBid id1 id2 + eqLit l1 l2 = l1 == l2 + + eqArea Old Old = True + eqArea (Young id1) (Young id2) = eqBid id1 id2 + eqArea _ _ = False + +-- Equality on the body of a block, modulo a function mapping block +-- IDs to block IDs. +eqBlockBodyWith :: (BlockId -> BlockId -> Bool) -> CmmBlock -> CmmBlock -> Bool +eqBlockBodyWith eqBid block block' + {- + | equal = pprTrace "equal" (vcat [ppr block, ppr block']) True + | otherwise = pprTrace "not equal" (vcat [ppr block, ppr block']) False + -} + = equal + where (_,m,l) = blockSplit block + nodes = filter (not . dont_care) (blockToList m) + (_,m',l') = blockSplit block' + nodes' = filter (not . dont_care) (blockToList m') + + equal = eqListWith (eqMiddleWith eqBid) nodes nodes' && + eqLastWith eqBid l l' + + +eqLastWith :: (BlockId -> BlockId -> Bool) -> CmmNode O C -> CmmNode O C -> Bool +eqLastWith eqBid (CmmBranch bid1) (CmmBranch bid2) = eqBid bid1 bid2 +eqLastWith eqBid (CmmCondBranch c1 t1 f1 l1) (CmmCondBranch c2 t2 f2 l2) = + c1 == c2 && l1 == l2 && eqBid t1 t2 && eqBid f1 f2 +eqLastWith eqBid (CmmCall t1 c1 g1 a1 r1 u1) (CmmCall t2 c2 g2 a2 r2 u2) = + t1 == t2 && eqMaybeWith eqBid c1 c2 && a1 == a2 && r1 == r2 && u1 == u2 && g1 == g2 +eqLastWith eqBid (CmmSwitch e1 ids1) (CmmSwitch e2 ids2) = + e1 == e2 && eqSwitchTargetWith eqBid ids1 ids2 +eqLastWith _ _ _ = False + +eqMaybeWith :: (a -> b -> Bool) -> Maybe a -> Maybe b -> Bool +eqMaybeWith eltEq (Just e) (Just e') = eltEq e e' +eqMaybeWith _ Nothing Nothing = True +eqMaybeWith _ _ _ = False + +eqListWith :: (a -> b -> Bool) -> [a] -> [b] -> Bool +eqListWith f (a : as) (b : bs) = f a b && eqListWith f as bs +eqListWith _ [] [] = True +eqListWith _ _ _ = False + +-- | Given a block map, ensure that all "target" blocks are covered by +-- the same ticks as the respective "source" blocks. This not only +-- means copying ticks, but also adjusting tick scopes where +-- necessary. +copyTicks :: LabelMap BlockId -> CmmGraph -> CmmGraph +copyTicks env g + | mapNull env = g + | otherwise = ofBlockMap (g_entry g) $ mapMap copyTo blockMap + where -- Reverse block merge map + blockMap = toBlockMap g + revEnv = mapFoldlWithKey insertRev M.empty env + insertRev m k x = M.insertWith (const (k:)) x [k] m + -- Copy ticks and scopes into the given block + copyTo block = case M.lookup (entryLabel block) revEnv of + Nothing -> block + Just ls -> foldr copy block $ mapMaybe (flip mapLookup blockMap) ls + copy from to = + let ticks = blockTicks from + CmmEntry _ scp0 = firstNode from + (CmmEntry lbl scp1, code) = blockSplitHead to + in CmmEntry lbl (combineTickScopes scp0 scp1) `blockJoinHead` + foldr blockCons code (map CmmTick ticks) + +-- Group by [Label] +-- See Note [Compressed TrieMap] in coreSyn/TrieMap about the usage of GenMap. +groupByLabel :: [(Key, DistinctBlocks)] -> [(Key, [DistinctBlocks])] +groupByLabel = + go (TM.emptyTM :: TM.ListMap (TM.GenMap LabelMap) (Key, [DistinctBlocks])) + where + go !m [] = TM.foldTM (:) m [] + go !m ((k,v) : entries) = go (TM.alterTM k adjust m) entries + where --k' = map (getKey . getUnique) k + adjust Nothing = Just (k,[v]) + adjust (Just (_,vs)) = Just (k,v:vs) + +groupByInt :: (a -> Int) -> [a] -> [[a]] +groupByInt f xs = nonDetEltsUFM $ List.foldl' go emptyUFM xs + -- See Note [Unique Determinism and code generation] + where + go m x = alterUFM addEntry m (f x) + where + addEntry xs = Just $! maybe [x] (x:) xs diff --git a/compiler/GHC/Cmm/ContFlowOpt.hs b/compiler/GHC/Cmm/ContFlowOpt.hs new file mode 100644 index 0000000000..7765972d02 --- /dev/null +++ b/compiler/GHC/Cmm/ContFlowOpt.hs @@ -0,0 +1,451 @@ +{-# LANGUAGE GADTs #-} +{-# LANGUAGE BangPatterns #-} +{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-} +module GHC.Cmm.ContFlowOpt + ( cmmCfgOpts + , cmmCfgOptsProc + , removeUnreachableBlocksProc + , replaceLabels + ) +where + +import GhcPrelude hiding (succ, unzip, zip) + +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label +import GHC.Cmm.BlockId +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.Switch (mapSwitchTargets, switchTargetsToList) +import Maybes +import Panic +import Util + +import Control.Monad + + +-- Note [What is shortcutting] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Consider this Cmm code: +-- +-- L1: ... +-- goto L2; +-- L2: goto L3; +-- L3: ... +-- +-- Here L2 is an empty block and contains only an unconditional branch +-- to L3. In this situation any block that jumps to L2 can jump +-- directly to L3: +-- +-- L1: ... +-- goto L3; +-- L2: goto L3; +-- L3: ... +-- +-- In this situation we say that we shortcut L2 to L3. One of +-- consequences of shortcutting is that some blocks of code may become +-- unreachable (in the example above this is true for L2). + + +-- Note [Control-flow optimisations] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- This optimisation does three things: +-- +-- - If a block finishes in an unconditional branch to another block +-- and that is the only jump to that block we concatenate the +-- destination block at the end of the current one. +-- +-- - If a block finishes in a call whose continuation block is a +-- goto, then we can shortcut the destination, making the +-- continuation block the destination of the goto - but see Note +-- [Shortcut call returns]. +-- +-- - For any block that is not a call we try to shortcut the +-- destination(s). Additionally, if a block ends with a +-- conditional branch we try to invert the condition. +-- +-- Blocks are processed using postorder DFS traversal. A side effect +-- of determining traversal order with a graph search is elimination +-- of any blocks that are unreachable. +-- +-- Transformations are improved by working from the end of the graph +-- towards the beginning, because we may be able to perform many +-- shortcuts in one go. + + +-- Note [Shortcut call returns] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- We are going to maintain the "current" graph (LabelMap CmmBlock) as +-- we go, and also a mapping from BlockId to BlockId, representing +-- continuation labels that we have renamed. This latter mapping is +-- important because we might shortcut a CmmCall continuation. For +-- example: +-- +-- Sp[0] = L +-- call g returns to L +-- L: goto M +-- M: ... +-- +-- So when we shortcut the L block, we need to replace not only +-- the continuation of the call, but also references to L in the +-- code (e.g. the assignment Sp[0] = L): +-- +-- Sp[0] = M +-- call g returns to M +-- M: ... +-- +-- So we keep track of which labels we have renamed and apply the mapping +-- at the end with replaceLabels. + + +-- Note [Shortcut call returns and proc-points] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Consider this code that you might get from a recursive +-- let-no-escape: +-- +-- goto L1 +-- L1: +-- if (Hp > HpLim) then L2 else L3 +-- L2: +-- call stg_gc_noregs returns to L4 +-- L4: +-- goto L1 +-- L3: +-- ... +-- goto L1 +-- +-- Then the control-flow optimiser shortcuts L4. But that turns L1 +-- into the call-return proc point, and every iteration of the loop +-- has to shuffle variables to and from the stack. So we must *not* +-- shortcut L4. +-- +-- Moreover not shortcutting call returns is probably fine. If L4 can +-- concat with its branch target then it will still do so. And we +-- save some compile time because we don't have to traverse all the +-- code in replaceLabels. +-- +-- However, we probably do want to do this if we are splitting proc +-- points, because L1 will be a proc-point anyway, so merging it with +-- L4 reduces the number of proc points. Unfortunately recursive +-- let-no-escapes won't generate very good code with proc-point +-- splitting on - we should probably compile them to explicitly use +-- the native calling convention instead. + +cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph +cmmCfgOpts split g = fst (blockConcat split g) + +cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl +cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g' + where (g', env) = blockConcat split g + info' = info{ info_tbls = new_info_tbls } + new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info))) + + -- If we changed any labels, then we have to update the info tables + -- too, except for the top-level info table because that might be + -- referred to by other procs. + upd_info (k,info) + | Just k' <- mapLookup k env + = (k', if k' == g_entry g' + then info + else info{ cit_lbl = infoTblLbl k' }) + | otherwise + = (k,info) +cmmCfgOptsProc _ top = top + + +blockConcat :: Bool -> CmmGraph -> (CmmGraph, LabelMap BlockId) +blockConcat splitting_procs g@CmmGraph { g_entry = entry_id } + = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map') + where + -- We might be able to shortcut the entry BlockId itself. + -- Remember to update the shortcut_map, since we also have to + -- update the info_tbls mapping now. + (new_entry, shortcut_map') + | Just entry_blk <- mapLookup entry_id new_blocks + , Just dest <- canShortcut entry_blk + = (dest, mapInsert entry_id dest shortcut_map) + | otherwise + = (entry_id, shortcut_map) + + -- blocks are sorted in reverse postorder, but we want to go from the exit + -- towards beginning, so we use foldr below. + blocks = revPostorder g + blockmap = foldl' (flip addBlock) emptyBody blocks + + -- Accumulator contains three components: + -- * map of blocks in a graph + -- * map of shortcut labels. See Note [Shortcut call returns] + -- * map containing number of predecessors for each block. We discard + -- it after we process all blocks. + (new_blocks, shortcut_map, _) = + foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks + + -- Map of predecessors for initial graph. We increase number of + -- predecessors for entry block by one to denote that it is + -- target of a jump, even if no block in the current graph jumps + -- to it. + initialBackEdges = incPreds entry_id (predMap blocks) + + maybe_concat :: CmmBlock + -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int) + -> (LabelMap CmmBlock, LabelMap BlockId, LabelMap Int) + maybe_concat block (!blocks, !shortcut_map, !backEdges) + -- If: + -- (1) current block ends with unconditional branch to b' and + -- (2) it has exactly one predecessor (namely, current block) + -- + -- Then: + -- (1) append b' block at the end of current block + -- (2) remove b' from the map of blocks + -- (3) remove information about b' from predecessors map + -- + -- Since we know that the block has only one predecessor we call + -- mapDelete directly instead of calling decPreds. + -- + -- Note that we always maintain an up-to-date list of predecessors, so + -- we can ignore the contents of shortcut_map + | CmmBranch b' <- last + , hasOnePredecessor b' + , Just blk' <- mapLookup b' blocks + = let bid' = entryLabel blk' + in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks + , shortcut_map + , mapDelete b' backEdges ) + + -- If: + -- (1) we are splitting proc points (see Note + -- [Shortcut call returns and proc-points]) and + -- (2) current block is a CmmCall or CmmForeignCall with + -- continuation b' and + -- (3) we can shortcut that continuation to dest + -- Then: + -- (1) we change continuation to point to b' + -- (2) create mapping from b' to dest + -- (3) increase number of predecessors of dest by 1 + -- (4) decrease number of predecessors of b' by 1 + -- + -- Later we will use replaceLabels to substitute all occurrences of b' + -- with dest. + | splitting_procs + , Just b' <- callContinuation_maybe last + , Just blk' <- mapLookup b' blocks + , Just dest <- canShortcut blk' + = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks + , mapInsert b' dest shortcut_map + , decPreds b' $ incPreds dest backEdges ) + + -- If: + -- (1) a block does not end with a call + -- Then: + -- (1) if it ends with a conditional attempt to invert the + -- conditional + -- (2) attempt to shortcut all destination blocks + -- (3) if new successors of a block are different from the old ones + -- update the of predecessors accordingly + -- + -- A special case of this is a situation when a block ends with an + -- unconditional jump to a block that can be shortcut. + | Nothing <- callContinuation_maybe last + = let oldSuccs = successors last + newSuccs = successors rewrite_last + in ( mapInsert bid (blockJoinTail head rewrite_last) blocks + , shortcut_map + , if oldSuccs == newSuccs + then backEdges + else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs ) + + -- Otherwise don't do anything + | otherwise + = ( blocks, shortcut_map, backEdges ) + where + (head, last) = blockSplitTail block + bid = entryLabel block + + -- Changes continuation of a call to a specified label + update_cont dest = + case last of + CmmCall{} -> last { cml_cont = Just dest } + CmmForeignCall{} -> last { succ = dest } + _ -> panic "Can't shortcut continuation." + + -- Attempts to shortcut successors of last node + shortcut_last = mapSuccessors shortcut last + where + shortcut l = + case mapLookup l blocks of + Just b | Just dest <- canShortcut b -> dest + _otherwise -> l + + rewrite_last + -- Sometimes we can get rid of the conditional completely. + | CmmCondBranch _cond t f _l <- shortcut_last + , t == f + = CmmBranch t + + -- See Note [Invert Cmm conditionals] + | CmmCondBranch cond t f l <- shortcut_last + , hasOnePredecessor t -- inverting will make t a fallthrough + , likelyTrue l || (numPreds f > 1) + , Just cond' <- maybeInvertCmmExpr cond + = CmmCondBranch cond' f t (invertLikeliness l) + + -- If all jump destinations of a switch go to the + -- same target eliminate the switch. + | CmmSwitch _expr targets <- shortcut_last + , (t:ts) <- switchTargetsToList targets + , all (== t) ts + = CmmBranch t + + | otherwise + = shortcut_last + + likelyTrue (Just True) = True + likelyTrue _ = False + + invertLikeliness :: Maybe Bool -> Maybe Bool + invertLikeliness = fmap not + + -- Number of predecessors for a block + numPreds bid = mapLookup bid backEdges `orElse` 0 + + hasOnePredecessor b = numPreds b == 1 + +{- + Note [Invert Cmm conditionals] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + The native code generator always produces jumps to the true branch. + Falling through to the false branch is however faster. So we try to + arrange for that to happen. + This means we invert the condition if: + * The likely path will become a fallthrough. + * We can't guarantee a fallthrough for the false branch but for the + true branch. + + In some cases it's faster to avoid inverting when the false branch is likely. + However determining when that is the case is neither easy nor cheap so for + now we always invert as this produces smaller binaries and code that is + equally fast on average. (On an i7-6700K) + + TODO: + There is also the edge case when both branches have multiple predecessors. + In this case we could assume that we will end up with a jump for BOTH + branches. In this case it might be best to put the likely path in the true + branch especially if there are large numbers of predecessors as this saves + us the jump thats not taken. However I haven't tested this and as of early + 2018 we almost never generate cmm where this would apply. +-} + +-- Functions for incrementing and decrementing number of predecessors. If +-- decrementing would set the predecessor count to 0, we remove entry from the +-- map. +-- Invariant: if a block has no predecessors it should be dropped from the +-- graph because it is unreachable. maybe_concat is constructed to maintain +-- that invariant, but calling replaceLabels may introduce unreachable blocks. +-- We rely on subsequent passes in the Cmm pipeline to remove unreachable +-- blocks. +incPreds, decPreds :: BlockId -> LabelMap Int -> LabelMap Int +incPreds bid edges = mapInsertWith (+) bid 1 edges +decPreds bid edges = case mapLookup bid edges of + Just preds | preds > 1 -> mapInsert bid (preds - 1) edges + Just _ -> mapDelete bid edges + _ -> edges + + +-- Checks if a block consists only of "goto dest". If it does than we return +-- "Just dest" label. See Note [What is shortcutting] +canShortcut :: CmmBlock -> Maybe BlockId +canShortcut block + | (_, middle, CmmBranch dest) <- blockSplit block + , all dont_care $ blockToList middle + = Just dest + | otherwise + = Nothing + where dont_care CmmComment{} = True + dont_care CmmTick{} = True + dont_care _other = False + +-- Concatenates two blocks. First one is assumed to be open on exit, the second +-- is assumed to be closed on entry (i.e. it has a label attached to it, which +-- the splice function removes by calling snd on result of blockSplitHead). +splice :: Block CmmNode C O -> CmmBlock -> CmmBlock +splice head rest = entry `blockJoinHead` code0 `blockAppend` code1 + where (CmmEntry lbl sc0, code0) = blockSplitHead head + (CmmEntry _ sc1, code1) = blockSplitHead rest + entry = CmmEntry lbl (combineTickScopes sc0 sc1) + +-- If node is a call with continuation call return Just label of that +-- continuation. Otherwise return Nothing. +callContinuation_maybe :: CmmNode O C -> Maybe BlockId +callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b +callContinuation_maybe (CmmForeignCall { succ = b }) = Just b +callContinuation_maybe _ = Nothing + + +-- Map over the CmmGraph, replacing each label with its mapping in the +-- supplied LabelMap. +replaceLabels :: LabelMap BlockId -> CmmGraph -> CmmGraph +replaceLabels env g + | mapNull env = g + | otherwise = replace_eid $ mapGraphNodes1 txnode g + where + replace_eid g = g {g_entry = lookup (g_entry g)} + lookup id = mapLookup id env `orElse` id + + txnode :: CmmNode e x -> CmmNode e x + txnode (CmmBranch bid) = CmmBranch (lookup bid) + txnode (CmmCondBranch p t f l) = + mkCmmCondBranch (exp p) (lookup t) (lookup f) l + txnode (CmmSwitch e ids) = + CmmSwitch (exp e) (mapSwitchTargets lookup ids) + txnode (CmmCall t k rg a res r) = + CmmCall (exp t) (liftM lookup k) rg a res r + txnode fc@CmmForeignCall{} = + fc{ args = map exp (args fc), succ = lookup (succ fc) } + txnode other = mapExpDeep exp other + + exp :: CmmExpr -> CmmExpr + exp (CmmLit (CmmBlock bid)) = CmmLit (CmmBlock (lookup bid)) + exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i + exp e = e + +mkCmmCondBranch :: CmmExpr -> Label -> Label -> Maybe Bool -> CmmNode O C +mkCmmCondBranch p t f l = + if t == f then CmmBranch t else CmmCondBranch p t f l + +-- Build a map from a block to its set of predecessors. +predMap :: [CmmBlock] -> LabelMap Int +predMap blocks = foldr add_preds mapEmpty blocks + where + add_preds block env = foldr add env (successors block) + where add lbl env = mapInsertWith (+) lbl 1 env + +-- Removing unreachable blocks +removeUnreachableBlocksProc :: CmmDecl -> CmmDecl +removeUnreachableBlocksProc proc@(CmmProc info lbl live g) + | used_blocks `lengthLessThan` mapSize (toBlockMap g) + = CmmProc info' lbl live g' + | otherwise + = proc + where + g' = ofBlockList (g_entry g) used_blocks + info' = info { info_tbls = keep_used (info_tbls info) } + -- Remove any info_tbls for unreachable + + keep_used :: LabelMap CmmInfoTable -> LabelMap CmmInfoTable + keep_used bs = mapFoldlWithKey keep mapEmpty bs + + keep :: LabelMap CmmInfoTable -> Label -> CmmInfoTable -> LabelMap CmmInfoTable + keep env l i | l `setMember` used_lbls = mapInsert l i env + | otherwise = env + + used_blocks :: [CmmBlock] + used_blocks = revPostorder g + + used_lbls :: LabelSet + used_lbls = setFromList $ map entryLabel used_blocks diff --git a/compiler/GHC/Cmm/Dataflow.hs b/compiler/GHC/Cmm/Dataflow.hs new file mode 100644 index 0000000000..fcabb1df0f --- /dev/null +++ b/compiler/GHC/Cmm/Dataflow.hs @@ -0,0 +1,441 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeFamilies #-} + +-- +-- Copyright (c) 2010, João Dias, Simon Marlow, Simon Peyton Jones, +-- and Norman Ramsey +-- +-- Modifications copyright (c) The University of Glasgow 2012 +-- +-- This module is a specialised and optimised version of +-- Compiler.Hoopl.Dataflow in the hoopl package. In particular it is +-- specialised to the UniqSM monad. +-- + +module GHC.Cmm.Dataflow + ( C, O, Block + , lastNode, entryLabel + , foldNodesBwdOO + , foldRewriteNodesBwdOO + , DataflowLattice(..), OldFact(..), NewFact(..), JoinedFact(..) + , TransferFun, RewriteFun + , Fact, FactBase + , getFact, mkFactBase + , analyzeCmmFwd, analyzeCmmBwd + , rewriteCmmBwd + , changedIf + , joinOutFacts + , joinFacts + ) +where + +import GhcPrelude + +import GHC.Cmm +import UniqSupply + +import Data.Array +import Data.Maybe +import Data.IntSet (IntSet) +import qualified Data.IntSet as IntSet + +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Label + +type family Fact (x :: Extensibility) f :: * +type instance Fact C f = FactBase f +type instance Fact O f = f + +newtype OldFact a = OldFact a + +newtype NewFact a = NewFact a + +-- | The result of joining OldFact and NewFact. +data JoinedFact a + = Changed !a -- ^ Result is different than OldFact. + | NotChanged !a -- ^ Result is the same as OldFact. + +getJoined :: JoinedFact a -> a +getJoined (Changed a) = a +getJoined (NotChanged a) = a + +changedIf :: Bool -> a -> JoinedFact a +changedIf True = Changed +changedIf False = NotChanged + +type JoinFun a = OldFact a -> NewFact a -> JoinedFact a + +data DataflowLattice a = DataflowLattice + { fact_bot :: a + , fact_join :: JoinFun a + } + +data Direction = Fwd | Bwd + +type TransferFun f = CmmBlock -> FactBase f -> FactBase f + +-- | Function for rewrtiting and analysis combined. To be used with +-- @rewriteCmm@. +-- +-- Currently set to work with @UniqSM@ monad, but we could probably abstract +-- that away (if we do that, we might want to specialize the fixpoint algorithms +-- to the particular monads through SPECIALIZE). +type RewriteFun f = CmmBlock -> FactBase f -> UniqSM (CmmBlock, FactBase f) + +analyzeCmmBwd, analyzeCmmFwd + :: DataflowLattice f + -> TransferFun f + -> CmmGraph + -> FactBase f + -> FactBase f +analyzeCmmBwd = analyzeCmm Bwd +analyzeCmmFwd = analyzeCmm Fwd + +analyzeCmm + :: Direction + -> DataflowLattice f + -> TransferFun f + -> CmmGraph + -> FactBase f + -> FactBase f +analyzeCmm dir lattice transfer cmmGraph initFact = + {-# SCC analyzeCmm #-} + let entry = g_entry cmmGraph + hooplGraph = g_graph cmmGraph + blockMap = + case hooplGraph of + GMany NothingO bm NothingO -> bm + in fixpointAnalysis dir lattice transfer entry blockMap initFact + +-- Fixpoint algorithm. +fixpointAnalysis + :: forall f. + Direction + -> DataflowLattice f + -> TransferFun f + -> Label + -> LabelMap CmmBlock + -> FactBase f + -> FactBase f +fixpointAnalysis direction lattice do_block entry blockmap = loop start + where + -- Sorting the blocks helps to minimize the number of times we need to + -- process blocks. For instance, for forward analysis we want to look at + -- blocks in reverse postorder. Also, see comments for sortBlocks. + blocks = sortBlocks direction entry blockmap + num_blocks = length blocks + block_arr = {-# SCC "block_arr" #-} listArray (0, num_blocks - 1) blocks + start = {-# SCC "start" #-} IntSet.fromDistinctAscList + [0 .. num_blocks - 1] + dep_blocks = {-# SCC "dep_blocks" #-} mkDepBlocks direction blocks + join = fact_join lattice + + loop + :: IntHeap -- ^ Worklist, i.e., blocks to process + -> FactBase f -- ^ Current result (increases monotonically) + -> FactBase f + loop todo !fbase1 | Just (index, todo1) <- IntSet.minView todo = + let block = block_arr ! index + out_facts = {-# SCC "do_block" #-} do_block block fbase1 + -- For each of the outgoing edges, we join it with the current + -- information in fbase1 and (if something changed) we update it + -- and add the affected blocks to the worklist. + (todo2, fbase2) = {-# SCC "mapFoldWithKey" #-} + mapFoldlWithKey + (updateFact join dep_blocks) (todo1, fbase1) out_facts + in loop todo2 fbase2 + loop _ !fbase1 = fbase1 + +rewriteCmmBwd + :: DataflowLattice f + -> RewriteFun f + -> CmmGraph + -> FactBase f + -> UniqSM (CmmGraph, FactBase f) +rewriteCmmBwd = rewriteCmm Bwd + +rewriteCmm + :: Direction + -> DataflowLattice f + -> RewriteFun f + -> CmmGraph + -> FactBase f + -> UniqSM (CmmGraph, FactBase f) +rewriteCmm dir lattice rwFun cmmGraph initFact = {-# SCC rewriteCmm #-} do + let entry = g_entry cmmGraph + hooplGraph = g_graph cmmGraph + blockMap1 = + case hooplGraph of + GMany NothingO bm NothingO -> bm + (blockMap2, facts) <- + fixpointRewrite dir lattice rwFun entry blockMap1 initFact + return (cmmGraph {g_graph = GMany NothingO blockMap2 NothingO}, facts) + +fixpointRewrite + :: forall f. + Direction + -> DataflowLattice f + -> RewriteFun f + -> Label + -> LabelMap CmmBlock + -> FactBase f + -> UniqSM (LabelMap CmmBlock, FactBase f) +fixpointRewrite dir lattice do_block entry blockmap = loop start blockmap + where + -- Sorting the blocks helps to minimize the number of times we need to + -- process blocks. For instance, for forward analysis we want to look at + -- blocks in reverse postorder. Also, see comments for sortBlocks. + blocks = sortBlocks dir entry blockmap + num_blocks = length blocks + block_arr = {-# SCC "block_arr_rewrite" #-} + listArray (0, num_blocks - 1) blocks + start = {-# SCC "start_rewrite" #-} + IntSet.fromDistinctAscList [0 .. num_blocks - 1] + dep_blocks = {-# SCC "dep_blocks_rewrite" #-} mkDepBlocks dir blocks + join = fact_join lattice + + loop + :: IntHeap -- ^ Worklist, i.e., blocks to process + -> LabelMap CmmBlock -- ^ Rewritten blocks. + -> FactBase f -- ^ Current facts. + -> UniqSM (LabelMap CmmBlock, FactBase f) + loop todo !blocks1 !fbase1 + | Just (index, todo1) <- IntSet.minView todo = do + -- Note that we use the *original* block here. This is important. + -- We're optimistically rewriting blocks even before reaching the fixed + -- point, which means that the rewrite might be incorrect. So if the + -- facts change, we need to rewrite the original block again (taking + -- into account the new facts). + let block = block_arr ! index + (new_block, out_facts) <- {-# SCC "do_block_rewrite" #-} + do_block block fbase1 + let blocks2 = mapInsert (entryLabel new_block) new_block blocks1 + (todo2, fbase2) = {-# SCC "mapFoldWithKey_rewrite" #-} + mapFoldlWithKey + (updateFact join dep_blocks) (todo1, fbase1) out_facts + loop todo2 blocks2 fbase2 + loop _ !blocks1 !fbase1 = return (blocks1, fbase1) + + +{- +Note [Unreachable blocks] +~~~~~~~~~~~~~~~~~~~~~~~~~ +A block that is not in the domain of tfb_fbase is "currently unreachable". +A currently-unreachable block is not even analyzed. Reason: consider +constant prop and this graph, with entry point L1: + L1: x:=3; goto L4 + L2: x:=4; goto L4 + L4: if x>3 goto L2 else goto L5 +Here L2 is actually unreachable, but if we process it with bottom input fact, +we'll propagate (x=4) to L4, and nuke the otherwise-good rewriting of L4. + +* If a currently-unreachable block is not analyzed, then its rewritten + graph will not be accumulated in tfb_rg. And that is good: + unreachable blocks simply do not appear in the output. + +* Note that clients must be careful to provide a fact (even if bottom) + for each entry point. Otherwise useful blocks may be garbage collected. + +* Note that updateFact must set the change-flag if a label goes from + not-in-fbase to in-fbase, even if its fact is bottom. In effect the + real fact lattice is + UNR + bottom + the points above bottom + +* Even if the fact is going from UNR to bottom, we still call the + client's fact_join function because it might give the client + some useful debugging information. + +* All of this only applies for *forward* ixpoints. For the backward + case we must treat every block as reachable; it might finish with a + 'return', and therefore have no successors, for example. +-} + + +----------------------------------------------------------------------------- +-- Pieces that are shared by fixpoint and fixpoint_anal +----------------------------------------------------------------------------- + +-- | Sort the blocks into the right order for analysis. This means reverse +-- postorder for a forward analysis. For the backward one, we simply reverse +-- that (see Note [Backward vs forward analysis]). +sortBlocks + :: NonLocal n + => Direction -> Label -> LabelMap (Block n C C) -> [Block n C C] +sortBlocks direction entry blockmap = + case direction of + Fwd -> fwd + Bwd -> reverse fwd + where + fwd = revPostorderFrom blockmap entry + +-- Note [Backward vs forward analysis] +-- +-- The forward and backward cases are not dual. In the forward case, the entry +-- points are known, and one simply traverses the body blocks from those points. +-- In the backward case, something is known about the exit points, but a +-- backward analysis must also include reachable blocks that don't reach the +-- exit, as in a procedure that loops forever and has side effects.) +-- For instance, let E be the entry and X the exit blocks (arrows indicate +-- control flow) +-- E -> X +-- E -> B +-- B -> C +-- C -> B +-- We do need to include B and C even though they're unreachable in the +-- *reverse* graph (that we could use for backward analysis): +-- E <- X +-- E <- B +-- B <- C +-- C <- B +-- So when sorting the blocks for the backward analysis, we simply take the +-- reverse of what is used for the forward one. + + +-- | Construct a mapping from a @Label@ to the block indexes that should be +-- re-analyzed if the facts at that @Label@ change. +-- +-- Note that we're considering here the entry point of the block, so if the +-- facts change at the entry: +-- * for a backward analysis we need to re-analyze all the predecessors, but +-- * for a forward analysis, we only need to re-analyze the current block +-- (and that will in turn propagate facts into its successors). +mkDepBlocks :: Direction -> [CmmBlock] -> LabelMap IntSet +mkDepBlocks Fwd blocks = go blocks 0 mapEmpty + where + go [] !_ !dep_map = dep_map + go (b:bs) !n !dep_map = + go bs (n + 1) $ mapInsert (entryLabel b) (IntSet.singleton n) dep_map +mkDepBlocks Bwd blocks = go blocks 0 mapEmpty + where + go [] !_ !dep_map = dep_map + go (b:bs) !n !dep_map = + let insert m l = mapInsertWith IntSet.union l (IntSet.singleton n) m + in go bs (n + 1) $ foldl' insert dep_map (successors b) + +-- | After some new facts have been generated by analysing a block, we +-- fold this function over them to generate (a) a list of block +-- indices to (re-)analyse, and (b) the new FactBase. +updateFact + :: JoinFun f + -> LabelMap IntSet + -> (IntHeap, FactBase f) + -> Label + -> f -- out fact + -> (IntHeap, FactBase f) +updateFact fact_join dep_blocks (todo, fbase) lbl new_fact + = case lookupFact lbl fbase of + Nothing -> + -- Note [No old fact] + let !z = mapInsert lbl new_fact fbase in (changed, z) + Just old_fact -> + case fact_join (OldFact old_fact) (NewFact new_fact) of + (NotChanged _) -> (todo, fbase) + (Changed f) -> let !z = mapInsert lbl f fbase in (changed, z) + where + changed = todo `IntSet.union` + mapFindWithDefault IntSet.empty lbl dep_blocks + +{- +Note [No old fact] + +We know that the new_fact is >= _|_, so we don't need to join. However, +if the new fact is also _|_, and we have already analysed its block, +we don't need to record a change. So there's a tradeoff here. It turns +out that always recording a change is faster. +-} + +---------------------------------------------------------------- +-- Utilities +---------------------------------------------------------------- + +-- Fact lookup: the fact `orelse` bottom +getFact :: DataflowLattice f -> Label -> FactBase f -> f +getFact lat l fb = case lookupFact l fb of Just f -> f + Nothing -> fact_bot lat + +-- | Returns the result of joining the facts from all the successors of the +-- provided node or block. +joinOutFacts :: (NonLocal n) => DataflowLattice f -> n e C -> FactBase f -> f +joinOutFacts lattice nonLocal fact_base = foldl' join (fact_bot lattice) facts + where + join new old = getJoined $ fact_join lattice (OldFact old) (NewFact new) + facts = + [ fromJust fact + | s <- successors nonLocal + , let fact = lookupFact s fact_base + , isJust fact + ] + +joinFacts :: DataflowLattice f -> [f] -> f +joinFacts lattice facts = foldl' join (fact_bot lattice) facts + where + join new old = getJoined $ fact_join lattice (OldFact old) (NewFact new) + +-- | Returns the joined facts for each label. +mkFactBase :: DataflowLattice f -> [(Label, f)] -> FactBase f +mkFactBase lattice = foldl' add mapEmpty + where + join = fact_join lattice + + add result (l, f1) = + let !newFact = + case mapLookup l result of + Nothing -> f1 + Just f2 -> getJoined $ join (OldFact f1) (NewFact f2) + in mapInsert l newFact result + +-- | Folds backward over all nodes of an open-open block. +-- Strict in the accumulator. +foldNodesBwdOO :: (CmmNode O O -> f -> f) -> Block CmmNode O O -> f -> f +foldNodesBwdOO funOO = go + where + go (BCat b1 b2) f = go b1 $! go b2 f + go (BSnoc h n) f = go h $! funOO n f + go (BCons n t) f = funOO n $! go t f + go (BMiddle n) f = funOO n f + go BNil f = f +{-# INLINABLE foldNodesBwdOO #-} + +-- | Folds backward over all the nodes of an open-open block and allows +-- rewriting them. The accumulator is both the block of nodes and @f@ (usually +-- dataflow facts). +-- Strict in both accumulated parts. +foldRewriteNodesBwdOO + :: forall f. + (CmmNode O O -> f -> UniqSM (Block CmmNode O O, f)) + -> Block CmmNode O O + -> f + -> UniqSM (Block CmmNode O O, f) +foldRewriteNodesBwdOO rewriteOO initBlock initFacts = go initBlock initFacts + where + go (BCons node1 block1) !fact1 = (rewriteOO node1 `comp` go block1) fact1 + go (BSnoc block1 node1) !fact1 = (go block1 `comp` rewriteOO node1) fact1 + go (BCat blockA1 blockB1) !fact1 = (go blockA1 `comp` go blockB1) fact1 + go (BMiddle node) !fact1 = rewriteOO node fact1 + go BNil !fact = return (BNil, fact) + + comp rew1 rew2 = \f1 -> do + (b, f2) <- rew2 f1 + (a, !f3) <- rew1 f2 + let !c = joinBlocksOO a b + return (c, f3) + {-# INLINE comp #-} +{-# INLINABLE foldRewriteNodesBwdOO #-} + +joinBlocksOO :: Block n O O -> Block n O O -> Block n O O +joinBlocksOO BNil b = b +joinBlocksOO b BNil = b +joinBlocksOO (BMiddle n) b = blockCons n b +joinBlocksOO b (BMiddle n) = blockSnoc b n +joinBlocksOO b1 b2 = BCat b1 b2 + +type IntHeap = IntSet diff --git a/compiler/GHC/Cmm/Dataflow/Block.hs b/compiler/GHC/Cmm/Dataflow/Block.hs new file mode 100644 index 0000000000..d2e52a8904 --- /dev/null +++ b/compiler/GHC/Cmm/Dataflow/Block.hs @@ -0,0 +1,329 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE PolyKinds #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE TypeFamilies #-} +module GHC.Cmm.Dataflow.Block + ( Extensibility (..) + , O + , C + , MaybeO(..) + , IndexedCO + , Block(..) + , blockAppend + , blockCons + , blockFromList + , blockJoin + , blockJoinHead + , blockJoinTail + , blockSnoc + , blockSplit + , blockSplitHead + , blockSplitTail + , blockToList + , emptyBlock + , firstNode + , foldBlockNodesB + , foldBlockNodesB3 + , foldBlockNodesF + , isEmptyBlock + , lastNode + , mapBlock + , mapBlock' + , mapBlock3' + , replaceFirstNode + , replaceLastNode + ) where + +import GhcPrelude + +-- ----------------------------------------------------------------------------- +-- Shapes: Open and Closed + +-- | Used at the type level to indicate "open" vs "closed" structure. +data Extensibility + -- | An "open" structure with a unique, unnamed control-flow edge flowing in + -- or out. "Fallthrough" and concatenation are permitted at an open point. + = Open + -- | A "closed" structure which supports control transfer only through the use + -- of named labels---no "fallthrough" is permitted. The number of control-flow + -- edges is unconstrained. + | Closed + +type O = 'Open +type C = 'Closed + +-- | Either type indexed by closed/open using type families +type family IndexedCO (ex :: Extensibility) (a :: k) (b :: k) :: k +type instance IndexedCO C a _b = a +type instance IndexedCO O _a b = b + +-- | Maybe type indexed by open/closed +data MaybeO ex t where + JustO :: t -> MaybeO O t + NothingO :: MaybeO C t + +-- | Maybe type indexed by closed/open +data MaybeC ex t where + JustC :: t -> MaybeC C t + NothingC :: MaybeC O t + +deriving instance Functor (MaybeO ex) +deriving instance Functor (MaybeC ex) + +-- ----------------------------------------------------------------------------- +-- The Block type + +-- | A sequence of nodes. May be any of four shapes (O/O, O/C, C/O, C/C). +-- Open at the entry means single entry, mutatis mutandis for exit. +-- A closed/closed block is a /basic/ block and can't be extended further. +-- Clients should avoid manipulating blocks and should stick to either nodes +-- or graphs. +data Block n e x where + BlockCO :: n C O -> Block n O O -> Block n C O + BlockCC :: n C O -> Block n O O -> n O C -> Block n C C + BlockOC :: Block n O O -> n O C -> Block n O C + + BNil :: Block n O O + BMiddle :: n O O -> Block n O O + BCat :: Block n O O -> Block n O O -> Block n O O + BSnoc :: Block n O O -> n O O -> Block n O O + BCons :: n O O -> Block n O O -> Block n O O + + +-- ----------------------------------------------------------------------------- +-- Simple operations on Blocks + +-- Predicates + +isEmptyBlock :: Block n e x -> Bool +isEmptyBlock BNil = True +isEmptyBlock (BCat l r) = isEmptyBlock l && isEmptyBlock r +isEmptyBlock _ = False + + +-- Building + +emptyBlock :: Block n O O +emptyBlock = BNil + +blockCons :: n O O -> Block n O x -> Block n O x +blockCons n b = case b of + BlockOC b l -> (BlockOC $! (n `blockCons` b)) l + BNil{} -> BMiddle n + BMiddle{} -> n `BCons` b + BCat{} -> n `BCons` b + BSnoc{} -> n `BCons` b + BCons{} -> n `BCons` b + +blockSnoc :: Block n e O -> n O O -> Block n e O +blockSnoc b n = case b of + BlockCO f b -> BlockCO f $! (b `blockSnoc` n) + BNil{} -> BMiddle n + BMiddle{} -> b `BSnoc` n + BCat{} -> b `BSnoc` n + BSnoc{} -> b `BSnoc` n + BCons{} -> b `BSnoc` n + +blockJoinHead :: n C O -> Block n O x -> Block n C x +blockJoinHead f (BlockOC b l) = BlockCC f b l +blockJoinHead f b = BlockCO f BNil `cat` b + +blockJoinTail :: Block n e O -> n O C -> Block n e C +blockJoinTail (BlockCO f b) t = BlockCC f b t +blockJoinTail b t = b `cat` BlockOC BNil t + +blockJoin :: n C O -> Block n O O -> n O C -> Block n C C +blockJoin f b t = BlockCC f b t + +blockAppend :: Block n e O -> Block n O x -> Block n e x +blockAppend = cat + + +-- Taking apart + +firstNode :: Block n C x -> n C O +firstNode (BlockCO n _) = n +firstNode (BlockCC n _ _) = n + +lastNode :: Block n x C -> n O C +lastNode (BlockOC _ n) = n +lastNode (BlockCC _ _ n) = n + +blockSplitHead :: Block n C x -> (n C O, Block n O x) +blockSplitHead (BlockCO n b) = (n, b) +blockSplitHead (BlockCC n b t) = (n, BlockOC b t) + +blockSplitTail :: Block n e C -> (Block n e O, n O C) +blockSplitTail (BlockOC b n) = (b, n) +blockSplitTail (BlockCC f b t) = (BlockCO f b, t) + +-- | Split a closed block into its entry node, open middle block, and +-- exit node. +blockSplit :: Block n C C -> (n C O, Block n O O, n O C) +blockSplit (BlockCC f b t) = (f, b, t) + +blockToList :: Block n O O -> [n O O] +blockToList b = go b [] + where go :: Block n O O -> [n O O] -> [n O O] + go BNil r = r + go (BMiddle n) r = n : r + go (BCat b1 b2) r = go b1 $! go b2 r + go (BSnoc b1 n) r = go b1 (n:r) + go (BCons n b1) r = n : go b1 r + +blockFromList :: [n O O] -> Block n O O +blockFromList = foldr BCons BNil + +-- Modifying + +replaceFirstNode :: Block n C x -> n C O -> Block n C x +replaceFirstNode (BlockCO _ b) f = BlockCO f b +replaceFirstNode (BlockCC _ b n) f = BlockCC f b n + +replaceLastNode :: Block n x C -> n O C -> Block n x C +replaceLastNode (BlockOC b _) n = BlockOC b n +replaceLastNode (BlockCC l b _) n = BlockCC l b n + +-- ----------------------------------------------------------------------------- +-- General concatenation + +cat :: Block n e O -> Block n O x -> Block n e x +cat x y = case x of + BNil -> y + + BlockCO l b1 -> case y of + BlockOC b2 n -> (BlockCC l $! (b1 `cat` b2)) n + BNil -> x + BMiddle _ -> BlockCO l $! (b1 `cat` y) + BCat{} -> BlockCO l $! (b1 `cat` y) + BSnoc{} -> BlockCO l $! (b1 `cat` y) + BCons{} -> BlockCO l $! (b1 `cat` y) + + BMiddle n -> case y of + BlockOC b2 n2 -> (BlockOC $! (x `cat` b2)) n2 + BNil -> x + BMiddle{} -> BCons n y + BCat{} -> BCons n y + BSnoc{} -> BCons n y + BCons{} -> BCons n y + + BCat{} -> case y of + BlockOC b3 n2 -> (BlockOC $! (x `cat` b3)) n2 + BNil -> x + BMiddle n -> BSnoc x n + BCat{} -> BCat x y + BSnoc{} -> BCat x y + BCons{} -> BCat x y + + BSnoc{} -> case y of + BlockOC b2 n2 -> (BlockOC $! (x `cat` b2)) n2 + BNil -> x + BMiddle n -> BSnoc x n + BCat{} -> BCat x y + BSnoc{} -> BCat x y + BCons{} -> BCat x y + + + BCons{} -> case y of + BlockOC b2 n2 -> (BlockOC $! (x `cat` b2)) n2 + BNil -> x + BMiddle n -> BSnoc x n + BCat{} -> BCat x y + BSnoc{} -> BCat x y + BCons{} -> BCat x y + + +-- ----------------------------------------------------------------------------- +-- Mapping + +-- | map a function over the nodes of a 'Block' +mapBlock :: (forall e x. n e x -> n' e x) -> Block n e x -> Block n' e x +mapBlock f (BlockCO n b ) = BlockCO (f n) (mapBlock f b) +mapBlock f (BlockOC b n) = BlockOC (mapBlock f b) (f n) +mapBlock f (BlockCC n b m) = BlockCC (f n) (mapBlock f b) (f m) +mapBlock _ BNil = BNil +mapBlock f (BMiddle n) = BMiddle (f n) +mapBlock f (BCat b1 b2) = BCat (mapBlock f b1) (mapBlock f b2) +mapBlock f (BSnoc b n) = BSnoc (mapBlock f b) (f n) +mapBlock f (BCons n b) = BCons (f n) (mapBlock f b) + +-- | A strict 'mapBlock' +mapBlock' :: (forall e x. n e x -> n' e x) -> (Block n e x -> Block n' e x) +mapBlock' f = mapBlock3' (f, f, f) + +-- | map over a block, with different functions to apply to first nodes, +-- middle nodes and last nodes respectively. The map is strict. +-- +mapBlock3' :: forall n n' e x . + ( n C O -> n' C O + , n O O -> n' O O, + n O C -> n' O C) + -> Block n e x -> Block n' e x +mapBlock3' (f, m, l) b = go b + where go :: forall e x . Block n e x -> Block n' e x + go (BlockOC b y) = (BlockOC $! go b) $! l y + go (BlockCO x b) = (BlockCO $! f x) $! (go b) + go (BlockCC x b y) = ((BlockCC $! f x) $! go b) $! (l y) + go BNil = BNil + go (BMiddle n) = BMiddle $! m n + go (BCat x y) = (BCat $! go x) $! (go y) + go (BSnoc x n) = (BSnoc $! go x) $! (m n) + go (BCons n x) = (BCons $! m n) $! (go x) + +-- ----------------------------------------------------------------------------- +-- Folding + + +-- | Fold a function over every node in a block, forward or backward. +-- The fold function must be polymorphic in the shape of the nodes. +foldBlockNodesF3 :: forall n a b c . + ( n C O -> a -> b + , n O O -> b -> b + , n O C -> b -> c) + -> (forall e x . Block n e x -> IndexedCO e a b -> IndexedCO x c b) +foldBlockNodesF :: forall n a . + (forall e x . n e x -> a -> a) + -> (forall e x . Block n e x -> IndexedCO e a a -> IndexedCO x a a) +foldBlockNodesB3 :: forall n a b c . + ( n C O -> b -> c + , n O O -> b -> b + , n O C -> a -> b) + -> (forall e x . Block n e x -> IndexedCO x a b -> IndexedCO e c b) +foldBlockNodesB :: forall n a . + (forall e x . n e x -> a -> a) + -> (forall e x . Block n e x -> IndexedCO x a a -> IndexedCO e a a) + +foldBlockNodesF3 (ff, fm, fl) = block + where block :: forall e x . Block n e x -> IndexedCO e a b -> IndexedCO x c b + block (BlockCO f b ) = ff f `cat` block b + block (BlockCC f b l) = ff f `cat` block b `cat` fl l + block (BlockOC b l) = block b `cat` fl l + block BNil = id + block (BMiddle node) = fm node + block (b1 `BCat` b2) = block b1 `cat` block b2 + block (b1 `BSnoc` n) = block b1 `cat` fm n + block (n `BCons` b2) = fm n `cat` block b2 + cat :: forall a b c. (a -> b) -> (b -> c) -> a -> c + cat f f' = f' . f + +foldBlockNodesF f = foldBlockNodesF3 (f, f, f) + +foldBlockNodesB3 (ff, fm, fl) = block + where block :: forall e x . Block n e x -> IndexedCO x a b -> IndexedCO e c b + block (BlockCO f b ) = ff f `cat` block b + block (BlockCC f b l) = ff f `cat` block b `cat` fl l + block (BlockOC b l) = block b `cat` fl l + block BNil = id + block (BMiddle node) = fm node + block (b1 `BCat` b2) = block b1 `cat` block b2 + block (b1 `BSnoc` n) = block b1 `cat` fm n + block (n `BCons` b2) = fm n `cat` block b2 + cat :: forall a b c. (b -> c) -> (a -> b) -> a -> c + cat f f' = f . f' + +foldBlockNodesB f = foldBlockNodesB3 (f, f, f) + diff --git a/compiler/GHC/Cmm/Dataflow/Collections.hs b/compiler/GHC/Cmm/Dataflow/Collections.hs new file mode 100644 index 0000000000..f131f17cc1 --- /dev/null +++ b/compiler/GHC/Cmm/Dataflow/Collections.hs @@ -0,0 +1,177 @@ +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE DeriveTraversable #-} +{-# LANGUAGE DeriveFoldable #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + +module GHC.Cmm.Dataflow.Collections + ( IsSet(..) + , setInsertList, setDeleteList, setUnions + , IsMap(..) + , mapInsertList, mapDeleteList, mapUnions + , UniqueMap, UniqueSet + ) where + +import GhcPrelude + +import qualified Data.IntMap.Strict as M +import qualified Data.IntSet as S + +import Data.List (foldl1') + +class IsSet set where + type ElemOf set + + setNull :: set -> Bool + setSize :: set -> Int + setMember :: ElemOf set -> set -> Bool + + setEmpty :: set + setSingleton :: ElemOf set -> set + setInsert :: ElemOf set -> set -> set + setDelete :: ElemOf set -> set -> set + + setUnion :: set -> set -> set + setDifference :: set -> set -> set + setIntersection :: set -> set -> set + setIsSubsetOf :: set -> set -> Bool + setFilter :: (ElemOf set -> Bool) -> set -> set + + setFoldl :: (b -> ElemOf set -> b) -> b -> set -> b + setFoldr :: (ElemOf set -> b -> b) -> b -> set -> b + + setElems :: set -> [ElemOf set] + setFromList :: [ElemOf set] -> set + +-- Helper functions for IsSet class +setInsertList :: IsSet set => [ElemOf set] -> set -> set +setInsertList keys set = foldl' (flip setInsert) set keys + +setDeleteList :: IsSet set => [ElemOf set] -> set -> set +setDeleteList keys set = foldl' (flip setDelete) set keys + +setUnions :: IsSet set => [set] -> set +setUnions [] = setEmpty +setUnions sets = foldl1' setUnion sets + + +class IsMap map where + type KeyOf map + + mapNull :: map a -> Bool + mapSize :: map a -> Int + mapMember :: KeyOf map -> map a -> Bool + mapLookup :: KeyOf map -> map a -> Maybe a + mapFindWithDefault :: a -> KeyOf map -> map a -> a + + mapEmpty :: map a + mapSingleton :: KeyOf map -> a -> map a + mapInsert :: KeyOf map -> a -> map a -> map a + mapInsertWith :: (a -> a -> a) -> KeyOf map -> a -> map a -> map a + mapDelete :: KeyOf map -> map a -> map a + mapAlter :: (Maybe a -> Maybe a) -> KeyOf map -> map a -> map a + mapAdjust :: (a -> a) -> KeyOf map -> map a -> map a + + mapUnion :: map a -> map a -> map a + mapUnionWithKey :: (KeyOf map -> a -> a -> a) -> map a -> map a -> map a + mapDifference :: map a -> map a -> map a + mapIntersection :: map a -> map a -> map a + mapIsSubmapOf :: Eq a => map a -> map a -> Bool + + mapMap :: (a -> b) -> map a -> map b + mapMapWithKey :: (KeyOf map -> a -> b) -> map a -> map b + mapFoldl :: (b -> a -> b) -> b -> map a -> b + mapFoldr :: (a -> b -> b) -> b -> map a -> b + mapFoldlWithKey :: (b -> KeyOf map -> a -> b) -> b -> map a -> b + mapFoldMapWithKey :: Monoid m => (KeyOf map -> a -> m) -> map a -> m + mapFilter :: (a -> Bool) -> map a -> map a + mapFilterWithKey :: (KeyOf map -> a -> Bool) -> map a -> map a + + + mapElems :: map a -> [a] + mapKeys :: map a -> [KeyOf map] + mapToList :: map a -> [(KeyOf map, a)] + mapFromList :: [(KeyOf map, a)] -> map a + mapFromListWith :: (a -> a -> a) -> [(KeyOf map,a)] -> map a + +-- Helper functions for IsMap class +mapInsertList :: IsMap map => [(KeyOf map, a)] -> map a -> map a +mapInsertList assocs map = foldl' (flip (uncurry mapInsert)) map assocs + +mapDeleteList :: IsMap map => [KeyOf map] -> map a -> map a +mapDeleteList keys map = foldl' (flip mapDelete) map keys + +mapUnions :: IsMap map => [map a] -> map a +mapUnions [] = mapEmpty +mapUnions maps = foldl1' mapUnion maps + +----------------------------------------------------------------------------- +-- Basic instances +----------------------------------------------------------------------------- + +newtype UniqueSet = US S.IntSet deriving (Eq, Ord, Show, Semigroup, Monoid) + +instance IsSet UniqueSet where + type ElemOf UniqueSet = Int + + setNull (US s) = S.null s + setSize (US s) = S.size s + setMember k (US s) = S.member k s + + setEmpty = US S.empty + setSingleton k = US (S.singleton k) + setInsert k (US s) = US (S.insert k s) + setDelete k (US s) = US (S.delete k s) + + setUnion (US x) (US y) = US (S.union x y) + setDifference (US x) (US y) = US (S.difference x y) + setIntersection (US x) (US y) = US (S.intersection x y) + setIsSubsetOf (US x) (US y) = S.isSubsetOf x y + setFilter f (US s) = US (S.filter f s) + + setFoldl k z (US s) = S.foldl' k z s + setFoldr k z (US s) = S.foldr k z s + + setElems (US s) = S.elems s + setFromList ks = US (S.fromList ks) + +newtype UniqueMap v = UM (M.IntMap v) + deriving (Eq, Ord, Show, Functor, Foldable, Traversable) + +instance IsMap UniqueMap where + type KeyOf UniqueMap = Int + + mapNull (UM m) = M.null m + mapSize (UM m) = M.size m + mapMember k (UM m) = M.member k m + mapLookup k (UM m) = M.lookup k m + mapFindWithDefault def k (UM m) = M.findWithDefault def k m + + mapEmpty = UM M.empty + mapSingleton k v = UM (M.singleton k v) + mapInsert k v (UM m) = UM (M.insert k v m) + mapInsertWith f k v (UM m) = UM (M.insertWith f k v m) + mapDelete k (UM m) = UM (M.delete k m) + mapAlter f k (UM m) = UM (M.alter f k m) + mapAdjust f k (UM m) = UM (M.adjust f k m) + + mapUnion (UM x) (UM y) = UM (M.union x y) + mapUnionWithKey f (UM x) (UM y) = UM (M.unionWithKey f x y) + mapDifference (UM x) (UM y) = UM (M.difference x y) + mapIntersection (UM x) (UM y) = UM (M.intersection x y) + mapIsSubmapOf (UM x) (UM y) = M.isSubmapOf x y + + mapMap f (UM m) = UM (M.map f m) + mapMapWithKey f (UM m) = UM (M.mapWithKey f m) + mapFoldl k z (UM m) = M.foldl' k z m + mapFoldr k z (UM m) = M.foldr k z m + mapFoldlWithKey k z (UM m) = M.foldlWithKey' k z m + mapFoldMapWithKey f (UM m) = M.foldMapWithKey f m + mapFilter f (UM m) = UM (M.filter f m) + mapFilterWithKey f (UM m) = UM (M.filterWithKey f m) + + mapElems (UM m) = M.elems m + mapKeys (UM m) = M.keys m + mapToList (UM m) = M.toList m + mapFromList assocs = UM (M.fromList assocs) + mapFromListWith f assocs = UM (M.fromListWith f assocs) diff --git a/compiler/GHC/Cmm/Dataflow/Graph.hs b/compiler/GHC/Cmm/Dataflow/Graph.hs new file mode 100644 index 0000000000..3f361de0fb --- /dev/null +++ b/compiler/GHC/Cmm/Dataflow/Graph.hs @@ -0,0 +1,186 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeFamilies #-} +module GHC.Cmm.Dataflow.Graph + ( Body + , Graph + , Graph'(..) + , NonLocal(..) + , addBlock + , bodyList + , emptyBody + , labelsDefined + , mapGraph + , mapGraphBlocks + , revPostorderFrom + ) where + + +import GhcPrelude +import Util + +import GHC.Cmm.Dataflow.Label +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections + +-- | A (possibly empty) collection of closed/closed blocks +type Body n = LabelMap (Block n C C) + +-- | @Body@ abstracted over @block@ +type Body' block (n :: Extensibility -> Extensibility -> *) = LabelMap (block n C C) + +------------------------------- +-- | Gives access to the anchor points for +-- nonlocal edges as well as the edges themselves +class NonLocal thing where + entryLabel :: thing C x -> Label -- ^ The label of a first node or block + successors :: thing e C -> [Label] -- ^ Gives control-flow successors + +instance NonLocal n => NonLocal (Block n) where + entryLabel (BlockCO f _) = entryLabel f + entryLabel (BlockCC f _ _) = entryLabel f + + successors (BlockOC _ n) = successors n + successors (BlockCC _ _ n) = successors n + + +emptyBody :: Body' block n +emptyBody = mapEmpty + +bodyList :: Body' block n -> [(Label,block n C C)] +bodyList body = mapToList body + +addBlock + :: (NonLocal block, HasDebugCallStack) + => block C C -> LabelMap (block C C) -> LabelMap (block C C) +addBlock block body = mapAlter add lbl body + where + lbl = entryLabel block + add Nothing = Just block + add _ = error $ "duplicate label " ++ show lbl ++ " in graph" + + +-- --------------------------------------------------------------------------- +-- Graph + +-- | A control-flow graph, which may take any of four shapes (O/O, +-- O/C, C/O, C/C). A graph open at the entry has a single, +-- distinguished, anonymous entry point; if a graph is closed at the +-- entry, its entry point(s) are supplied by a context. +type Graph = Graph' Block + +-- | @Graph'@ is abstracted over the block type, so that we can build +-- graphs of annotated blocks for example (Compiler.Hoopl.Dataflow +-- needs this). +data Graph' block (n :: Extensibility -> Extensibility -> *) e x where + GNil :: Graph' block n O O + GUnit :: block n O O -> Graph' block n O O + GMany :: MaybeO e (block n O C) + -> Body' block n + -> MaybeO x (block n C O) + -> Graph' block n e x + + +-- ----------------------------------------------------------------------------- +-- Mapping over graphs + +-- | Maps over all nodes in a graph. +mapGraph :: (forall e x. n e x -> n' e x) -> Graph n e x -> Graph n' e x +mapGraph f = mapGraphBlocks (mapBlock f) + +-- | Function 'mapGraphBlocks' enables a change of representation of blocks, +-- nodes, or both. It lifts a polymorphic block transform into a polymorphic +-- graph transform. When the block representation stabilizes, a similar +-- function should be provided for blocks. +mapGraphBlocks :: forall block n block' n' e x . + (forall e x . block n e x -> block' n' e x) + -> (Graph' block n e x -> Graph' block' n' e x) + +mapGraphBlocks f = map + where map :: Graph' block n e x -> Graph' block' n' e x + map GNil = GNil + map (GUnit b) = GUnit (f b) + map (GMany e b x) = GMany (fmap f e) (mapMap f b) (fmap f x) + +-- ----------------------------------------------------------------------------- +-- Extracting Labels from graphs + +labelsDefined :: forall block n e x . NonLocal (block n) => Graph' block n e x + -> LabelSet +labelsDefined GNil = setEmpty +labelsDefined (GUnit{}) = setEmpty +labelsDefined (GMany _ body x) = mapFoldlWithKey addEntry (exitLabel x) body + where addEntry :: forall a. LabelSet -> ElemOf LabelSet -> a -> LabelSet + addEntry labels label _ = setInsert label labels + exitLabel :: MaybeO x (block n C O) -> LabelSet + exitLabel NothingO = setEmpty + exitLabel (JustO b) = setSingleton (entryLabel b) + + +---------------------------------------------------------------- + +-- | Returns a list of blocks reachable from the provided Labels in the reverse +-- postorder. +-- +-- This is the most important traversal over this data structure. It drops +-- unreachable code and puts blocks in an order that is good for solving forward +-- dataflow problems quickly. The reverse order is good for solving backward +-- dataflow problems quickly. The forward order is also reasonably good for +-- emitting instructions, except that it will not usually exploit Forrest +-- Baskett's trick of eliminating the unconditional branch from a loop. For +-- that you would need a more serious analysis, probably based on dominators, to +-- identify loop headers. +-- +-- For forward analyses we want reverse postorder visitation, consider: +-- @ +-- A -> [B,C] +-- B -> D +-- C -> D +-- @ +-- Postorder: [D, C, B, A] (or [D, B, C, A]) +-- Reverse postorder: [A, B, C, D] (or [A, C, B, D]) +-- This matters for, e.g., forward analysis, because we want to analyze *both* +-- B and C before we analyze D. +revPostorderFrom + :: forall block. (NonLocal block) + => LabelMap (block C C) -> Label -> [block C C] +revPostorderFrom graph start = go start_worklist setEmpty [] + where + start_worklist = lookup_for_descend start Nil + + -- To compute the postorder we need to "visit" a block (mark as done) + -- *after* visiting all its successors. So we need to know whether we + -- already processed all successors of each block (and @NonLocal@ allows + -- arbitrary many successors). So we use an explicit stack with an extra bit + -- of information: + -- * @ConsTodo@ means to explore the block if it wasn't visited before + -- * @ConsMark@ means that all successors were already done and we can add + -- the block to the result. + -- + -- NOTE: We add blocks to the result list in postorder, but we *prepend* + -- them (i.e., we use @(:)@), which means that the final list is in reverse + -- postorder. + go :: DfsStack (block C C) -> LabelSet -> [block C C] -> [block C C] + go Nil !_ !result = result + go (ConsMark block rest) !wip_or_done !result = + go rest wip_or_done (block : result) + go (ConsTodo block rest) !wip_or_done !result + | entryLabel block `setMember` wip_or_done = go rest wip_or_done result + | otherwise = + let new_worklist = + foldr lookup_for_descend + (ConsMark block rest) + (successors block) + in go new_worklist (setInsert (entryLabel block) wip_or_done) result + + lookup_for_descend :: Label -> DfsStack (block C C) -> DfsStack (block C C) + lookup_for_descend label wl + | Just b <- mapLookup label graph = ConsTodo b wl + | otherwise = + error $ "Label that doesn't have a block?! " ++ show label + +data DfsStack a = ConsTodo a (DfsStack a) | ConsMark a (DfsStack a) | Nil diff --git a/compiler/GHC/Cmm/Dataflow/Label.hs b/compiler/GHC/Cmm/Dataflow/Label.hs new file mode 100644 index 0000000000..c571cedb48 --- /dev/null +++ b/compiler/GHC/Cmm/Dataflow/Label.hs @@ -0,0 +1,142 @@ +{-# LANGUAGE DeriveFoldable #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE DeriveTraversable #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + +module GHC.Cmm.Dataflow.Label + ( Label + , LabelMap + , LabelSet + , FactBase + , lookupFact + , mkHooplLabel + ) where + +import GhcPrelude + +import Outputable + +-- TODO: This should really just use GHC's Unique and Uniq{Set,FM} +import GHC.Cmm.Dataflow.Collections + +import Unique (Uniquable(..)) +import TrieMap + + +----------------------------------------------------------------------------- +-- Label +----------------------------------------------------------------------------- + +newtype Label = Label { lblToUnique :: Int } + deriving (Eq, Ord) + +mkHooplLabel :: Int -> Label +mkHooplLabel = Label + +instance Show Label where + show (Label n) = "L" ++ show n + +instance Uniquable Label where + getUnique label = getUnique (lblToUnique label) + +instance Outputable Label where + ppr label = ppr (getUnique label) + +----------------------------------------------------------------------------- +-- LabelSet + +newtype LabelSet = LS UniqueSet deriving (Eq, Ord, Show, Monoid, Semigroup) + +instance IsSet LabelSet where + type ElemOf LabelSet = Label + + setNull (LS s) = setNull s + setSize (LS s) = setSize s + setMember (Label k) (LS s) = setMember k s + + setEmpty = LS setEmpty + setSingleton (Label k) = LS (setSingleton k) + setInsert (Label k) (LS s) = LS (setInsert k s) + setDelete (Label k) (LS s) = LS (setDelete k s) + + setUnion (LS x) (LS y) = LS (setUnion x y) + setDifference (LS x) (LS y) = LS (setDifference x y) + setIntersection (LS x) (LS y) = LS (setIntersection x y) + setIsSubsetOf (LS x) (LS y) = setIsSubsetOf x y + setFilter f (LS s) = LS (setFilter (f . mkHooplLabel) s) + setFoldl k z (LS s) = setFoldl (\a v -> k a (mkHooplLabel v)) z s + setFoldr k z (LS s) = setFoldr (\v a -> k (mkHooplLabel v) a) z s + + setElems (LS s) = map mkHooplLabel (setElems s) + setFromList ks = LS (setFromList (map lblToUnique ks)) + +----------------------------------------------------------------------------- +-- LabelMap + +newtype LabelMap v = LM (UniqueMap v) + deriving (Eq, Ord, Show, Functor, Foldable, Traversable) + +instance IsMap LabelMap where + type KeyOf LabelMap = Label + + mapNull (LM m) = mapNull m + mapSize (LM m) = mapSize m + mapMember (Label k) (LM m) = mapMember k m + mapLookup (Label k) (LM m) = mapLookup k m + mapFindWithDefault def (Label k) (LM m) = mapFindWithDefault def k m + + mapEmpty = LM mapEmpty + mapSingleton (Label k) v = LM (mapSingleton k v) + mapInsert (Label k) v (LM m) = LM (mapInsert k v m) + mapInsertWith f (Label k) v (LM m) = LM (mapInsertWith f k v m) + mapDelete (Label k) (LM m) = LM (mapDelete k m) + mapAlter f (Label k) (LM m) = LM (mapAlter f k m) + mapAdjust f (Label k) (LM m) = LM (mapAdjust f k m) + + mapUnion (LM x) (LM y) = LM (mapUnion x y) + mapUnionWithKey f (LM x) (LM y) = LM (mapUnionWithKey (f . mkHooplLabel) x y) + mapDifference (LM x) (LM y) = LM (mapDifference x y) + mapIntersection (LM x) (LM y) = LM (mapIntersection x y) + mapIsSubmapOf (LM x) (LM y) = mapIsSubmapOf x y + + mapMap f (LM m) = LM (mapMap f m) + mapMapWithKey f (LM m) = LM (mapMapWithKey (f . mkHooplLabel) m) + mapFoldl k z (LM m) = mapFoldl k z m + mapFoldr k z (LM m) = mapFoldr k z m + mapFoldlWithKey k z (LM m) = + mapFoldlWithKey (\a v -> k a (mkHooplLabel v)) z m + mapFoldMapWithKey f (LM m) = mapFoldMapWithKey (\k v -> f (mkHooplLabel k) v) m + mapFilter f (LM m) = LM (mapFilter f m) + mapFilterWithKey f (LM m) = LM (mapFilterWithKey (f . mkHooplLabel) m) + + mapElems (LM m) = mapElems m + mapKeys (LM m) = map mkHooplLabel (mapKeys m) + mapToList (LM m) = [(mkHooplLabel k, v) | (k, v) <- mapToList m] + mapFromList assocs = LM (mapFromList [(lblToUnique k, v) | (k, v) <- assocs]) + mapFromListWith f assocs = LM (mapFromListWith f [(lblToUnique k, v) | (k, v) <- assocs]) + +----------------------------------------------------------------------------- +-- Instances + +instance Outputable LabelSet where + ppr = ppr . setElems + +instance Outputable a => Outputable (LabelMap a) where + ppr = ppr . mapToList + +instance TrieMap LabelMap where + type Key LabelMap = Label + emptyTM = mapEmpty + lookupTM k m = mapLookup k m + alterTM k f m = mapAlter f k m + foldTM k m z = mapFoldr k z m + mapTM f m = mapMap f m + +----------------------------------------------------------------------------- +-- FactBase + +type FactBase f = LabelMap f + +lookupFact :: Label -> FactBase f -> Maybe f +lookupFact = mapLookup diff --git a/compiler/GHC/Cmm/DebugBlock.hs b/compiler/GHC/Cmm/DebugBlock.hs new file mode 100644 index 0000000000..70fc08ee94 --- /dev/null +++ b/compiler/GHC/Cmm/DebugBlock.hs @@ -0,0 +1,546 @@ +{-# LANGUAGE GADTs #-} +{-# LANGUAGE MultiWayIf #-} + +----------------------------------------------------------------------------- +-- +-- Debugging data +-- +-- Association of debug data on the Cmm level, with methods to encode it in +-- event log format for later inclusion in profiling event logs. +-- +----------------------------------------------------------------------------- + +module GHC.Cmm.DebugBlock ( + + DebugBlock(..), + cmmDebugGen, + cmmDebugLabels, + cmmDebugLink, + debugToMap, + + -- * Unwinding information + UnwindTable, UnwindPoint(..), + UnwindExpr(..), toUnwindExpr + ) where + +import GhcPrelude + +import GHC.Cmm.BlockId +import GHC.Cmm.CLabel +import GHC.Cmm +import GHC.Cmm.Utils +import CoreSyn +import FastString ( nilFS, mkFastString ) +import Module +import Outputable +import GHC.Cmm.Ppr.Expr ( pprExpr ) +import SrcLoc +import Util ( seqList ) + +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label + +import Data.Maybe +import Data.List ( minimumBy, nubBy ) +import Data.Ord ( comparing ) +import qualified Data.Map as Map +import Data.Either ( partitionEithers ) + +-- | Debug information about a block of code. Ticks scope over nested +-- blocks. +data DebugBlock = + DebugBlock + { dblProcedure :: !Label -- ^ Entry label of containing proc + , dblLabel :: !Label -- ^ Hoopl label + , dblCLabel :: !CLabel -- ^ Output label + , dblHasInfoTbl :: !Bool -- ^ Has an info table? + , dblParent :: !(Maybe DebugBlock) + -- ^ The parent of this proc. See Note [Splitting DebugBlocks] + , dblTicks :: ![CmmTickish] -- ^ Ticks defined in this block + , dblSourceTick :: !(Maybe CmmTickish) -- ^ Best source tick covering block + , dblPosition :: !(Maybe Int) -- ^ Output position relative to + -- other blocks. @Nothing@ means + -- the block was optimized out + , dblUnwind :: [UnwindPoint] + , dblBlocks :: ![DebugBlock] -- ^ Nested blocks + } + +instance Outputable DebugBlock where + ppr blk = (if | dblProcedure blk == dblLabel blk + -> text "proc" + | dblHasInfoTbl blk + -> text "pp-blk" + | otherwise + -> text "blk") <+> + ppr (dblLabel blk) <+> parens (ppr (dblCLabel blk)) <+> + (maybe empty ppr (dblSourceTick blk)) <+> + (maybe (text "removed") ((text "pos " <>) . ppr) + (dblPosition blk)) <+> + (ppr (dblUnwind blk)) $+$ + (if null (dblBlocks blk) then empty else nest 4 (ppr (dblBlocks blk))) + +-- | Intermediate data structure holding debug-relevant context information +-- about a block. +type BlockContext = (CmmBlock, RawCmmDecl) + +-- | Extract debug data from a group of procedures. We will prefer +-- source notes that come from the given module (presumably the module +-- that we are currently compiling). +cmmDebugGen :: ModLocation -> RawCmmGroup -> [DebugBlock] +cmmDebugGen modLoc decls = map (blocksForScope Nothing) topScopes + where + blockCtxs :: Map.Map CmmTickScope [BlockContext] + blockCtxs = blockContexts decls + + -- Analyse tick scope structure: Each one is either a top-level + -- tick scope, or the child of another. + (topScopes, childScopes) + = partitionEithers $ map (\a -> findP a a) $ Map.keys blockCtxs + findP tsc GlobalScope = Left tsc -- top scope + findP tsc scp | scp' `Map.member` blockCtxs = Right (scp', tsc) + | otherwise = findP tsc scp' + where -- Note that we only following the left parent of + -- combined scopes. This loses us ticks, which we will + -- recover by copying ticks below. + scp' | SubScope _ scp' <- scp = scp' + | CombinedScope scp' _ <- scp = scp' + | otherwise = panic "findP impossible" + + scopeMap = foldr (uncurry insertMulti) Map.empty childScopes + + -- This allows us to recover ticks that we lost by flattening + -- the graph. Basically, if the parent is A but the child is + -- CBA, we know that there is no BA, because it would have taken + -- priority - but there might be a B scope, with ticks that + -- would not be associated with our child anymore. Note however + -- that there might be other childs (DB), which we have to + -- filter out. + -- + -- We expect this to be called rarely, which is why we are not + -- trying too hard to be efficient here. In many cases we won't + -- have to construct blockCtxsU in the first place. + ticksToCopy :: CmmTickScope -> [CmmTickish] + ticksToCopy (CombinedScope scp s) = go s + where go s | scp `isTickSubScope` s = [] -- done + | SubScope _ s' <- s = ticks ++ go s' + | CombinedScope s1 s2 <- s = ticks ++ go s1 ++ go s2 + | otherwise = panic "ticksToCopy impossible" + where ticks = bCtxsTicks $ fromMaybe [] $ Map.lookup s blockCtxs + ticksToCopy _ = [] + bCtxsTicks = concatMap (blockTicks . fst) + + -- Finding the "best" source tick is somewhat arbitrary -- we + -- select the first source span, while preferring source ticks + -- from the same source file. Furthermore, dumps take priority + -- (if we generated one, we probably want debug information to + -- refer to it). + bestSrcTick = minimumBy (comparing rangeRating) + rangeRating (SourceNote span _) + | srcSpanFile span == thisFile = 1 + | otherwise = 2 :: Int + rangeRating note = pprPanic "rangeRating" (ppr note) + thisFile = maybe nilFS mkFastString $ ml_hs_file modLoc + + -- Returns block tree for this scope as well as all nested + -- scopes. Note that if there are multiple blocks in the (exact) + -- same scope we elect one as the "branch" node and add the rest + -- as children. + blocksForScope :: Maybe CmmTickish -> CmmTickScope -> DebugBlock + blocksForScope cstick scope = mkBlock True (head bctxs) + where bctxs = fromJust $ Map.lookup scope blockCtxs + nested = fromMaybe [] $ Map.lookup scope scopeMap + childs = map (mkBlock False) (tail bctxs) ++ + map (blocksForScope stick) nested + + mkBlock :: Bool -> BlockContext -> DebugBlock + mkBlock top (block, prc) + = DebugBlock { dblProcedure = g_entry graph + , dblLabel = label + , dblCLabel = case info of + Just (Statics infoLbl _) -> infoLbl + Nothing + | g_entry graph == label -> entryLbl + | otherwise -> blockLbl label + , dblHasInfoTbl = isJust info + , dblParent = Nothing + , dblTicks = ticks + , dblPosition = Nothing -- see cmmDebugLink + , dblSourceTick = stick + , dblBlocks = blocks + , dblUnwind = [] + } + where (CmmProc infos entryLbl _ graph) = prc + label = entryLabel block + info = mapLookup label infos + blocks | top = seqList childs childs + | otherwise = [] + + -- A source tick scopes over all nested blocks. However + -- their source ticks might take priority. + isSourceTick SourceNote {} = True + isSourceTick _ = False + -- Collect ticks from all blocks inside the tick scope. + -- We attempt to filter out duplicates while we're at it. + ticks = nubBy (flip tickishContains) $ + bCtxsTicks bctxs ++ ticksToCopy scope + stick = case filter isSourceTick ticks of + [] -> cstick + sticks -> Just $! bestSrcTick (sticks ++ maybeToList cstick) + +-- | Build a map of blocks sorted by their tick scopes +-- +-- This involves a pre-order traversal, as we want blocks in rough +-- control flow order (so ticks have a chance to be sorted in the +-- right order). +blockContexts :: RawCmmGroup -> Map.Map CmmTickScope [BlockContext] +blockContexts decls = Map.map reverse $ foldr walkProc Map.empty decls + where walkProc :: RawCmmDecl + -> Map.Map CmmTickScope [BlockContext] + -> Map.Map CmmTickScope [BlockContext] + walkProc CmmData{} m = m + walkProc prc@(CmmProc _ _ _ graph) m + | mapNull blocks = m + | otherwise = snd $ walkBlock prc entry (emptyLbls, m) + where blocks = toBlockMap graph + entry = [mapFind (g_entry graph) blocks] + emptyLbls = setEmpty :: LabelSet + + walkBlock :: RawCmmDecl -> [Block CmmNode C C] + -> (LabelSet, Map.Map CmmTickScope [BlockContext]) + -> (LabelSet, Map.Map CmmTickScope [BlockContext]) + walkBlock _ [] c = c + walkBlock prc (block:blocks) (visited, m) + | lbl `setMember` visited + = walkBlock prc blocks (visited, m) + | otherwise + = walkBlock prc blocks $ + walkBlock prc succs + (lbl `setInsert` visited, + insertMulti scope (block, prc) m) + where CmmEntry lbl scope = firstNode block + (CmmProc _ _ _ graph) = prc + succs = map (flip mapFind (toBlockMap graph)) + (successors (lastNode block)) + mapFind = mapFindWithDefault (error "contextTree: block not found!") + +insertMulti :: Ord k => k -> a -> Map.Map k [a] -> Map.Map k [a] +insertMulti k v = Map.insertWith (const (v:)) k [v] + +cmmDebugLabels :: (i -> Bool) -> GenCmmGroup d g (ListGraph i) -> [Label] +cmmDebugLabels isMeta nats = seqList lbls lbls + where -- Find order in which procedures will be generated by the + -- back-end (that actually matters for DWARF generation). + -- + -- Note that we might encounter blocks that are missing or only + -- consist of meta instructions -- we will declare them missing, + -- which will skip debug data generation without messing up the + -- block hierarchy. + lbls = map blockId $ filter (not . allMeta) $ concatMap getBlocks nats + getBlocks (CmmProc _ _ _ (ListGraph bs)) = bs + getBlocks _other = [] + allMeta (BasicBlock _ instrs) = all isMeta instrs + +-- | Sets position and unwind table fields in the debug block tree according to +-- native generated code. +cmmDebugLink :: [Label] -> LabelMap [UnwindPoint] + -> [DebugBlock] -> [DebugBlock] +cmmDebugLink labels unwindPts blocks = map link blocks + where blockPos :: LabelMap Int + blockPos = mapFromList $ flip zip [0..] labels + link block = block { dblPosition = mapLookup (dblLabel block) blockPos + , dblBlocks = map link (dblBlocks block) + , dblUnwind = fromMaybe mempty + $ mapLookup (dblLabel block) unwindPts + } + +-- | Converts debug blocks into a label map for easier lookups +debugToMap :: [DebugBlock] -> LabelMap DebugBlock +debugToMap = mapUnions . map go + where go b = mapInsert (dblLabel b) b $ mapUnions $ map go (dblBlocks b) + +{- +Note [What is this unwinding business?] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Unwinding tables are a variety of debugging information used by debugging tools +to reconstruct the execution history of a program at runtime. These tables +consist of sets of "instructions", one set for every instruction in the program, +which describe how to reconstruct the state of the machine at the point where +the current procedure was called. For instance, consider the following annotated +pseudo-code, + + a_fun: + add rsp, 8 -- unwind: rsp = rsp - 8 + mov rax, 1 -- unwind: rax = unknown + call another_block + sub rsp, 8 -- unwind: rsp = rsp + +We see that attached to each instruction there is an "unwind" annotation, which +provides a relationship between each updated register and its value at the +time of entry to a_fun. This is the sort of information that allows gdb to give +you a stack backtrace given the execution state of your program. This +unwinding information is captured in various ways by various debug information +formats; in the case of DWARF (the only format supported by GHC) it is known as +Call Frame Information (CFI) and can be found in the .debug.frames section of +your object files. + +Currently we only bother to produce unwinding information for registers which +are necessary to reconstruct flow-of-execution. On x86_64 this includes $rbp +(which is the STG stack pointer) and $rsp (the C stack pointer). + +Let's consider how GHC would annotate a C-- program with unwinding information +with a typical C-- procedure as would come from the STG-to-Cmm code generator, + + entry() + { c2fe: + v :: P64 = R2; + if ((Sp + 8) - 32 < SpLim) (likely: False) goto c2ff; else goto c2fg; + c2ff: + R2 = v :: P64; + R1 = test_closure; + call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8; + c2fg: + I64[Sp - 8] = c2dD; + R1 = v :: P64; + Sp = Sp - 8; // Sp updated here + if (R1 & 7 != 0) goto c2dD; else goto c2dE; + c2dE: + call (I64[R1])(R1) returns to c2dD, args: 8, res: 8, upd: 8; + c2dD: + w :: P64 = R1; + Hp = Hp + 48; + if (Hp > HpLim) (likely: False) goto c2fj; else goto c2fi; + ... + }, + +Let's consider how this procedure will be decorated with unwind information +(largely by GHC.Cmm.LayoutStack). Naturally, when we enter the procedure `entry` the +value of Sp is no different from what it was at its call site. Therefore we will +add an `unwind` statement saying this at the beginning of its unwind-annotated +code, + + entry() + { c2fe: + unwind Sp = Just Sp + 0; + v :: P64 = R2; + if ((Sp + 8) - 32 < SpLim) (likely: False) goto c2ff; else goto c2fg; + +After c2fe we may pass to either c2ff or c2fg; let's first consider the +former. In this case there is nothing in particular that we need to do other +than reiterate what we already know about Sp, + + c2ff: + unwind Sp = Just Sp + 0; + R2 = v :: P64; + R1 = test_closure; + call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8; + +In contrast, c2fg updates Sp midway through its body. To ensure that unwinding +can happen correctly after this point we must include an unwind statement there, +in addition to the usual beginning-of-block statement, + + c2fg: + unwind Sp = Just Sp + 0; + I64[Sp - 8] = c2dD; + R1 = v :: P64; + Sp = Sp - 8; + unwind Sp = Just Sp + 8; + if (R1 & 7 != 0) goto c2dD; else goto c2dE; + +The remaining blocks are simple, + + c2dE: + unwind Sp = Just Sp + 8; + call (I64[R1])(R1) returns to c2dD, args: 8, res: 8, upd: 8; + c2dD: + unwind Sp = Just Sp + 8; + w :: P64 = R1; + Hp = Hp + 48; + if (Hp > HpLim) (likely: False) goto c2fj; else goto c2fi; + ... + }, + + +The flow of unwinding information through the compiler is a bit convoluted: + + * C-- begins life in StgToCmm without any unwind information. This is because we + haven't actually done any register assignment or stack layout yet, so there + is no need for unwind information. + + * GHC.Cmm.LayoutStack figures out how to layout each procedure's stack, and produces + appropriate unwinding nodes for each adjustment of the STG Sp register. + + * The unwind nodes are carried through the sinking pass. Currently this is + guaranteed not to invalidate unwind information since it won't touch stores + to Sp, but this will need revisiting if CmmSink gets smarter in the future. + + * Eventually we make it to the native code generator backend which can then + preserve the unwind nodes in its machine-specific instructions. In so doing + the backend can also modify or add unwinding information; this is necessary, + for instance, in the case of x86-64, where adjustment of $rsp may be + necessary during calls to native foreign code due to the native calling + convention. + + * The NCG then retrieves the final unwinding table for each block from the + backend with extractUnwindPoints. + + * This unwind information is converted to DebugBlocks by Debug.cmmDebugGen + + * These DebugBlocks are then converted to, e.g., DWARF unwinding tables + (by the Dwarf module) and emitted in the final object. + +See also: + Note [Unwinding information in the NCG] in AsmCodeGen, + Note [Unwind pseudo-instruction in Cmm], + Note [Debugging DWARF unwinding info]. + + +Note [Debugging DWARF unwinding info] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For debugging generated unwinding info I've found it most useful to dump the +disassembled binary with objdump -D and dump the debug info with +readelf --debug-dump=frames-interp. + +You should get something like this: + + 0000000000000010 <stg_catch_frame_info>: + 10: 48 83 c5 18 add $0x18,%rbp + 14: ff 65 00 jmpq *0x0(%rbp) + +and: + + Contents of the .debug_frame section: + + 00000000 0000000000000014 ffffffff CIE "" cf=1 df=-8 ra=16 + LOC CFA rbp rsp ra + 0000000000000000 rbp+0 v+0 s c+0 + + 00000018 0000000000000024 00000000 FDE cie=00000000 pc=000000000000000f..0000000000000017 + LOC CFA rbp rsp ra + 000000000000000f rbp+0 v+0 s c+0 + 000000000000000f rbp+24 v+0 s c+0 + +To read it http://www.dwarfstd.org/doc/dwarf-2.0.0.pdf has a nice example in +Appendix 5 (page 101 of the pdf) and more details in the relevant section. + +The key thing to keep in mind is that the value at LOC is the value from +*before* the instruction at LOC executes. In other words it answers the +question: if my $rip is at LOC, how do I get the relevant values given the +values obtained through unwinding so far. + +If the readelf --debug-dump=frames-interp output looks wrong, it may also be +useful to look at readelf --debug-dump=frames, which is closer to the +information that GHC generated. + +It's also useful to dump the relevant Cmm with -ddump-cmm -ddump-opt-cmm +-ddump-cmm-proc -ddump-cmm-verbose. Note [Unwind pseudo-instruction in Cmm] +explains how to interpret it. + +Inside gdb there are a couple useful commands for inspecting frames. +For example: + + gdb> info frame <num> + +It shows the values of registers obtained through unwinding. + +Another useful thing to try when debugging the DWARF unwinding is to enable +extra debugging output in GDB: + + gdb> set debug frame 1 + +This makes GDB produce a trace of its internal workings. Having gone this far, +it's just a tiny step to run GDB in GDB. Make sure you install debugging +symbols for gdb if you obtain it through a package manager. + +Keep in mind that the current release of GDB has an instruction pointer handling +heuristic that works well for C-like languages, but doesn't always work for +Haskell. See Note [Info Offset] in Dwarf.Types for more details. + +Note [Unwind pseudo-instruction in Cmm] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One of the possible CmmNodes is a CmmUnwind pseudo-instruction. It doesn't +generate any assembly, but controls what DWARF unwinding information gets +generated. + +It's important to understand what ranges of code the unwind pseudo-instruction +refers to. +For a sequence of CmmNodes like: + + A // starts at addr X and ends at addr Y-1 + unwind Sp = Just Sp + 16; + B // starts at addr Y and ends at addr Z + +the unwind statement reflects the state after A has executed, but before B +has executed. If you consult the Note [Debugging DWARF unwinding info], the +LOC this information will end up in is Y. +-} + +-- | A label associated with an 'UnwindTable' +data UnwindPoint = UnwindPoint !CLabel !UnwindTable + +instance Outputable UnwindPoint where + ppr (UnwindPoint lbl uws) = + braces $ ppr lbl<>colon + <+> hsep (punctuate comma $ map pprUw $ Map.toList uws) + where + pprUw (g, expr) = ppr g <> char '=' <> ppr expr + +-- | Maps registers to expressions that yield their "old" values +-- further up the stack. Most interesting for the stack pointer @Sp@, +-- but might be useful to document saved registers, too. Note that a +-- register's value will be 'Nothing' when the register's previous +-- value cannot be reconstructed. +type UnwindTable = Map.Map GlobalReg (Maybe UnwindExpr) + +-- | Expressions, used for unwind information +data UnwindExpr = UwConst !Int -- ^ literal value + | UwReg !GlobalReg !Int -- ^ register plus offset + | UwDeref UnwindExpr -- ^ pointer dereferencing + | UwLabel CLabel + | UwPlus UnwindExpr UnwindExpr + | UwMinus UnwindExpr UnwindExpr + | UwTimes UnwindExpr UnwindExpr + deriving (Eq) + +instance Outputable UnwindExpr where + pprPrec _ (UwConst i) = ppr i + pprPrec _ (UwReg g 0) = ppr g + pprPrec p (UwReg g x) = pprPrec p (UwPlus (UwReg g 0) (UwConst x)) + pprPrec _ (UwDeref e) = char '*' <> pprPrec 3 e + pprPrec _ (UwLabel l) = pprPrec 3 l + pprPrec p (UwPlus e0 e1) | p <= 0 + = pprPrec 0 e0 <> char '+' <> pprPrec 0 e1 + pprPrec p (UwMinus e0 e1) | p <= 0 + = pprPrec 1 e0 <> char '-' <> pprPrec 1 e1 + pprPrec p (UwTimes e0 e1) | p <= 1 + = pprPrec 2 e0 <> char '*' <> pprPrec 2 e1 + pprPrec _ other = parens (pprPrec 0 other) + +-- | Conversion of Cmm expressions to unwind expressions. We check for +-- unsupported operator usages and simplify the expression as far as +-- possible. +toUnwindExpr :: CmmExpr -> UnwindExpr +toUnwindExpr (CmmLit (CmmInt i _)) = UwConst (fromIntegral i) +toUnwindExpr (CmmLit (CmmLabel l)) = UwLabel l +toUnwindExpr (CmmRegOff (CmmGlobal g) i) = UwReg g i +toUnwindExpr (CmmReg (CmmGlobal g)) = UwReg g 0 +toUnwindExpr (CmmLoad e _) = UwDeref (toUnwindExpr e) +toUnwindExpr e@(CmmMachOp op [e1, e2]) = + case (op, toUnwindExpr e1, toUnwindExpr e2) of + (MO_Add{}, UwReg r x, UwConst y) -> UwReg r (x + y) + (MO_Sub{}, UwReg r x, UwConst y) -> UwReg r (x - y) + (MO_Add{}, UwConst x, UwReg r y) -> UwReg r (x + y) + (MO_Add{}, UwConst x, UwConst y) -> UwConst (x + y) + (MO_Sub{}, UwConst x, UwConst y) -> UwConst (x - y) + (MO_Mul{}, UwConst x, UwConst y) -> UwConst (x * y) + (MO_Add{}, u1, u2 ) -> UwPlus u1 u2 + (MO_Sub{}, u1, u2 ) -> UwMinus u1 u2 + (MO_Mul{}, u1, u2 ) -> UwTimes u1 u2 + _otherwise -> pprPanic "Unsupported operator in unwind expression!" + (pprExpr e) +toUnwindExpr e + = pprPanic "Unsupported unwind expression!" (ppr e) diff --git a/compiler/GHC/Cmm/Expr.hs b/compiler/GHC/Cmm/Expr.hs new file mode 100644 index 0000000000..3b4f0156a0 --- /dev/null +++ b/compiler/GHC/Cmm/Expr.hs @@ -0,0 +1,619 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE UndecidableInstances #-} + +module GHC.Cmm.Expr + ( CmmExpr(..), cmmExprType, cmmExprWidth, cmmExprAlignment, maybeInvertCmmExpr + , CmmReg(..), cmmRegType, cmmRegWidth + , CmmLit(..), cmmLitType + , LocalReg(..), localRegType + , GlobalReg(..), isArgReg, globalRegType + , spReg, hpReg, spLimReg, hpLimReg, nodeReg + , currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg + , node, baseReg + , VGcPtr(..) + + , DefinerOfRegs, UserOfRegs + , foldRegsDefd, foldRegsUsed + , foldLocalRegsDefd, foldLocalRegsUsed + + , RegSet, LocalRegSet, GlobalRegSet + , emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet + , plusRegSet, minusRegSet, timesRegSet, sizeRegSet, nullRegSet + , regSetToList + + , Area(..) + , module GHC.Cmm.MachOp + , module GHC.Cmm.Type + ) +where + +import GhcPrelude + +import GHC.Cmm.BlockId +import GHC.Cmm.CLabel +import GHC.Cmm.MachOp +import GHC.Cmm.Type +import DynFlags +import Outputable (panic) +import Unique + +import Data.Set (Set) +import qualified Data.Set as Set + +import BasicTypes (Alignment, mkAlignment, alignmentOf) + +----------------------------------------------------------------------------- +-- CmmExpr +-- An expression. Expressions have no side effects. +----------------------------------------------------------------------------- + +data CmmExpr + = CmmLit CmmLit -- Literal + | CmmLoad !CmmExpr !CmmType -- Read memory location + | CmmReg !CmmReg -- Contents of register + | CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.) + | CmmStackSlot Area {-# UNPACK #-} !Int + -- addressing expression of a stack slot + -- See Note [CmmStackSlot aliasing] + | CmmRegOff !CmmReg Int + -- CmmRegOff reg i + -- ** is shorthand only, meaning ** + -- CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)] + -- where rep = typeWidth (cmmRegType reg) + +instance Eq CmmExpr where -- Equality ignores the types + CmmLit l1 == CmmLit l2 = l1==l2 + CmmLoad e1 _ == CmmLoad e2 _ = e1==e2 + CmmReg r1 == CmmReg r2 = r1==r2 + CmmRegOff r1 i1 == CmmRegOff r2 i2 = r1==r2 && i1==i2 + CmmMachOp op1 es1 == CmmMachOp op2 es2 = op1==op2 && es1==es2 + CmmStackSlot a1 i1 == CmmStackSlot a2 i2 = a1==a2 && i1==i2 + _e1 == _e2 = False + +data CmmReg + = CmmLocal {-# UNPACK #-} !LocalReg + | CmmGlobal GlobalReg + deriving( Eq, Ord ) + +-- | A stack area is either the stack slot where a variable is spilled +-- or the stack space where function arguments and results are passed. +data Area + = Old -- See Note [Old Area] + | Young {-# UNPACK #-} !BlockId -- Invariant: must be a continuation BlockId + -- See Note [Continuation BlockId] in GHC.Cmm.Node. + deriving (Eq, Ord) + +{- Note [Old Area] +~~~~~~~~~~~~~~~~~~ +There is a single call area 'Old', allocated at the extreme old +end of the stack frame (ie just younger than the return address) +which holds: + * incoming (overflow) parameters, + * outgoing (overflow) parameter to tail calls, + * outgoing (overflow) result values + * the update frame (if any) + +Its size is the max of all these requirements. On entry, the stack +pointer will point to the youngest incoming parameter, which is not +necessarily at the young end of the Old area. + +End of note -} + + +{- Note [CmmStackSlot aliasing] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When do two CmmStackSlots alias? + + - T[old+N] aliases with U[young(L)+M] for all T, U, L, N and M + - T[old+N] aliases with U[old+M] only if the areas actually overlap + +Or more informally, different Areas may overlap with each other. + +An alternative semantics, that we previously had, was that different +Areas do not overlap. The problem that lead to redefining the +semantics of stack areas is described below. + +e.g. if we had + + x = Sp[old + 8] + y = Sp[old + 16] + + Sp[young(L) + 8] = L + Sp[young(L) + 16] = y + Sp[young(L) + 24] = x + call f() returns to L + +if areas semantically do not overlap, then we might optimise this to + + Sp[young(L) + 8] = L + Sp[young(L) + 16] = Sp[old + 8] + Sp[young(L) + 24] = Sp[old + 16] + call f() returns to L + +and now young(L) cannot be allocated at the same place as old, and we +are doomed to use more stack. + + - old+8 conflicts with young(L)+8 + - old+16 conflicts with young(L)+16 and young(L)+8 + +so young(L)+8 == old+24 and we get + + Sp[-8] = L + Sp[-16] = Sp[8] + Sp[-24] = Sp[0] + Sp -= 24 + call f() returns to L + +However, if areas are defined to be "possibly overlapping" in the +semantics, then we cannot commute any loads/stores of old with +young(L), and we will be able to re-use both old+8 and old+16 for +young(L). + + x = Sp[8] + y = Sp[0] + + Sp[8] = L + Sp[0] = y + Sp[-8] = x + Sp = Sp - 8 + call f() returns to L + +Now, the assignments of y go away, + + x = Sp[8] + Sp[8] = L + Sp[-8] = x + Sp = Sp - 8 + call f() returns to L +-} + +data CmmLit + = CmmInt !Integer Width + -- Interpretation: the 2's complement representation of the value + -- is truncated to the specified size. This is easier than trying + -- to keep the value within range, because we don't know whether + -- it will be used as a signed or unsigned value (the CmmType doesn't + -- distinguish between signed & unsigned). + | CmmFloat Rational Width + | CmmVec [CmmLit] -- Vector literal + | CmmLabel CLabel -- Address of label + | CmmLabelOff CLabel Int -- Address of label + byte offset + + -- Due to limitations in the C backend, the following + -- MUST ONLY be used inside the info table indicated by label2 + -- (label2 must be the info label), and label1 must be an + -- SRT, a slow entrypoint or a large bitmap (see the Mangler) + -- Don't use it at all unless tablesNextToCode. + -- It is also used inside the NCG during when generating + -- position-independent code. + | CmmLabelDiffOff CLabel CLabel Int Width -- label1 - label2 + offset + -- In an expression, the width just has the effect of MO_SS_Conv + -- from wordWidth to the desired width. + -- + -- In a static literal, the supported Widths depend on the + -- architecture: wordWidth is supported on all + -- architectures. Additionally W32 is supported on x86_64 when + -- using the small memory model. + + | CmmBlock {-# UNPACK #-} !BlockId -- Code label + -- Invariant: must be a continuation BlockId + -- See Note [Continuation BlockId] in GHC.Cmm.Node. + + | CmmHighStackMark -- A late-bound constant that stands for the max + -- #bytes of stack space used during a procedure. + -- During the stack-layout pass, CmmHighStackMark + -- is replaced by a CmmInt for the actual number + -- of bytes used + deriving Eq + +cmmExprType :: DynFlags -> CmmExpr -> CmmType +cmmExprType dflags (CmmLit lit) = cmmLitType dflags lit +cmmExprType _ (CmmLoad _ rep) = rep +cmmExprType dflags (CmmReg reg) = cmmRegType dflags reg +cmmExprType dflags (CmmMachOp op args) = machOpResultType dflags op (map (cmmExprType dflags) args) +cmmExprType dflags (CmmRegOff reg _) = cmmRegType dflags reg +cmmExprType dflags (CmmStackSlot _ _) = bWord dflags -- an address +-- Careful though: what is stored at the stack slot may be bigger than +-- an address + +cmmLitType :: DynFlags -> CmmLit -> CmmType +cmmLitType _ (CmmInt _ width) = cmmBits width +cmmLitType _ (CmmFloat _ width) = cmmFloat width +cmmLitType _ (CmmVec []) = panic "cmmLitType: CmmVec []" +cmmLitType cflags (CmmVec (l:ls)) = let ty = cmmLitType cflags l + in if all (`cmmEqType` ty) (map (cmmLitType cflags) ls) + then cmmVec (1+length ls) ty + else panic "cmmLitType: CmmVec" +cmmLitType dflags (CmmLabel lbl) = cmmLabelType dflags lbl +cmmLitType dflags (CmmLabelOff lbl _) = cmmLabelType dflags lbl +cmmLitType _ (CmmLabelDiffOff _ _ _ width) = cmmBits width +cmmLitType dflags (CmmBlock _) = bWord dflags +cmmLitType dflags (CmmHighStackMark) = bWord dflags + +cmmLabelType :: DynFlags -> CLabel -> CmmType +cmmLabelType dflags lbl + | isGcPtrLabel lbl = gcWord dflags + | otherwise = bWord dflags + +cmmExprWidth :: DynFlags -> CmmExpr -> Width +cmmExprWidth dflags e = typeWidth (cmmExprType dflags e) + +-- | Returns an alignment in bytes of a CmmExpr when it's a statically +-- known integer constant, otherwise returns an alignment of 1 byte. +-- The caller is responsible for using with a sensible CmmExpr +-- argument. +cmmExprAlignment :: CmmExpr -> Alignment +cmmExprAlignment (CmmLit (CmmInt intOff _)) = alignmentOf (fromInteger intOff) +cmmExprAlignment _ = mkAlignment 1 +-------- +--- Negation for conditional branches + +maybeInvertCmmExpr :: CmmExpr -> Maybe CmmExpr +maybeInvertCmmExpr (CmmMachOp op args) = do op' <- maybeInvertComparison op + return (CmmMachOp op' args) +maybeInvertCmmExpr _ = Nothing + +----------------------------------------------------------------------------- +-- Local registers +----------------------------------------------------------------------------- + +data LocalReg + = LocalReg {-# UNPACK #-} !Unique CmmType + -- ^ Parameters: + -- 1. Identifier + -- 2. Type + +instance Eq LocalReg where + (LocalReg u1 _) == (LocalReg u2 _) = u1 == u2 + +-- This is non-deterministic but we do not currently support deterministic +-- code-generation. See Note [Unique Determinism and code generation] +-- See Note [No Ord for Unique] +instance Ord LocalReg where + compare (LocalReg u1 _) (LocalReg u2 _) = nonDetCmpUnique u1 u2 + +instance Uniquable LocalReg where + getUnique (LocalReg uniq _) = uniq + +cmmRegType :: DynFlags -> CmmReg -> CmmType +cmmRegType _ (CmmLocal reg) = localRegType reg +cmmRegType dflags (CmmGlobal reg) = globalRegType dflags reg + +cmmRegWidth :: DynFlags -> CmmReg -> Width +cmmRegWidth dflags = typeWidth . cmmRegType dflags + +localRegType :: LocalReg -> CmmType +localRegType (LocalReg _ rep) = rep + +----------------------------------------------------------------------------- +-- Register-use information for expressions and other types +----------------------------------------------------------------------------- + +-- | Sets of registers + +-- These are used for dataflow facts, and a common operation is taking +-- the union of two RegSets and then asking whether the union is the +-- same as one of the inputs. UniqSet isn't good here, because +-- sizeUniqSet is O(n) whereas Set.size is O(1), so we use ordinary +-- Sets. + +type RegSet r = Set r +type LocalRegSet = RegSet LocalReg +type GlobalRegSet = RegSet GlobalReg + +emptyRegSet :: RegSet r +nullRegSet :: RegSet r -> Bool +elemRegSet :: Ord r => r -> RegSet r -> Bool +extendRegSet :: Ord r => RegSet r -> r -> RegSet r +deleteFromRegSet :: Ord r => RegSet r -> r -> RegSet r +mkRegSet :: Ord r => [r] -> RegSet r +minusRegSet, plusRegSet, timesRegSet :: Ord r => RegSet r -> RegSet r -> RegSet r +sizeRegSet :: RegSet r -> Int +regSetToList :: RegSet r -> [r] + +emptyRegSet = Set.empty +nullRegSet = Set.null +elemRegSet = Set.member +extendRegSet = flip Set.insert +deleteFromRegSet = flip Set.delete +mkRegSet = Set.fromList +minusRegSet = Set.difference +plusRegSet = Set.union +timesRegSet = Set.intersection +sizeRegSet = Set.size +regSetToList = Set.toList + +class Ord r => UserOfRegs r a where + foldRegsUsed :: DynFlags -> (b -> r -> b) -> b -> a -> b + +foldLocalRegsUsed :: UserOfRegs LocalReg a + => DynFlags -> (b -> LocalReg -> b) -> b -> a -> b +foldLocalRegsUsed = foldRegsUsed + +class Ord r => DefinerOfRegs r a where + foldRegsDefd :: DynFlags -> (b -> r -> b) -> b -> a -> b + +foldLocalRegsDefd :: DefinerOfRegs LocalReg a + => DynFlags -> (b -> LocalReg -> b) -> b -> a -> b +foldLocalRegsDefd = foldRegsDefd + +instance UserOfRegs LocalReg CmmReg where + foldRegsUsed _ f z (CmmLocal reg) = f z reg + foldRegsUsed _ _ z (CmmGlobal _) = z + +instance DefinerOfRegs LocalReg CmmReg where + foldRegsDefd _ f z (CmmLocal reg) = f z reg + foldRegsDefd _ _ z (CmmGlobal _) = z + +instance UserOfRegs GlobalReg CmmReg where + foldRegsUsed _ _ z (CmmLocal _) = z + foldRegsUsed _ f z (CmmGlobal reg) = f z reg + +instance DefinerOfRegs GlobalReg CmmReg where + foldRegsDefd _ _ z (CmmLocal _) = z + foldRegsDefd _ f z (CmmGlobal reg) = f z reg + +instance Ord r => UserOfRegs r r where + foldRegsUsed _ f z r = f z r + +instance Ord r => DefinerOfRegs r r where + foldRegsDefd _ f z r = f z r + +instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r CmmExpr where + -- The (Ord r) in the context is necessary here + -- See Note [Recursive superclasses] in TcInstDcls + foldRegsUsed dflags f !z e = expr z e + where expr z (CmmLit _) = z + expr z (CmmLoad addr _) = foldRegsUsed dflags f z addr + expr z (CmmReg r) = foldRegsUsed dflags f z r + expr z (CmmMachOp _ exprs) = foldRegsUsed dflags f z exprs + expr z (CmmRegOff r _) = foldRegsUsed dflags f z r + expr z (CmmStackSlot _ _) = z + +instance UserOfRegs r a => UserOfRegs r [a] where + foldRegsUsed dflags f set as = foldl' (foldRegsUsed dflags f) set as + {-# INLINABLE foldRegsUsed #-} + +instance DefinerOfRegs r a => DefinerOfRegs r [a] where + foldRegsDefd dflags f set as = foldl' (foldRegsDefd dflags f) set as + {-# INLINABLE foldRegsDefd #-} + +----------------------------------------------------------------------------- +-- Global STG registers +----------------------------------------------------------------------------- + +data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show ) + +----------------------------------------------------------------------------- +-- Global STG registers +----------------------------------------------------------------------------- +{- +Note [Overlapping global registers] + +The backend might not faithfully implement the abstraction of the STG +machine with independent registers for different values of type +GlobalReg. Specifically, certain pairs of registers (r1, r2) may +overlap in the sense that a store to r1 invalidates the value in r2, +and vice versa. + +Currently this occurs only on the x86_64 architecture where FloatReg n +and DoubleReg n are assigned the same microarchitectural register, in +order to allow functions to receive more Float# or Double# arguments +in registers (as opposed to on the stack). + +There are no specific rules about which registers might overlap with +which other registers, but presumably it's safe to assume that nothing +will overlap with special registers like Sp or BaseReg. + +Use GHC.Cmm.Utils.regsOverlap to determine whether two GlobalRegs overlap +on a particular platform. The instance Eq GlobalReg is syntactic +equality of STG registers and does not take overlap into +account. However it is still used in UserOfRegs/DefinerOfRegs and +there are likely still bugs there, beware! +-} + +data GlobalReg + -- Argument and return registers + = VanillaReg -- pointers, unboxed ints and chars + {-# UNPACK #-} !Int -- its number + VGcPtr + + | FloatReg -- single-precision floating-point registers + {-# UNPACK #-} !Int -- its number + + | DoubleReg -- double-precision floating-point registers + {-# UNPACK #-} !Int -- its number + + | LongReg -- long int registers (64-bit, really) + {-# UNPACK #-} !Int -- its number + + | XmmReg -- 128-bit SIMD vector register + {-# UNPACK #-} !Int -- its number + + | YmmReg -- 256-bit SIMD vector register + {-# UNPACK #-} !Int -- its number + + | ZmmReg -- 512-bit SIMD vector register + {-# UNPACK #-} !Int -- its number + + -- STG registers + | Sp -- Stack ptr; points to last occupied stack location. + | SpLim -- Stack limit + | Hp -- Heap ptr; points to last occupied heap location. + | HpLim -- Heap limit register + | CCCS -- Current cost-centre stack + | CurrentTSO -- pointer to current thread's TSO + | CurrentNursery -- pointer to allocation area + | HpAlloc -- allocation count for heap check failure + + -- We keep the address of some commonly-called + -- functions in the register table, to keep code + -- size down: + | EagerBlackholeInfo -- stg_EAGER_BLACKHOLE_info + | GCEnter1 -- stg_gc_enter_1 + | GCFun -- stg_gc_fun + + -- Base offset for the register table, used for accessing registers + -- which do not have real registers assigned to them. This register + -- will only appear after we have expanded GlobalReg into memory accesses + -- (where necessary) in the native code generator. + | BaseReg + + -- The register used by the platform for the C stack pointer. This is + -- a break in the STG abstraction used exclusively to setup stack unwinding + -- information. + | MachSp + + -- The is a dummy register used to indicate to the stack unwinder where + -- a routine would return to. + | UnwindReturnReg + + -- Base Register for PIC (position-independent code) calculations + -- Only used inside the native code generator. It's exact meaning differs + -- from platform to platform (see module PositionIndependentCode). + | PicBaseReg + + deriving( Show ) + +instance Eq GlobalReg where + VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes + FloatReg i == FloatReg j = i==j + DoubleReg i == DoubleReg j = i==j + LongReg i == LongReg j = i==j + -- NOTE: XMM, YMM, ZMM registers actually are the same registers + -- at least with respect to store at YMM i and then read from XMM i + -- and similarly for ZMM etc. + XmmReg i == XmmReg j = i==j + YmmReg i == YmmReg j = i==j + ZmmReg i == ZmmReg j = i==j + Sp == Sp = True + SpLim == SpLim = True + Hp == Hp = True + HpLim == HpLim = True + CCCS == CCCS = True + CurrentTSO == CurrentTSO = True + CurrentNursery == CurrentNursery = True + HpAlloc == HpAlloc = True + EagerBlackholeInfo == EagerBlackholeInfo = True + GCEnter1 == GCEnter1 = True + GCFun == GCFun = True + BaseReg == BaseReg = True + MachSp == MachSp = True + UnwindReturnReg == UnwindReturnReg = True + PicBaseReg == PicBaseReg = True + _r1 == _r2 = False + +instance Ord GlobalReg where + compare (VanillaReg i _) (VanillaReg j _) = compare i j + -- Ignore type when seeking clashes + compare (FloatReg i) (FloatReg j) = compare i j + compare (DoubleReg i) (DoubleReg j) = compare i j + compare (LongReg i) (LongReg j) = compare i j + compare (XmmReg i) (XmmReg j) = compare i j + compare (YmmReg i) (YmmReg j) = compare i j + compare (ZmmReg i) (ZmmReg j) = compare i j + compare Sp Sp = EQ + compare SpLim SpLim = EQ + compare Hp Hp = EQ + compare HpLim HpLim = EQ + compare CCCS CCCS = EQ + compare CurrentTSO CurrentTSO = EQ + compare CurrentNursery CurrentNursery = EQ + compare HpAlloc HpAlloc = EQ + compare EagerBlackholeInfo EagerBlackholeInfo = EQ + compare GCEnter1 GCEnter1 = EQ + compare GCFun GCFun = EQ + compare BaseReg BaseReg = EQ + compare MachSp MachSp = EQ + compare UnwindReturnReg UnwindReturnReg = EQ + compare PicBaseReg PicBaseReg = EQ + compare (VanillaReg _ _) _ = LT + compare _ (VanillaReg _ _) = GT + compare (FloatReg _) _ = LT + compare _ (FloatReg _) = GT + compare (DoubleReg _) _ = LT + compare _ (DoubleReg _) = GT + compare (LongReg _) _ = LT + compare _ (LongReg _) = GT + compare (XmmReg _) _ = LT + compare _ (XmmReg _) = GT + compare (YmmReg _) _ = LT + compare _ (YmmReg _) = GT + compare (ZmmReg _) _ = LT + compare _ (ZmmReg _) = GT + compare Sp _ = LT + compare _ Sp = GT + compare SpLim _ = LT + compare _ SpLim = GT + compare Hp _ = LT + compare _ Hp = GT + compare HpLim _ = LT + compare _ HpLim = GT + compare CCCS _ = LT + compare _ CCCS = GT + compare CurrentTSO _ = LT + compare _ CurrentTSO = GT + compare CurrentNursery _ = LT + compare _ CurrentNursery = GT + compare HpAlloc _ = LT + compare _ HpAlloc = GT + compare GCEnter1 _ = LT + compare _ GCEnter1 = GT + compare GCFun _ = LT + compare _ GCFun = GT + compare BaseReg _ = LT + compare _ BaseReg = GT + compare MachSp _ = LT + compare _ MachSp = GT + compare UnwindReturnReg _ = LT + compare _ UnwindReturnReg = GT + compare EagerBlackholeInfo _ = LT + compare _ EagerBlackholeInfo = GT + +-- convenient aliases +baseReg, spReg, hpReg, spLimReg, hpLimReg, nodeReg, + currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg :: CmmReg +baseReg = CmmGlobal BaseReg +spReg = CmmGlobal Sp +hpReg = CmmGlobal Hp +hpLimReg = CmmGlobal HpLim +spLimReg = CmmGlobal SpLim +nodeReg = CmmGlobal node +currentTSOReg = CmmGlobal CurrentTSO +currentNurseryReg = CmmGlobal CurrentNursery +hpAllocReg = CmmGlobal HpAlloc +cccsReg = CmmGlobal CCCS + +node :: GlobalReg +node = VanillaReg 1 VGcPtr + +globalRegType :: DynFlags -> GlobalReg -> CmmType +globalRegType dflags (VanillaReg _ VGcPtr) = gcWord dflags +globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags +globalRegType _ (FloatReg _) = cmmFloat W32 +globalRegType _ (DoubleReg _) = cmmFloat W64 +globalRegType _ (LongReg _) = cmmBits W64 +-- TODO: improve the internal model of SIMD/vectorized registers +-- the right design SHOULd improve handling of float and double code too. +-- see remarks in "NOTE [SIMD Design for the future]"" in GHC.StgToCmm.Prim +globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32) +globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32) +globalRegType _ (ZmmReg _) = cmmVec 16 (cmmBits W32) + +globalRegType dflags Hp = gcWord dflags + -- The initialiser for all + -- dynamically allocated closures +globalRegType dflags _ = bWord dflags + +isArgReg :: GlobalReg -> Bool +isArgReg (VanillaReg {}) = True +isArgReg (FloatReg {}) = True +isArgReg (DoubleReg {}) = True +isArgReg (LongReg {}) = True +isArgReg (XmmReg {}) = True +isArgReg (YmmReg {}) = True +isArgReg (ZmmReg {}) = True +isArgReg _ = False diff --git a/compiler/GHC/Cmm/Graph.hs b/compiler/GHC/Cmm/Graph.hs new file mode 100644 index 0000000000..8d19e7fdb9 --- /dev/null +++ b/compiler/GHC/Cmm/Graph.hs @@ -0,0 +1,484 @@ +{-# LANGUAGE BangPatterns, GADTs #-} + +module GHC.Cmm.Graph + ( CmmAGraph, CmmAGraphScoped, CgStmt(..) + , (<*>), catAGraphs + , mkLabel, mkMiddle, mkLast, outOfLine + , lgraphOfAGraph, labelAGraph + + , stackStubExpr + , mkNop, mkAssign, mkStore + , mkUnsafeCall, mkFinalCall, mkCallReturnsTo + , mkJumpReturnsTo + , mkJump, mkJumpExtra + , mkRawJump + , mkCbranch, mkSwitch + , mkReturn, mkComment, mkCallEntry, mkBranch + , mkUnwind + , copyInOflow, copyOutOflow + , noExtraStack + , toCall, Transfer(..) + ) +where + +import GhcPrelude hiding ( (<*>) ) -- avoid importing (<*>) + +import GHC.Cmm.BlockId +import GHC.Cmm +import GHC.Cmm.CallConv +import GHC.Cmm.Switch (SwitchTargets) + +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label +import DynFlags +import FastString +import ForeignCall +import OrdList +import GHC.Runtime.Layout (ByteOff) +import UniqSupply +import Util +import Panic + + +----------------------------------------------------------------------------- +-- Building Graphs + + +-- | CmmAGraph is a chunk of code consisting of: +-- +-- * ordinary statements (assignments, stores etc.) +-- * jumps +-- * labels +-- * out-of-line labelled blocks +-- +-- The semantics is that control falls through labels and out-of-line +-- blocks. Everything after a jump up to the next label is by +-- definition unreachable code, and will be discarded. +-- +-- Two CmmAGraphs can be stuck together with <*>, with the meaning that +-- control flows from the first to the second. +-- +-- A 'CmmAGraph' can be turned into a 'CmmGraph' (closed at both ends) +-- by providing a label for the entry point and a tick scope; see +-- 'labelAGraph'. +type CmmAGraph = OrdList CgStmt +-- | Unlabeled graph with tick scope +type CmmAGraphScoped = (CmmAGraph, CmmTickScope) + +data CgStmt + = CgLabel BlockId CmmTickScope + | CgStmt (CmmNode O O) + | CgLast (CmmNode O C) + | CgFork BlockId CmmAGraph CmmTickScope + +flattenCmmAGraph :: BlockId -> CmmAGraphScoped -> CmmGraph +flattenCmmAGraph id (stmts_t, tscope) = + CmmGraph { g_entry = id, + g_graph = GMany NothingO body NothingO } + where + body = foldr addBlock emptyBody $ flatten id stmts_t tscope [] + + -- + -- flatten: given an entry label and a CmmAGraph, make a list of blocks. + -- + -- NB. avoid the quadratic-append trap by passing in the tail of the + -- list. This is important for Very Long Functions (e.g. in T783). + -- + flatten :: Label -> CmmAGraph -> CmmTickScope -> [Block CmmNode C C] + -> [Block CmmNode C C] + flatten id g tscope blocks + = flatten1 (fromOL g) block' blocks + where !block' = blockJoinHead (CmmEntry id tscope) emptyBlock + -- + -- flatten0: we are outside a block at this point: any code before + -- the first label is unreachable, so just drop it. + -- + flatten0 :: [CgStmt] -> [Block CmmNode C C] -> [Block CmmNode C C] + flatten0 [] blocks = blocks + + flatten0 (CgLabel id tscope : stmts) blocks + = flatten1 stmts block blocks + where !block = blockJoinHead (CmmEntry id tscope) emptyBlock + + flatten0 (CgFork fork_id stmts_t tscope : rest) blocks + = flatten fork_id stmts_t tscope $ flatten0 rest blocks + + flatten0 (CgLast _ : stmts) blocks = flatten0 stmts blocks + flatten0 (CgStmt _ : stmts) blocks = flatten0 stmts blocks + + -- + -- flatten1: we have a partial block, collect statements until the + -- next last node to make a block, then call flatten0 to get the rest + -- of the blocks + -- + flatten1 :: [CgStmt] -> Block CmmNode C O + -> [Block CmmNode C C] -> [Block CmmNode C C] + + -- The current block falls through to the end of a function or fork: + -- this code should not be reachable, but it may be referenced by + -- other code that is not reachable. We'll remove it later with + -- dead-code analysis, but for now we have to keep the graph + -- well-formed, so we terminate the block with a branch to the + -- beginning of the current block. + flatten1 [] block blocks + = blockJoinTail block (CmmBranch (entryLabel block)) : blocks + + flatten1 (CgLast stmt : stmts) block blocks + = block' : flatten0 stmts blocks + where !block' = blockJoinTail block stmt + + flatten1 (CgStmt stmt : stmts) block blocks + = flatten1 stmts block' blocks + where !block' = blockSnoc block stmt + + flatten1 (CgFork fork_id stmts_t tscope : rest) block blocks + = flatten fork_id stmts_t tscope $ flatten1 rest block blocks + + -- a label here means that we should start a new block, and the + -- current block should fall through to the new block. + flatten1 (CgLabel id tscp : stmts) block blocks + = blockJoinTail block (CmmBranch id) : + flatten1 stmts (blockJoinHead (CmmEntry id tscp) emptyBlock) blocks + + + +---------- AGraph manipulation + +(<*>) :: CmmAGraph -> CmmAGraph -> CmmAGraph +(<*>) = appOL + +catAGraphs :: [CmmAGraph] -> CmmAGraph +catAGraphs = concatOL + +-- | creates a sequence "goto id; id:" as an AGraph +mkLabel :: BlockId -> CmmTickScope -> CmmAGraph +mkLabel bid scp = unitOL (CgLabel bid scp) + +-- | creates an open AGraph from a given node +mkMiddle :: CmmNode O O -> CmmAGraph +mkMiddle middle = unitOL (CgStmt middle) + +-- | creates a closed AGraph from a given node +mkLast :: CmmNode O C -> CmmAGraph +mkLast last = unitOL (CgLast last) + +-- | A labelled code block; should end in a last node +outOfLine :: BlockId -> CmmAGraphScoped -> CmmAGraph +outOfLine l (c,s) = unitOL (CgFork l c s) + +-- | allocate a fresh label for the entry point +lgraphOfAGraph :: CmmAGraphScoped -> UniqSM CmmGraph +lgraphOfAGraph g = do + u <- getUniqueM + return (labelAGraph (mkBlockId u) g) + +-- | use the given BlockId as the label of the entry point +labelAGraph :: BlockId -> CmmAGraphScoped -> CmmGraph +labelAGraph lbl ag = flattenCmmAGraph lbl ag + +---------- No-ops +mkNop :: CmmAGraph +mkNop = nilOL + +mkComment :: FastString -> CmmAGraph +mkComment fs + -- SDM: generating all those comments takes time, this saved about 4% for me + | debugIsOn = mkMiddle $ CmmComment fs + | otherwise = nilOL + +---------- Assignment and store +mkAssign :: CmmReg -> CmmExpr -> CmmAGraph +mkAssign l (CmmReg r) | l == r = mkNop +mkAssign l r = mkMiddle $ CmmAssign l r + +mkStore :: CmmExpr -> CmmExpr -> CmmAGraph +mkStore l r = mkMiddle $ CmmStore l r + +---------- Control transfer +mkJump :: DynFlags -> Convention -> CmmExpr + -> [CmmExpr] + -> UpdFrameOffset + -> CmmAGraph +mkJump dflags conv e actuals updfr_off = + lastWithArgs dflags Jump Old conv actuals updfr_off $ + toCall e Nothing updfr_off 0 + +-- | A jump where the caller says what the live GlobalRegs are. Used +-- for low-level hand-written Cmm. +mkRawJump :: DynFlags -> CmmExpr -> UpdFrameOffset -> [GlobalReg] + -> CmmAGraph +mkRawJump dflags e updfr_off vols = + lastWithArgs dflags Jump Old NativeNodeCall [] updfr_off $ + \arg_space _ -> toCall e Nothing updfr_off 0 arg_space vols + + +mkJumpExtra :: DynFlags -> Convention -> CmmExpr -> [CmmExpr] + -> UpdFrameOffset -> [CmmExpr] + -> CmmAGraph +mkJumpExtra dflags conv e actuals updfr_off extra_stack = + lastWithArgsAndExtraStack dflags Jump Old conv actuals updfr_off extra_stack $ + toCall e Nothing updfr_off 0 + +mkCbranch :: CmmExpr -> BlockId -> BlockId -> Maybe Bool -> CmmAGraph +mkCbranch pred ifso ifnot likely = + mkLast (CmmCondBranch pred ifso ifnot likely) + +mkSwitch :: CmmExpr -> SwitchTargets -> CmmAGraph +mkSwitch e tbl = mkLast $ CmmSwitch e tbl + +mkReturn :: DynFlags -> CmmExpr -> [CmmExpr] -> UpdFrameOffset + -> CmmAGraph +mkReturn dflags e actuals updfr_off = + lastWithArgs dflags Ret Old NativeReturn actuals updfr_off $ + toCall e Nothing updfr_off 0 + +mkBranch :: BlockId -> CmmAGraph +mkBranch bid = mkLast (CmmBranch bid) + +mkFinalCall :: DynFlags + -> CmmExpr -> CCallConv -> [CmmExpr] -> UpdFrameOffset + -> CmmAGraph +mkFinalCall dflags f _ actuals updfr_off = + lastWithArgs dflags Call Old NativeDirectCall actuals updfr_off $ + toCall f Nothing updfr_off 0 + +mkCallReturnsTo :: DynFlags -> CmmExpr -> Convention -> [CmmExpr] + -> BlockId + -> ByteOff + -> UpdFrameOffset + -> [CmmExpr] + -> CmmAGraph +mkCallReturnsTo dflags f callConv actuals ret_lbl ret_off updfr_off extra_stack = do + lastWithArgsAndExtraStack dflags Call (Young ret_lbl) callConv actuals + updfr_off extra_stack $ + toCall f (Just ret_lbl) updfr_off ret_off + +-- Like mkCallReturnsTo, but does not push the return address (it is assumed to be +-- already on the stack). +mkJumpReturnsTo :: DynFlags -> CmmExpr -> Convention -> [CmmExpr] + -> BlockId + -> ByteOff + -> UpdFrameOffset + -> CmmAGraph +mkJumpReturnsTo dflags f callConv actuals ret_lbl ret_off updfr_off = do + lastWithArgs dflags JumpRet (Young ret_lbl) callConv actuals updfr_off $ + toCall f (Just ret_lbl) updfr_off ret_off + +mkUnsafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual] -> CmmAGraph +mkUnsafeCall t fs as = mkMiddle $ CmmUnsafeForeignCall t fs as + +-- | Construct a 'CmmUnwind' node for the given register and unwinding +-- expression. +mkUnwind :: GlobalReg -> CmmExpr -> CmmAGraph +mkUnwind r e = mkMiddle $ CmmUnwind [(r, Just e)] + +-------------------------------------------------------------------------- + + + + +-- Why are we inserting extra blocks that simply branch to the successors? +-- Because in addition to the branch instruction, @mkBranch@ will insert +-- a necessary adjustment to the stack pointer. + + +-- For debugging purposes, we can stub out dead stack slots: +stackStubExpr :: Width -> CmmExpr +stackStubExpr w = CmmLit (CmmInt 0 w) + +-- When we copy in parameters, we usually want to put overflow +-- parameters on the stack, but sometimes we want to pass the +-- variables in their spill slots. Therefore, for copying arguments +-- and results, we provide different functions to pass the arguments +-- in an overflow area and to pass them in spill slots. +copyInOflow :: DynFlags -> Convention -> Area + -> [CmmFormal] + -> [CmmFormal] + -> (Int, [GlobalReg], CmmAGraph) + +copyInOflow dflags conv area formals extra_stk + = (offset, gregs, catAGraphs $ map mkMiddle nodes) + where (offset, gregs, nodes) = copyIn dflags conv area formals extra_stk + +-- Return the number of bytes used for copying arguments, as well as the +-- instructions to copy the arguments. +copyIn :: DynFlags -> Convention -> Area + -> [CmmFormal] + -> [CmmFormal] + -> (ByteOff, [GlobalReg], [CmmNode O O]) +copyIn dflags conv area formals extra_stk + = (stk_size, [r | (_, RegisterParam r) <- args], map ci (stk_args ++ args)) + where + -- See Note [Width of parameters] + ci (reg, RegisterParam r@(VanillaReg {})) = + let local = CmmLocal reg + global = CmmReg (CmmGlobal r) + width = cmmRegWidth dflags local + expr + | width == wordWidth dflags = global + | width < wordWidth dflags = + CmmMachOp (MO_XX_Conv (wordWidth dflags) width) [global] + | otherwise = panic "Parameter width greater than word width" + + in CmmAssign local expr + + -- Non VanillaRegs + ci (reg, RegisterParam r) = + CmmAssign (CmmLocal reg) (CmmReg (CmmGlobal r)) + + ci (reg, StackParam off) + | isBitsType $ localRegType reg + , typeWidth (localRegType reg) < wordWidth dflags = + let + stack_slot = (CmmLoad (CmmStackSlot area off) (cmmBits $ wordWidth dflags)) + local = CmmLocal reg + width = cmmRegWidth dflags local + expr = CmmMachOp (MO_XX_Conv (wordWidth dflags) width) [stack_slot] + in CmmAssign local expr + + | otherwise = + CmmAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off) ty) + where ty = localRegType reg + + init_offset = widthInBytes (wordWidth dflags) -- infotable + + (stk_off, stk_args) = assignStack dflags init_offset localRegType extra_stk + + (stk_size, args) = assignArgumentsPos dflags stk_off conv + localRegType formals + +-- Factoring out the common parts of the copyout functions yielded something +-- more complicated: + +data Transfer = Call | JumpRet | Jump | Ret deriving Eq + +copyOutOflow :: DynFlags -> Convention -> Transfer -> Area -> [CmmExpr] + -> UpdFrameOffset + -> [CmmExpr] -- extra stack args + -> (Int, [GlobalReg], CmmAGraph) + +-- Generate code to move the actual parameters into the locations +-- required by the calling convention. This includes a store for the +-- return address. +-- +-- The argument layout function ignores the pointer to the info table, +-- so we slot that in here. When copying-out to a young area, we set +-- the info table for return and adjust the offsets of the other +-- parameters. If this is a call instruction, we adjust the offsets +-- of the other parameters. +copyOutOflow dflags conv transfer area actuals updfr_off extra_stack_stuff + = (stk_size, regs, graph) + where + (regs, graph) = foldr co ([], mkNop) (setRA ++ args ++ stack_params) + + -- See Note [Width of parameters] + co (v, RegisterParam r@(VanillaReg {})) (rs, ms) = + let width = cmmExprWidth dflags v + value + | width == wordWidth dflags = v + | width < wordWidth dflags = + CmmMachOp (MO_XX_Conv width (wordWidth dflags)) [v] + | otherwise = panic "Parameter width greater than word width" + + in (r:rs, mkAssign (CmmGlobal r) value <*> ms) + + -- Non VanillaRegs + co (v, RegisterParam r) (rs, ms) = + (r:rs, mkAssign (CmmGlobal r) v <*> ms) + + -- See Note [Width of parameters] + co (v, StackParam off) (rs, ms) + = (rs, mkStore (CmmStackSlot area off) (value v) <*> ms) + + width v = cmmExprWidth dflags v + value v + | isBitsType $ cmmExprType dflags v + , width v < wordWidth dflags = + CmmMachOp (MO_XX_Conv (width v) (wordWidth dflags)) [v] + | otherwise = v + + (setRA, init_offset) = + case area of + Young id -> -- Generate a store instruction for + -- the return address if making a call + case transfer of + Call -> + ([(CmmLit (CmmBlock id), StackParam init_offset)], + widthInBytes (wordWidth dflags)) + JumpRet -> + ([], + widthInBytes (wordWidth dflags)) + _other -> + ([], 0) + Old -> ([], updfr_off) + + (extra_stack_off, stack_params) = + assignStack dflags init_offset (cmmExprType dflags) extra_stack_stuff + + args :: [(CmmExpr, ParamLocation)] -- The argument and where to put it + (stk_size, args) = assignArgumentsPos dflags extra_stack_off conv + (cmmExprType dflags) actuals + + +-- Note [Width of parameters] +-- +-- Consider passing a small (< word width) primitive like Int8# to a function. +-- It's actually non-trivial to do this without extending/narrowing: +-- * Global registers are considered to have native word width (i.e., 64-bits on +-- x86-64), so CmmLint would complain if we assigned an 8-bit parameter to a +-- global register. +-- * Same problem exists with LLVM IR. +-- * Lowering gets harder since on x86-32 not every register exposes its lower +-- 8 bits (e.g., for %eax we can use %al, but there isn't a corresponding +-- 8-bit register for %edi). So we would either need to extend/narrow anyway, +-- or complicate the calling convention. +-- * Passing a small integer in a stack slot, which has native word width, +-- requires extending to word width when writing to the stack and narrowing +-- when reading off the stack (see #16258). +-- So instead, we always extend every parameter smaller than native word width +-- in copyOutOflow and then truncate it back to the expected width in copyIn. +-- Note that we do this in cmm using MO_XX_Conv to avoid requiring +-- zero-/sign-extending - it's up to a backend to handle this in a most +-- efficient way (e.g., a simple register move or a smaller size store). +-- This convention (of ignoring the upper bits) is different from some C ABIs, +-- e.g. all PowerPC ELF ABIs, that require sign or zero extending parameters. +-- +-- There was some discussion about this on this PR: +-- https://github.com/ghc-proposals/ghc-proposals/pull/74 + + +mkCallEntry :: DynFlags -> Convention -> [CmmFormal] -> [CmmFormal] + -> (Int, [GlobalReg], CmmAGraph) +mkCallEntry dflags conv formals extra_stk + = copyInOflow dflags conv Old formals extra_stk + +lastWithArgs :: DynFlags -> Transfer -> Area -> Convention -> [CmmExpr] + -> UpdFrameOffset + -> (ByteOff -> [GlobalReg] -> CmmAGraph) + -> CmmAGraph +lastWithArgs dflags transfer area conv actuals updfr_off last = + lastWithArgsAndExtraStack dflags transfer area conv actuals + updfr_off noExtraStack last + +lastWithArgsAndExtraStack :: DynFlags + -> Transfer -> Area -> Convention -> [CmmExpr] + -> UpdFrameOffset -> [CmmExpr] + -> (ByteOff -> [GlobalReg] -> CmmAGraph) + -> CmmAGraph +lastWithArgsAndExtraStack dflags transfer area conv actuals updfr_off + extra_stack last = + copies <*> last outArgs regs + where + (outArgs, regs, copies) = copyOutOflow dflags conv transfer area actuals + updfr_off extra_stack + + +noExtraStack :: [CmmExpr] +noExtraStack = [] + +toCall :: CmmExpr -> Maybe BlockId -> UpdFrameOffset -> ByteOff + -> ByteOff -> [GlobalReg] + -> CmmAGraph +toCall e cont updfr_off res_space arg_space regs = + mkLast $ CmmCall e cont regs arg_space res_space updfr_off diff --git a/compiler/GHC/Cmm/Info.hs b/compiler/GHC/Cmm/Info.hs new file mode 100644 index 0000000000..a10db2b292 --- /dev/null +++ b/compiler/GHC/Cmm/Info.hs @@ -0,0 +1,593 @@ +{-# LANGUAGE CPP #-} +module GHC.Cmm.Info ( + mkEmptyContInfoTable, + cmmToRawCmm, + mkInfoTable, + srtEscape, + + -- info table accessors + closureInfoPtr, + entryCode, + getConstrTag, + cmmGetClosureType, + infoTable, + infoTableConstrTag, + infoTableSrtBitmap, + infoTableClosureType, + infoTablePtrs, + infoTableNonPtrs, + funInfoTable, + funInfoArity, + + -- info table sizes and offsets + stdInfoTableSizeW, + fixedInfoTableSizeW, + profInfoTableSizeW, + maxStdInfoTableSizeW, + maxRetInfoTableSizeW, + stdInfoTableSizeB, + conInfoTableSizeB, + stdSrtBitmapOffset, + stdClosureTypeOffset, + stdPtrsOffset, stdNonPtrsOffset, +) where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.CLabel +import GHC.Runtime.Layout +import GHC.Data.Bitmap +import Stream (Stream) +import qualified Stream +import GHC.Cmm.Dataflow.Collections + +import GHC.Platform +import Maybes +import DynFlags +import ErrUtils (withTimingSilent) +import Panic +import UniqSupply +import MonadUtils +import Util +import Outputable + +import Data.ByteString (ByteString) +import Data.Bits + +-- When we split at proc points, we need an empty info table. +mkEmptyContInfoTable :: CLabel -> CmmInfoTable +mkEmptyContInfoTable info_lbl + = CmmInfoTable { cit_lbl = info_lbl + , cit_rep = mkStackRep [] + , cit_prof = NoProfilingInfo + , cit_srt = Nothing + , cit_clo = Nothing } + +cmmToRawCmm :: DynFlags -> Stream IO CmmGroup a + -> IO (Stream IO RawCmmGroup a) +cmmToRawCmm dflags cmms + = do { uniqs <- mkSplitUniqSupply 'i' + ; let do_one :: UniqSupply -> [CmmDecl] -> IO (UniqSupply, [RawCmmDecl]) + do_one uniqs cmm = + -- NB. strictness fixes a space leak. DO NOT REMOVE. + withTimingSilent dflags (text "Cmm -> Raw Cmm") + forceRes $ + case initUs uniqs $ concatMapM (mkInfoTable dflags) cmm of + (b,uniqs') -> return (uniqs',b) + ; return (snd <$> Stream.mapAccumL_ do_one uniqs cmms) + } + + where forceRes (uniqs, rawcmms) = + uniqs `seq` foldr (\decl r -> decl `seq` r) () rawcmms + +-- Make a concrete info table, represented as a list of CmmStatic +-- (it can't be simply a list of Word, because the SRT field is +-- represented by a label+offset expression). +-- +-- With tablesNextToCode, the layout is +-- <reversed variable part> +-- <normal forward StgInfoTable, but without +-- an entry point at the front> +-- <code> +-- +-- Without tablesNextToCode, the layout of an info table is +-- <entry label> +-- <normal forward rest of StgInfoTable> +-- <forward variable part> +-- +-- See includes/rts/storage/InfoTables.h +-- +-- For return-points these are as follows +-- +-- Tables next to code: +-- +-- <srt slot> +-- <standard info table> +-- ret-addr --> <entry code (if any)> +-- +-- Not tables-next-to-code: +-- +-- ret-addr --> <ptr to entry code> +-- <standard info table> +-- <srt slot> +-- +-- * The SRT slot is only there if there is SRT info to record + +mkInfoTable :: DynFlags -> CmmDecl -> UniqSM [RawCmmDecl] +mkInfoTable _ (CmmData sec dat) + = return [CmmData sec dat] + +mkInfoTable dflags proc@(CmmProc infos entry_lbl live blocks) + -- + -- in the non-tables-next-to-code case, procs can have at most a + -- single info table associated with the entry label of the proc. + -- + | not (tablesNextToCode dflags) + = case topInfoTable proc of -- must be at most one + -- no info table + Nothing -> + return [CmmProc mapEmpty entry_lbl live blocks] + + Just info@CmmInfoTable { cit_lbl = info_lbl } -> do + (top_decls, (std_info, extra_bits)) <- + mkInfoTableContents dflags info Nothing + let + rel_std_info = map (makeRelativeRefTo dflags info_lbl) std_info + rel_extra_bits = map (makeRelativeRefTo dflags info_lbl) extra_bits + -- + -- Separately emit info table (with the function entry + -- point as first entry) and the entry code + -- + return (top_decls ++ + [CmmProc mapEmpty entry_lbl live blocks, + mkRODataLits info_lbl + (CmmLabel entry_lbl : rel_std_info ++ rel_extra_bits)]) + + -- + -- With tables-next-to-code, we can have many info tables, + -- associated with some of the BlockIds of the proc. For each info + -- table we need to turn it into CmmStatics, and collect any new + -- CmmDecls that arise from doing so. + -- + | otherwise + = do + (top_declss, raw_infos) <- + unzip `fmap` mapM do_one_info (mapToList (info_tbls infos)) + return (concat top_declss ++ + [CmmProc (mapFromList raw_infos) entry_lbl live blocks]) + + where + do_one_info (lbl,itbl) = do + (top_decls, (std_info, extra_bits)) <- + mkInfoTableContents dflags itbl Nothing + let + info_lbl = cit_lbl itbl + rel_std_info = map (makeRelativeRefTo dflags info_lbl) std_info + rel_extra_bits = map (makeRelativeRefTo dflags info_lbl) extra_bits + -- + return (top_decls, (lbl, Statics info_lbl $ map CmmStaticLit $ + reverse rel_extra_bits ++ rel_std_info)) + +----------------------------------------------------- +type InfoTableContents = ( [CmmLit] -- The standard part + , [CmmLit] ) -- The "extra bits" +-- These Lits have *not* had mkRelativeTo applied to them + +mkInfoTableContents :: DynFlags + -> CmmInfoTable + -> Maybe Int -- Override default RTS type tag? + -> UniqSM ([RawCmmDecl], -- Auxiliary top decls + InfoTableContents) -- Info tbl + extra bits + +mkInfoTableContents dflags + info@(CmmInfoTable { cit_lbl = info_lbl + , cit_rep = smrep + , cit_prof = prof + , cit_srt = srt }) + mb_rts_tag + | RTSRep rts_tag rep <- smrep + = mkInfoTableContents dflags info{cit_rep = rep} (Just rts_tag) + -- Completely override the rts_tag that mkInfoTableContents would + -- otherwise compute, with the rts_tag stored in the RTSRep + -- (which in turn came from a handwritten .cmm file) + + | StackRep frame <- smrep + = do { (prof_lits, prof_data) <- mkProfLits dflags prof + ; let (srt_label, srt_bitmap) = mkSRTLit dflags info_lbl srt + ; (liveness_lit, liveness_data) <- mkLivenessBits dflags frame + ; let + std_info = mkStdInfoTable dflags prof_lits rts_tag srt_bitmap liveness_lit + rts_tag | Just tag <- mb_rts_tag = tag + | null liveness_data = rET_SMALL -- Fits in extra_bits + | otherwise = rET_BIG -- Does not; extra_bits is + -- a label + ; return (prof_data ++ liveness_data, (std_info, srt_label)) } + + | HeapRep _ ptrs nonptrs closure_type <- smrep + = do { let layout = packIntsCLit dflags ptrs nonptrs + ; (prof_lits, prof_data) <- mkProfLits dflags prof + ; let (srt_label, srt_bitmap) = mkSRTLit dflags info_lbl srt + ; (mb_srt_field, mb_layout, extra_bits, ct_data) + <- mk_pieces closure_type srt_label + ; let std_info = mkStdInfoTable dflags prof_lits + (mb_rts_tag `orElse` rtsClosureType smrep) + (mb_srt_field `orElse` srt_bitmap) + (mb_layout `orElse` layout) + ; return (prof_data ++ ct_data, (std_info, extra_bits)) } + where + mk_pieces :: ClosureTypeInfo -> [CmmLit] + -> UniqSM ( Maybe CmmLit -- Override the SRT field with this + , Maybe CmmLit -- Override the layout field with this + , [CmmLit] -- "Extra bits" for info table + , [RawCmmDecl]) -- Auxiliary data decls + mk_pieces (Constr con_tag con_descr) _no_srt -- A data constructor + = do { (descr_lit, decl) <- newStringLit con_descr + ; return ( Just (CmmInt (fromIntegral con_tag) + (halfWordWidth dflags)) + , Nothing, [descr_lit], [decl]) } + + mk_pieces Thunk srt_label + = return (Nothing, Nothing, srt_label, []) + + mk_pieces (ThunkSelector offset) _no_srt + = return (Just (CmmInt 0 (halfWordWidth dflags)), + Just (mkWordCLit dflags (fromIntegral offset)), [], []) + -- Layout known (one free var); we use the layout field for offset + + mk_pieces (Fun arity (ArgSpec fun_type)) srt_label + = do { let extra_bits = packIntsCLit dflags fun_type arity : srt_label + ; return (Nothing, Nothing, extra_bits, []) } + + mk_pieces (Fun arity (ArgGen arg_bits)) srt_label + = do { (liveness_lit, liveness_data) <- mkLivenessBits dflags arg_bits + ; let fun_type | null liveness_data = aRG_GEN + | otherwise = aRG_GEN_BIG + extra_bits = [ packIntsCLit dflags fun_type arity ] + ++ (if inlineSRT dflags then [] else [ srt_lit ]) + ++ [ liveness_lit, slow_entry ] + ; return (Nothing, Nothing, extra_bits, liveness_data) } + where + slow_entry = CmmLabel (toSlowEntryLbl info_lbl) + srt_lit = case srt_label of + [] -> mkIntCLit dflags 0 + (lit:_rest) -> ASSERT( null _rest ) lit + + mk_pieces other _ = pprPanic "mk_pieces" (ppr other) + +mkInfoTableContents _ _ _ = panic "mkInfoTableContents" -- NonInfoTable dealt with earlier + +packIntsCLit :: DynFlags -> Int -> Int -> CmmLit +packIntsCLit dflags a b = packHalfWordsCLit dflags + (toStgHalfWord dflags (fromIntegral a)) + (toStgHalfWord dflags (fromIntegral b)) + + +mkSRTLit :: DynFlags + -> CLabel + -> Maybe CLabel + -> ([CmmLit], -- srt_label, if any + CmmLit) -- srt_bitmap +mkSRTLit dflags info_lbl (Just lbl) + | inlineSRT dflags + = ([], CmmLabelDiffOff lbl info_lbl 0 (halfWordWidth dflags)) +mkSRTLit dflags _ Nothing = ([], CmmInt 0 (halfWordWidth dflags)) +mkSRTLit dflags _ (Just lbl) = ([CmmLabel lbl], CmmInt 1 (halfWordWidth dflags)) + + +-- | Is the SRT offset field inline in the info table on this platform? +-- +-- See the section "Referring to an SRT from the info table" in +-- Note [SRTs] in GHC.Cmm.Info.Build +inlineSRT :: DynFlags -> Bool +inlineSRT dflags = platformArch (targetPlatform dflags) == ArchX86_64 + && tablesNextToCode dflags + +------------------------------------------------------------------------- +-- +-- Lay out the info table and handle relative offsets +-- +------------------------------------------------------------------------- + +-- This function takes +-- * the standard info table portion (StgInfoTable) +-- * the "extra bits" (StgFunInfoExtraRev etc.) +-- * the entry label +-- * the code +-- and lays them out in memory, producing a list of RawCmmDecl + +------------------------------------------------------------------------- +-- +-- Position independent code +-- +------------------------------------------------------------------------- +-- In order to support position independent code, we mustn't put absolute +-- references into read-only space. Info tables in the tablesNextToCode +-- case must be in .text, which is read-only, so we doctor the CmmLits +-- to use relative offsets instead. + +-- Note that this is done even when the -fPIC flag is not specified, +-- as we want to keep binary compatibility between PIC and non-PIC. + +makeRelativeRefTo :: DynFlags -> CLabel -> CmmLit -> CmmLit + +makeRelativeRefTo dflags info_lbl (CmmLabel lbl) + | tablesNextToCode dflags + = CmmLabelDiffOff lbl info_lbl 0 (wordWidth dflags) +makeRelativeRefTo dflags info_lbl (CmmLabelOff lbl off) + | tablesNextToCode dflags + = CmmLabelDiffOff lbl info_lbl off (wordWidth dflags) +makeRelativeRefTo _ _ lit = lit + + +------------------------------------------------------------------------- +-- +-- Build a liveness mask for the stack layout +-- +------------------------------------------------------------------------- + +-- There are four kinds of things on the stack: +-- +-- - pointer variables (bound in the environment) +-- - non-pointer variables (bound in the environment) +-- - free slots (recorded in the stack free list) +-- - non-pointer data slots (recorded in the stack free list) +-- +-- The first two are represented with a 'Just' of a 'LocalReg'. +-- The last two with one or more 'Nothing' constructors. +-- Each 'Nothing' represents one used word. +-- +-- The head of the stack layout is the top of the stack and +-- the least-significant bit. + +mkLivenessBits :: DynFlags -> Liveness -> UniqSM (CmmLit, [RawCmmDecl]) + -- ^ Returns: + -- 1. The bitmap (literal value or label) + -- 2. Large bitmap CmmData if needed + +mkLivenessBits dflags liveness + | n_bits > mAX_SMALL_BITMAP_SIZE dflags -- does not fit in one word + = do { uniq <- getUniqueM + ; let bitmap_lbl = mkBitmapLabel uniq + ; return (CmmLabel bitmap_lbl, + [mkRODataLits bitmap_lbl lits]) } + + | otherwise -- Fits in one word + = return (mkStgWordCLit dflags bitmap_word, []) + where + n_bits = length liveness + + bitmap :: Bitmap + bitmap = mkBitmap dflags liveness + + small_bitmap = case bitmap of + [] -> toStgWord dflags 0 + [b] -> b + _ -> panic "mkLiveness" + bitmap_word = toStgWord dflags (fromIntegral n_bits) + .|. (small_bitmap `shiftL` bITMAP_BITS_SHIFT dflags) + + lits = mkWordCLit dflags (fromIntegral n_bits) + : map (mkStgWordCLit dflags) bitmap + -- The first word is the size. The structure must match + -- StgLargeBitmap in includes/rts/storage/InfoTable.h + +------------------------------------------------------------------------- +-- +-- Generating a standard info table +-- +------------------------------------------------------------------------- + +-- The standard bits of an info table. This part of the info table +-- corresponds to the StgInfoTable type defined in +-- includes/rts/storage/InfoTables.h. +-- +-- Its shape varies with ticky/profiling/tables next to code etc +-- so we can't use constant offsets from Constants + +mkStdInfoTable + :: DynFlags + -> (CmmLit,CmmLit) -- Closure type descr and closure descr (profiling) + -> Int -- Closure RTS tag + -> CmmLit -- SRT length + -> CmmLit -- layout field + -> [CmmLit] + +mkStdInfoTable dflags (type_descr, closure_descr) cl_type srt layout_lit + = -- Parallel revertible-black hole field + prof_info + -- Ticky info (none at present) + -- Debug info (none at present) + ++ [layout_lit, tag, srt] + + where + prof_info + | gopt Opt_SccProfilingOn dflags = [type_descr, closure_descr] + | otherwise = [] + + tag = CmmInt (fromIntegral cl_type) (halfWordWidth dflags) + +------------------------------------------------------------------------- +-- +-- Making string literals +-- +------------------------------------------------------------------------- + +mkProfLits :: DynFlags -> ProfilingInfo -> UniqSM ((CmmLit,CmmLit), [RawCmmDecl]) +mkProfLits dflags NoProfilingInfo = return ((zeroCLit dflags, zeroCLit dflags), []) +mkProfLits _ (ProfilingInfo td cd) + = do { (td_lit, td_decl) <- newStringLit td + ; (cd_lit, cd_decl) <- newStringLit cd + ; return ((td_lit,cd_lit), [td_decl,cd_decl]) } + +newStringLit :: ByteString -> UniqSM (CmmLit, GenCmmDecl CmmStatics info stmt) +newStringLit bytes + = do { uniq <- getUniqueM + ; return (mkByteStringCLit (mkStringLitLabel uniq) bytes) } + + +-- Misc utils + +-- | Value of the srt field of an info table when using an StgLargeSRT +srtEscape :: DynFlags -> StgHalfWord +srtEscape dflags = toStgHalfWord dflags (-1) + +------------------------------------------------------------------------- +-- +-- Accessing fields of an info table +-- +------------------------------------------------------------------------- + +-- | Wrap a 'CmmExpr' in an alignment check when @-falignment-sanitisation@ is +-- enabled. +wordAligned :: DynFlags -> CmmExpr -> CmmExpr +wordAligned dflags e + | gopt Opt_AlignmentSanitisation dflags + = CmmMachOp (MO_AlignmentCheck (wORD_SIZE dflags) (wordWidth dflags)) [e] + | otherwise + = e + +closureInfoPtr :: DynFlags -> CmmExpr -> CmmExpr +-- Takes a closure pointer and returns the info table pointer +closureInfoPtr dflags e = + CmmLoad (wordAligned dflags e) (bWord dflags) + +entryCode :: DynFlags -> CmmExpr -> CmmExpr +-- Takes an info pointer (the first word of a closure) +-- and returns its entry code +entryCode dflags e + | tablesNextToCode dflags = e + | otherwise = CmmLoad e (bWord dflags) + +getConstrTag :: DynFlags -> CmmExpr -> CmmExpr +-- Takes a closure pointer, and return the *zero-indexed* +-- constructor tag obtained from the info table +-- This lives in the SRT field of the info table +-- (constructors don't need SRTs). +getConstrTag dflags closure_ptr + = CmmMachOp (MO_UU_Conv (halfWordWidth dflags) (wordWidth dflags)) [infoTableConstrTag dflags info_table] + where + info_table = infoTable dflags (closureInfoPtr dflags closure_ptr) + +cmmGetClosureType :: DynFlags -> CmmExpr -> CmmExpr +-- Takes a closure pointer, and return the closure type +-- obtained from the info table +cmmGetClosureType dflags closure_ptr + = CmmMachOp (MO_UU_Conv (halfWordWidth dflags) (wordWidth dflags)) [infoTableClosureType dflags info_table] + where + info_table = infoTable dflags (closureInfoPtr dflags closure_ptr) + +infoTable :: DynFlags -> CmmExpr -> CmmExpr +-- Takes an info pointer (the first word of a closure) +-- and returns a pointer to the first word of the standard-form +-- info table, excluding the entry-code word (if present) +infoTable dflags info_ptr + | tablesNextToCode dflags = cmmOffsetB dflags info_ptr (- stdInfoTableSizeB dflags) + | otherwise = cmmOffsetW dflags info_ptr 1 -- Past the entry code pointer + +infoTableConstrTag :: DynFlags -> CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the constr tag +-- field of the info table (same as the srt_bitmap field) +infoTableConstrTag = infoTableSrtBitmap + +infoTableSrtBitmap :: DynFlags -> CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the srt_bitmap +-- field of the info table +infoTableSrtBitmap dflags info_tbl + = CmmLoad (cmmOffsetB dflags info_tbl (stdSrtBitmapOffset dflags)) (bHalfWord dflags) + +infoTableClosureType :: DynFlags -> CmmExpr -> CmmExpr +-- Takes an info table pointer (from infoTable) and returns the closure type +-- field of the info table. +infoTableClosureType dflags info_tbl + = CmmLoad (cmmOffsetB dflags info_tbl (stdClosureTypeOffset dflags)) (bHalfWord dflags) + +infoTablePtrs :: DynFlags -> CmmExpr -> CmmExpr +infoTablePtrs dflags info_tbl + = CmmLoad (cmmOffsetB dflags info_tbl (stdPtrsOffset dflags)) (bHalfWord dflags) + +infoTableNonPtrs :: DynFlags -> CmmExpr -> CmmExpr +infoTableNonPtrs dflags info_tbl + = CmmLoad (cmmOffsetB dflags info_tbl (stdNonPtrsOffset dflags)) (bHalfWord dflags) + +funInfoTable :: DynFlags -> CmmExpr -> CmmExpr +-- Takes the info pointer of a function, +-- and returns a pointer to the first word of the StgFunInfoExtra struct +-- in the info table. +funInfoTable dflags info_ptr + | tablesNextToCode dflags + = cmmOffsetB dflags info_ptr (- stdInfoTableSizeB dflags - sIZEOF_StgFunInfoExtraRev dflags) + | otherwise + = cmmOffsetW dflags info_ptr (1 + stdInfoTableSizeW dflags) + -- Past the entry code pointer + +-- Takes the info pointer of a function, returns the function's arity +funInfoArity :: DynFlags -> CmmExpr -> CmmExpr +funInfoArity dflags iptr + = cmmToWord dflags (cmmLoadIndex dflags rep fun_info (offset `div` rep_bytes)) + where + fun_info = funInfoTable dflags iptr + rep = cmmBits (widthFromBytes rep_bytes) + + (rep_bytes, offset) + | tablesNextToCode dflags = ( pc_REP_StgFunInfoExtraRev_arity pc + , oFFSET_StgFunInfoExtraRev_arity dflags ) + | otherwise = ( pc_REP_StgFunInfoExtraFwd_arity pc + , oFFSET_StgFunInfoExtraFwd_arity dflags ) + + pc = platformConstants dflags + +----------------------------------------------------------------------------- +-- +-- Info table sizes & offsets +-- +----------------------------------------------------------------------------- + +stdInfoTableSizeW :: DynFlags -> WordOff +-- The size of a standard info table varies with profiling/ticky etc, +-- so we can't get it from Constants +-- It must vary in sync with mkStdInfoTable +stdInfoTableSizeW dflags + = fixedInfoTableSizeW + + if gopt Opt_SccProfilingOn dflags + then profInfoTableSizeW + else 0 + +fixedInfoTableSizeW :: WordOff +fixedInfoTableSizeW = 2 -- layout, type + +profInfoTableSizeW :: WordOff +profInfoTableSizeW = 2 + +maxStdInfoTableSizeW :: WordOff +maxStdInfoTableSizeW = + 1 {- entry, when !tablesNextToCode -} + + fixedInfoTableSizeW + + profInfoTableSizeW + +maxRetInfoTableSizeW :: WordOff +maxRetInfoTableSizeW = + maxStdInfoTableSizeW + + 1 {- srt label -} + +stdInfoTableSizeB :: DynFlags -> ByteOff +stdInfoTableSizeB dflags = stdInfoTableSizeW dflags * wORD_SIZE dflags + +stdSrtBitmapOffset :: DynFlags -> ByteOff +-- Byte offset of the SRT bitmap half-word which is +-- in the *higher-addressed* part of the type_lit +stdSrtBitmapOffset dflags = stdInfoTableSizeB dflags - halfWordSize dflags + +stdClosureTypeOffset :: DynFlags -> ByteOff +-- Byte offset of the closure type half-word +stdClosureTypeOffset dflags = stdInfoTableSizeB dflags - wORD_SIZE dflags + +stdPtrsOffset, stdNonPtrsOffset :: DynFlags -> ByteOff +stdPtrsOffset dflags = stdInfoTableSizeB dflags - 2 * wORD_SIZE dflags +stdNonPtrsOffset dflags = stdInfoTableSizeB dflags - 2 * wORD_SIZE dflags + halfWordSize dflags + +conInfoTableSizeB :: DynFlags -> Int +conInfoTableSizeB dflags = stdInfoTableSizeB dflags + wORD_SIZE dflags diff --git a/compiler/GHC/Cmm/Info/Build.hs b/compiler/GHC/Cmm/Info/Build.hs new file mode 100644 index 0000000000..1ba79befcd --- /dev/null +++ b/compiler/GHC/Cmm/Info/Build.hs @@ -0,0 +1,892 @@ +{-# LANGUAGE GADTs, BangPatterns, RecordWildCards, + GeneralizedNewtypeDeriving, NondecreasingIndentation, TupleSections #-} + +module GHC.Cmm.Info.Build + ( CAFSet, CAFEnv, cafAnal + , doSRTs, ModuleSRTInfo, emptySRT + ) where + +import GhcPrelude hiding (succ) + +import Id +import GHC.Cmm.BlockId +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow +import Module +import GHC.Platform +import Digraph +import GHC.Cmm.CLabel +import GHC.Cmm +import GHC.Cmm.Utils +import DynFlags +import Maybes +import Outputable +import GHC.Runtime.Layout +import UniqSupply +import CostCentre +import GHC.StgToCmm.Heap + +import Control.Monad +import Data.Map (Map) +import qualified Data.Map as Map +import Data.Set (Set) +import qualified Data.Set as Set +import Data.Tuple +import Control.Monad.Trans.State +import Control.Monad.Trans.Class + + +{- Note [SRTs] + +SRTs are the mechanism by which the garbage collector can determine +the live CAFs in the program. + +Representation +^^^^^^^^^^^^^^ + ++------+ +| info | +| | +-----+---+---+---+ +| -------->|SRT_2| | | | | 0 | +|------| +-----+-|-+-|-+---+ +| | | | +| code | | | +| | v v + +An SRT is simply an object in the program's data segment. It has the +same representation as a static constructor. There are 16 +pre-compiled SRT info tables: stg_SRT_1_info, .. stg_SRT_16_info, +representing SRT objects with 1-16 pointers, respectively. + +The entries of an SRT object point to static closures, which are either +- FUN_STATIC, THUNK_STATIC or CONSTR +- Another SRT (actually just a CONSTR) + +The final field of the SRT is the static link field, used by the +garbage collector to chain together static closures that it visits and +to determine whether a static closure has been visited or not. (see +Note [STATIC_LINK fields]) + +By traversing the transitive closure of an SRT, the GC will reach all +of the CAFs that are reachable from the code associated with this SRT. + +If we need to create an SRT with more than 16 entries, we build a +chain of SRT objects with all but the last having 16 entries. + ++-----+---+- -+---+---+ +|SRT16| | | | | | 0 | ++-----+-|-+- -+-|-+---+ + | | + v v + +----+---+---+---+ + |SRT2| | | | | 0 | + +----+-|-+-|-+---+ + | | + | | + v v + +Referring to an SRT from the info table +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following things have SRTs: + +- Static functions (FUN) +- Static thunks (THUNK), ie. CAFs +- Continuations (RET_SMALL, etc.) + +In each case, the info table points to the SRT. + +- info->srt is zero if there's no SRT, otherwise: +- info->srt == 1 and info->f.srt_offset points to the SRT + +e.g. for a FUN with an SRT: + +StgFunInfoTable +------+ + info->f.srt_offset | ------------> offset to SRT object +StgStdInfoTable +------+ + info->layout.ptrs | ... | + info->layout.nptrs | ... | + info->srt | 1 | + info->type | ... | + |------| + +On x86_64, we optimise the info table representation further. The +offset to the SRT can be stored in 32 bits (all code lives within a +2GB region in x86_64's small memory model), so we can save a word in +the info table by storing the srt_offset in the srt field, which is +half a word. + +On x86_64 with TABLES_NEXT_TO_CODE (except on MachO, due to #15169): + +- info->srt is zero if there's no SRT, otherwise: +- info->srt is an offset from the info pointer to the SRT object + +StgStdInfoTable +------+ + info->layout.ptrs | | + info->layout.nptrs | | + info->srt | ------------> offset to SRT object + |------| + + +EXAMPLE +^^^^^^^ + +f = \x. ... g ... + where + g = \y. ... h ... c1 ... + h = \z. ... c2 ... + +c1 & c2 are CAFs + +g and h are local functions, but they have no static closures. When +we generate code for f, we start with a CmmGroup of four CmmDecls: + + [ f_closure, f_entry, g_entry, h_entry ] + +we process each CmmDecl separately in cpsTop, giving us a list of +CmmDecls. e.g. for f_entry, we might end up with + + [ f_entry, f1_ret, f2_proc ] + +where f1_ret is a return point, and f2_proc is a proc-point. We have +a CAFSet for each of these CmmDecls, let's suppose they are + + [ f_entry{g_info}, f1_ret{g_info}, f2_proc{} ] + [ g_entry{h_info, c1_closure} ] + [ h_entry{c2_closure} ] + +Next, we make an SRT for each of these functions: + + f_srt : [g_info] + g_srt : [h_info, c1_closure] + h_srt : [c2_closure] + +Now, for g_info and h_info, we want to refer to the SRTs for g and h +respectively, which we'll label g_srt and h_srt: + + f_srt : [g_srt] + g_srt : [h_srt, c1_closure] + h_srt : [c2_closure] + +Now, when an SRT has a single entry, we don't actually generate an SRT +closure for it, instead we just replace references to it with its +single element. So, since h_srt == c2_closure, we have + + f_srt : [g_srt] + g_srt : [c2_closure, c1_closure] + h_srt : [c2_closure] + +and the only SRT closure we generate is + + g_srt = SRT_2 [c2_closure, c1_closure] + + +Optimisations +^^^^^^^^^^^^^ + +To reduce the code size overhead and the cost of traversing SRTs in +the GC, we want to simplify SRTs where possible. We therefore apply +the following optimisations. Each has a [keyword]; search for the +keyword in the code below to see where the optimisation is +implemented. + +1. [Inline] we never create an SRT with a single entry, instead we + point to the single entry directly from the info table. + + i.e. instead of + + +------+ + | info | + | | +-----+---+---+ + | -------->|SRT_1| | | 0 | + |------| +-----+-|-+---+ + | | | + | code | | + | | v + C + + we can point directly to the closure: + + +------+ + | info | + | | + | -------->C + |------| + | | + | code | + | | + + + Furthermore, the SRT for any code that refers to this info table + can point directly to C. + + The exception to this is when we're doing dynamic linking. In that + case, if the closure is not locally defined then we can't point to + it directly from the info table, because this is the text section + which cannot contain runtime relocations. In this case we skip this + optimisation and generate the singleton SRT, because SRTs are in the + data section and *can* have relocatable references. + +2. [FUN] A static function closure can also be an SRT, we simply put + the SRT entries as fields in the static closure. This makes a lot + of sense: the static references are just like the free variables of + the FUN closure. + + i.e. instead of + + f_closure: + +-----+---+ + | | | 0 | + +- |--+---+ + | +------+ + | | info | f_srt: + | | | +-----+---+---+---+ + | | -------->|SRT_2| | | | + 0 | + `----------->|------| +-----+-|-+-|-+---+ + | | | | + | code | | | + | | v v + + + We can generate: + + f_closure: + +-----+---+---+---+ + | | | | | | | 0 | + +- |--+-|-+-|-+---+ + | | | +------+ + | v v | info | + | | | + | | 0 | + `----------->|------| + | | + | code | + | | + + + (note: we can't do this for THUNKs, because the thunk gets + overwritten when it is entered, so we wouldn't be able to share + this SRT with other info tables that want to refer to it (see + [Common] below). FUNs are immutable so don't have this problem.) + +3. [Common] Identical SRTs can be commoned up. + +4. [Filter] If an SRT A refers to an SRT B and a closure C, and B also + refers to C (perhaps transitively), then we can omit the reference + to C from A. + + +Note that there are many other optimisations that we could do, but +aren't implemented. In general, we could omit any reference from an +SRT if everything reachable from it is also reachable from the other +fields in the SRT. Our [Filter] optimisation is a special case of +this. + +Another opportunity we don't exploit is this: + +A = {X,Y,Z} +B = {Y,Z} +C = {X,B} + +Here we could use C = {A} and therefore [Inline] C = A. +-} + +-- --------------------------------------------------------------------- +{- Note [Invalid optimisation: shortcutting] + +You might think that if we have something like + +A's SRT = {B} +B's SRT = {X} + +that we could replace the reference to B in A's SRT with X. + +A's SRT = {X} +B's SRT = {X} + +and thereby perhaps save a little work at runtime, because we don't +have to visit B. + +But this is NOT valid. + +Consider these cases: + +0. B can't be a constructor, because constructors don't have SRTs + +1. B is a CAF. This is the easy one. Obviously we want A's SRT to + point to B, so that it keeps B alive. + +2. B is a function. This is the tricky one. The reason we can't +shortcut in this case is that we aren't allowed to resurrect static +objects. + +== How does this cause a problem? == + +The particular case that cropped up when we tried this was #15544. +- A is a thunk +- B is a static function +- X is a CAF +- suppose we GC when A is alive, and B is not otherwise reachable. +- B is "collected", meaning that it doesn't make it onto the static + objects list during this GC, but nothing bad happens yet. +- Next, suppose we enter A, and then call B. (remember that A refers to B) + At the entry point to B, we GC. This puts B on the stack, as part of the + RET_FUN stack frame that gets pushed when we GC at a function entry point. +- This GC will now reach B +- But because B was previous "collected", it breaks the assumption + that static objects are never resurrected. See Note [STATIC_LINK + fields] in rts/sm/Storage.h for why this is bad. +- In practice, the GC thinks that B has already been visited, and so + doesn't visit X, and catastrophe ensues. + +== Isn't this caused by the RET_FUN business? == + +Maybe, but could you prove that RET_FUN is the only way that +resurrection can occur? + +So, no shortcutting. +-} + +-- --------------------------------------------------------------------- +-- Label types + +-- Labels that come from cafAnal can be: +-- - _closure labels for static functions or CAFs +-- - _info labels for dynamic functions, thunks, or continuations +-- - _entry labels for functions or thunks +-- +-- Meanwhile the labels on top-level blocks are _entry labels. +-- +-- To put everything in the same namespace we convert all labels to +-- closure labels using toClosureLbl. Note that some of these +-- labels will not actually exist; that's ok because we're going to +-- map them to SRTEntry later, which ranges over labels that do exist. +-- +newtype CAFLabel = CAFLabel CLabel + deriving (Eq,Ord,Outputable) + +type CAFSet = Set CAFLabel +type CAFEnv = LabelMap CAFSet + +mkCAFLabel :: CLabel -> CAFLabel +mkCAFLabel lbl = CAFLabel (toClosureLbl lbl) + +-- This is a label that we can put in an SRT. It *must* be a closure label, +-- pointing to either a FUN_STATIC, THUNK_STATIC, or CONSTR. +newtype SRTEntry = SRTEntry CLabel + deriving (Eq, Ord, Outputable) + +-- --------------------------------------------------------------------- +-- CAF analysis + +-- | +-- For each code block: +-- - collect the references reachable from this code block to FUN, +-- THUNK or RET labels for which hasCAF == True +-- +-- This gives us a `CAFEnv`: a mapping from code block to sets of labels +-- +cafAnal + :: LabelSet -- The blocks representing continuations, ie. those + -- that will get RET info tables. These labels will + -- get their own SRTs, so we don't aggregate CAFs from + -- references to these labels, we just use the label. + -> CLabel -- The top label of the proc + -> CmmGraph + -> CAFEnv +cafAnal contLbls topLbl cmmGraph = + analyzeCmmBwd cafLattice + (cafTransfers contLbls (g_entry cmmGraph) topLbl) cmmGraph mapEmpty + + +cafLattice :: DataflowLattice CAFSet +cafLattice = DataflowLattice Set.empty add + where + add (OldFact old) (NewFact new) = + let !new' = old `Set.union` new + in changedIf (Set.size new' > Set.size old) new' + + +cafTransfers :: LabelSet -> Label -> CLabel -> TransferFun CAFSet +cafTransfers contLbls entry topLbl + (BlockCC eNode middle xNode) fBase = + let joined = cafsInNode xNode $! live' + !result = foldNodesBwdOO cafsInNode middle joined + + facts = mapMaybe successorFact (successors xNode) + live' = joinFacts cafLattice facts + + successorFact s + -- If this is a loop back to the entry, we can refer to the + -- entry label. + | s == entry = Just (add topLbl Set.empty) + -- If this is a continuation, we want to refer to the + -- SRT for the continuation's info table + | s `setMember` contLbls + = Just (Set.singleton (mkCAFLabel (infoTblLbl s))) + -- Otherwise, takes the CAF references from the destination + | otherwise + = lookupFact s fBase + + cafsInNode :: CmmNode e x -> CAFSet -> CAFSet + cafsInNode node set = foldExpDeep addCaf node set + + addCaf expr !set = + case expr of + CmmLit (CmmLabel c) -> add c set + CmmLit (CmmLabelOff c _) -> add c set + CmmLit (CmmLabelDiffOff c1 c2 _ _) -> add c1 $! add c2 set + _ -> set + add l s | hasCAF l = Set.insert (mkCAFLabel l) s + | otherwise = s + + in mapSingleton (entryLabel eNode) result + + +-- ----------------------------------------------------------------------------- +-- ModuleSRTInfo + +data ModuleSRTInfo = ModuleSRTInfo + { thisModule :: Module + -- ^ Current module being compiled. Required for calling labelDynamic. + , dedupSRTs :: Map (Set SRTEntry) SRTEntry + -- ^ previous SRTs we've emitted, so we can de-duplicate. + -- Used to implement the [Common] optimisation. + , flatSRTs :: Map SRTEntry (Set SRTEntry) + -- ^ The reverse mapping, so that we can remove redundant + -- entries. e.g. if we have an SRT [a,b,c], and we know that b + -- points to [c,d], we can omit c and emit [a,b]. + -- Used to implement the [Filter] optimisation. + } +instance Outputable ModuleSRTInfo where + ppr ModuleSRTInfo{..} = + text "ModuleSRTInfo:" <+> ppr dedupSRTs <+> ppr flatSRTs + +emptySRT :: Module -> ModuleSRTInfo +emptySRT mod = + ModuleSRTInfo + { thisModule = mod + , dedupSRTs = Map.empty + , flatSRTs = Map.empty } + +-- ----------------------------------------------------------------------------- +-- Constructing SRTs + +{- Implementation notes + +- In each CmmDecl there is a mapping info_tbls from Label -> CmmInfoTable + +- The entry in info_tbls corresponding to g_entry is the closure info + table, the rest are continuations. + +- Each entry in info_tbls possibly needs an SRT. We need to make a + label for each of these. + +- We get the CAFSet for each entry from the CAFEnv + +-} + +-- | Return a (Label,CLabel) pair for each labelled block of a CmmDecl, +-- where the label is +-- - the info label for a continuation or dynamic closure +-- - the closure label for a top-level function (not a CAF) +getLabelledBlocks :: CmmDecl -> [(Label, CAFLabel)] +getLabelledBlocks (CmmData _ _) = [] +getLabelledBlocks (CmmProc top_info _ _ _) = + [ (blockId, mkCAFLabel (cit_lbl info)) + | (blockId, info) <- mapToList (info_tbls top_info) + , let rep = cit_rep info + , not (isStaticRep rep) || not (isThunkRep rep) + ] + + +-- | Put the labelled blocks that we will be annotating with SRTs into +-- dependency order. This is so that we can process them one at a +-- time, resolving references to earlier blocks to point to their +-- SRTs. CAFs themselves are not included here; see getCAFs below. +depAnalSRTs + :: CAFEnv + -> [CmmDecl] + -> [SCC (Label, CAFLabel, Set CAFLabel)] +depAnalSRTs cafEnv decls = + srtTrace "depAnalSRTs" (ppr graph) graph + where + labelledBlocks = concatMap getLabelledBlocks decls + labelToBlock = Map.fromList (map swap labelledBlocks) + graph = stronglyConnCompFromEdgedVerticesOrd + [ let cafs' = Set.delete lbl cafs in + DigraphNode (l,lbl,cafs') l + (mapMaybe (flip Map.lookup labelToBlock) (Set.toList cafs')) + | (l, lbl) <- labelledBlocks + , Just cafs <- [mapLookup l cafEnv] ] + + +-- | Get (Label, CAFLabel, Set CAFLabel) for each block that represents a CAF. +-- These are treated differently from other labelled blocks: +-- - we never shortcut a reference to a CAF to the contents of its +-- SRT, since the point of SRTs is to keep CAFs alive. +-- - CAFs therefore don't take part in the dependency analysis in depAnalSRTs. +-- instead we generate their SRTs after everything else. +getCAFs :: CAFEnv -> [CmmDecl] -> [(Label, CAFLabel, Set CAFLabel)] +getCAFs cafEnv decls = + [ (g_entry g, mkCAFLabel topLbl, cafs) + | CmmProc top_info topLbl _ g <- decls + , Just info <- [mapLookup (g_entry g) (info_tbls top_info)] + , let rep = cit_rep info + , isStaticRep rep && isThunkRep rep + , Just cafs <- [mapLookup (g_entry g) cafEnv] + ] + + +-- | Get the list of blocks that correspond to the entry points for +-- FUN_STATIC closures. These are the blocks for which if we have an +-- SRT we can merge it with the static closure. [FUN] +getStaticFuns :: [CmmDecl] -> [(BlockId, CLabel)] +getStaticFuns decls = + [ (g_entry g, lbl) + | CmmProc top_info _ _ g <- decls + , Just info <- [mapLookup (g_entry g) (info_tbls top_info)] + , Just (id, _) <- [cit_clo info] + , let rep = cit_rep info + , isStaticRep rep && isFunRep rep + , let lbl = mkLocalClosureLabel (idName id) (idCafInfo id) + ] + + +-- | Maps labels from 'cafAnal' to the final CLabel that will appear +-- in the SRT. +-- - closures with singleton SRTs resolve to their single entry +-- - closures with larger SRTs map to the label for that SRT +-- - CAFs must not map to anything! +-- - if a labels maps to Nothing, we found that this label's SRT +-- is empty, so we don't need to refer to it from other SRTs. +type SRTMap = Map CAFLabel (Maybe SRTEntry) + +-- | resolve a CAFLabel to its SRTEntry using the SRTMap +resolveCAF :: SRTMap -> CAFLabel -> Maybe SRTEntry +resolveCAF srtMap lbl@(CAFLabel l) = + Map.findWithDefault (Just (SRTEntry (toClosureLbl l))) lbl srtMap + + +-- | Attach SRTs to all info tables in the CmmDecls, and add SRT +-- declarations to the ModuleSRTInfo. +-- +doSRTs + :: DynFlags + -> ModuleSRTInfo + -> [(CAFEnv, [CmmDecl])] + -> IO (ModuleSRTInfo, [CmmDecl]) + +doSRTs dflags moduleSRTInfo tops = do + us <- mkSplitUniqSupply 'u' + + -- Ignore the original grouping of decls, and combine all the + -- CAFEnvs into a single CAFEnv. + let (cafEnvs, declss) = unzip tops + cafEnv = mapUnions cafEnvs + decls = concat declss + staticFuns = mapFromList (getStaticFuns decls) + + -- Put the decls in dependency order. Why? So that we can implement + -- [Inline] and [Filter]. If we need to refer to an SRT that has + -- a single entry, we use the entry itself, which means that we + -- don't need to generate the singleton SRT in the first place. But + -- to do this we need to process blocks before things that depend on + -- them. + let + sccs = depAnalSRTs cafEnv decls + cafsWithSRTs = getCAFs cafEnv decls + + -- On each strongly-connected group of decls, construct the SRT + -- closures and the SRT fields for info tables. + let result :: + [ ( [CmmDecl] -- generated SRTs + , [(Label, CLabel)] -- SRT fields for info tables + , [(Label, [SRTEntry])] -- SRTs to attach to static functions + ) ] + ((result, _srtMap), moduleSRTInfo') = + initUs_ us $ + flip runStateT moduleSRTInfo $ + flip runStateT Map.empty $ do + nonCAFs <- mapM (doSCC dflags staticFuns) sccs + cAFs <- forM cafsWithSRTs $ \(l, cafLbl, cafs) -> + oneSRT dflags staticFuns [l] [cafLbl] True{-is a CAF-} cafs + return (nonCAFs ++ cAFs) + + (declss, pairs, funSRTs) = unzip3 result + + -- Next, update the info tables with the SRTs + let + srtFieldMap = mapFromList (concat pairs) + funSRTMap = mapFromList (concat funSRTs) + decls' = concatMap (updInfoSRTs dflags srtFieldMap funSRTMap) decls + + return (moduleSRTInfo', concat declss ++ decls') + + +-- | Build the SRT for a strongly-connected component of blocks +doSCC + :: DynFlags + -> LabelMap CLabel -- which blocks are static function entry points + -> SCC (Label, CAFLabel, Set CAFLabel) + -> StateT SRTMap + (StateT ModuleSRTInfo UniqSM) + ( [CmmDecl] -- generated SRTs + , [(Label, CLabel)] -- SRT fields for info tables + , [(Label, [SRTEntry])] -- SRTs to attach to static functions + ) + +doSCC dflags staticFuns (AcyclicSCC (l, cafLbl, cafs)) = + oneSRT dflags staticFuns [l] [cafLbl] False cafs + +doSCC dflags staticFuns (CyclicSCC nodes) = do + -- build a single SRT for the whole cycle, see Note [recursive SRTs] + let (blockids, lbls, cafsets) = unzip3 nodes + cafs = Set.unions cafsets + oneSRT dflags staticFuns blockids lbls False cafs + + +{- Note [recursive SRTs] + +If the dependency analyser has found us a recursive group of +declarations, then we build a single SRT for the whole group, on the +grounds that everything in the group is reachable from everything +else, so we lose nothing by having a single SRT. + +However, there are a couple of wrinkles to be aware of. + +* The Set CAFLabel for this SRT will contain labels in the group +itself. The SRTMap will therefore not contain entries for these labels +yet, so we can't turn them into SRTEntries using resolveCAF. BUT we +can just remove recursive references from the Set CAFLabel before +generating the SRT - the SRT will still contain all the CAFLabels that +we need to refer to from this group's SRT. + +* That is, EXCEPT for static function closures. For the same reason +described in Note [Invalid optimisation: shortcutting], we cannot omit +references to static function closures. + - But, since we will merge the SRT with one of the static function + closures (see [FUN]), we can omit references to *that* static + function closure from the SRT. +-} + +-- | Build an SRT for a set of blocks +oneSRT + :: DynFlags + -> LabelMap CLabel -- which blocks are static function entry points + -> [Label] -- blocks in this set + -> [CAFLabel] -- labels for those blocks + -> Bool -- True <=> this SRT is for a CAF + -> Set CAFLabel -- SRT for this set + -> StateT SRTMap + (StateT ModuleSRTInfo UniqSM) + ( [CmmDecl] -- SRT objects we built + , [(Label, CLabel)] -- SRT fields for these blocks' itbls + , [(Label, [SRTEntry])] -- SRTs to attach to static functions + ) + +oneSRT dflags staticFuns blockids lbls isCAF cafs = do + srtMap <- get + topSRT <- lift get + let + -- Can we merge this SRT with a FUN_STATIC closure? + (maybeFunClosure, otherFunLabels) = + case [ (l,b) | b <- blockids, Just l <- [mapLookup b staticFuns] ] of + [] -> (Nothing, []) + ((l,b):xs) -> (Just (l,b), map (mkCAFLabel . fst) xs) + + -- Remove recursive references from the SRT, except for (all but + -- one of the) static functions. See Note [recursive SRTs]. + nonRec = cafs `Set.difference` + (Set.fromList lbls `Set.difference` Set.fromList otherFunLabels) + + -- First resolve all the CAFLabels to SRTEntries + -- Implements the [Inline] optimisation. + resolved = mapMaybe (resolveCAF srtMap) (Set.toList nonRec) + + -- The set of all SRTEntries in SRTs that we refer to from here. + allBelow = + Set.unions [ lbls | caf <- resolved + , Just lbls <- [Map.lookup caf (flatSRTs topSRT)] ] + + -- Remove SRTEntries that are also in an SRT that we refer to. + -- Implements the [Filter] optimisation. + filtered = Set.difference (Set.fromList resolved) allBelow + + srtTrace "oneSRT:" + (ppr cafs <+> ppr resolved <+> ppr allBelow <+> ppr filtered) $ return () + + let + isStaticFun = isJust maybeFunClosure + + -- For a label without a closure (e.g. a continuation), we must + -- update the SRTMap for the label to point to a closure. It's + -- important that we don't do this for static functions or CAFs, + -- see Note [Invalid optimisation: shortcutting]. + updateSRTMap srtEntry = + when (not isCAF && (not isStaticFun || isNothing srtEntry)) $ do + let newSRTMap = Map.fromList [(cafLbl, srtEntry) | cafLbl <- lbls] + put (Map.union newSRTMap srtMap) + + this_mod = thisModule topSRT + + case Set.toList filtered of + [] -> do + srtTrace "oneSRT: empty" (ppr lbls) $ return () + updateSRTMap Nothing + return ([], [], []) + + -- [Inline] - when we have only one entry there is no need to + -- build an SRT object at all, instead we put the singleton SRT + -- entry in the info table. + [one@(SRTEntry lbl)] + | -- Info tables refer to SRTs by offset (as noted in the section + -- "Referring to an SRT from the info table" of Note [SRTs]). However, + -- when dynamic linking is used we cannot guarantee that the offset + -- between the SRT and the info table will fit in the offset field. + -- Consequently we build a singleton SRT in in this case. + not (labelDynamic dflags this_mod lbl) + + -- MachO relocations can't express offsets between compilation units at + -- all, so we are always forced to build a singleton SRT in this case. + && (not (osMachOTarget $ platformOS $ targetPlatform dflags) + || isLocalCLabel this_mod lbl) -> do + + -- If we have a static function closure, then it becomes the + -- SRT object, and everything else points to it. (the only way + -- we could have multiple labels here is if this is a + -- recursive group, see Note [recursive SRTs]) + case maybeFunClosure of + Just (staticFunLbl,staticFunBlock) -> return ([], withLabels, []) + where + withLabels = + [ (b, if b == staticFunBlock then lbl else staticFunLbl) + | b <- blockids ] + Nothing -> do + updateSRTMap (Just one) + return ([], map (,lbl) blockids, []) + + cafList -> + -- Check whether an SRT with the same entries has been emitted already. + -- Implements the [Common] optimisation. + case Map.lookup filtered (dedupSRTs topSRT) of + Just srtEntry@(SRTEntry srtLbl) -> do + srtTrace "oneSRT [Common]" (ppr lbls <+> ppr srtLbl) $ return () + updateSRTMap (Just srtEntry) + return ([], map (,srtLbl) blockids, []) + Nothing -> do + -- No duplicates: we have to build a new SRT object + srtTrace "oneSRT: new" (ppr lbls <+> ppr filtered) $ return () + (decls, funSRTs, srtEntry) <- + case maybeFunClosure of + Just (fun,block) -> + return ( [], [(block, cafList)], SRTEntry fun ) + Nothing -> do + (decls, entry) <- lift . lift $ buildSRTChain dflags cafList + return (decls, [], entry) + updateSRTMap (Just srtEntry) + let allBelowThis = Set.union allBelow filtered + oldFlatSRTs = flatSRTs topSRT + newFlatSRTs = Map.insert srtEntry allBelowThis oldFlatSRTs + newDedupSRTs = Map.insert filtered srtEntry (dedupSRTs topSRT) + lift (put (topSRT { dedupSRTs = newDedupSRTs + , flatSRTs = newFlatSRTs })) + let SRTEntry lbl = srtEntry + return (decls, map (,lbl) blockids, funSRTs) + + +-- | build a static SRT object (or a chain of objects) from a list of +-- SRTEntries. +buildSRTChain + :: DynFlags + -> [SRTEntry] + -> UniqSM + ( [CmmDecl] -- The SRT object(s) + , SRTEntry -- label to use in the info table + ) +buildSRTChain _ [] = panic "buildSRT: empty" +buildSRTChain dflags cafSet = + case splitAt mAX_SRT_SIZE cafSet of + (these, []) -> do + (decl,lbl) <- buildSRT dflags these + return ([decl], lbl) + (these,those) -> do + (rest, rest_lbl) <- buildSRTChain dflags (head these : those) + (decl,lbl) <- buildSRT dflags (rest_lbl : tail these) + return (decl:rest, lbl) + where + mAX_SRT_SIZE = 16 + + +buildSRT :: DynFlags -> [SRTEntry] -> UniqSM (CmmDecl, SRTEntry) +buildSRT dflags refs = do + id <- getUniqueM + let + lbl = mkSRTLabel id + srt_n_info = mkSRTInfoLabel (length refs) + fields = + mkStaticClosure dflags srt_n_info dontCareCCS + [ CmmLabel lbl | SRTEntry lbl <- refs ] + [] -- no padding + [mkIntCLit dflags 0] -- link field + [] -- no saved info + return (mkDataLits (Section Data lbl) lbl fields, SRTEntry lbl) + + +-- | Update info tables with references to their SRTs. Also generate +-- static closures, splicing in SRT fields as necessary. +updInfoSRTs + :: DynFlags + -> LabelMap CLabel -- SRT labels for each block + -> LabelMap [SRTEntry] -- SRTs to merge into FUN_STATIC closures + -> CmmDecl + -> [CmmDecl] + +updInfoSRTs dflags srt_env funSRTEnv (CmmProc top_info top_l live g) + | Just (_,closure) <- maybeStaticClosure = [ proc, closure ] + | otherwise = [ proc ] + where + proc = CmmProc top_info { info_tbls = newTopInfo } top_l live g + newTopInfo = mapMapWithKey updInfoTbl (info_tbls top_info) + updInfoTbl l info_tbl + | l == g_entry g, Just (inf, _) <- maybeStaticClosure = inf + | otherwise = info_tbl { cit_srt = mapLookup l srt_env } + + -- Generate static closures [FUN]. Note that this also generates + -- static closures for thunks (CAFs), because it's easier to treat + -- them uniformly in the code generator. + maybeStaticClosure :: Maybe (CmmInfoTable, CmmDecl) + maybeStaticClosure + | Just info_tbl@CmmInfoTable{..} <- + mapLookup (g_entry g) (info_tbls top_info) + , Just (id, ccs) <- cit_clo + , isStaticRep cit_rep = + let + (newInfo, srtEntries) = case mapLookup (g_entry g) funSRTEnv of + Nothing -> + -- if we don't add SRT entries to this closure, then we + -- want to set the srt field in its info table as usual + (info_tbl { cit_srt = mapLookup (g_entry g) srt_env }, []) + Just srtEntries -> srtTrace "maybeStaticFun" (ppr res) + (info_tbl { cit_rep = new_rep }, res) + where res = [ CmmLabel lbl | SRTEntry lbl <- srtEntries ] + fields = mkStaticClosureFields dflags info_tbl ccs (idCafInfo id) + srtEntries + new_rep = case cit_rep of + HeapRep sta ptrs nptrs ty -> + HeapRep sta (ptrs + length srtEntries) nptrs ty + _other -> panic "maybeStaticFun" + lbl = mkLocalClosureLabel (idName id) (idCafInfo id) + in + Just (newInfo, mkDataLits (Section Data lbl) lbl fields) + | otherwise = Nothing + +updInfoSRTs _ _ _ t = [t] + + +srtTrace :: String -> SDoc -> b -> b +-- srtTrace = pprTrace +srtTrace _ _ b = b diff --git a/compiler/GHC/Cmm/LayoutStack.hs b/compiler/GHC/Cmm/LayoutStack.hs new file mode 100644 index 0000000000..f6dda7728c --- /dev/null +++ b/compiler/GHC/Cmm/LayoutStack.hs @@ -0,0 +1,1236 @@ +{-# LANGUAGE BangPatterns, RecordWildCards, GADTs #-} +module GHC.Cmm.LayoutStack ( + cmmLayoutStack, setInfoTableStackMap + ) where + +import GhcPrelude hiding ((<*>)) + +import GHC.StgToCmm.Utils ( callerSaveVolatileRegs, newTemp ) -- XXX layering violation +import GHC.StgToCmm.Foreign ( saveThreadState, loadThreadState ) -- XXX layering violation + +import BasicTypes +import GHC.Cmm +import GHC.Cmm.Info +import GHC.Cmm.BlockId +import GHC.Cmm.CLabel +import GHC.Cmm.Utils +import GHC.Cmm.Graph +import ForeignCall +import GHC.Cmm.Liveness +import GHC.Cmm.ProcPoint +import GHC.Runtime.Layout +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label +import UniqSupply +import Maybes +import UniqFM +import Util + +import DynFlags +import FastString +import Outputable hiding ( isEmpty ) +import qualified Data.Set as Set +import Control.Monad.Fix +import Data.Array as Array +import Data.Bits +import Data.List (nub) + +{- Note [Stack Layout] + +The job of this pass is to + + - replace references to abstract stack Areas with fixed offsets from Sp. + + - replace the CmmHighStackMark constant used in the stack check with + the maximum stack usage of the proc. + + - save any variables that are live across a call, and reload them as + necessary. + +Before stack allocation, local variables remain live across native +calls (CmmCall{ cmm_cont = Just _ }), and after stack allocation local +variables are clobbered by native calls. + +We want to do stack allocation so that as far as possible + - stack use is minimized, and + - unnecessary stack saves and loads are avoided. + +The algorithm we use is a variant of linear-scan register allocation, +where the stack is our register file. + +We proceed in two passes, see Note [Two pass approach] for why they are not easy +to merge into one. + +Pass 1: + + - First, we do a liveness analysis, which annotates every block with + the variables live on entry to the block. + + - We traverse blocks in reverse postorder DFS; that is, we visit at + least one predecessor of a block before the block itself. The + stack layout flowing from the predecessor of the block will + determine the stack layout on entry to the block. + + - We maintain a data structure + + Map Label StackMap + + which describes the contents of the stack and the stack pointer on + entry to each block that is a successor of a block that we have + visited. + + - For each block we visit: + + - Look up the StackMap for this block. + + - If this block is a proc point (or a call continuation, if we aren't + splitting proc points), we need to reload all the live variables from the + stack - but this is done in Pass 2, which calculates more precise liveness + information (see description of Pass 2). + + - Walk forwards through the instructions: + - At an assignment x = Sp[loc] + - Record the fact that Sp[loc] contains x, so that we won't + need to save x if it ever needs to be spilled. + - At an assignment x = E + - If x was previously on the stack, it isn't any more + - At the last node, if it is a call or a jump to a proc point + - Lay out the stack frame for the call (see setupStackFrame) + - emit instructions to save all the live variables + - Remember the StackMaps for all the successors + - emit an instruction to adjust Sp + - If the last node is a branch, then the current StackMap is the + StackMap for the successors. + + - Manifest Sp: replace references to stack areas in this block + with real Sp offsets. We cannot do this until we have laid out + the stack area for the successors above. + + In this phase we also eliminate redundant stores to the stack; + see elimStackStores. + + - There is one important gotcha: sometimes we'll encounter a control + transfer to a block that we've already processed (a join point), + and in that case we might need to rearrange the stack to match + what the block is expecting. (exactly the same as in linear-scan + register allocation, except here we have the luxury of an infinite + supply of temporary variables). + + - Finally, we update the magic CmmHighStackMark constant with the + stack usage of the function, and eliminate the whole stack check + if there was no stack use. (in fact this is done as part of the + main traversal, by feeding the high-water-mark output back in as + an input. I hate cyclic programming, but it's just too convenient + sometimes.) + + There are plenty of tricky details: update frames, proc points, return + addresses, foreign calls, and some ad-hoc optimisations that are + convenient to do here and effective in common cases. Comments in the + code below explain these. + +Pass 2: + +- Calculate live registers, but taking into account that nothing is live at the + entry to a proc point. + +- At each proc point and call continuation insert reloads of live registers from + the stack (they were saved by Pass 1). + + +Note [Two pass approach] + +The main reason for Pass 2 is being able to insert only the reloads that are +needed and the fact that the two passes need different liveness information. +Let's consider an example: + + ..... + \ / + D <- proc point + / \ + E F + \ / + G <- proc point + | + X + +Pass 1 needs liveness assuming that local variables are preserved across calls. +This is important because it needs to save any local registers to the stack +(e.g., if register a is used in block X, it must be saved before any native +call). +However, for Pass 2, where we want to reload registers from stack (in a proc +point), this is overly conservative and would lead us to generate reloads in D +for things used in X, even though we're going to generate reloads in G anyway +(since it's also a proc point). +So Pass 2 calculates liveness knowing that nothing is live at the entry to a +proc point. This means that in D we only need to reload things used in E or F. +This can be quite important, for an extreme example see testcase for #3294. + +Merging the two passes is not trivial - Pass 2 is a backward rewrite and Pass 1 +is a forward one. Furthermore, Pass 1 is creating code that uses local registers +(saving them before a call), which the liveness analysis for Pass 2 must see to +be correct. + +-} + + +-- All stack locations are expressed as positive byte offsets from the +-- "base", which is defined to be the address above the return address +-- on the stack on entry to this CmmProc. +-- +-- Lower addresses have higher StackLocs. +-- +type StackLoc = ByteOff + +{- + A StackMap describes the stack at any given point. At a continuation + it has a particular layout, like this: + + | | <- base + |-------------| + | ret0 | <- base + 8 + |-------------| + . upd frame . <- base + sm_ret_off + |-------------| + | | + . vars . + . (live/dead) . + | | <- base + sm_sp - sm_args + |-------------| + | ret1 | + . ret vals . <- base + sm_sp (<--- Sp points here) + |-------------| + +Why do we include the final return address (ret0) in our stack map? I +have absolutely no idea, but it seems to be done that way consistently +in the rest of the code generator, so I played along here. --SDM + +Note that we will be constructing an info table for the continuation +(ret1), which needs to describe the stack down to, but not including, +the update frame (or ret0, if there is no update frame). +-} + +data StackMap = StackMap + { sm_sp :: StackLoc + -- ^ the offset of Sp relative to the base on entry + -- to this block. + , sm_args :: ByteOff + -- ^ the number of bytes of arguments in the area for this block + -- Defn: the offset of young(L) relative to the base is given by + -- (sm_sp - sm_args) of the StackMap for block L. + , sm_ret_off :: ByteOff + -- ^ Number of words of stack that we do not describe with an info + -- table, because it contains an update frame. + , sm_regs :: UniqFM (LocalReg,StackLoc) + -- ^ regs on the stack + } + +instance Outputable StackMap where + ppr StackMap{..} = + text "Sp = " <> int sm_sp $$ + text "sm_args = " <> int sm_args $$ + text "sm_ret_off = " <> int sm_ret_off $$ + text "sm_regs = " <> pprUFM sm_regs ppr + + +cmmLayoutStack :: DynFlags -> ProcPointSet -> ByteOff -> CmmGraph + -> UniqSM (CmmGraph, LabelMap StackMap) +cmmLayoutStack dflags procpoints entry_args + graph@(CmmGraph { g_entry = entry }) + = do + -- We need liveness info. Dead assignments are removed later + -- by the sinking pass. + let liveness = cmmLocalLiveness dflags graph + blocks = revPostorder graph + + (final_stackmaps, _final_high_sp, new_blocks) <- + mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) -> + layout dflags procpoints liveness entry entry_args + rec_stackmaps rec_high_sp blocks + + blocks_with_reloads <- + insertReloadsAsNeeded dflags procpoints final_stackmaps entry new_blocks + new_blocks' <- mapM (lowerSafeForeignCall dflags) blocks_with_reloads + return (ofBlockList entry new_blocks', final_stackmaps) + +-- ----------------------------------------------------------------------------- +-- Pass 1 +-- ----------------------------------------------------------------------------- + +layout :: DynFlags + -> LabelSet -- proc points + -> LabelMap CmmLocalLive -- liveness + -> BlockId -- entry + -> ByteOff -- stack args on entry + + -> LabelMap StackMap -- [final] stack maps + -> ByteOff -- [final] Sp high water mark + + -> [CmmBlock] -- [in] blocks + + -> UniqSM + ( LabelMap StackMap -- [out] stack maps + , ByteOff -- [out] Sp high water mark + , [CmmBlock] -- [out] new blocks + ) + +layout dflags procpoints liveness entry entry_args final_stackmaps final_sp_high blocks + = go blocks init_stackmap entry_args [] + where + (updfr, cont_info) = collectContInfo blocks + + init_stackmap = mapSingleton entry StackMap{ sm_sp = entry_args + , sm_args = entry_args + , sm_ret_off = updfr + , sm_regs = emptyUFM + } + + go [] acc_stackmaps acc_hwm acc_blocks + = return (acc_stackmaps, acc_hwm, acc_blocks) + + go (b0 : bs) acc_stackmaps acc_hwm acc_blocks + = do + let (entry0@(CmmEntry entry_lbl tscope), middle0, last0) = blockSplit b0 + + let stack0@StackMap { sm_sp = sp0 } + = mapFindWithDefault + (pprPanic "no stack map for" (ppr entry_lbl)) + entry_lbl acc_stackmaps + + -- (a) Update the stack map to include the effects of + -- assignments in this block + let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0 + + -- (b) Look at the last node and if we are making a call or + -- jumping to a proc point, we must save the live + -- variables, adjust Sp, and construct the StackMaps for + -- each of the successor blocks. See handleLastNode for + -- details. + (middle1, sp_off, last1, fixup_blocks, out) + <- handleLastNode dflags procpoints liveness cont_info + acc_stackmaps stack1 tscope middle0 last0 + + -- (c) Manifest Sp: run over the nodes in the block and replace + -- CmmStackSlot with CmmLoad from Sp with a concrete offset. + -- + -- our block: + -- middle0 -- the original middle nodes + -- middle1 -- live variable saves from handleLastNode + -- Sp = Sp + sp_off -- Sp adjustment goes here + -- last1 -- the last node + -- + let middle_pre = blockToList $ foldl' blockSnoc middle0 middle1 + + let final_blocks = + manifestSp dflags final_stackmaps stack0 sp0 final_sp_high + entry0 middle_pre sp_off last1 fixup_blocks + + let acc_stackmaps' = mapUnion acc_stackmaps out + + -- If this block jumps to the GC, then we do not take its + -- stack usage into account for the high-water mark. + -- Otherwise, if the only stack usage is in the stack-check + -- failure block itself, we will do a redundant stack + -- check. The stack has a buffer designed to accommodate + -- the largest amount of stack needed for calling the GC. + -- + this_sp_hwm | isGcJump last0 = 0 + | otherwise = sp0 - sp_off + + hwm' = maximum (acc_hwm : this_sp_hwm : map sm_sp (mapElems out)) + + go bs acc_stackmaps' hwm' (final_blocks ++ acc_blocks) + + +-- ----------------------------------------------------------------------------- + +-- Not foolproof, but GCFun is the culprit we most want to catch +isGcJump :: CmmNode O C -> Bool +isGcJump (CmmCall { cml_target = CmmReg (CmmGlobal l) }) + = l == GCFun || l == GCEnter1 +isGcJump _something_else = False + +-- ----------------------------------------------------------------------------- + +-- This doesn't seem right somehow. We need to find out whether this +-- proc will push some update frame material at some point, so that we +-- can avoid using that area of the stack for spilling. The +-- updfr_space field of the CmmProc *should* tell us, but it doesn't +-- (I think maybe it gets filled in later when we do proc-point +-- splitting). +-- +-- So we'll just take the max of all the cml_ret_offs. This could be +-- unnecessarily pessimistic, but probably not in the code we +-- generate. + +collectContInfo :: [CmmBlock] -> (ByteOff, LabelMap ByteOff) +collectContInfo blocks + = (maximum ret_offs, mapFromList (catMaybes mb_argss)) + where + (mb_argss, ret_offs) = mapAndUnzip get_cont blocks + + get_cont :: Block CmmNode x C -> (Maybe (Label, ByteOff), ByteOff) + get_cont b = + case lastNode b of + CmmCall { cml_cont = Just l, .. } + -> (Just (l, cml_ret_args), cml_ret_off) + CmmForeignCall { .. } + -> (Just (succ, ret_args), ret_off) + _other -> (Nothing, 0) + + +-- ----------------------------------------------------------------------------- +-- Updating the StackMap from middle nodes + +-- Look for loads from stack slots, and update the StackMap. This is +-- purely for optimisation reasons, so that we can avoid saving a +-- variable back to a different stack slot if it is already on the +-- stack. +-- +-- This happens a lot: for example when function arguments are passed +-- on the stack and need to be immediately saved across a call, we +-- want to just leave them where they are on the stack. +-- +procMiddle :: LabelMap StackMap -> CmmNode e x -> StackMap -> StackMap +procMiddle stackmaps node sm + = case node of + CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _) + -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) } + where loc = getStackLoc area off stackmaps + CmmAssign (CmmLocal r) _other + -> sm { sm_regs = delFromUFM (sm_regs sm) r } + _other + -> sm + +getStackLoc :: Area -> ByteOff -> LabelMap StackMap -> StackLoc +getStackLoc Old n _ = n +getStackLoc (Young l) n stackmaps = + case mapLookup l stackmaps of + Nothing -> pprPanic "getStackLoc" (ppr l) + Just sm -> sm_sp sm - sm_args sm + n + + +-- ----------------------------------------------------------------------------- +-- Handling stack allocation for a last node + +-- We take a single last node and turn it into: +-- +-- C1 (some statements) +-- Sp = Sp + N +-- C2 (some more statements) +-- call f() -- the actual last node +-- +-- plus possibly some more blocks (we may have to add some fixup code +-- between the last node and the continuation). +-- +-- C1: is the code for saving the variables across this last node onto +-- the stack, if the continuation is a call or jumps to a proc point. +-- +-- C2: if the last node is a safe foreign call, we have to inject some +-- extra code that goes *after* the Sp adjustment. + +handleLastNode + :: DynFlags -> ProcPointSet -> LabelMap CmmLocalLive -> LabelMap ByteOff + -> LabelMap StackMap -> StackMap -> CmmTickScope + -> Block CmmNode O O + -> CmmNode O C + -> UniqSM + ( [CmmNode O O] -- nodes to go *before* the Sp adjustment + , ByteOff -- amount to adjust Sp + , CmmNode O C -- new last node + , [CmmBlock] -- new blocks + , LabelMap StackMap -- stackmaps for the continuations + ) + +handleLastNode dflags procpoints liveness cont_info stackmaps + stack0@StackMap { sm_sp = sp0 } tscp middle last + = case last of + -- At each return / tail call, + -- adjust Sp to point to the last argument pushed, which + -- is cml_args, after popping any other junk from the stack. + CmmCall{ cml_cont = Nothing, .. } -> do + let sp_off = sp0 - cml_args + return ([], sp_off, last, [], mapEmpty) + + -- At each CmmCall with a continuation: + CmmCall{ cml_cont = Just cont_lbl, .. } -> + return $ lastCall cont_lbl cml_args cml_ret_args cml_ret_off + + CmmForeignCall{ succ = cont_lbl, .. } -> do + return $ lastCall cont_lbl (wORD_SIZE dflags) ret_args ret_off + -- one word of args: the return address + + CmmBranch {} -> handleBranches + CmmCondBranch {} -> handleBranches + CmmSwitch {} -> handleBranches + + where + -- Calls and ForeignCalls are handled the same way: + lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff + -> ( [CmmNode O O] + , ByteOff + , CmmNode O C + , [CmmBlock] + , LabelMap StackMap + ) + lastCall lbl cml_args cml_ret_args cml_ret_off + = ( assignments + , spOffsetForCall sp0 cont_stack cml_args + , last + , [] -- no new blocks + , mapSingleton lbl cont_stack ) + where + (assignments, cont_stack) = prepareStack lbl cml_ret_args cml_ret_off + + + prepareStack lbl cml_ret_args cml_ret_off + | Just cont_stack <- mapLookup lbl stackmaps + -- If we have already seen this continuation before, then + -- we just have to make the stack look the same: + = (fixupStack stack0 cont_stack, cont_stack) + -- Otherwise, we have to allocate the stack frame + | otherwise + = (save_assignments, new_cont_stack) + where + (new_cont_stack, save_assignments) + = setupStackFrame dflags lbl liveness cml_ret_off cml_ret_args stack0 + + + -- For other last nodes (branches), if any of the targets is a + -- proc point, we have to set up the stack to match what the proc + -- point is expecting. + -- + handleBranches :: UniqSM ( [CmmNode O O] + , ByteOff + , CmmNode O C + , [CmmBlock] + , LabelMap StackMap ) + + handleBranches + -- Note [diamond proc point] + | Just l <- futureContinuation middle + , (nub $ filter (`setMember` procpoints) $ successors last) == [l] + = do + let cont_args = mapFindWithDefault 0 l cont_info + (assigs, cont_stack) = prepareStack l cont_args (sm_ret_off stack0) + out = mapFromList [ (l', cont_stack) + | l' <- successors last ] + return ( assigs + , spOffsetForCall sp0 cont_stack (wORD_SIZE dflags) + , last + , [] + , out) + + | otherwise = do + pps <- mapM handleBranch (successors last) + let lbl_map :: LabelMap Label + lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ] + fix_lbl l = mapFindWithDefault l l lbl_map + return ( [] + , 0 + , mapSuccessors fix_lbl last + , concat [ blk | (_,_,_,blk) <- pps ] + , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] ) + + -- For each successor of this block + handleBranch :: BlockId -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock]) + handleBranch l + -- (a) if the successor already has a stackmap, we need to + -- shuffle the current stack to make it look the same. + -- We have to insert a new block to make this happen. + | Just stack2 <- mapLookup l stackmaps + = do + let assigs = fixupStack stack0 stack2 + (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 tscp assigs + return (l, tmp_lbl, stack2, block) + + -- (b) if the successor is a proc point, save everything + -- on the stack. + | l `setMember` procpoints + = do + let cont_args = mapFindWithDefault 0 l cont_info + (stack2, assigs) = + setupStackFrame dflags l liveness (sm_ret_off stack0) + cont_args stack0 + (tmp_lbl, block) <- makeFixupBlock dflags sp0 l stack2 tscp assigs + return (l, tmp_lbl, stack2, block) + + -- (c) otherwise, the current StackMap is the StackMap for + -- the continuation. But we must remember to remove any + -- variables from the StackMap that are *not* live at + -- the destination, because this StackMap might be used + -- by fixupStack if this is a join point. + | otherwise = return (l, l, stack1, []) + where live = mapFindWithDefault (panic "handleBranch") l liveness + stack1 = stack0 { sm_regs = filterUFM is_live (sm_regs stack0) } + is_live (r,_) = r `elemRegSet` live + + +makeFixupBlock :: DynFlags -> ByteOff -> Label -> StackMap + -> CmmTickScope -> [CmmNode O O] + -> UniqSM (Label, [CmmBlock]) +makeFixupBlock dflags sp0 l stack tscope assigs + | null assigs && sp0 == sm_sp stack = return (l, []) + | otherwise = do + tmp_lbl <- newBlockId + let sp_off = sp0 - sm_sp stack + block = blockJoin (CmmEntry tmp_lbl tscope) + ( maybeAddSpAdj dflags sp0 sp_off + $ blockFromList assigs ) + (CmmBranch l) + return (tmp_lbl, [block]) + + +-- Sp is currently pointing to current_sp, +-- we want it to point to +-- (sm_sp cont_stack - sm_args cont_stack + args) +-- so the difference is +-- sp0 - (sm_sp cont_stack - sm_args cont_stack + args) +spOffsetForCall :: ByteOff -> StackMap -> ByteOff -> ByteOff +spOffsetForCall current_sp cont_stack args + = current_sp - (sm_sp cont_stack - sm_args cont_stack + args) + + +-- | create a sequence of assignments to establish the new StackMap, +-- given the old StackMap. +fixupStack :: StackMap -> StackMap -> [CmmNode O O] +fixupStack old_stack new_stack = concatMap move new_locs + where + old_map = sm_regs old_stack + new_locs = stackSlotRegs new_stack + + move (r,n) + | Just (_,m) <- lookupUFM old_map r, n == m = [] + | otherwise = [CmmStore (CmmStackSlot Old n) + (CmmReg (CmmLocal r))] + + + +setupStackFrame + :: DynFlags + -> BlockId -- label of continuation + -> LabelMap CmmLocalLive -- liveness + -> ByteOff -- updfr + -> ByteOff -- bytes of return values on stack + -> StackMap -- current StackMap + -> (StackMap, [CmmNode O O]) + +setupStackFrame dflags lbl liveness updfr_off ret_args stack0 + = (cont_stack, assignments) + where + -- get the set of LocalRegs live in the continuation + live = mapFindWithDefault Set.empty lbl liveness + + -- the stack from the base to updfr_off is off-limits. + -- our new stack frame contains: + -- * saved live variables + -- * the return address [young(C) + 8] + -- * the args for the call, + -- which are replaced by the return values at the return + -- point. + + -- everything up to updfr_off is off-limits + -- stack1 contains updfr_off, plus everything we need to save + (stack1, assignments) = allocate dflags updfr_off live stack0 + + -- And the Sp at the continuation is: + -- sm_sp stack1 + ret_args + cont_stack = stack1{ sm_sp = sm_sp stack1 + ret_args + , sm_args = ret_args + , sm_ret_off = updfr_off + } + + +-- ----------------------------------------------------------------------------- +-- Note [diamond proc point] +-- +-- This special case looks for the pattern we get from a typical +-- tagged case expression: +-- +-- Sp[young(L1)] = L1 +-- if (R1 & 7) != 0 goto L1 else goto L2 +-- L2: +-- call [R1] returns to L1 +-- L1: live: {y} +-- x = R1 +-- +-- If we let the generic case handle this, we get +-- +-- Sp[-16] = L1 +-- if (R1 & 7) != 0 goto L1a else goto L2 +-- L2: +-- Sp[-8] = y +-- Sp = Sp - 16 +-- call [R1] returns to L1 +-- L1a: +-- Sp[-8] = y +-- Sp = Sp - 16 +-- goto L1 +-- L1: +-- x = R1 +-- +-- The code for saving the live vars is duplicated in each branch, and +-- furthermore there is an extra jump in the fast path (assuming L1 is +-- a proc point, which it probably is if there is a heap check). +-- +-- So to fix this we want to set up the stack frame before the +-- conditional jump. How do we know when to do this, and when it is +-- safe? The basic idea is, when we see the assignment +-- +-- Sp[young(L)] = L +-- +-- we know that +-- * we are definitely heading for L +-- * there can be no more reads from another stack area, because young(L) +-- overlaps with it. +-- +-- We don't necessarily know that everything live at L is live now +-- (some might be assigned between here and the jump to L). So we +-- simplify and only do the optimisation when we see +-- +-- (1) a block containing an assignment of a return address L +-- (2) ending in a branch where one (and only) continuation goes to L, +-- and no other continuations go to proc points. +-- +-- then we allocate the stack frame for L at the end of the block, +-- before the branch. +-- +-- We could generalise (2), but that would make it a bit more +-- complicated to handle, and this currently catches the common case. + +futureContinuation :: Block CmmNode O O -> Maybe BlockId +futureContinuation middle = foldBlockNodesB f middle Nothing + where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId + f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _ + = Just l + f _ r = r + +-- ----------------------------------------------------------------------------- +-- Saving live registers + +-- | Given a set of live registers and a StackMap, save all the registers +-- on the stack and return the new StackMap and the assignments to do +-- the saving. +-- +allocate :: DynFlags -> ByteOff -> LocalRegSet -> StackMap + -> (StackMap, [CmmNode O O]) +allocate dflags ret_off live stackmap@StackMap{ sm_sp = sp0 + , sm_regs = regs0 } + = + -- we only have to save regs that are not already in a slot + let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live) + regs1 = filterUFM (\(r,_) -> elemRegSet r live) regs0 + in + + -- make a map of the stack + let stack = reverse $ Array.elems $ + accumArray (\_ x -> x) Empty (1, toWords dflags (max sp0 ret_off)) $ + ret_words ++ live_words + where ret_words = + [ (x, Occupied) + | x <- [ 1 .. toWords dflags ret_off] ] + live_words = + [ (toWords dflags x, Occupied) + | (r,off) <- nonDetEltsUFM regs1, + -- See Note [Unique Determinism and code generation] + let w = localRegBytes dflags r, + x <- [ off, off - wORD_SIZE dflags .. off - w + 1] ] + in + + -- Pass over the stack: find slots to save all the new live variables, + -- choosing the oldest slots first (hence a foldr). + let + save slot ([], stack, n, assigs, regs) -- no more regs to save + = ([], slot:stack, plusW dflags n 1, assigs, regs) + save slot (to_save, stack, n, assigs, regs) + = case slot of + Occupied -> (to_save, Occupied:stack, plusW dflags n 1, assigs, regs) + Empty + | Just (stack', r, to_save') <- + select_save to_save (slot:stack) + -> let assig = CmmStore (CmmStackSlot Old n') + (CmmReg (CmmLocal r)) + n' = plusW dflags n 1 + in + (to_save', stack', n', assig : assigs, (r,(r,n')):regs) + + | otherwise + -> (to_save, slot:stack, plusW dflags n 1, assigs, regs) + + -- we should do better here: right now we'll fit the smallest first, + -- but it would make more sense to fit the biggest first. + select_save :: [LocalReg] -> [StackSlot] + -> Maybe ([StackSlot], LocalReg, [LocalReg]) + select_save regs stack = go regs [] + where go [] _no_fit = Nothing + go (r:rs) no_fit + | Just rest <- dropEmpty words stack + = Just (replicate words Occupied ++ rest, r, rs++no_fit) + | otherwise + = go rs (r:no_fit) + where words = localRegWords dflags r + + -- fill in empty slots as much as possible + (still_to_save, save_stack, n, save_assigs, save_regs) + = foldr save (to_save, [], 0, [], []) stack + + -- push any remaining live vars on the stack + (push_sp, push_assigs, push_regs) + = foldr push (n, [], []) still_to_save + where + push r (n, assigs, regs) + = (n', assig : assigs, (r,(r,n')) : regs) + where + n' = n + localRegBytes dflags r + assig = CmmStore (CmmStackSlot Old n') + (CmmReg (CmmLocal r)) + + trim_sp + | not (null push_regs) = push_sp + | otherwise + = plusW dflags n (- length (takeWhile isEmpty save_stack)) + + final_regs = regs1 `addListToUFM` push_regs + `addListToUFM` save_regs + + in + -- XXX should be an assert + if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else + + if (trim_sp .&. (wORD_SIZE dflags - 1)) /= 0 then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else + + ( stackmap { sm_regs = final_regs , sm_sp = trim_sp } + , push_assigs ++ save_assigs ) + + +-- ----------------------------------------------------------------------------- +-- Manifesting Sp + +-- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp. The +-- block looks like this: +-- +-- middle_pre -- the middle nodes +-- Sp = Sp + sp_off -- Sp adjustment goes here +-- last -- the last node +-- +-- And we have some extra blocks too (that don't contain Sp adjustments) +-- +-- The adjustment for middle_pre will be different from that for +-- middle_post, because the Sp adjustment intervenes. +-- +manifestSp + :: DynFlags + -> LabelMap StackMap -- StackMaps for other blocks + -> StackMap -- StackMap for this block + -> ByteOff -- Sp on entry to the block + -> ByteOff -- SpHigh + -> CmmNode C O -- first node + -> [CmmNode O O] -- middle + -> ByteOff -- sp_off + -> CmmNode O C -- last node + -> [CmmBlock] -- new blocks + -> [CmmBlock] -- final blocks with Sp manifest + +manifestSp dflags stackmaps stack0 sp0 sp_high + first middle_pre sp_off last fixup_blocks + = final_block : fixup_blocks' + where + area_off = getAreaOff stackmaps + + adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x + adj_pre_sp = mapExpDeep (areaToSp dflags sp0 sp_high area_off) + adj_post_sp = mapExpDeep (areaToSp dflags (sp0 - sp_off) sp_high area_off) + + final_middle = maybeAddSpAdj dflags sp0 sp_off + . blockFromList + . map adj_pre_sp + . elimStackStores stack0 stackmaps area_off + $ middle_pre + final_last = optStackCheck (adj_post_sp last) + + final_block = blockJoin first final_middle final_last + + fixup_blocks' = map (mapBlock3' (id, adj_post_sp, id)) fixup_blocks + +getAreaOff :: LabelMap StackMap -> (Area -> StackLoc) +getAreaOff _ Old = 0 +getAreaOff stackmaps (Young l) = + case mapLookup l stackmaps of + Just sm -> sm_sp sm - sm_args sm + Nothing -> pprPanic "getAreaOff" (ppr l) + + +maybeAddSpAdj + :: DynFlags -> ByteOff -> ByteOff -> Block CmmNode O O -> Block CmmNode O O +maybeAddSpAdj dflags sp0 sp_off block = + add_initial_unwind $ add_adj_unwind $ adj block + where + adj block + | sp_off /= 0 + = block `blockSnoc` CmmAssign spReg (cmmOffset dflags spExpr sp_off) + | otherwise = block + -- Add unwind pseudo-instruction at the beginning of each block to + -- document Sp level for debugging + add_initial_unwind block + | debugLevel dflags > 0 + = CmmUnwind [(Sp, Just sp_unwind)] `blockCons` block + | otherwise + = block + where sp_unwind = CmmRegOff spReg (sp0 - wORD_SIZE dflags) + + -- Add unwind pseudo-instruction right after the Sp adjustment + -- if there is one. + add_adj_unwind block + | debugLevel dflags > 0 + , sp_off /= 0 + = block `blockSnoc` CmmUnwind [(Sp, Just sp_unwind)] + | otherwise + = block + where sp_unwind = CmmRegOff spReg (sp0 - wORD_SIZE dflags - sp_off) + +{- Note [SP old/young offsets] + +Sp(L) is the Sp offset on entry to block L relative to the base of the +OLD area. + +SpArgs(L) is the size of the young area for L, i.e. the number of +arguments. + + - in block L, each reference to [old + N] turns into + [Sp + Sp(L) - N] + + - in block L, each reference to [young(L') + N] turns into + [Sp + Sp(L) - Sp(L') + SpArgs(L') - N] + + - be careful with the last node of each block: Sp has already been adjusted + to be Sp + Sp(L) - Sp(L') +-} + +areaToSp :: DynFlags -> ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr + +areaToSp dflags sp_old _sp_hwm area_off (CmmStackSlot area n) + = cmmOffset dflags spExpr (sp_old - area_off area - n) + -- Replace (CmmStackSlot area n) with an offset from Sp + +areaToSp dflags _ sp_hwm _ (CmmLit CmmHighStackMark) + = mkIntExpr dflags sp_hwm + -- Replace CmmHighStackMark with the number of bytes of stack used, + -- the sp_hwm. See Note [Stack usage] in GHC.StgToCmm.Heap + +areaToSp dflags _ _ _ (CmmMachOp (MO_U_Lt _) args) + | falseStackCheck args + = zeroExpr dflags +areaToSp dflags _ _ _ (CmmMachOp (MO_U_Ge _) args) + | falseStackCheck args + = mkIntExpr dflags 1 + -- Replace a stack-overflow test that cannot fail with a no-op + -- See Note [Always false stack check] + +areaToSp _ _ _ _ other = other + +-- | Determine whether a stack check cannot fail. +falseStackCheck :: [CmmExpr] -> Bool +falseStackCheck [ CmmMachOp (MO_Sub _) + [ CmmRegOff (CmmGlobal Sp) x_off + , CmmLit (CmmInt y_lit _)] + , CmmReg (CmmGlobal SpLim)] + = fromIntegral x_off >= y_lit +falseStackCheck _ = False + +-- Note [Always false stack check] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- We can optimise stack checks of the form +-- +-- if ((Sp + x) - y < SpLim) then .. else .. +-- +-- where are non-negative integer byte offsets. Since we know that +-- SpLim <= Sp (remember the stack grows downwards), this test must +-- yield False if (x >= y), so we can rewrite the comparison to False. +-- A subsequent sinking pass will later drop the dead code. +-- Optimising this away depends on knowing that SpLim <= Sp, so it is +-- really the job of the stack layout algorithm, hence we do it now. +-- +-- The control flow optimiser may negate a conditional to increase +-- the likelihood of a fallthrough if the branch is not taken. But +-- not every conditional is inverted as the control flow optimiser +-- places some requirements on the predecessors of both branch targets. +-- So we better look for the inverted comparison too. + +optStackCheck :: CmmNode O C -> CmmNode O C +optStackCheck n = -- Note [Always false stack check] + case n of + CmmCondBranch (CmmLit (CmmInt 0 _)) _true false _ -> CmmBranch false + CmmCondBranch (CmmLit (CmmInt _ _)) true _false _ -> CmmBranch true + other -> other + + +-- ----------------------------------------------------------------------------- + +-- | Eliminate stores of the form +-- +-- Sp[area+n] = r +-- +-- when we know that r is already in the same slot as Sp[area+n]. We +-- could do this in a later optimisation pass, but that would involve +-- a separate analysis and we already have the information to hand +-- here. It helps clean up some extra stack stores in common cases. +-- +-- Note that we may have to modify the StackMap as we walk through the +-- code using procMiddle, since an assignment to a variable in the +-- StackMap will invalidate its mapping there. +-- +elimStackStores :: StackMap + -> LabelMap StackMap + -> (Area -> ByteOff) + -> [CmmNode O O] + -> [CmmNode O O] +elimStackStores stackmap stackmaps area_off nodes + = go stackmap nodes + where + go _stackmap [] = [] + go stackmap (n:ns) + = case n of + CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r)) + | Just (_,off) <- lookupUFM (sm_regs stackmap) r + , area_off area + m == off + -> go stackmap ns + _otherwise + -> n : go (procMiddle stackmaps n stackmap) ns + + +-- ----------------------------------------------------------------------------- +-- Update info tables to include stack liveness + + +setInfoTableStackMap :: DynFlags -> LabelMap StackMap -> CmmDecl -> CmmDecl +setInfoTableStackMap dflags stackmaps (CmmProc top_info@TopInfo{..} l v g) + = CmmProc top_info{ info_tbls = mapMapWithKey fix_info info_tbls } l v g + where + fix_info lbl info_tbl@CmmInfoTable{ cit_rep = StackRep _ } = + info_tbl { cit_rep = StackRep (get_liveness lbl) } + fix_info _ other = other + + get_liveness :: BlockId -> Liveness + get_liveness lbl + = case mapLookup lbl stackmaps of + Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl <+> ppr info_tbls) + Just sm -> stackMapToLiveness dflags sm + +setInfoTableStackMap _ _ d = d + + +stackMapToLiveness :: DynFlags -> StackMap -> Liveness +stackMapToLiveness dflags StackMap{..} = + reverse $ Array.elems $ + accumArray (\_ x -> x) True (toWords dflags sm_ret_off + 1, + toWords dflags (sm_sp - sm_args)) live_words + where + live_words = [ (toWords dflags off, False) + | (r,off) <- nonDetEltsUFM sm_regs + , isGcPtrType (localRegType r) ] + -- See Note [Unique Determinism and code generation] + +-- ----------------------------------------------------------------------------- +-- Pass 2 +-- ----------------------------------------------------------------------------- + +insertReloadsAsNeeded + :: DynFlags + -> ProcPointSet + -> LabelMap StackMap + -> BlockId + -> [CmmBlock] + -> UniqSM [CmmBlock] +insertReloadsAsNeeded dflags procpoints final_stackmaps entry blocks = do + toBlockList . fst <$> + rewriteCmmBwd liveLattice rewriteCC (ofBlockList entry blocks) mapEmpty + where + rewriteCC :: RewriteFun CmmLocalLive + rewriteCC (BlockCC e_node middle0 x_node) fact_base0 = do + let entry_label = entryLabel e_node + stackmap = case mapLookup entry_label final_stackmaps of + Just sm -> sm + Nothing -> panic "insertReloadsAsNeeded: rewriteCC: stackmap" + + -- Merge the liveness from successor blocks and analyse the last + -- node. + joined = gen_kill dflags x_node $! + joinOutFacts liveLattice x_node fact_base0 + -- What is live at the start of middle0. + live_at_middle0 = foldNodesBwdOO (gen_kill dflags) middle0 joined + + -- If this is a procpoint we need to add the reloads, but only if + -- they're actually live. Furthermore, nothing is live at the entry + -- to a proc point. + (middle1, live_with_reloads) + | entry_label `setMember` procpoints + = let reloads = insertReloads dflags stackmap live_at_middle0 + in (foldr blockCons middle0 reloads, emptyRegSet) + | otherwise + = (middle0, live_at_middle0) + + -- Final liveness for this block. + !fact_base2 = mapSingleton entry_label live_with_reloads + + return (BlockCC e_node middle1 x_node, fact_base2) + +insertReloads :: DynFlags -> StackMap -> CmmLocalLive -> [CmmNode O O] +insertReloads dflags stackmap live = + [ CmmAssign (CmmLocal reg) + -- This cmmOffset basically corresponds to manifesting + -- @CmmStackSlot Old sp_off@, see Note [SP old/young offsets] + (CmmLoad (cmmOffset dflags spExpr (sp_off - reg_off)) + (localRegType reg)) + | (reg, reg_off) <- stackSlotRegs stackmap + , reg `elemRegSet` live + ] + where + sp_off = sm_sp stackmap + +-- ----------------------------------------------------------------------------- +-- Lowering safe foreign calls + +{- +Note [Lower safe foreign calls] + +We start with + + Sp[young(L1)] = L1 + ,----------------------- + | r1 = foo(x,y,z) returns to L1 + '----------------------- + L1: + R1 = r1 -- copyIn, inserted by mkSafeCall + ... + +the stack layout algorithm will arrange to save and reload everything +live across the call. Our job now is to expand the call so we get + + Sp[young(L1)] = L1 + ,----------------------- + | SAVE_THREAD_STATE() + | token = suspendThread(BaseReg, interruptible) + | r = foo(x,y,z) + | BaseReg = resumeThread(token) + | LOAD_THREAD_STATE() + | R1 = r -- copyOut + | jump Sp[0] + '----------------------- + L1: + r = R1 -- copyIn, inserted by mkSafeCall + ... + +Note the copyOut, which saves the results in the places that L1 is +expecting them (see Note [safe foreign call convention]). Note also +that safe foreign call is replace by an unsafe one in the Cmm graph. +-} + +lowerSafeForeignCall :: DynFlags -> CmmBlock -> UniqSM CmmBlock +lowerSafeForeignCall dflags block + | (entry@(CmmEntry _ tscp), middle, CmmForeignCall { .. }) <- blockSplit block + = do + -- Both 'id' and 'new_base' are KindNonPtr because they're + -- RTS-only objects and are not subject to garbage collection + id <- newTemp (bWord dflags) + new_base <- newTemp (cmmRegType dflags baseReg) + let (caller_save, caller_load) = callerSaveVolatileRegs dflags + save_state_code <- saveThreadState dflags + load_state_code <- loadThreadState dflags + let suspend = save_state_code <*> + caller_save <*> + mkMiddle (callSuspendThread dflags id intrbl) + midCall = mkUnsafeCall tgt res args + resume = mkMiddle (callResumeThread new_base id) <*> + -- Assign the result to BaseReg: we + -- might now have a different Capability! + mkAssign baseReg (CmmReg (CmmLocal new_base)) <*> + caller_load <*> + load_state_code + + (_, regs, copyout) = + copyOutOflow dflags NativeReturn Jump (Young succ) + (map (CmmReg . CmmLocal) res) + ret_off [] + + -- NB. after resumeThread returns, the top-of-stack probably contains + -- the stack frame for succ, but it might not: if the current thread + -- received an exception during the call, then the stack might be + -- different. Hence we continue by jumping to the top stack frame, + -- not by jumping to succ. + jump = CmmCall { cml_target = entryCode dflags $ + CmmLoad spExpr (bWord dflags) + , cml_cont = Just succ + , cml_args_regs = regs + , cml_args = widthInBytes (wordWidth dflags) + , cml_ret_args = ret_args + , cml_ret_off = ret_off } + + graph' <- lgraphOfAGraph ( suspend <*> + midCall <*> + resume <*> + copyout <*> + mkLast jump, tscp) + + case toBlockList graph' of + [one] -> let (_, middle', last) = blockSplit one + in return (blockJoin entry (middle `blockAppend` middle') last) + _ -> panic "lowerSafeForeignCall0" + + -- Block doesn't end in a safe foreign call: + | otherwise = return block + + +foreignLbl :: FastString -> CmmExpr +foreignLbl name = CmmLit (CmmLabel (mkForeignLabel name Nothing ForeignLabelInExternalPackage IsFunction)) + +callSuspendThread :: DynFlags -> LocalReg -> Bool -> CmmNode O O +callSuspendThread dflags id intrbl = + CmmUnsafeForeignCall + (ForeignTarget (foreignLbl (fsLit "suspendThread")) + (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint] CmmMayReturn)) + [id] [baseExpr, mkIntExpr dflags (fromEnum intrbl)] + +callResumeThread :: LocalReg -> LocalReg -> CmmNode O O +callResumeThread new_base id = + CmmUnsafeForeignCall + (ForeignTarget (foreignLbl (fsLit "resumeThread")) + (ForeignConvention CCallConv [AddrHint] [AddrHint] CmmMayReturn)) + [new_base] [CmmReg (CmmLocal id)] + +-- ----------------------------------------------------------------------------- + +plusW :: DynFlags -> ByteOff -> WordOff -> ByteOff +plusW dflags b w = b + w * wORD_SIZE dflags + +data StackSlot = Occupied | Empty + -- Occupied: a return address or part of an update frame + +instance Outputable StackSlot where + ppr Occupied = text "XXX" + ppr Empty = text "---" + +dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot] +dropEmpty 0 ss = Just ss +dropEmpty n (Empty : ss) = dropEmpty (n-1) ss +dropEmpty _ _ = Nothing + +isEmpty :: StackSlot -> Bool +isEmpty Empty = True +isEmpty _ = False + +localRegBytes :: DynFlags -> LocalReg -> ByteOff +localRegBytes dflags r + = roundUpToWords dflags (widthInBytes (typeWidth (localRegType r))) + +localRegWords :: DynFlags -> LocalReg -> WordOff +localRegWords dflags = toWords dflags . localRegBytes dflags + +toWords :: DynFlags -> ByteOff -> WordOff +toWords dflags x = x `quot` wORD_SIZE dflags + + +stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)] +stackSlotRegs sm = nonDetEltsUFM (sm_regs sm) + -- See Note [Unique Determinism and code generation] diff --git a/compiler/GHC/Cmm/Lexer.x b/compiler/GHC/Cmm/Lexer.x new file mode 100644 index 0000000000..d8f15b916c --- /dev/null +++ b/compiler/GHC/Cmm/Lexer.x @@ -0,0 +1,368 @@ +----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow, 2004-2006 +-- +-- Lexer for concrete Cmm. We try to stay close to the C-- spec, but there +-- are a few minor differences: +-- +-- * extra keywords for our macros, and float32/float64 types +-- * global registers (Sp,Hp, etc.) +-- +----------------------------------------------------------------------------- + +{ +module GHC.Cmm.Lexer ( + CmmToken(..), cmmlex, + ) where + +import GhcPrelude + +import GHC.Cmm.Expr + +import Lexer +import GHC.Cmm.Monad +import SrcLoc +import UniqFM +import StringBuffer +import FastString +import Ctype +import Util +--import TRACE + +import Data.Word +import Data.Char +} + +$whitechar = [\ \t\n\r\f\v\xa0] -- \xa0 is Unicode no-break space +$white_no_nl = $whitechar # \n + +$ascdigit = 0-9 +$unidigit = \x01 -- Trick Alex into handling Unicode. See alexGetChar. +$digit = [$ascdigit $unidigit] +$octit = 0-7 +$hexit = [$digit A-F a-f] + +$unilarge = \x03 -- Trick Alex into handling Unicode. See alexGetChar. +$asclarge = [A-Z \xc0-\xd6 \xd8-\xde] +$large = [$asclarge $unilarge] + +$unismall = \x04 -- Trick Alex into handling Unicode. See alexGetChar. +$ascsmall = [a-z \xdf-\xf6 \xf8-\xff] +$small = [$ascsmall $unismall \_] + +$namebegin = [$large $small \. \$ \@] +$namechar = [$namebegin $digit] + +@decimal = $digit+ +@octal = $octit+ +@hexadecimal = $hexit+ +@exponent = [eE] [\-\+]? @decimal + +@floating_point = @decimal \. @decimal @exponent? | @decimal @exponent + +@escape = \\ ([abfnrt\\\'\"\?] | x $hexit{1,2} | $octit{1,3}) +@strchar = ($printable # [\"\\]) | @escape + +cmm :- + +$white_no_nl+ ; +^\# pragma .* \n ; -- Apple GCC 3.3 CPP generates pragmas in its output + +^\# (line)? { begin line_prag } + +-- single-line line pragmas, of the form +-- # <line> "<file>" <extra-stuff> \n +<line_prag> $digit+ { setLine line_prag1 } +<line_prag1> \" [^\"]* \" { setFile line_prag2 } +<line_prag2> .* { pop } + +<0> { + \n ; + + [\:\;\{\}\[\]\(\)\=\`\~\/\*\%\-\+\&\^\|\>\<\,\!] { special_char } + + ".." { kw CmmT_DotDot } + "::" { kw CmmT_DoubleColon } + ">>" { kw CmmT_Shr } + "<<" { kw CmmT_Shl } + ">=" { kw CmmT_Ge } + "<=" { kw CmmT_Le } + "==" { kw CmmT_Eq } + "!=" { kw CmmT_Ne } + "&&" { kw CmmT_BoolAnd } + "||" { kw CmmT_BoolOr } + + "True" { kw CmmT_True } + "False" { kw CmmT_False } + "likely" { kw CmmT_likely} + + P@decimal { global_regN (\n -> VanillaReg n VGcPtr) } + R@decimal { global_regN (\n -> VanillaReg n VNonGcPtr) } + F@decimal { global_regN FloatReg } + D@decimal { global_regN DoubleReg } + L@decimal { global_regN LongReg } + Sp { global_reg Sp } + SpLim { global_reg SpLim } + Hp { global_reg Hp } + HpLim { global_reg HpLim } + CCCS { global_reg CCCS } + CurrentTSO { global_reg CurrentTSO } + CurrentNursery { global_reg CurrentNursery } + HpAlloc { global_reg HpAlloc } + BaseReg { global_reg BaseReg } + MachSp { global_reg MachSp } + UnwindReturnReg { global_reg UnwindReturnReg } + + $namebegin $namechar* { name } + + 0 @octal { tok_octal } + @decimal { tok_decimal } + 0[xX] @hexadecimal { tok_hexadecimal } + @floating_point { strtoken tok_float } + + \" @strchar* \" { strtoken tok_string } +} + +{ +data CmmToken + = CmmT_SpecChar Char + | CmmT_DotDot + | CmmT_DoubleColon + | CmmT_Shr + | CmmT_Shl + | CmmT_Ge + | CmmT_Le + | CmmT_Eq + | CmmT_Ne + | CmmT_BoolAnd + | CmmT_BoolOr + | CmmT_CLOSURE + | CmmT_INFO_TABLE + | CmmT_INFO_TABLE_RET + | CmmT_INFO_TABLE_FUN + | CmmT_INFO_TABLE_CONSTR + | CmmT_INFO_TABLE_SELECTOR + | CmmT_else + | CmmT_export + | CmmT_section + | CmmT_goto + | CmmT_if + | CmmT_call + | CmmT_jump + | CmmT_foreign + | CmmT_never + | CmmT_prim + | CmmT_reserve + | CmmT_return + | CmmT_returns + | CmmT_import + | CmmT_switch + | CmmT_case + | CmmT_default + | CmmT_push + | CmmT_unwind + | CmmT_bits8 + | CmmT_bits16 + | CmmT_bits32 + | CmmT_bits64 + | CmmT_bits128 + | CmmT_bits256 + | CmmT_bits512 + | CmmT_float32 + | CmmT_float64 + | CmmT_gcptr + | CmmT_GlobalReg GlobalReg + | CmmT_Name FastString + | CmmT_String String + | CmmT_Int Integer + | CmmT_Float Rational + | CmmT_EOF + | CmmT_False + | CmmT_True + | CmmT_likely + deriving (Show) + +-- ----------------------------------------------------------------------------- +-- Lexer actions + +type Action = RealSrcSpan -> StringBuffer -> Int -> PD (RealLocated CmmToken) + +begin :: Int -> Action +begin code _span _str _len = do liftP (pushLexState code); lexToken + +pop :: Action +pop _span _buf _len = liftP popLexState >> lexToken + +special_char :: Action +special_char span buf _len = return (L span (CmmT_SpecChar (currentChar buf))) + +kw :: CmmToken -> Action +kw tok span _buf _len = return (L span tok) + +global_regN :: (Int -> GlobalReg) -> Action +global_regN con span buf len + = return (L span (CmmT_GlobalReg (con (fromIntegral n)))) + where buf' = stepOn buf + n = parseUnsignedInteger buf' (len-1) 10 octDecDigit + +global_reg :: GlobalReg -> Action +global_reg r span _buf _len = return (L span (CmmT_GlobalReg r)) + +strtoken :: (String -> CmmToken) -> Action +strtoken f span buf len = + return (L span $! (f $! lexemeToString buf len)) + +name :: Action +name span buf len = + case lookupUFM reservedWordsFM fs of + Just tok -> return (L span tok) + Nothing -> return (L span (CmmT_Name fs)) + where + fs = lexemeToFastString buf len + +reservedWordsFM = listToUFM $ + map (\(x, y) -> (mkFastString x, y)) [ + ( "CLOSURE", CmmT_CLOSURE ), + ( "INFO_TABLE", CmmT_INFO_TABLE ), + ( "INFO_TABLE_RET", CmmT_INFO_TABLE_RET ), + ( "INFO_TABLE_FUN", CmmT_INFO_TABLE_FUN ), + ( "INFO_TABLE_CONSTR", CmmT_INFO_TABLE_CONSTR ), + ( "INFO_TABLE_SELECTOR",CmmT_INFO_TABLE_SELECTOR ), + ( "else", CmmT_else ), + ( "export", CmmT_export ), + ( "section", CmmT_section ), + ( "goto", CmmT_goto ), + ( "if", CmmT_if ), + ( "call", CmmT_call ), + ( "jump", CmmT_jump ), + ( "foreign", CmmT_foreign ), + ( "never", CmmT_never ), + ( "prim", CmmT_prim ), + ( "reserve", CmmT_reserve ), + ( "return", CmmT_return ), + ( "returns", CmmT_returns ), + ( "import", CmmT_import ), + ( "switch", CmmT_switch ), + ( "case", CmmT_case ), + ( "default", CmmT_default ), + ( "push", CmmT_push ), + ( "unwind", CmmT_unwind ), + ( "bits8", CmmT_bits8 ), + ( "bits16", CmmT_bits16 ), + ( "bits32", CmmT_bits32 ), + ( "bits64", CmmT_bits64 ), + ( "bits128", CmmT_bits128 ), + ( "bits256", CmmT_bits256 ), + ( "bits512", CmmT_bits512 ), + ( "float32", CmmT_float32 ), + ( "float64", CmmT_float64 ), +-- New forms + ( "b8", CmmT_bits8 ), + ( "b16", CmmT_bits16 ), + ( "b32", CmmT_bits32 ), + ( "b64", CmmT_bits64 ), + ( "b128", CmmT_bits128 ), + ( "b256", CmmT_bits256 ), + ( "b512", CmmT_bits512 ), + ( "f32", CmmT_float32 ), + ( "f64", CmmT_float64 ), + ( "gcptr", CmmT_gcptr ), + ( "likely", CmmT_likely), + ( "True", CmmT_True ), + ( "False", CmmT_False ) + ] + +tok_decimal span buf len + = return (L span (CmmT_Int $! parseUnsignedInteger buf len 10 octDecDigit)) + +tok_octal span buf len + = return (L span (CmmT_Int $! parseUnsignedInteger (offsetBytes 1 buf) (len-1) 8 octDecDigit)) + +tok_hexadecimal span buf len + = return (L span (CmmT_Int $! parseUnsignedInteger (offsetBytes 2 buf) (len-2) 16 hexDigit)) + +tok_float str = CmmT_Float $! readRational str + +tok_string str = CmmT_String (read str) + -- urk, not quite right, but it'll do for now + +-- ----------------------------------------------------------------------------- +-- Line pragmas + +setLine :: Int -> Action +setLine code span buf len = do + let line = parseUnsignedInteger buf len 10 octDecDigit + liftP $ do + setSrcLoc (mkRealSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1) + -- subtract one: the line number refers to the *following* line + -- trace ("setLine " ++ show line) $ do + popLexState >> pushLexState code + lexToken + +setFile :: Int -> Action +setFile code span buf len = do + let file = lexemeToFastString (stepOn buf) (len-2) + liftP $ do + setSrcLoc (mkRealSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span)) + popLexState >> pushLexState code + lexToken + +-- ----------------------------------------------------------------------------- +-- This is the top-level function: called from the parser each time a +-- new token is to be read from the input. + +cmmlex :: (Located CmmToken -> PD a) -> PD a +cmmlex cont = do + (L span tok) <- lexToken + --trace ("token: " ++ show tok) $ do + cont (L (RealSrcSpan span) tok) + +lexToken :: PD (RealLocated CmmToken) +lexToken = do + inp@(loc1,buf) <- getInput + sc <- liftP getLexState + case alexScan inp sc of + AlexEOF -> do let span = mkRealSrcSpan loc1 loc1 + liftP (setLastToken span 0) + return (L span CmmT_EOF) + AlexError (loc2,_) -> liftP $ failLocMsgP loc1 loc2 "lexical error" + AlexSkip inp2 _ -> do + setInput inp2 + lexToken + AlexToken inp2@(end,_buf2) len t -> do + setInput inp2 + let span = mkRealSrcSpan loc1 end + span `seq` liftP (setLastToken span len) + t span buf len + +-- ----------------------------------------------------------------------------- +-- Monad stuff + +-- Stuff that Alex needs to know about our input type: +type AlexInput = (RealSrcLoc,StringBuffer) + +alexInputPrevChar :: AlexInput -> Char +alexInputPrevChar (_,s) = prevChar s '\n' + +-- backwards compatibility for Alex 2.x +alexGetChar :: AlexInput -> Maybe (Char,AlexInput) +alexGetChar inp = case alexGetByte inp of + Nothing -> Nothing + Just (b,i) -> c `seq` Just (c,i) + where c = chr $ fromIntegral b + +alexGetByte :: AlexInput -> Maybe (Word8,AlexInput) +alexGetByte (loc,s) + | atEnd s = Nothing + | otherwise = b `seq` loc' `seq` s' `seq` Just (b, (loc', s')) + where c = currentChar s + b = fromIntegral $ ord $ c + loc' = advanceSrcLoc loc c + s' = stepOn s + +getInput :: PD AlexInput +getInput = PD $ \_ s@PState{ loc=l, buffer=b } -> POk s (l,b) + +setInput :: AlexInput -> PD () +setInput (l,b) = PD $ \_ s -> POk s{ loc=l, buffer=b } () +} diff --git a/compiler/GHC/Cmm/Lint.hs b/compiler/GHC/Cmm/Lint.hs new file mode 100644 index 0000000000..d70fed3b9e --- /dev/null +++ b/compiler/GHC/Cmm/Lint.hs @@ -0,0 +1,261 @@ +----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow 2011 +-- +-- CmmLint: checking the correctness of Cmm statements and expressions +-- +----------------------------------------------------------------------------- +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE GADTs #-} +module GHC.Cmm.Lint ( + cmmLint, cmmLintGraph + ) where + +import GhcPrelude + +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.Liveness +import GHC.Cmm.Switch (switchTargetsToList) +import GHC.Cmm.Ppr () -- For Outputable instances +import Outputable +import DynFlags + +import Control.Monad (ap) + +-- Things to check: +-- - invariant on CmmBlock in GHC.Cmm.Expr (see comment there) +-- - check for branches to blocks that don't exist +-- - check types + +-- ----------------------------------------------------------------------------- +-- Exported entry points: + +cmmLint :: (Outputable d, Outputable h) + => DynFlags -> GenCmmGroup d h CmmGraph -> Maybe SDoc +cmmLint dflags tops = runCmmLint dflags (mapM_ (lintCmmDecl dflags)) tops + +cmmLintGraph :: DynFlags -> CmmGraph -> Maybe SDoc +cmmLintGraph dflags g = runCmmLint dflags (lintCmmGraph dflags) g + +runCmmLint :: Outputable a => DynFlags -> (a -> CmmLint b) -> a -> Maybe SDoc +runCmmLint dflags l p = + case unCL (l p) dflags of + Left err -> Just (vcat [text "Cmm lint error:", + nest 2 err, + text "Program was:", + nest 2 (ppr p)]) + Right _ -> Nothing + +lintCmmDecl :: DynFlags -> GenCmmDecl h i CmmGraph -> CmmLint () +lintCmmDecl dflags (CmmProc _ lbl _ g) + = addLintInfo (text "in proc " <> ppr lbl) $ lintCmmGraph dflags g +lintCmmDecl _ (CmmData {}) + = return () + + +lintCmmGraph :: DynFlags -> CmmGraph -> CmmLint () +lintCmmGraph dflags g = + cmmLocalLiveness dflags g `seq` mapM_ (lintCmmBlock labels) blocks + -- cmmLiveness throws an error if there are registers + -- live on entry to the graph (i.e. undefined + -- variables) + where + blocks = toBlockList g + labels = setFromList (map entryLabel blocks) + + +lintCmmBlock :: LabelSet -> CmmBlock -> CmmLint () +lintCmmBlock labels block + = addLintInfo (text "in basic block " <> ppr (entryLabel block)) $ do + let (_, middle, last) = blockSplit block + mapM_ lintCmmMiddle (blockToList middle) + lintCmmLast labels last + +-- ----------------------------------------------------------------------------- +-- lintCmmExpr + +-- Checks whether a CmmExpr is "type-correct", and check for obvious-looking +-- byte/word mismatches. + +lintCmmExpr :: CmmExpr -> CmmLint CmmType +lintCmmExpr (CmmLoad expr rep) = do + _ <- lintCmmExpr expr + -- Disabled, if we have the inlining phase before the lint phase, + -- we can have funny offsets due to pointer tagging. -- EZY + -- when (widthInBytes (typeWidth rep) >= wORD_SIZE) $ + -- cmmCheckWordAddress expr + return rep +lintCmmExpr expr@(CmmMachOp op args) = do + dflags <- getDynFlags + tys <- mapM lintCmmExpr args + if map (typeWidth . cmmExprType dflags) args == machOpArgReps dflags op + then cmmCheckMachOp op args tys + else cmmLintMachOpErr expr (map (cmmExprType dflags) args) (machOpArgReps dflags op) +lintCmmExpr (CmmRegOff reg offset) + = do dflags <- getDynFlags + let rep = typeWidth (cmmRegType dflags reg) + lintCmmExpr (CmmMachOp (MO_Add rep) + [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)]) +lintCmmExpr expr = + do dflags <- getDynFlags + return (cmmExprType dflags expr) + +-- Check for some common byte/word mismatches (eg. Sp + 1) +cmmCheckMachOp :: MachOp -> [CmmExpr] -> [CmmType] -> CmmLint CmmType +cmmCheckMachOp op [lit@(CmmLit (CmmInt { })), reg@(CmmReg _)] tys + = cmmCheckMachOp op [reg, lit] tys +cmmCheckMachOp op _ tys + = do dflags <- getDynFlags + return (machOpResultType dflags op tys) + +{- +isOffsetOp :: MachOp -> Bool +isOffsetOp (MO_Add _) = True +isOffsetOp (MO_Sub _) = True +isOffsetOp _ = False + +-- This expression should be an address from which a word can be loaded: +-- check for funny-looking sub-word offsets. +_cmmCheckWordAddress :: CmmExpr -> CmmLint () +_cmmCheckWordAddress e@(CmmMachOp op [arg, CmmLit (CmmInt i _)]) + | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral (wORD_SIZE dflags) /= 0 + = cmmLintDubiousWordOffset e +_cmmCheckWordAddress e@(CmmMachOp op [CmmLit (CmmInt i _), arg]) + | isOffsetOp op && notNodeReg arg && i `rem` fromIntegral (wORD_SIZE dflags) /= 0 + = cmmLintDubiousWordOffset e +_cmmCheckWordAddress _ + = return () + +-- No warnings for unaligned arithmetic with the node register, +-- which is used to extract fields from tagged constructor closures. +notNodeReg :: CmmExpr -> Bool +notNodeReg (CmmReg reg) | reg == nodeReg = False +notNodeReg _ = True +-} + +lintCmmMiddle :: CmmNode O O -> CmmLint () +lintCmmMiddle node = case node of + CmmComment _ -> return () + CmmTick _ -> return () + CmmUnwind{} -> return () + + CmmAssign reg expr -> do + dflags <- getDynFlags + erep <- lintCmmExpr expr + let reg_ty = cmmRegType dflags reg + if (erep `cmmEqType_ignoring_ptrhood` reg_ty) + then return () + else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty + + CmmStore l r -> do + _ <- lintCmmExpr l + _ <- lintCmmExpr r + return () + + CmmUnsafeForeignCall target _formals actuals -> do + lintTarget target + mapM_ lintCmmExpr actuals + + +lintCmmLast :: LabelSet -> CmmNode O C -> CmmLint () +lintCmmLast labels node = case node of + CmmBranch id -> checkTarget id + + CmmCondBranch e t f _ -> do + dflags <- getDynFlags + mapM_ checkTarget [t,f] + _ <- lintCmmExpr e + checkCond dflags e + + CmmSwitch e ids -> do + dflags <- getDynFlags + mapM_ checkTarget $ switchTargetsToList ids + erep <- lintCmmExpr e + if (erep `cmmEqType_ignoring_ptrhood` bWord dflags) + then return () + else cmmLintErr (text "switch scrutinee is not a word: " <> + ppr e <> text " :: " <> ppr erep) + + CmmCall { cml_target = target, cml_cont = cont } -> do + _ <- lintCmmExpr target + maybe (return ()) checkTarget cont + + CmmForeignCall tgt _ args succ _ _ _ -> do + lintTarget tgt + mapM_ lintCmmExpr args + checkTarget succ + where + checkTarget id + | setMember id labels = return () + | otherwise = cmmLintErr (text "Branch to nonexistent id" <+> ppr id) + + +lintTarget :: ForeignTarget -> CmmLint () +lintTarget (ForeignTarget e _) = lintCmmExpr e >> return () +lintTarget (PrimTarget {}) = return () + + +checkCond :: DynFlags -> CmmExpr -> CmmLint () +checkCond _ (CmmMachOp mop _) | isComparisonMachOp mop = return () +checkCond dflags (CmmLit (CmmInt x t)) | x == 0 || x == 1, t == wordWidth dflags = return () -- constant values +checkCond _ expr + = cmmLintErr (hang (text "expression is not a conditional:") 2 + (ppr expr)) + +-- ----------------------------------------------------------------------------- +-- CmmLint monad + +-- just a basic error monad: + +newtype CmmLint a = CmmLint { unCL :: DynFlags -> Either SDoc a } + deriving (Functor) + +instance Applicative CmmLint where + pure a = CmmLint (\_ -> Right a) + (<*>) = ap + +instance Monad CmmLint where + CmmLint m >>= k = CmmLint $ \dflags -> + case m dflags of + Left e -> Left e + Right a -> unCL (k a) dflags + +instance HasDynFlags CmmLint where + getDynFlags = CmmLint (\dflags -> Right dflags) + +cmmLintErr :: SDoc -> CmmLint a +cmmLintErr msg = CmmLint (\_ -> Left msg) + +addLintInfo :: SDoc -> CmmLint a -> CmmLint a +addLintInfo info thing = CmmLint $ \dflags -> + case unCL thing dflags of + Left err -> Left (hang info 2 err) + Right a -> Right a + +cmmLintMachOpErr :: CmmExpr -> [CmmType] -> [Width] -> CmmLint a +cmmLintMachOpErr expr argsRep opExpectsRep + = cmmLintErr (text "in MachOp application: " $$ + nest 2 (ppr expr) $$ + (text "op is expecting: " <+> ppr opExpectsRep) $$ + (text "arguments provide: " <+> ppr argsRep)) + +cmmLintAssignErr :: CmmNode e x -> CmmType -> CmmType -> CmmLint a +cmmLintAssignErr stmt e_ty r_ty + = cmmLintErr (text "in assignment: " $$ + nest 2 (vcat [ppr stmt, + text "Reg ty:" <+> ppr r_ty, + text "Rhs ty:" <+> ppr e_ty])) + + +{- +cmmLintDubiousWordOffset :: CmmExpr -> CmmLint a +cmmLintDubiousWordOffset expr + = cmmLintErr (text "offset is not a multiple of words: " $$ + nest 2 (ppr expr)) +-} + diff --git a/compiler/GHC/Cmm/Liveness.hs b/compiler/GHC/Cmm/Liveness.hs new file mode 100644 index 0000000000..2b598f52e5 --- /dev/null +++ b/compiler/GHC/Cmm/Liveness.hs @@ -0,0 +1,93 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE ScopedTypeVariables #-} + +module GHC.Cmm.Liveness + ( CmmLocalLive + , cmmLocalLiveness + , cmmGlobalLiveness + , liveLattice + , gen_kill + ) +where + +import GhcPrelude + +import DynFlags +import GHC.Cmm.BlockId +import GHC.Cmm +import GHC.Cmm.Ppr.Expr () -- For Outputable instances +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow +import GHC.Cmm.Dataflow.Label + +import Maybes +import Outputable + +----------------------------------------------------------------------------- +-- Calculating what variables are live on entry to a basic block +----------------------------------------------------------------------------- + +-- | The variables live on entry to a block +type CmmLive r = RegSet r +type CmmLocalLive = CmmLive LocalReg + +-- | The dataflow lattice +liveLattice :: Ord r => DataflowLattice (CmmLive r) +{-# SPECIALIZE liveLattice :: DataflowLattice (CmmLive LocalReg) #-} +{-# SPECIALIZE liveLattice :: DataflowLattice (CmmLive GlobalReg) #-} +liveLattice = DataflowLattice emptyRegSet add + where + add (OldFact old) (NewFact new) = + let !join = plusRegSet old new + in changedIf (sizeRegSet join > sizeRegSet old) join + +-- | A mapping from block labels to the variables live on entry +type BlockEntryLiveness r = LabelMap (CmmLive r) + +----------------------------------------------------------------------------- +-- | Calculated liveness info for a CmmGraph +----------------------------------------------------------------------------- + +cmmLocalLiveness :: DynFlags -> CmmGraph -> BlockEntryLiveness LocalReg +cmmLocalLiveness dflags graph = + check $ analyzeCmmBwd liveLattice (xferLive dflags) graph mapEmpty + where + entry = g_entry graph + check facts = + noLiveOnEntry entry (expectJust "check" $ mapLookup entry facts) facts + +cmmGlobalLiveness :: DynFlags -> CmmGraph -> BlockEntryLiveness GlobalReg +cmmGlobalLiveness dflags graph = + analyzeCmmBwd liveLattice (xferLive dflags) graph mapEmpty + +-- | On entry to the procedure, there had better not be any LocalReg's live-in. +noLiveOnEntry :: BlockId -> CmmLive LocalReg -> a -> a +noLiveOnEntry bid in_fact x = + if nullRegSet in_fact then x + else pprPanic "LocalReg's live-in to graph" (ppr bid <+> ppr in_fact) + +gen_kill + :: (DefinerOfRegs r n, UserOfRegs r n) + => DynFlags -> n -> CmmLive r -> CmmLive r +gen_kill dflags node set = + let !afterKill = foldRegsDefd dflags deleteFromRegSet set node + in foldRegsUsed dflags extendRegSet afterKill node +{-# INLINE gen_kill #-} + +xferLive + :: forall r. + ( UserOfRegs r (CmmNode O O) + , DefinerOfRegs r (CmmNode O O) + , UserOfRegs r (CmmNode O C) + , DefinerOfRegs r (CmmNode O C) + ) + => DynFlags -> TransferFun (CmmLive r) +xferLive dflags (BlockCC eNode middle xNode) fBase = + let joined = gen_kill dflags xNode $! joinOutFacts liveLattice xNode fBase + !result = foldNodesBwdOO (gen_kill dflags) middle joined + in mapSingleton (entryLabel eNode) result +{-# SPECIALIZE xferLive :: DynFlags -> TransferFun (CmmLive LocalReg) #-} +{-# SPECIALIZE xferLive :: DynFlags -> TransferFun (CmmLive GlobalReg) #-} diff --git a/compiler/GHC/Cmm/MachOp.hs b/compiler/GHC/Cmm/MachOp.hs new file mode 100644 index 0000000000..234001545c --- /dev/null +++ b/compiler/GHC/Cmm/MachOp.hs @@ -0,0 +1,664 @@ +module GHC.Cmm.MachOp + ( MachOp(..) + , pprMachOp, isCommutableMachOp, isAssociativeMachOp + , isComparisonMachOp, maybeIntComparison, machOpResultType + , machOpArgReps, maybeInvertComparison, isFloatComparison + + -- MachOp builders + , mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot + , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem + , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe + , mo_wordULe, mo_wordUGt, mo_wordULt + , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot + , mo_wordShl, mo_wordSShr, mo_wordUShr + , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32 + , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord + , mo_u_32ToWord, mo_s_32ToWord + , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32, mo_WordTo64 + + -- CallishMachOp + , CallishMachOp(..), callishMachOpHints + , pprCallishMachOp + , machOpMemcpyishAlign + + -- Atomic read-modify-write + , AtomicMachOp(..) + ) +where + +import GhcPrelude + +import GHC.Cmm.Type +import Outputable +import DynFlags + +----------------------------------------------------------------------------- +-- MachOp +----------------------------------------------------------------------------- + +{- | +Machine-level primops; ones which we can reasonably delegate to the +native code generators to handle. + +Most operations are parameterised by the 'Width' that they operate on. +Some operations have separate signed and unsigned versions, and float +and integer versions. +-} + +data MachOp + -- Integer operations (insensitive to signed/unsigned) + = MO_Add Width + | MO_Sub Width + | MO_Eq Width + | MO_Ne Width + | MO_Mul Width -- low word of multiply + + -- Signed multiply/divide + | MO_S_MulMayOflo Width -- nonzero if signed multiply overflows + | MO_S_Quot Width -- signed / (same semantics as IntQuotOp) + | MO_S_Rem Width -- signed % (same semantics as IntRemOp) + | MO_S_Neg Width -- unary - + + -- Unsigned multiply/divide + | MO_U_MulMayOflo Width -- nonzero if unsigned multiply overflows + | MO_U_Quot Width -- unsigned / (same semantics as WordQuotOp) + | MO_U_Rem Width -- unsigned % (same semantics as WordRemOp) + + -- Signed comparisons + | MO_S_Ge Width + | MO_S_Le Width + | MO_S_Gt Width + | MO_S_Lt Width + + -- Unsigned comparisons + | MO_U_Ge Width + | MO_U_Le Width + | MO_U_Gt Width + | MO_U_Lt Width + + -- Floating point arithmetic + | MO_F_Add Width + | MO_F_Sub Width + | MO_F_Neg Width -- unary - + | MO_F_Mul Width + | MO_F_Quot Width + + -- Floating point comparison + | MO_F_Eq Width + | MO_F_Ne Width + | MO_F_Ge Width + | MO_F_Le Width + | MO_F_Gt Width + | MO_F_Lt Width + + -- Bitwise operations. Not all of these may be supported + -- at all sizes, and only integral Widths are valid. + | MO_And Width + | MO_Or Width + | MO_Xor Width + | MO_Not Width + | MO_Shl Width + | MO_U_Shr Width -- unsigned shift right + | MO_S_Shr Width -- signed shift right + + -- Conversions. Some of these will be NOPs. + -- Floating-point conversions use the signed variant. + | MO_SF_Conv Width Width -- Signed int -> Float + | MO_FS_Conv Width Width -- Float -> Signed int + | MO_SS_Conv Width Width -- Signed int -> Signed int + | MO_UU_Conv Width Width -- unsigned int -> unsigned int + | MO_XX_Conv Width Width -- int -> int; puts no requirements on the + -- contents of upper bits when extending; + -- narrowing is simply truncation; the only + -- expectation is that we can recover the + -- original value by applying the opposite + -- MO_XX_Conv, e.g., + -- MO_XX_CONV W64 W8 (MO_XX_CONV W8 W64 x) + -- is equivalent to just x. + | MO_FF_Conv Width Width -- Float -> Float + + -- Vector element insertion and extraction operations + | MO_V_Insert Length Width -- Insert scalar into vector + | MO_V_Extract Length Width -- Extract scalar from vector + + -- Integer vector operations + | MO_V_Add Length Width + | MO_V_Sub Length Width + | MO_V_Mul Length Width + + -- Signed vector multiply/divide + | MO_VS_Quot Length Width + | MO_VS_Rem Length Width + | MO_VS_Neg Length Width + + -- Unsigned vector multiply/divide + | MO_VU_Quot Length Width + | MO_VU_Rem Length Width + + -- Floating point vector element insertion and extraction operations + | MO_VF_Insert Length Width -- Insert scalar into vector + | MO_VF_Extract Length Width -- Extract scalar from vector + + -- Floating point vector operations + | MO_VF_Add Length Width + | MO_VF_Sub Length Width + | MO_VF_Neg Length Width -- unary negation + | MO_VF_Mul Length Width + | MO_VF_Quot Length Width + + -- Alignment check (for -falignment-sanitisation) + | MO_AlignmentCheck Int Width + deriving (Eq, Show) + +pprMachOp :: MachOp -> SDoc +pprMachOp mo = text (show mo) + + + +-- ----------------------------------------------------------------------------- +-- Some common MachReps + +-- A 'wordRep' is a machine word on the target architecture +-- Specifically, it is the size of an Int#, Word#, Addr# +-- and the unit of allocation on the stack and the heap +-- Any pointer is also guaranteed to be a wordRep. + +mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot + , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem + , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe + , mo_wordULe, mo_wordUGt, mo_wordULt + , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr + , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord + , mo_WordTo8, mo_WordTo16, mo_WordTo32, mo_WordTo64 + :: DynFlags -> MachOp + +mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32 + , mo_32To8, mo_32To16 + :: MachOp + +mo_wordAdd dflags = MO_Add (wordWidth dflags) +mo_wordSub dflags = MO_Sub (wordWidth dflags) +mo_wordEq dflags = MO_Eq (wordWidth dflags) +mo_wordNe dflags = MO_Ne (wordWidth dflags) +mo_wordMul dflags = MO_Mul (wordWidth dflags) +mo_wordSQuot dflags = MO_S_Quot (wordWidth dflags) +mo_wordSRem dflags = MO_S_Rem (wordWidth dflags) +mo_wordSNeg dflags = MO_S_Neg (wordWidth dflags) +mo_wordUQuot dflags = MO_U_Quot (wordWidth dflags) +mo_wordURem dflags = MO_U_Rem (wordWidth dflags) + +mo_wordSGe dflags = MO_S_Ge (wordWidth dflags) +mo_wordSLe dflags = MO_S_Le (wordWidth dflags) +mo_wordSGt dflags = MO_S_Gt (wordWidth dflags) +mo_wordSLt dflags = MO_S_Lt (wordWidth dflags) + +mo_wordUGe dflags = MO_U_Ge (wordWidth dflags) +mo_wordULe dflags = MO_U_Le (wordWidth dflags) +mo_wordUGt dflags = MO_U_Gt (wordWidth dflags) +mo_wordULt dflags = MO_U_Lt (wordWidth dflags) + +mo_wordAnd dflags = MO_And (wordWidth dflags) +mo_wordOr dflags = MO_Or (wordWidth dflags) +mo_wordXor dflags = MO_Xor (wordWidth dflags) +mo_wordNot dflags = MO_Not (wordWidth dflags) +mo_wordShl dflags = MO_Shl (wordWidth dflags) +mo_wordSShr dflags = MO_S_Shr (wordWidth dflags) +mo_wordUShr dflags = MO_U_Shr (wordWidth dflags) + +mo_u_8To32 = MO_UU_Conv W8 W32 +mo_s_8To32 = MO_SS_Conv W8 W32 +mo_u_16To32 = MO_UU_Conv W16 W32 +mo_s_16To32 = MO_SS_Conv W16 W32 + +mo_u_8ToWord dflags = MO_UU_Conv W8 (wordWidth dflags) +mo_s_8ToWord dflags = MO_SS_Conv W8 (wordWidth dflags) +mo_u_16ToWord dflags = MO_UU_Conv W16 (wordWidth dflags) +mo_s_16ToWord dflags = MO_SS_Conv W16 (wordWidth dflags) +mo_s_32ToWord dflags = MO_SS_Conv W32 (wordWidth dflags) +mo_u_32ToWord dflags = MO_UU_Conv W32 (wordWidth dflags) + +mo_WordTo8 dflags = MO_UU_Conv (wordWidth dflags) W8 +mo_WordTo16 dflags = MO_UU_Conv (wordWidth dflags) W16 +mo_WordTo32 dflags = MO_UU_Conv (wordWidth dflags) W32 +mo_WordTo64 dflags = MO_UU_Conv (wordWidth dflags) W64 + +mo_32To8 = MO_UU_Conv W32 W8 +mo_32To16 = MO_UU_Conv W32 W16 + + +-- ---------------------------------------------------------------------------- +-- isCommutableMachOp + +{- | +Returns 'True' if the MachOp has commutable arguments. This is used +in the platform-independent Cmm optimisations. + +If in doubt, return 'False'. This generates worse code on the +native routes, but is otherwise harmless. +-} +isCommutableMachOp :: MachOp -> Bool +isCommutableMachOp mop = + case mop of + MO_Add _ -> True + MO_Eq _ -> True + MO_Ne _ -> True + MO_Mul _ -> True + MO_S_MulMayOflo _ -> True + MO_U_MulMayOflo _ -> True + MO_And _ -> True + MO_Or _ -> True + MO_Xor _ -> True + MO_F_Add _ -> True + MO_F_Mul _ -> True + _other -> False + +-- ---------------------------------------------------------------------------- +-- isAssociativeMachOp + +{- | +Returns 'True' if the MachOp is associative (i.e. @(x+y)+z == x+(y+z)@) +This is used in the platform-independent Cmm optimisations. + +If in doubt, return 'False'. This generates worse code on the +native routes, but is otherwise harmless. +-} +isAssociativeMachOp :: MachOp -> Bool +isAssociativeMachOp mop = + case mop of + MO_Add {} -> True -- NB: does not include + MO_Mul {} -> True -- floatint point! + MO_And {} -> True + MO_Or {} -> True + MO_Xor {} -> True + _other -> False + + +-- ---------------------------------------------------------------------------- +-- isComparisonMachOp + +{- | +Returns 'True' if the MachOp is a comparison. + +If in doubt, return False. This generates worse code on the +native routes, but is otherwise harmless. +-} +isComparisonMachOp :: MachOp -> Bool +isComparisonMachOp mop = + case mop of + MO_Eq _ -> True + MO_Ne _ -> True + MO_S_Ge _ -> True + MO_S_Le _ -> True + MO_S_Gt _ -> True + MO_S_Lt _ -> True + MO_U_Ge _ -> True + MO_U_Le _ -> True + MO_U_Gt _ -> True + MO_U_Lt _ -> True + MO_F_Eq {} -> True + MO_F_Ne {} -> True + MO_F_Ge {} -> True + MO_F_Le {} -> True + MO_F_Gt {} -> True + MO_F_Lt {} -> True + _other -> False + +{- | +Returns @Just w@ if the operation is an integer comparison with width +@w@, or @Nothing@ otherwise. +-} +maybeIntComparison :: MachOp -> Maybe Width +maybeIntComparison mop = + case mop of + MO_Eq w -> Just w + MO_Ne w -> Just w + MO_S_Ge w -> Just w + MO_S_Le w -> Just w + MO_S_Gt w -> Just w + MO_S_Lt w -> Just w + MO_U_Ge w -> Just w + MO_U_Le w -> Just w + MO_U_Gt w -> Just w + MO_U_Lt w -> Just w + _ -> Nothing + +isFloatComparison :: MachOp -> Bool +isFloatComparison mop = + case mop of + MO_F_Eq {} -> True + MO_F_Ne {} -> True + MO_F_Ge {} -> True + MO_F_Le {} -> True + MO_F_Gt {} -> True + MO_F_Lt {} -> True + _other -> False + +-- ----------------------------------------------------------------------------- +-- Inverting conditions + +-- Sometimes it's useful to be able to invert the sense of a +-- condition. Not all conditional tests are invertible: in +-- particular, floating point conditionals cannot be inverted, because +-- there exist floating-point values which return False for both senses +-- of a condition (eg. !(NaN > NaN) && !(NaN /<= NaN)). + +maybeInvertComparison :: MachOp -> Maybe MachOp +maybeInvertComparison op + = case op of -- None of these Just cases include floating point + MO_Eq r -> Just (MO_Ne r) + MO_Ne r -> Just (MO_Eq r) + MO_U_Lt r -> Just (MO_U_Ge r) + MO_U_Gt r -> Just (MO_U_Le r) + MO_U_Le r -> Just (MO_U_Gt r) + MO_U_Ge r -> Just (MO_U_Lt r) + MO_S_Lt r -> Just (MO_S_Ge r) + MO_S_Gt r -> Just (MO_S_Le r) + MO_S_Le r -> Just (MO_S_Gt r) + MO_S_Ge r -> Just (MO_S_Lt r) + _other -> Nothing + +-- ---------------------------------------------------------------------------- +-- machOpResultType + +{- | +Returns the MachRep of the result of a MachOp. +-} +machOpResultType :: DynFlags -> MachOp -> [CmmType] -> CmmType +machOpResultType dflags mop tys = + case mop of + MO_Add {} -> ty1 -- Preserve GC-ptr-hood + MO_Sub {} -> ty1 -- of first arg + MO_Mul r -> cmmBits r + MO_S_MulMayOflo r -> cmmBits r + MO_S_Quot r -> cmmBits r + MO_S_Rem r -> cmmBits r + MO_S_Neg r -> cmmBits r + MO_U_MulMayOflo r -> cmmBits r + MO_U_Quot r -> cmmBits r + MO_U_Rem r -> cmmBits r + + MO_Eq {} -> comparisonResultRep dflags + MO_Ne {} -> comparisonResultRep dflags + MO_S_Ge {} -> comparisonResultRep dflags + MO_S_Le {} -> comparisonResultRep dflags + MO_S_Gt {} -> comparisonResultRep dflags + MO_S_Lt {} -> comparisonResultRep dflags + + MO_U_Ge {} -> comparisonResultRep dflags + MO_U_Le {} -> comparisonResultRep dflags + MO_U_Gt {} -> comparisonResultRep dflags + MO_U_Lt {} -> comparisonResultRep dflags + + MO_F_Add r -> cmmFloat r + MO_F_Sub r -> cmmFloat r + MO_F_Mul r -> cmmFloat r + MO_F_Quot r -> cmmFloat r + MO_F_Neg r -> cmmFloat r + MO_F_Eq {} -> comparisonResultRep dflags + MO_F_Ne {} -> comparisonResultRep dflags + MO_F_Ge {} -> comparisonResultRep dflags + MO_F_Le {} -> comparisonResultRep dflags + MO_F_Gt {} -> comparisonResultRep dflags + MO_F_Lt {} -> comparisonResultRep dflags + + MO_And {} -> ty1 -- Used for pointer masking + MO_Or {} -> ty1 + MO_Xor {} -> ty1 + MO_Not r -> cmmBits r + MO_Shl r -> cmmBits r + MO_U_Shr r -> cmmBits r + MO_S_Shr r -> cmmBits r + + MO_SS_Conv _ to -> cmmBits to + MO_UU_Conv _ to -> cmmBits to + MO_XX_Conv _ to -> cmmBits to + MO_FS_Conv _ to -> cmmBits to + MO_SF_Conv _ to -> cmmFloat to + MO_FF_Conv _ to -> cmmFloat to + + MO_V_Insert l w -> cmmVec l (cmmBits w) + MO_V_Extract _ w -> cmmBits w + + MO_V_Add l w -> cmmVec l (cmmBits w) + MO_V_Sub l w -> cmmVec l (cmmBits w) + MO_V_Mul l w -> cmmVec l (cmmBits w) + + MO_VS_Quot l w -> cmmVec l (cmmBits w) + MO_VS_Rem l w -> cmmVec l (cmmBits w) + MO_VS_Neg l w -> cmmVec l (cmmBits w) + + MO_VU_Quot l w -> cmmVec l (cmmBits w) + MO_VU_Rem l w -> cmmVec l (cmmBits w) + + MO_VF_Insert l w -> cmmVec l (cmmFloat w) + MO_VF_Extract _ w -> cmmFloat w + + MO_VF_Add l w -> cmmVec l (cmmFloat w) + MO_VF_Sub l w -> cmmVec l (cmmFloat w) + MO_VF_Mul l w -> cmmVec l (cmmFloat w) + MO_VF_Quot l w -> cmmVec l (cmmFloat w) + MO_VF_Neg l w -> cmmVec l (cmmFloat w) + + MO_AlignmentCheck _ _ -> ty1 + where + (ty1:_) = tys + +comparisonResultRep :: DynFlags -> CmmType +comparisonResultRep = bWord -- is it? + + +-- ----------------------------------------------------------------------------- +-- machOpArgReps + +-- | This function is used for debugging only: we can check whether an +-- application of a MachOp is "type-correct" by checking that the MachReps of +-- its arguments are the same as the MachOp expects. This is used when +-- linting a CmmExpr. + +machOpArgReps :: DynFlags -> MachOp -> [Width] +machOpArgReps dflags op = + case op of + MO_Add r -> [r,r] + MO_Sub r -> [r,r] + MO_Eq r -> [r,r] + MO_Ne r -> [r,r] + MO_Mul r -> [r,r] + MO_S_MulMayOflo r -> [r,r] + MO_S_Quot r -> [r,r] + MO_S_Rem r -> [r,r] + MO_S_Neg r -> [r] + MO_U_MulMayOflo r -> [r,r] + MO_U_Quot r -> [r,r] + MO_U_Rem r -> [r,r] + + MO_S_Ge r -> [r,r] + MO_S_Le r -> [r,r] + MO_S_Gt r -> [r,r] + MO_S_Lt r -> [r,r] + + MO_U_Ge r -> [r,r] + MO_U_Le r -> [r,r] + MO_U_Gt r -> [r,r] + MO_U_Lt r -> [r,r] + + MO_F_Add r -> [r,r] + MO_F_Sub r -> [r,r] + MO_F_Mul r -> [r,r] + MO_F_Quot r -> [r,r] + MO_F_Neg r -> [r] + MO_F_Eq r -> [r,r] + MO_F_Ne r -> [r,r] + MO_F_Ge r -> [r,r] + MO_F_Le r -> [r,r] + MO_F_Gt r -> [r,r] + MO_F_Lt r -> [r,r] + + MO_And r -> [r,r] + MO_Or r -> [r,r] + MO_Xor r -> [r,r] + MO_Not r -> [r] + MO_Shl r -> [r, wordWidth dflags] + MO_U_Shr r -> [r, wordWidth dflags] + MO_S_Shr r -> [r, wordWidth dflags] + + MO_SS_Conv from _ -> [from] + MO_UU_Conv from _ -> [from] + MO_XX_Conv from _ -> [from] + MO_SF_Conv from _ -> [from] + MO_FS_Conv from _ -> [from] + MO_FF_Conv from _ -> [from] + + MO_V_Insert l r -> [typeWidth (vec l (cmmBits r)),r,wordWidth dflags] + MO_V_Extract l r -> [typeWidth (vec l (cmmBits r)),wordWidth dflags] + + MO_V_Add _ r -> [r,r] + MO_V_Sub _ r -> [r,r] + MO_V_Mul _ r -> [r,r] + + MO_VS_Quot _ r -> [r,r] + MO_VS_Rem _ r -> [r,r] + MO_VS_Neg _ r -> [r] + + MO_VU_Quot _ r -> [r,r] + MO_VU_Rem _ r -> [r,r] + + MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags] + MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags] + + MO_VF_Add _ r -> [r,r] + MO_VF_Sub _ r -> [r,r] + MO_VF_Mul _ r -> [r,r] + MO_VF_Quot _ r -> [r,r] + MO_VF_Neg _ r -> [r] + + MO_AlignmentCheck _ r -> [r] + +----------------------------------------------------------------------------- +-- CallishMachOp +----------------------------------------------------------------------------- + +-- CallishMachOps tend to be implemented by foreign calls in some backends, +-- so we separate them out. In Cmm, these can only occur in a +-- statement position, in contrast to an ordinary MachOp which can occur +-- anywhere in an expression. +data CallishMachOp + = MO_F64_Pwr + | MO_F64_Sin + | MO_F64_Cos + | MO_F64_Tan + | MO_F64_Sinh + | MO_F64_Cosh + | MO_F64_Tanh + | MO_F64_Asin + | MO_F64_Acos + | MO_F64_Atan + | MO_F64_Asinh + | MO_F64_Acosh + | MO_F64_Atanh + | MO_F64_Log + | MO_F64_Log1P + | MO_F64_Exp + | MO_F64_ExpM1 + | MO_F64_Fabs + | MO_F64_Sqrt + | MO_F32_Pwr + | MO_F32_Sin + | MO_F32_Cos + | MO_F32_Tan + | MO_F32_Sinh + | MO_F32_Cosh + | MO_F32_Tanh + | MO_F32_Asin + | MO_F32_Acos + | MO_F32_Atan + | MO_F32_Asinh + | MO_F32_Acosh + | MO_F32_Atanh + | MO_F32_Log + | MO_F32_Log1P + | MO_F32_Exp + | MO_F32_ExpM1 + | MO_F32_Fabs + | MO_F32_Sqrt + + | MO_UF_Conv Width + + | MO_S_Mul2 Width + | MO_S_QuotRem Width + | MO_U_QuotRem Width + | MO_U_QuotRem2 Width + | MO_Add2 Width + | MO_AddWordC Width + | MO_SubWordC Width + | MO_AddIntC Width + | MO_SubIntC Width + | MO_U_Mul2 Width + + | MO_ReadBarrier + | MO_WriteBarrier + | MO_Touch -- Keep variables live (when using interior pointers) + + -- Prefetch + | MO_Prefetch_Data Int -- Prefetch hint. May change program performance but not + -- program behavior. + -- the Int can be 0-3. Needs to be known at compile time + -- to interact with code generation correctly. + -- TODO: add support for prefetch WRITES, + -- currently only exposes prefetch reads, which + -- would the majority of use cases in ghc anyways + + + -- These three MachOps are parameterised by the known alignment + -- of the destination and source (for memcpy/memmove) pointers. + -- This information may be used for optimisation in backends. + | MO_Memcpy Int + | MO_Memset Int + | MO_Memmove Int + | MO_Memcmp Int + + | MO_PopCnt Width + | MO_Pdep Width + | MO_Pext Width + | MO_Clz Width + | MO_Ctz Width + + | MO_BSwap Width + | MO_BRev Width + + -- Atomic read-modify-write. + | MO_AtomicRMW Width AtomicMachOp + | MO_AtomicRead Width + | MO_AtomicWrite Width + | MO_Cmpxchg Width + deriving (Eq, Show) + +-- | The operation to perform atomically. +data AtomicMachOp = + AMO_Add + | AMO_Sub + | AMO_And + | AMO_Nand + | AMO_Or + | AMO_Xor + deriving (Eq, Show) + +pprCallishMachOp :: CallishMachOp -> SDoc +pprCallishMachOp mo = text (show mo) + +callishMachOpHints :: CallishMachOp -> ([ForeignHint], [ForeignHint]) +callishMachOpHints op = case op of + MO_Memcpy _ -> ([], [AddrHint,AddrHint,NoHint]) + MO_Memset _ -> ([], [AddrHint,NoHint,NoHint]) + MO_Memmove _ -> ([], [AddrHint,AddrHint,NoHint]) + MO_Memcmp _ -> ([], [AddrHint, AddrHint, NoHint]) + _ -> ([],[]) + -- empty lists indicate NoHint + +-- | The alignment of a 'memcpy'-ish operation. +machOpMemcpyishAlign :: CallishMachOp -> Maybe Int +machOpMemcpyishAlign op = case op of + MO_Memcpy align -> Just align + MO_Memset align -> Just align + MO_Memmove align -> Just align + MO_Memcmp align -> Just align + _ -> Nothing diff --git a/compiler/GHC/Cmm/Monad.hs b/compiler/GHC/Cmm/Monad.hs new file mode 100644 index 0000000000..6b8d00a118 --- /dev/null +++ b/compiler/GHC/Cmm/Monad.hs @@ -0,0 +1,59 @@ +{-# LANGUAGE CPP #-} + +----------------------------------------------------------------------------- +-- A Parser monad with access to the 'DynFlags'. +-- +-- The 'P' monad only has access to the subset of of 'DynFlags' +-- required for parsing Haskell. + +-- The parser for C-- requires access to a lot more of the 'DynFlags', +-- so 'PD' provides access to 'DynFlags' via a 'HasDynFlags' instance. +----------------------------------------------------------------------------- +module GHC.Cmm.Monad ( + PD(..) + , liftP + ) where + +import GhcPrelude + +import Control.Monad +import qualified Control.Monad.Fail as MonadFail + +import DynFlags +import Lexer + +newtype PD a = PD { unPD :: DynFlags -> PState -> ParseResult a } + +instance Functor PD where + fmap = liftM + +instance Applicative PD where + pure = returnPD + (<*>) = ap + +instance Monad PD where + (>>=) = thenPD +#if !MIN_VERSION_base(4,13,0) + fail = MonadFail.fail +#endif + +instance MonadFail.MonadFail PD where + fail = failPD + +liftP :: P a -> PD a +liftP (P f) = PD $ \_ s -> f s + +returnPD :: a -> PD a +returnPD = liftP . return + +thenPD :: PD a -> (a -> PD b) -> PD b +(PD m) `thenPD` k = PD $ \d s -> + case m d s of + POk s1 a -> unPD (k a) d s1 + PFailed s1 -> PFailed s1 + +failPD :: String -> PD a +failPD = liftP . fail + +instance HasDynFlags PD where + getDynFlags = PD $ \d s -> POk s d diff --git a/compiler/GHC/Cmm/Node.hs b/compiler/GHC/Cmm/Node.hs new file mode 100644 index 0000000000..bb74647910 --- /dev/null +++ b/compiler/GHC/Cmm/Node.hs @@ -0,0 +1,724 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE ExplicitForAll #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE UndecidableInstances #-} +{-# LANGUAGE ScopedTypeVariables #-} + + +-- CmmNode type for representation using Hoopl graphs. + +module GHC.Cmm.Node ( + CmmNode(..), CmmFormal, CmmActual, CmmTickish, + UpdFrameOffset, Convention(..), + ForeignConvention(..), ForeignTarget(..), foreignTargetHints, + CmmReturnInfo(..), + mapExp, mapExpDeep, wrapRecExp, foldExp, foldExpDeep, wrapRecExpf, + mapExpM, mapExpDeepM, wrapRecExpM, mapSuccessors, mapCollectSuccessors, + + -- * Tick scopes + CmmTickScope(..), isTickSubScope, combineTickScopes, + ) where + +import GhcPrelude hiding (succ) + +import GHC.Platform.Regs +import GHC.Cmm.Expr +import GHC.Cmm.Switch +import DynFlags +import FastString +import ForeignCall +import Outputable +import GHC.Runtime.Layout +import CoreSyn (Tickish) +import qualified Unique as U + +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Label +import Data.Maybe +import Data.List (tails,sortBy) +import Unique (nonDetCmpUnique) +import Util + + +------------------------ +-- CmmNode + +#define ULabel {-# UNPACK #-} !Label + +data CmmNode e x where + CmmEntry :: ULabel -> CmmTickScope -> CmmNode C O + + CmmComment :: FastString -> CmmNode O O + + -- Tick annotation, covering Cmm code in our tick scope. We only + -- expect non-code @Tickish@ at this point (e.g. @SourceNote@). + -- See Note [CmmTick scoping details] + CmmTick :: !CmmTickish -> CmmNode O O + + -- Unwind pseudo-instruction, encoding stack unwinding + -- instructions for a debugger. This describes how to reconstruct + -- the "old" value of a register if we want to navigate the stack + -- up one frame. Having unwind information for @Sp@ will allow the + -- debugger to "walk" the stack. + -- + -- See Note [What is this unwinding business?] in Debug + CmmUnwind :: [(GlobalReg, Maybe CmmExpr)] -> CmmNode O O + + CmmAssign :: !CmmReg -> !CmmExpr -> CmmNode O O + -- Assign to register + + CmmStore :: !CmmExpr -> !CmmExpr -> CmmNode O O + -- Assign to memory location. Size is + -- given by cmmExprType of the rhs. + + CmmUnsafeForeignCall :: -- An unsafe foreign call; + -- see Note [Foreign calls] + -- Like a "fat machine instruction"; can occur + -- in the middle of a block + ForeignTarget -> -- call target + [CmmFormal] -> -- zero or more results + [CmmActual] -> -- zero or more arguments + CmmNode O O + -- Semantics: clobbers any GlobalRegs for which callerSaves r == True + -- See Note [Unsafe foreign calls clobber caller-save registers] + -- + -- Invariant: the arguments and the ForeignTarget must not + -- mention any registers for which GHC.Platform.callerSaves + -- is True. See Note [Register Parameter Passing]. + + CmmBranch :: ULabel -> CmmNode O C + -- Goto another block in the same procedure + + CmmCondBranch :: { -- conditional branch + cml_pred :: CmmExpr, + cml_true, cml_false :: ULabel, + cml_likely :: Maybe Bool -- likely result of the conditional, + -- if known + } -> CmmNode O C + + CmmSwitch + :: CmmExpr -- Scrutinee, of some integral type + -> SwitchTargets -- Cases. See [Note SwitchTargets] + -> CmmNode O C + + CmmCall :: { -- A native call or tail call + cml_target :: CmmExpr, -- never a CmmPrim to a CallishMachOp! + + cml_cont :: Maybe Label, + -- Label of continuation (Nothing for return or tail call) + -- + -- Note [Continuation BlockIds]: these BlockIds are called + -- Continuation BlockIds, and are the only BlockIds that can + -- occur in CmmExprs, namely as (CmmLit (CmmBlock b)) or + -- (CmmStackSlot (Young b) _). + + cml_args_regs :: [GlobalReg], + -- The argument GlobalRegs (Rx, Fx, Dx, Lx) that are passed + -- to the call. This is essential information for the + -- native code generator's register allocator; without + -- knowing which GlobalRegs are live it has to assume that + -- they are all live. This list should only include + -- GlobalRegs that are mapped to real machine registers on + -- the target platform. + + cml_args :: ByteOff, + -- Byte offset, from the *old* end of the Area associated with + -- the Label (if cml_cont = Nothing, then Old area), of + -- youngest outgoing arg. Set the stack pointer to this before + -- transferring control. + -- (NB: an update frame might also have been stored in the Old + -- area, but it'll be in an older part than the args.) + + cml_ret_args :: ByteOff, + -- For calls *only*, the byte offset for youngest returned value + -- This is really needed at the *return* point rather than here + -- at the call, but in practice it's convenient to record it here. + + cml_ret_off :: ByteOff + -- For calls *only*, the byte offset of the base of the frame that + -- must be described by the info table for the return point. + -- The older words are an update frames, which have their own + -- info-table and layout information + + -- From a liveness point of view, the stack words older than + -- cml_ret_off are treated as live, even if the sequel of + -- the call goes into a loop. + } -> CmmNode O C + + CmmForeignCall :: { -- A safe foreign call; see Note [Foreign calls] + -- Always the last node of a block + tgt :: ForeignTarget, -- call target and convention + res :: [CmmFormal], -- zero or more results + args :: [CmmActual], -- zero or more arguments; see Note [Register parameter passing] + succ :: ULabel, -- Label of continuation + ret_args :: ByteOff, -- same as cml_ret_args + ret_off :: ByteOff, -- same as cml_ret_off + intrbl:: Bool -- whether or not the call is interruptible + } -> CmmNode O C + +{- Note [Foreign calls] +~~~~~~~~~~~~~~~~~~~~~~~ +A CmmUnsafeForeignCall is used for *unsafe* foreign calls; +a CmmForeignCall call is used for *safe* foreign calls. + +Unsafe ones are mostly easy: think of them as a "fat machine +instruction". In particular, they do *not* kill all live registers, +just the registers they return to (there was a bit of code in GHC that +conservatively assumed otherwise.) However, see [Register parameter passing]. + +Safe ones are trickier. A safe foreign call + r = f(x) +ultimately expands to + push "return address" -- Never used to return to; + -- just points an info table + save registers into TSO + call suspendThread + r = f(x) -- Make the call + call resumeThread + restore registers + pop "return address" +We cannot "lower" a safe foreign call to this sequence of Cmms, because +after we've saved Sp all the Cmm optimiser's assumptions are broken. + +Note that a safe foreign call needs an info table. + +So Safe Foreign Calls must remain as last nodes until the stack is +made manifest in GHC.Cmm.LayoutStack, where they are lowered into the above +sequence. +-} + +{- Note [Unsafe foreign calls clobber caller-save registers] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A foreign call is defined to clobber any GlobalRegs that are mapped to +caller-saves machine registers (according to the prevailing C ABI). +GHC.StgToCmm.Utils.callerSaves tells you which GlobalRegs are caller-saves. + +This is a design choice that makes it easier to generate code later. +We could instead choose to say that foreign calls do *not* clobber +caller-saves regs, but then we would have to figure out which regs +were live across the call later and insert some saves/restores. + +Furthermore when we generate code we never have any GlobalRegs live +across a call, because they are always copied-in to LocalRegs and +copied-out again before making a call/jump. So all we have to do is +avoid any code motion that would make a caller-saves GlobalReg live +across a foreign call during subsequent optimisations. +-} + +{- Note [Register parameter passing] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +On certain architectures, some registers are utilized for parameter +passing in the C calling convention. For example, in x86-64 Linux +convention, rdi, rsi, rdx and rcx (as well as r8 and r9) may be used for +argument passing. These are registers R3-R6, which our generated +code may also be using; as a result, it's necessary to save these +values before doing a foreign call. This is done during initial +code generation in callerSaveVolatileRegs in GHC.StgToCmm.Utils. However, +one result of doing this is that the contents of these registers +may mysteriously change if referenced inside the arguments. This +is dangerous, so you'll need to disable inlining much in the same +way is done in GHC.Cmm.Opt currently. We should fix this! +-} + +--------------------------------------------- +-- Eq instance of CmmNode + +deriving instance Eq (CmmNode e x) + +---------------------------------------------- +-- Hoopl instances of CmmNode + +instance NonLocal CmmNode where + entryLabel (CmmEntry l _) = l + + successors (CmmBranch l) = [l] + successors (CmmCondBranch {cml_true=t, cml_false=f}) = [f, t] -- meets layout constraint + successors (CmmSwitch _ ids) = switchTargetsToList ids + successors (CmmCall {cml_cont=l}) = maybeToList l + successors (CmmForeignCall {succ=l}) = [l] + + +-------------------------------------------------- +-- Various helper types + +type CmmActual = CmmExpr +type CmmFormal = LocalReg + +type UpdFrameOffset = ByteOff + +-- | A convention maps a list of values (function arguments or return +-- values) to registers or stack locations. +data Convention + = NativeDirectCall + -- ^ top-level Haskell functions use @NativeDirectCall@, which + -- maps arguments to registers starting with R2, according to + -- how many registers are available on the platform. This + -- convention ignores R1, because for a top-level function call + -- the function closure is implicit, and doesn't need to be passed. + | NativeNodeCall + -- ^ non-top-level Haskell functions, which pass the address of + -- the function closure in R1 (regardless of whether R1 is a + -- real register or not), and the rest of the arguments in + -- registers or on the stack. + | NativeReturn + -- ^ a native return. The convention for returns depends on + -- how many values are returned: for just one value returned, + -- the appropriate register is used (R1, F1, etc.). regardless + -- of whether it is a real register or not. For multiple + -- values returned, they are mapped to registers or the stack. + | Slow + -- ^ Slow entry points: all args pushed on the stack + | GC + -- ^ Entry to the garbage collector: uses the node reg! + -- (TODO: I don't think we need this --SDM) + deriving( Eq ) + +data ForeignConvention + = ForeignConvention + CCallConv -- Which foreign-call convention + [ForeignHint] -- Extra info about the args + [ForeignHint] -- Extra info about the result + CmmReturnInfo + deriving Eq + +data CmmReturnInfo + = CmmMayReturn + | CmmNeverReturns + deriving ( Eq ) + +data ForeignTarget -- The target of a foreign call + = ForeignTarget -- A foreign procedure + CmmExpr -- Its address + ForeignConvention -- Its calling convention + | PrimTarget -- A possibly-side-effecting machine operation + CallishMachOp -- Which one + deriving Eq + +foreignTargetHints :: ForeignTarget -> ([ForeignHint], [ForeignHint]) +foreignTargetHints target + = ( res_hints ++ repeat NoHint + , arg_hints ++ repeat NoHint ) + where + (res_hints, arg_hints) = + case target of + PrimTarget op -> callishMachOpHints op + ForeignTarget _ (ForeignConvention _ arg_hints res_hints _) -> + (res_hints, arg_hints) + +-------------------------------------------------- +-- Instances of register and slot users / definers + +instance UserOfRegs LocalReg (CmmNode e x) where + foldRegsUsed dflags f !z n = case n of + CmmAssign _ expr -> fold f z expr + CmmStore addr rval -> fold f (fold f z addr) rval + CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args + CmmCondBranch expr _ _ _ -> fold f z expr + CmmSwitch expr _ -> fold f z expr + CmmCall {cml_target=tgt} -> fold f z tgt + CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args + _ -> z + where fold :: forall a b. UserOfRegs LocalReg a + => (b -> LocalReg -> b) -> b -> a -> b + fold f z n = foldRegsUsed dflags f z n + +instance UserOfRegs GlobalReg (CmmNode e x) where + foldRegsUsed dflags f !z n = case n of + CmmAssign _ expr -> fold f z expr + CmmStore addr rval -> fold f (fold f z addr) rval + CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args + CmmCondBranch expr _ _ _ -> fold f z expr + CmmSwitch expr _ -> fold f z expr + CmmCall {cml_target=tgt, cml_args_regs=args} -> fold f (fold f z args) tgt + CmmForeignCall {tgt=tgt, args=args} -> fold f (fold f z tgt) args + _ -> z + where fold :: forall a b. UserOfRegs GlobalReg a + => (b -> GlobalReg -> b) -> b -> a -> b + fold f z n = foldRegsUsed dflags f z n + +instance (Ord r, UserOfRegs r CmmReg) => UserOfRegs r ForeignTarget where + -- The (Ord r) in the context is necessary here + -- See Note [Recursive superclasses] in TcInstDcls + foldRegsUsed _ _ !z (PrimTarget _) = z + foldRegsUsed dflags f !z (ForeignTarget e _) = foldRegsUsed dflags f z e + +instance DefinerOfRegs LocalReg (CmmNode e x) where + foldRegsDefd dflags f !z n = case n of + CmmAssign lhs _ -> fold f z lhs + CmmUnsafeForeignCall _ fs _ -> fold f z fs + CmmForeignCall {res=res} -> fold f z res + _ -> z + where fold :: forall a b. DefinerOfRegs LocalReg a + => (b -> LocalReg -> b) -> b -> a -> b + fold f z n = foldRegsDefd dflags f z n + +instance DefinerOfRegs GlobalReg (CmmNode e x) where + foldRegsDefd dflags f !z n = case n of + CmmAssign lhs _ -> fold f z lhs + CmmUnsafeForeignCall tgt _ _ -> fold f z (foreignTargetRegs tgt) + CmmCall {} -> fold f z activeRegs + CmmForeignCall {} -> fold f z activeRegs + -- See Note [Safe foreign calls clobber STG registers] + _ -> z + where fold :: forall a b. DefinerOfRegs GlobalReg a + => (b -> GlobalReg -> b) -> b -> a -> b + fold f z n = foldRegsDefd dflags f z n + + platform = targetPlatform dflags + activeRegs = activeStgRegs platform + activeCallerSavesRegs = filter (callerSaves platform) activeRegs + + foreignTargetRegs (ForeignTarget _ (ForeignConvention _ _ _ CmmNeverReturns)) = [] + foreignTargetRegs _ = activeCallerSavesRegs + +-- Note [Safe foreign calls clobber STG registers] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- During stack layout phase every safe foreign call is expanded into a block +-- that contains unsafe foreign call (instead of safe foreign call) and ends +-- with a normal call (See Note [Foreign calls]). This means that we must +-- treat safe foreign call as if it was a normal call (because eventually it +-- will be). This is important if we try to run sinking pass before stack +-- layout phase. Consider this example of what might go wrong (this is cmm +-- code from stablename001 test). Here is code after common block elimination +-- (before stack layout): +-- +-- c1q6: +-- _s1pf::P64 = R1; +-- _c1q8::I64 = performMajorGC; +-- I64[(young<c1q9> + 8)] = c1q9; +-- foreign call "ccall" arg hints: [] result hints: [] (_c1q8::I64)(...) +-- returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8; +-- c1q9: +-- I64[(young<c1qb> + 8)] = c1qb; +-- R1 = _s1pc::P64; +-- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8; +-- +-- If we run sinking pass now (still before stack layout) we will get this: +-- +-- c1q6: +-- I64[(young<c1q9> + 8)] = c1q9; +-- foreign call "ccall" arg hints: [] result hints: [] performMajorGC(...) +-- returns to c1q9 args: ([]) ress: ([])ret_args: 8ret_off: 8; +-- c1q9: +-- I64[(young<c1qb> + 8)] = c1qb; +-- _s1pf::P64 = R1; <------ _s1pf sunk past safe foreign call +-- R1 = _s1pc::P64; +-- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8; +-- +-- Notice that _s1pf was sunk past a foreign call. When we run stack layout +-- safe call to performMajorGC will be turned into: +-- +-- c1q6: +-- _s1pc::P64 = P64[Sp + 8]; +-- I64[Sp - 8] = c1q9; +-- Sp = Sp - 8; +-- I64[I64[CurrentTSO + 24] + 16] = Sp; +-- P64[CurrentNursery + 8] = Hp + 8; +-- (_u1qI::I64) = call "ccall" arg hints: [PtrHint,] +-- result hints: [PtrHint] suspendThread(BaseReg, 0); +-- call "ccall" arg hints: [] result hints: [] performMajorGC(); +-- (_u1qJ::I64) = call "ccall" arg hints: [PtrHint] +-- result hints: [PtrHint] resumeThread(_u1qI::I64); +-- BaseReg = _u1qJ::I64; +-- _u1qK::P64 = CurrentTSO; +-- _u1qL::P64 = I64[_u1qK::P64 + 24]; +-- Sp = I64[_u1qL::P64 + 16]; +-- SpLim = _u1qL::P64 + 192; +-- HpAlloc = 0; +-- Hp = I64[CurrentNursery + 8] - 8; +-- HpLim = I64[CurrentNursery] + (%MO_SS_Conv_W32_W64(I32[CurrentNursery + 48]) * 4096 - 1); +-- call (I64[Sp])() returns to c1q9, args: 8, res: 8, upd: 8; +-- c1q9: +-- I64[(young<c1qb> + 8)] = c1qb; +-- _s1pf::P64 = R1; <------ INCORRECT! +-- R1 = _s1pc::P64; +-- call stg_makeStableName#(R1) returns to c1qb, args: 8, res: 8, upd: 8; +-- +-- Notice that c1q6 now ends with a call. Sinking _s1pf::P64 = R1 past that +-- call is clearly incorrect. This is what would happen if we assumed that +-- safe foreign call has the same semantics as unsafe foreign call. To prevent +-- this we need to treat safe foreign call as if was normal call. + +----------------------------------- +-- mapping Expr in GHC.Cmm.Node + +mapForeignTarget :: (CmmExpr -> CmmExpr) -> ForeignTarget -> ForeignTarget +mapForeignTarget exp (ForeignTarget e c) = ForeignTarget (exp e) c +mapForeignTarget _ m@(PrimTarget _) = m + +wrapRecExp :: (CmmExpr -> CmmExpr) -> CmmExpr -> CmmExpr +-- Take a transformer on expressions and apply it recursively. +-- (wrapRecExp f e) first recursively applies itself to sub-expressions of e +-- then uses f to rewrite the resulting expression +wrapRecExp f (CmmMachOp op es) = f (CmmMachOp op $ map (wrapRecExp f) es) +wrapRecExp f (CmmLoad addr ty) = f (CmmLoad (wrapRecExp f addr) ty) +wrapRecExp f e = f e + +mapExp :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x +mapExp _ f@(CmmEntry{}) = f +mapExp _ m@(CmmComment _) = m +mapExp _ m@(CmmTick _) = m +mapExp f (CmmUnwind regs) = CmmUnwind (map (fmap (fmap f)) regs) +mapExp f (CmmAssign r e) = CmmAssign r (f e) +mapExp f (CmmStore addr e) = CmmStore (f addr) (f e) +mapExp f (CmmUnsafeForeignCall tgt fs as) = CmmUnsafeForeignCall (mapForeignTarget f tgt) fs (map f as) +mapExp _ l@(CmmBranch _) = l +mapExp f (CmmCondBranch e ti fi l) = CmmCondBranch (f e) ti fi l +mapExp f (CmmSwitch e ids) = CmmSwitch (f e) ids +mapExp f n@CmmCall {cml_target=tgt} = n{cml_target = f tgt} +mapExp f (CmmForeignCall tgt fs as succ ret_args updfr intrbl) = CmmForeignCall (mapForeignTarget f tgt) fs (map f as) succ ret_args updfr intrbl + +mapExpDeep :: (CmmExpr -> CmmExpr) -> CmmNode e x -> CmmNode e x +mapExpDeep f = mapExp $ wrapRecExp f + +------------------------------------------------------------------------ +-- mapping Expr in GHC.Cmm.Node, but not performing allocation if no changes + +mapForeignTargetM :: (CmmExpr -> Maybe CmmExpr) -> ForeignTarget -> Maybe ForeignTarget +mapForeignTargetM f (ForeignTarget e c) = (\x -> ForeignTarget x c) `fmap` f e +mapForeignTargetM _ (PrimTarget _) = Nothing + +wrapRecExpM :: (CmmExpr -> Maybe CmmExpr) -> (CmmExpr -> Maybe CmmExpr) +-- (wrapRecExpM f e) first recursively applies itself to sub-expressions of e +-- then gives f a chance to rewrite the resulting expression +wrapRecExpM f n@(CmmMachOp op es) = maybe (f n) (f . CmmMachOp op) (mapListM (wrapRecExpM f) es) +wrapRecExpM f n@(CmmLoad addr ty) = maybe (f n) (f . flip CmmLoad ty) (wrapRecExpM f addr) +wrapRecExpM f e = f e + +mapExpM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x) +mapExpM _ (CmmEntry{}) = Nothing +mapExpM _ (CmmComment _) = Nothing +mapExpM _ (CmmTick _) = Nothing +mapExpM f (CmmUnwind regs) = CmmUnwind `fmap` mapM (\(r,e) -> mapM f e >>= \e' -> pure (r,e')) regs +mapExpM f (CmmAssign r e) = CmmAssign r `fmap` f e +mapExpM f (CmmStore addr e) = (\[addr', e'] -> CmmStore addr' e') `fmap` mapListM f [addr, e] +mapExpM _ (CmmBranch _) = Nothing +mapExpM f (CmmCondBranch e ti fi l) = (\x -> CmmCondBranch x ti fi l) `fmap` f e +mapExpM f (CmmSwitch e tbl) = (\x -> CmmSwitch x tbl) `fmap` f e +mapExpM f (CmmCall tgt mb_id r o i s) = (\x -> CmmCall x mb_id r o i s) `fmap` f tgt +mapExpM f (CmmUnsafeForeignCall tgt fs as) + = case mapForeignTargetM f tgt of + Just tgt' -> Just (CmmUnsafeForeignCall tgt' fs (mapListJ f as)) + Nothing -> (\xs -> CmmUnsafeForeignCall tgt fs xs) `fmap` mapListM f as +mapExpM f (CmmForeignCall tgt fs as succ ret_args updfr intrbl) + = case mapForeignTargetM f tgt of + Just tgt' -> Just (CmmForeignCall tgt' fs (mapListJ f as) succ ret_args updfr intrbl) + Nothing -> (\xs -> CmmForeignCall tgt fs xs succ ret_args updfr intrbl) `fmap` mapListM f as + +-- share as much as possible +mapListM :: (a -> Maybe a) -> [a] -> Maybe [a] +mapListM f xs = let (b, r) = mapListT f xs + in if b then Just r else Nothing + +mapListJ :: (a -> Maybe a) -> [a] -> [a] +mapListJ f xs = snd (mapListT f xs) + +mapListT :: (a -> Maybe a) -> [a] -> (Bool, [a]) +mapListT f xs = foldr g (False, []) (zip3 (tails xs) xs (map f xs)) + where g (_, y, Nothing) (True, ys) = (True, y:ys) + g (_, _, Just y) (True, ys) = (True, y:ys) + g (ys', _, Nothing) (False, _) = (False, ys') + g (_, _, Just y) (False, ys) = (True, y:ys) + +mapExpDeepM :: (CmmExpr -> Maybe CmmExpr) -> CmmNode e x -> Maybe (CmmNode e x) +mapExpDeepM f = mapExpM $ wrapRecExpM f + +----------------------------------- +-- folding Expr in GHC.Cmm.Node + +foldExpForeignTarget :: (CmmExpr -> z -> z) -> ForeignTarget -> z -> z +foldExpForeignTarget exp (ForeignTarget e _) z = exp e z +foldExpForeignTarget _ (PrimTarget _) z = z + +-- Take a folder on expressions and apply it recursively. +-- Specifically (wrapRecExpf f e z) deals with CmmMachOp and CmmLoad +-- itself, delegating all the other CmmExpr forms to 'f'. +wrapRecExpf :: (CmmExpr -> z -> z) -> CmmExpr -> z -> z +wrapRecExpf f e@(CmmMachOp _ es) z = foldr (wrapRecExpf f) (f e z) es +wrapRecExpf f e@(CmmLoad addr _) z = wrapRecExpf f addr (f e z) +wrapRecExpf f e z = f e z + +foldExp :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z +foldExp _ (CmmEntry {}) z = z +foldExp _ (CmmComment {}) z = z +foldExp _ (CmmTick {}) z = z +foldExp f (CmmUnwind xs) z = foldr (maybe id f) z (map snd xs) +foldExp f (CmmAssign _ e) z = f e z +foldExp f (CmmStore addr e) z = f addr $ f e z +foldExp f (CmmUnsafeForeignCall t _ as) z = foldr f (foldExpForeignTarget f t z) as +foldExp _ (CmmBranch _) z = z +foldExp f (CmmCondBranch e _ _ _) z = f e z +foldExp f (CmmSwitch e _) z = f e z +foldExp f (CmmCall {cml_target=tgt}) z = f tgt z +foldExp f (CmmForeignCall {tgt=tgt, args=args}) z = foldr f (foldExpForeignTarget f tgt z) args + +foldExpDeep :: (CmmExpr -> z -> z) -> CmmNode e x -> z -> z +foldExpDeep f = foldExp (wrapRecExpf f) + +-- ----------------------------------------------------------------------------- + +mapSuccessors :: (Label -> Label) -> CmmNode O C -> CmmNode O C +mapSuccessors f (CmmBranch bid) = CmmBranch (f bid) +mapSuccessors f (CmmCondBranch p y n l) = CmmCondBranch p (f y) (f n) l +mapSuccessors f (CmmSwitch e ids) = CmmSwitch e (mapSwitchTargets f ids) +mapSuccessors _ n = n + +mapCollectSuccessors :: forall a. (Label -> (Label,a)) -> CmmNode O C + -> (CmmNode O C, [a]) +mapCollectSuccessors f (CmmBranch bid) + = let (bid', acc) = f bid in (CmmBranch bid', [acc]) +mapCollectSuccessors f (CmmCondBranch p y n l) + = let (bidt, acct) = f y + (bidf, accf) = f n + in (CmmCondBranch p bidt bidf l, [accf, acct]) +mapCollectSuccessors f (CmmSwitch e ids) + = let lbls = switchTargetsToList ids :: [Label] + lblMap = mapFromList $ zip lbls (map f lbls) :: LabelMap (Label, a) + in ( CmmSwitch e + (mapSwitchTargets + (\l -> fst $ mapFindWithDefault (error "impossible") l lblMap) ids) + , map snd (mapElems lblMap) + ) +mapCollectSuccessors _ n = (n, []) + +-- ----------------------------------------------------------------------------- + +-- | Tickish in Cmm context (annotations only) +type CmmTickish = Tickish () + +-- | Tick scope identifier, allowing us to reason about what +-- annotations in a Cmm block should scope over. We especially take +-- care to allow optimisations to reorganise blocks without losing +-- tick association in the process. +data CmmTickScope + = GlobalScope + -- ^ The global scope is the "root" of the scope graph. Every + -- scope is a sub-scope of the global scope. It doesn't make sense + -- to add ticks to this scope. On the other hand, this means that + -- setting this scope on a block means no ticks apply to it. + + | SubScope !U.Unique CmmTickScope + -- ^ Constructs a new sub-scope to an existing scope. This allows + -- us to translate Core-style scoping rules (see @tickishScoped@) + -- into the Cmm world. Suppose the following code: + -- + -- tick<1> case ... of + -- A -> tick<2> ... + -- B -> tick<3> ... + -- + -- We want the top-level tick annotation to apply to blocks + -- generated for the A and B alternatives. We can achieve that by + -- generating tick<1> into a block with scope a, while the code + -- for alternatives A and B gets generated into sub-scopes a/b and + -- a/c respectively. + + | CombinedScope CmmTickScope CmmTickScope + -- ^ A combined scope scopes over everything that the two given + -- scopes cover. It is therefore a sub-scope of either scope. This + -- is required for optimisations. Consider common block elimination: + -- + -- A -> tick<2> case ... of + -- C -> [common] + -- B -> tick<3> case ... of + -- D -> [common] + -- + -- We will generate code for the C and D alternatives, and figure + -- out afterwards that it's actually common code. Scoping rules + -- dictate that the resulting common block needs to be covered by + -- both tick<2> and tick<3>, therefore we need to construct a + -- scope that is a child to *both* scope. Now we can do that - if + -- we assign the scopes a/c and b/d to the common-ed up blocks, + -- the new block could have a combined tick scope a/c+b/d, which + -- both tick<2> and tick<3> apply to. + +-- Note [CmmTick scoping details]: +-- +-- The scope of a @CmmTick@ is given by the @CmmEntry@ node of the +-- same block. Note that as a result of this, optimisations making +-- tick scopes more specific can *reduce* the amount of code a tick +-- scopes over. Fixing this would require a separate @CmmTickScope@ +-- field for @CmmTick@. Right now we do not do this simply because I +-- couldn't find an example where it actually mattered -- multiple +-- blocks within the same scope generally jump to each other, which +-- prevents common block elimination from happening in the first +-- place. But this is no strong reason, so if Cmm optimisations become +-- more involved in future this might have to be revisited. + +-- | Output all scope paths. +scopeToPaths :: CmmTickScope -> [[U.Unique]] +scopeToPaths GlobalScope = [[]] +scopeToPaths (SubScope u s) = map (u:) (scopeToPaths s) +scopeToPaths (CombinedScope s1 s2) = scopeToPaths s1 ++ scopeToPaths s2 + +-- | Returns the head uniques of the scopes. This is based on the +-- assumption that the @Unique@ of @SubScope@ identifies the +-- underlying super-scope. Used for efficient equality and comparison, +-- see below. +scopeUniques :: CmmTickScope -> [U.Unique] +scopeUniques GlobalScope = [] +scopeUniques (SubScope u _) = [u] +scopeUniques (CombinedScope s1 s2) = scopeUniques s1 ++ scopeUniques s2 + +-- Equality and order is based on the head uniques defined above. We +-- take care to short-cut the (extremely) common cases. +instance Eq CmmTickScope where + GlobalScope == GlobalScope = True + GlobalScope == _ = False + _ == GlobalScope = False + (SubScope u _) == (SubScope u' _) = u == u' + (SubScope _ _) == _ = False + _ == (SubScope _ _) = False + scope == scope' = + sortBy nonDetCmpUnique (scopeUniques scope) == + sortBy nonDetCmpUnique (scopeUniques scope') + -- This is still deterministic because + -- the order is the same for equal lists + +-- This is non-deterministic but we do not currently support deterministic +-- code-generation. See Note [Unique Determinism and code generation] +-- See Note [No Ord for Unique] +instance Ord CmmTickScope where + compare GlobalScope GlobalScope = EQ + compare GlobalScope _ = LT + compare _ GlobalScope = GT + compare (SubScope u _) (SubScope u' _) = nonDetCmpUnique u u' + compare scope scope' = cmpList nonDetCmpUnique + (sortBy nonDetCmpUnique $ scopeUniques scope) + (sortBy nonDetCmpUnique $ scopeUniques scope') + +instance Outputable CmmTickScope where + ppr GlobalScope = text "global" + ppr (SubScope us GlobalScope) + = ppr us + ppr (SubScope us s) = ppr s <> char '/' <> ppr us + ppr combined = parens $ hcat $ punctuate (char '+') $ + map (hcat . punctuate (char '/') . map ppr . reverse) $ + scopeToPaths combined + +-- | Checks whether two tick scopes are sub-scopes of each other. True +-- if the two scopes are equal. +isTickSubScope :: CmmTickScope -> CmmTickScope -> Bool +isTickSubScope = cmp + where cmp _ GlobalScope = True + cmp GlobalScope _ = False + cmp (CombinedScope s1 s2) s' = cmp s1 s' && cmp s2 s' + cmp s (CombinedScope s1' s2') = cmp s s1' || cmp s s2' + cmp (SubScope u s) s'@(SubScope u' _) = u == u' || cmp s s' + +-- | Combine two tick scopes. The new scope should be sub-scope of +-- both parameters. We simplify automatically if one tick scope is a +-- sub-scope of the other already. +combineTickScopes :: CmmTickScope -> CmmTickScope -> CmmTickScope +combineTickScopes s1 s2 + | s1 `isTickSubScope` s2 = s1 + | s2 `isTickSubScope` s1 = s2 + | otherwise = CombinedScope s1 s2 diff --git a/compiler/GHC/Cmm/Opt.hs b/compiler/GHC/Cmm/Opt.hs new file mode 100644 index 0000000000..1db37ae58c --- /dev/null +++ b/compiler/GHC/Cmm/Opt.hs @@ -0,0 +1,423 @@ +----------------------------------------------------------------------------- +-- +-- Cmm optimisation +-- +-- (c) The University of Glasgow 2006 +-- +----------------------------------------------------------------------------- + +module GHC.Cmm.Opt ( + constantFoldNode, + constantFoldExpr, + cmmMachOpFold, + cmmMachOpFoldM + ) where + +import GhcPrelude + +import GHC.Cmm.Utils +import GHC.Cmm +import DynFlags +import Util + +import Outputable +import GHC.Platform + +import Data.Bits +import Data.Maybe + + +constantFoldNode :: DynFlags -> CmmNode e x -> CmmNode e x +constantFoldNode dflags = mapExp (constantFoldExpr dflags) + +constantFoldExpr :: DynFlags -> CmmExpr -> CmmExpr +constantFoldExpr dflags = wrapRecExp f + where f (CmmMachOp op args) = cmmMachOpFold dflags op args + f (CmmRegOff r 0) = CmmReg r + f e = e + +-- ----------------------------------------------------------------------------- +-- MachOp constant folder + +-- Now, try to constant-fold the MachOps. The arguments have already +-- been optimized and folded. + +cmmMachOpFold + :: DynFlags + -> MachOp -- The operation from an CmmMachOp + -> [CmmExpr] -- The optimized arguments + -> CmmExpr + +cmmMachOpFold dflags op args = fromMaybe (CmmMachOp op args) (cmmMachOpFoldM dflags op args) + +-- Returns Nothing if no changes, useful for Hoopl, also reduces +-- allocation! +cmmMachOpFoldM + :: DynFlags + -> MachOp + -> [CmmExpr] + -> Maybe CmmExpr + +cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)] + = Just $ case op of + MO_S_Neg _ -> CmmLit (CmmInt (-x) rep) + MO_Not _ -> CmmLit (CmmInt (complement x) rep) + + -- these are interesting: we must first narrow to the + -- "from" type, in order to truncate to the correct size. + -- The final narrow/widen to the destination type + -- is implicit in the CmmLit. + MO_SF_Conv _from to -> CmmLit (CmmFloat (fromInteger x) to) + MO_SS_Conv from to -> CmmLit (CmmInt (narrowS from x) to) + MO_UU_Conv from to -> CmmLit (CmmInt (narrowU from x) to) + + _ -> panic $ "cmmMachOpFoldM: unknown unary op: " ++ show op + + +-- Eliminate conversion NOPs +cmmMachOpFoldM _ (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = Just x +cmmMachOpFoldM _ (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = Just x + +-- Eliminate nested conversions where possible +cmmMachOpFoldM dflags conv_outer [CmmMachOp conv_inner [x]] + | Just (rep1,rep2,signed1) <- isIntConversion conv_inner, + Just (_, rep3,signed2) <- isIntConversion conv_outer + = case () of + -- widen then narrow to the same size is a nop + _ | rep1 < rep2 && rep1 == rep3 -> Just x + -- Widen then narrow to different size: collapse to single conversion + -- but remember to use the signedness from the widening, just in case + -- the final conversion is a widen. + | rep1 < rep2 && rep2 > rep3 -> + Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x] + -- Nested widenings: collapse if the signedness is the same + | rep1 < rep2 && rep2 < rep3 && signed1 == signed2 -> + Just $ cmmMachOpFold dflags (intconv signed1 rep1 rep3) [x] + -- Nested narrowings: collapse + | rep1 > rep2 && rep2 > rep3 -> + Just $ cmmMachOpFold dflags (MO_UU_Conv rep1 rep3) [x] + | otherwise -> + Nothing + where + isIntConversion (MO_UU_Conv rep1 rep2) + = Just (rep1,rep2,False) + isIntConversion (MO_SS_Conv rep1 rep2) + = Just (rep1,rep2,True) + isIntConversion _ = Nothing + + intconv True = MO_SS_Conv + intconv False = MO_UU_Conv + +-- ToDo: a narrow of a load can be collapsed into a narrow load, right? +-- but what if the architecture only supports word-sized loads, should +-- we do the transformation anyway? + +cmmMachOpFoldM dflags mop [CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)] + = case mop of + -- for comparisons: don't forget to narrow the arguments before + -- comparing, since they might be out of range. + MO_Eq _ -> Just $ CmmLit (CmmInt (if x_u == y_u then 1 else 0) (wordWidth dflags)) + MO_Ne _ -> Just $ CmmLit (CmmInt (if x_u /= y_u then 1 else 0) (wordWidth dflags)) + + MO_U_Gt _ -> Just $ CmmLit (CmmInt (if x_u > y_u then 1 else 0) (wordWidth dflags)) + MO_U_Ge _ -> Just $ CmmLit (CmmInt (if x_u >= y_u then 1 else 0) (wordWidth dflags)) + MO_U_Lt _ -> Just $ CmmLit (CmmInt (if x_u < y_u then 1 else 0) (wordWidth dflags)) + MO_U_Le _ -> Just $ CmmLit (CmmInt (if x_u <= y_u then 1 else 0) (wordWidth dflags)) + + MO_S_Gt _ -> Just $ CmmLit (CmmInt (if x_s > y_s then 1 else 0) (wordWidth dflags)) + MO_S_Ge _ -> Just $ CmmLit (CmmInt (if x_s >= y_s then 1 else 0) (wordWidth dflags)) + MO_S_Lt _ -> Just $ CmmLit (CmmInt (if x_s < y_s then 1 else 0) (wordWidth dflags)) + MO_S_Le _ -> Just $ CmmLit (CmmInt (if x_s <= y_s then 1 else 0) (wordWidth dflags)) + + MO_Add r -> Just $ CmmLit (CmmInt (x + y) r) + MO_Sub r -> Just $ CmmLit (CmmInt (x - y) r) + MO_Mul r -> Just $ CmmLit (CmmInt (x * y) r) + MO_U_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `quot` y_u) r) + MO_U_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x_u `rem` y_u) r) + MO_S_Quot r | y /= 0 -> Just $ CmmLit (CmmInt (x `quot` y) r) + MO_S_Rem r | y /= 0 -> Just $ CmmLit (CmmInt (x `rem` y) r) + + MO_And r -> Just $ CmmLit (CmmInt (x .&. y) r) + MO_Or r -> Just $ CmmLit (CmmInt (x .|. y) r) + MO_Xor r -> Just $ CmmLit (CmmInt (x `xor` y) r) + + MO_Shl r -> Just $ CmmLit (CmmInt (x `shiftL` fromIntegral y) r) + MO_U_Shr r -> Just $ CmmLit (CmmInt (x_u `shiftR` fromIntegral y) r) + MO_S_Shr r -> Just $ CmmLit (CmmInt (x `shiftR` fromIntegral y) r) + + _ -> Nothing + + where + x_u = narrowU xrep x + y_u = narrowU xrep y + x_s = narrowS xrep x + y_s = narrowS xrep y + + +-- When possible, shift the constants to the right-hand side, so that we +-- can match for strength reductions. Note that the code generator will +-- also assume that constants have been shifted to the right when +-- possible. + +cmmMachOpFoldM dflags op [x@(CmmLit _), y] + | not (isLit y) && isCommutableMachOp op + = Just (cmmMachOpFold dflags op [y, x]) + +-- Turn (a+b)+c into a+(b+c) where possible. Because literals are +-- moved to the right, it is more likely that we will find +-- opportunities for constant folding when the expression is +-- right-associated. +-- +-- ToDo: this appears to introduce a quadratic behaviour due to the +-- nested cmmMachOpFold. Can we fix this? +-- +-- Why do we check isLit arg1? If arg1 is a lit, it means that arg2 +-- is also a lit (otherwise arg1 would be on the right). If we +-- put arg1 on the left of the rearranged expression, we'll get into a +-- loop: (x1+x2)+x3 => x1+(x2+x3) => (x2+x3)+x1 => x2+(x3+x1) ... +-- +-- Also don't do it if arg1 is PicBaseReg, so that we don't separate the +-- PicBaseReg from the corresponding label (or label difference). +-- +cmmMachOpFoldM dflags mop1 [CmmMachOp mop2 [arg1,arg2], arg3] + | mop2 `associates_with` mop1 + && not (isLit arg1) && not (isPicReg arg1) + = Just (cmmMachOpFold dflags mop2 [arg1, cmmMachOpFold dflags mop1 [arg2,arg3]]) + where + MO_Add{} `associates_with` MO_Sub{} = True + mop1 `associates_with` mop2 = + mop1 == mop2 && isAssociativeMachOp mop1 + +-- special case: (a - b) + c ==> a + (c - b) +cmmMachOpFoldM dflags mop1@(MO_Add{}) [CmmMachOp mop2@(MO_Sub{}) [arg1,arg2], arg3] + | not (isLit arg1) && not (isPicReg arg1) + = Just (cmmMachOpFold dflags mop1 [arg1, cmmMachOpFold dflags mop2 [arg3,arg2]]) + +-- special case: (PicBaseReg + lit) + N ==> PicBaseReg + (lit+N) +-- +-- this is better because lit+N is a single link-time constant (e.g. a +-- CmmLabelOff), so the right-hand expression needs only one +-- instruction, whereas the left needs two. This happens when pointer +-- tagging gives us label+offset, and PIC turns the label into +-- PicBaseReg + label. +-- +cmmMachOpFoldM _ MO_Add{} [ CmmMachOp op@MO_Add{} [pic, CmmLit lit] + , CmmLit (CmmInt n rep) ] + | isPicReg pic + = Just $ CmmMachOp op [pic, CmmLit $ cmmOffsetLit lit off ] + where off = fromIntegral (narrowS rep n) + +-- Make a RegOff if we can +cmmMachOpFoldM _ (MO_Add _) [CmmReg reg, CmmLit (CmmInt n rep)] + = Just $ cmmRegOff reg (fromIntegral (narrowS rep n)) +cmmMachOpFoldM _ (MO_Add _) [CmmRegOff reg off, CmmLit (CmmInt n rep)] + = Just $ cmmRegOff reg (off + fromIntegral (narrowS rep n)) +cmmMachOpFoldM _ (MO_Sub _) [CmmReg reg, CmmLit (CmmInt n rep)] + = Just $ cmmRegOff reg (- fromIntegral (narrowS rep n)) +cmmMachOpFoldM _ (MO_Sub _) [CmmRegOff reg off, CmmLit (CmmInt n rep)] + = Just $ cmmRegOff reg (off - fromIntegral (narrowS rep n)) + +-- Fold label(+/-)offset into a CmmLit where possible + +cmmMachOpFoldM _ (MO_Add _) [CmmLit lit, CmmLit (CmmInt i rep)] + = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i))) +cmmMachOpFoldM _ (MO_Add _) [CmmLit (CmmInt i rep), CmmLit lit] + = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (narrowU rep i))) +cmmMachOpFoldM _ (MO_Sub _) [CmmLit lit, CmmLit (CmmInt i rep)] + = Just $ CmmLit (cmmOffsetLit lit (fromIntegral (negate (narrowU rep i)))) + + +-- Comparison of literal with widened operand: perform the comparison +-- at the smaller width, as long as the literal is within range. + +-- We can't do the reverse trick, when the operand is narrowed: +-- narrowing throws away bits from the operand, there's no way to do +-- the same comparison at the larger size. + +cmmMachOpFoldM dflags cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)] + | -- powerPC NCG has a TODO for I8/I16 comparisons, so don't try + platformArch (targetPlatform dflags) `elem` [ArchX86, ArchX86_64], + -- if the operand is widened: + Just (rep, signed, narrow_fn) <- maybe_conversion conv, + -- and this is a comparison operation: + Just narrow_cmp <- maybe_comparison cmp rep signed, + -- and the literal fits in the smaller size: + i == narrow_fn rep i + -- then we can do the comparison at the smaller size + = Just (cmmMachOpFold dflags narrow_cmp [x, CmmLit (CmmInt i rep)]) + where + maybe_conversion (MO_UU_Conv from to) + | to > from + = Just (from, False, narrowU) + maybe_conversion (MO_SS_Conv from to) + | to > from + = Just (from, True, narrowS) + + -- don't attempt to apply this optimisation when the source + -- is a float; see #1916 + maybe_conversion _ = Nothing + + -- careful (#2080): if the original comparison was signed, but + -- we were doing an unsigned widen, then we must do an + -- unsigned comparison at the smaller size. + maybe_comparison (MO_U_Gt _) rep _ = Just (MO_U_Gt rep) + maybe_comparison (MO_U_Ge _) rep _ = Just (MO_U_Ge rep) + maybe_comparison (MO_U_Lt _) rep _ = Just (MO_U_Lt rep) + maybe_comparison (MO_U_Le _) rep _ = Just (MO_U_Le rep) + maybe_comparison (MO_Eq _) rep _ = Just (MO_Eq rep) + maybe_comparison (MO_S_Gt _) rep True = Just (MO_S_Gt rep) + maybe_comparison (MO_S_Ge _) rep True = Just (MO_S_Ge rep) + maybe_comparison (MO_S_Lt _) rep True = Just (MO_S_Lt rep) + maybe_comparison (MO_S_Le _) rep True = Just (MO_S_Le rep) + maybe_comparison (MO_S_Gt _) rep False = Just (MO_U_Gt rep) + maybe_comparison (MO_S_Ge _) rep False = Just (MO_U_Ge rep) + maybe_comparison (MO_S_Lt _) rep False = Just (MO_U_Lt rep) + maybe_comparison (MO_S_Le _) rep False = Just (MO_U_Le rep) + maybe_comparison _ _ _ = Nothing + +-- We can often do something with constants of 0 and 1 ... +-- See Note [Comparison operators] + +cmmMachOpFoldM dflags mop [x, y@(CmmLit (CmmInt 0 _))] + = case mop of + -- Arithmetic + MO_Add _ -> Just x -- x + 0 = x + MO_Sub _ -> Just x -- x - 0 = x + MO_Mul _ -> Just y -- x * 0 = 0 + + -- Logical operations + MO_And _ -> Just y -- x & 0 = 0 + MO_Or _ -> Just x -- x | 0 = x + MO_Xor _ -> Just x -- x `xor` 0 = x + + -- Shifts + MO_Shl _ -> Just x -- x << 0 = x + MO_S_Shr _ -> Just x -- ditto shift-right + MO_U_Shr _ -> Just x + + -- Comparisons; these ones are trickier + -- See Note [Comparison operators] + MO_Ne _ | isComparisonExpr x -> Just x -- (x > y) != 0 = x > y + MO_Eq _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x > y) == 0 = x <= y + MO_U_Gt _ | isComparisonExpr x -> Just x -- (x > y) > 0 = x > y + MO_S_Gt _ | isComparisonExpr x -> Just x -- ditto + MO_U_Lt _ | isComparisonExpr x -> Just zero -- (x > y) < 0 = 0 + MO_S_Lt _ | isComparisonExpr x -> Just zero + MO_U_Ge _ | isComparisonExpr x -> Just one -- (x > y) >= 0 = 1 + MO_S_Ge _ | isComparisonExpr x -> Just one + + MO_U_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x > y) <= 0 = x <= y + MO_S_Le _ | Just x' <- maybeInvertCmmExpr x -> Just x' + _ -> Nothing + where + zero = CmmLit (CmmInt 0 (wordWidth dflags)) + one = CmmLit (CmmInt 1 (wordWidth dflags)) + +cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt 1 rep))] + = case mop of + -- Arithmetic: x*1 = x, etc + MO_Mul _ -> Just x + MO_S_Quot _ -> Just x + MO_U_Quot _ -> Just x + MO_S_Rem _ -> Just $ CmmLit (CmmInt 0 rep) + MO_U_Rem _ -> Just $ CmmLit (CmmInt 0 rep) + + -- Comparisons; trickier + -- See Note [Comparison operators] + MO_Ne _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x>y) != 1 = x<=y + MO_Eq _ | isComparisonExpr x -> Just x -- (x>y) == 1 = x>y + MO_U_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- (x>y) < 1 = x<=y + MO_S_Lt _ | Just x' <- maybeInvertCmmExpr x -> Just x' -- ditto + MO_U_Gt _ | isComparisonExpr x -> Just zero -- (x>y) > 1 = 0 + MO_S_Gt _ | isComparisonExpr x -> Just zero + MO_U_Le _ | isComparisonExpr x -> Just one -- (x>y) <= 1 = 1 + MO_S_Le _ | isComparisonExpr x -> Just one + MO_U_Ge _ | isComparisonExpr x -> Just x -- (x>y) >= 1 = x>y + MO_S_Ge _ | isComparisonExpr x -> Just x + _ -> Nothing + where + zero = CmmLit (CmmInt 0 (wordWidth dflags)) + one = CmmLit (CmmInt 1 (wordWidth dflags)) + +-- Now look for multiplication/division by powers of 2 (integers). + +cmmMachOpFoldM dflags mop [x, (CmmLit (CmmInt n _))] + = case mop of + MO_Mul rep + | Just p <- exactLog2 n -> + Just (cmmMachOpFold dflags (MO_Shl rep) [x, CmmLit (CmmInt p rep)]) + MO_U_Quot rep + | Just p <- exactLog2 n -> + Just (cmmMachOpFold dflags (MO_U_Shr rep) [x, CmmLit (CmmInt p rep)]) + MO_U_Rem rep + | Just _ <- exactLog2 n -> + Just (cmmMachOpFold dflags (MO_And rep) [x, CmmLit (CmmInt (n - 1) rep)]) + MO_S_Quot rep + | Just p <- exactLog2 n, + CmmReg _ <- x -> -- We duplicate x in signedQuotRemHelper, hence require + -- it is a reg. FIXME: remove this restriction. + Just (cmmMachOpFold dflags (MO_S_Shr rep) + [signedQuotRemHelper rep p, CmmLit (CmmInt p rep)]) + MO_S_Rem rep + | Just p <- exactLog2 n, + CmmReg _ <- x -> -- We duplicate x in signedQuotRemHelper, hence require + -- it is a reg. FIXME: remove this restriction. + -- We replace (x `rem` 2^p) by (x - (x `quot` 2^p) * 2^p). + -- Moreover, we fuse MO_S_Shr (last operation of MO_S_Quot) + -- and MO_S_Shl (multiplication by 2^p) into a single MO_And operation. + Just (cmmMachOpFold dflags (MO_Sub rep) + [x, cmmMachOpFold dflags (MO_And rep) + [signedQuotRemHelper rep p, CmmLit (CmmInt (- n) rep)]]) + _ -> Nothing + where + -- In contrast with unsigned integers, for signed ones + -- shift right is not the same as quot, because it rounds + -- to minus infinity, whereas quot rounds toward zero. + -- To fix this up, we add one less than the divisor to the + -- dividend if it is a negative number. + -- + -- to avoid a test/jump, we use the following sequence: + -- x1 = x >> word_size-1 (all 1s if -ve, all 0s if +ve) + -- x2 = y & (divisor-1) + -- result = x + x2 + -- this could be done a bit more simply using conditional moves, + -- but we're processor independent here. + -- + -- we optimise the divide by 2 case slightly, generating + -- x1 = x >> word_size-1 (unsigned) + -- return = x + x1 + signedQuotRemHelper :: Width -> Integer -> CmmExpr + signedQuotRemHelper rep p = CmmMachOp (MO_Add rep) [x, x2] + where + bits = fromIntegral (widthInBits rep) - 1 + shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep + x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)] + x2 = if p == 1 then x1 else + CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)] + +-- ToDo (#7116): optimise floating-point multiplication, e.g. x*2.0 -> x+x +-- Unfortunately this needs a unique supply because x might not be a +-- register. See #2253 (program 6) for an example. + + +-- Anything else is just too hard. + +cmmMachOpFoldM _ _ _ = Nothing + +{- Note [Comparison operators] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we have + CmmCondBranch ((x>#y) == 1) t f +we really want to convert to + CmmCondBranch (x>#y) t f + +That's what the constant-folding operations on comparison operators do above. +-} + + +-- ----------------------------------------------------------------------------- +-- Utils + +isPicReg :: CmmExpr -> Bool +isPicReg (CmmReg (CmmGlobal PicBaseReg)) = True +isPicReg _ = False diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y new file mode 100644 index 0000000000..d7235d0167 --- /dev/null +++ b/compiler/GHC/Cmm/Parser.y @@ -0,0 +1,1442 @@ +----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow, 2004-2012 +-- +-- Parser for concrete Cmm. +-- +----------------------------------------------------------------------------- + +{- ----------------------------------------------------------------------------- +Note [Syntax of .cmm files] + +NOTE: You are very much on your own in .cmm. There is very little +error checking at all: + + * Type errors are detected by the (optional) -dcmm-lint pass, if you + don't turn this on then a type error will likely result in a panic + from the native code generator. + + * Passing the wrong number of arguments or arguments of the wrong + type is not detected. + +There are two ways to write .cmm code: + + (1) High-level Cmm code delegates the stack handling to GHC, and + never explicitly mentions Sp or registers. + + (2) Low-level Cmm manages the stack itself, and must know about + calling conventions. + +Whether you want high-level or low-level Cmm is indicated by the +presence of an argument list on a procedure. For example: + +foo ( gcptr a, bits32 b ) +{ + // this is high-level cmm code + + if (b > 0) { + // we can make tail calls passing arguments: + jump stg_ap_0_fast(a); + } + + push (stg_upd_frame_info, a) { + // stack frames can be explicitly pushed + + (x,y) = call wibble(a,b,3,4); + // calls pass arguments and return results using the native + // Haskell calling convention. The code generator will automatically + // construct a stack frame and an info table for the continuation. + + return (x,y); + // we can return multiple values from the current proc + } +} + +bar +{ + // this is low-level cmm code, indicated by the fact that we did not + // put an argument list on bar. + + x = R1; // the calling convention is explicit: better be careful + // that this works on all platforms! + + jump %ENTRY_CODE(Sp(0)) +} + +Here is a list of rules for high-level and low-level code. If you +break the rules, you get a panic (for using a high-level construct in +a low-level proc), or wrong code (when using low-level code in a +high-level proc). This stuff isn't checked! (TODO!) + +High-level only: + + - tail-calls with arguments, e.g. + jump stg_fun (arg1, arg2); + + - function calls: + (ret1,ret2) = call stg_fun (arg1, arg2); + + This makes a call with the NativeNodeCall convention, and the + values are returned to the following code using the NativeReturn + convention. + + - returning: + return (ret1, ret2) + + These use the NativeReturn convention to return zero or more + results to the caller. + + - pushing stack frames: + push (info_ptr, field1, ..., fieldN) { ... statements ... } + + - reserving temporary stack space: + + reserve N = x { ... } + + this reserves an area of size N (words) on the top of the stack, + and binds its address to x (a local register). Typically this is + used for allocating temporary storage for passing to foreign + functions. + + Note that if you make any native calls or invoke the GC in the + scope of the reserve block, you are responsible for ensuring that + the stack you reserved is laid out correctly with an info table. + +Low-level only: + + - References to Sp, R1-R8, F1-F4 etc. + + NB. foreign calls may clobber the argument registers R1-R8, F1-F4 + etc., so ensure they are saved into variables around foreign + calls. + + - SAVE_THREAD_STATE() and LOAD_THREAD_STATE(), which modify Sp + directly. + +Both high-level and low-level code can use a raw tail-call: + + jump stg_fun [R1,R2] + +NB. you *must* specify the list of GlobalRegs that are passed via a +jump, otherwise the register allocator will assume that all the +GlobalRegs are dead at the jump. + + +Calling Conventions +------------------- + +High-level procedures use the NativeNode calling convention, or the +NativeReturn convention if the 'return' keyword is used (see Stack +Frames below). + +Low-level procedures implement their own calling convention, so it can +be anything at all. + +If a low-level procedure implements the NativeNode calling convention, +then it can be called by high-level code using an ordinary function +call. In general this is hard to arrange because the calling +convention depends on the number of physical registers available for +parameter passing, but there are two cases where the calling +convention is platform-independent: + + - Zero arguments. + + - One argument of pointer or non-pointer word type; this is always + passed in R1 according to the NativeNode convention. + + - Returning a single value; these conventions are fixed and platform + independent. + + +Stack Frames +------------ + +A stack frame is written like this: + +INFO_TABLE_RET ( label, FRAME_TYPE, info_ptr, field1, ..., fieldN ) + return ( arg1, ..., argM ) +{ + ... code ... +} + +where field1 ... fieldN are the fields of the stack frame (with types) +arg1...argN are the values returned to the stack frame (with types). +The return values are assumed to be passed according to the +NativeReturn convention. + +On entry to the code, the stack frame looks like: + + |----------| + | fieldN | + | ... | + | field1 | + |----------| + | info_ptr | + |----------| + | argN | + | ... | <- Sp + +and some of the args may be in registers. + +We prepend the code by a copyIn of the args, and assign all the stack +frame fields to their formals. The initial "arg offset" for stack +layout purposes consists of the whole stack frame plus any args that +might be on the stack. + +A tail-call may pass a stack frame to the callee using the following +syntax: + +jump f (info_ptr, field1,..,fieldN) (arg1,..,argN) + +where info_ptr and field1..fieldN describe the stack frame, and +arg1..argN are the arguments passed to f using the NativeNodeCall +convention. Note if a field is longer than a word (e.g. a D_ on +a 32-bit machine) then the call will push as many words as +necessary to the stack to accommodate it (e.g. 2). + + +----------------------------------------------------------------------------- -} + +{ +{-# LANGUAGE TupleSections #-} + +module GHC.Cmm.Parser ( parseCmmFile ) where + +import GhcPrelude + +import GHC.StgToCmm.ExtCode +import GHC.Cmm.CallConv +import GHC.StgToCmm.Prof +import GHC.StgToCmm.Heap +import GHC.StgToCmm.Monad hiding ( getCode, getCodeR, getCodeScoped, emitLabel, emit + , emitStore, emitAssign, emitOutOfLine, withUpdFrameOff + , getUpdFrameOff ) +import qualified GHC.StgToCmm.Monad as F +import GHC.StgToCmm.Utils +import GHC.StgToCmm.Foreign +import GHC.StgToCmm.Expr +import GHC.StgToCmm.Closure +import GHC.StgToCmm.Layout hiding (ArgRep(..)) +import GHC.StgToCmm.Ticky +import GHC.StgToCmm.Bind ( emitBlackHoleCode, emitUpdateFrame ) +import CoreSyn ( Tickish(SourceNote) ) + +import GHC.Cmm.Opt +import GHC.Cmm.Graph +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.Switch ( mkSwitchTargets ) +import GHC.Cmm.Info +import GHC.Cmm.BlockId +import GHC.Cmm.Lexer +import GHC.Cmm.CLabel +import GHC.Cmm.Monad +import GHC.Runtime.Layout +import Lexer + +import CostCentre +import ForeignCall +import Module +import GHC.Platform +import Literal +import Unique +import UniqFM +import SrcLoc +import DynFlags +import ErrUtils +import StringBuffer +import FastString +import Panic +import Constants +import Outputable +import BasicTypes +import Bag ( emptyBag, unitBag ) +import Var + +import Control.Monad +import Data.Array +import Data.Char ( ord ) +import System.Exit +import Data.Maybe +import qualified Data.Map as M +import qualified Data.ByteString.Char8 as BS8 + +#include "HsVersions.h" +} + +%expect 0 + +%token + ':' { L _ (CmmT_SpecChar ':') } + ';' { L _ (CmmT_SpecChar ';') } + '{' { L _ (CmmT_SpecChar '{') } + '}' { L _ (CmmT_SpecChar '}') } + '[' { L _ (CmmT_SpecChar '[') } + ']' { L _ (CmmT_SpecChar ']') } + '(' { L _ (CmmT_SpecChar '(') } + ')' { L _ (CmmT_SpecChar ')') } + '=' { L _ (CmmT_SpecChar '=') } + '`' { L _ (CmmT_SpecChar '`') } + '~' { L _ (CmmT_SpecChar '~') } + '/' { L _ (CmmT_SpecChar '/') } + '*' { L _ (CmmT_SpecChar '*') } + '%' { L _ (CmmT_SpecChar '%') } + '-' { L _ (CmmT_SpecChar '-') } + '+' { L _ (CmmT_SpecChar '+') } + '&' { L _ (CmmT_SpecChar '&') } + '^' { L _ (CmmT_SpecChar '^') } + '|' { L _ (CmmT_SpecChar '|') } + '>' { L _ (CmmT_SpecChar '>') } + '<' { L _ (CmmT_SpecChar '<') } + ',' { L _ (CmmT_SpecChar ',') } + '!' { L _ (CmmT_SpecChar '!') } + + '..' { L _ (CmmT_DotDot) } + '::' { L _ (CmmT_DoubleColon) } + '>>' { L _ (CmmT_Shr) } + '<<' { L _ (CmmT_Shl) } + '>=' { L _ (CmmT_Ge) } + '<=' { L _ (CmmT_Le) } + '==' { L _ (CmmT_Eq) } + '!=' { L _ (CmmT_Ne) } + '&&' { L _ (CmmT_BoolAnd) } + '||' { L _ (CmmT_BoolOr) } + + 'True' { L _ (CmmT_True ) } + 'False' { L _ (CmmT_False) } + 'likely'{ L _ (CmmT_likely)} + + 'CLOSURE' { L _ (CmmT_CLOSURE) } + 'INFO_TABLE' { L _ (CmmT_INFO_TABLE) } + 'INFO_TABLE_RET'{ L _ (CmmT_INFO_TABLE_RET) } + 'INFO_TABLE_FUN'{ L _ (CmmT_INFO_TABLE_FUN) } + 'INFO_TABLE_CONSTR'{ L _ (CmmT_INFO_TABLE_CONSTR) } + 'INFO_TABLE_SELECTOR'{ L _ (CmmT_INFO_TABLE_SELECTOR) } + 'else' { L _ (CmmT_else) } + 'export' { L _ (CmmT_export) } + 'section' { L _ (CmmT_section) } + 'goto' { L _ (CmmT_goto) } + 'if' { L _ (CmmT_if) } + 'call' { L _ (CmmT_call) } + 'jump' { L _ (CmmT_jump) } + 'foreign' { L _ (CmmT_foreign) } + 'never' { L _ (CmmT_never) } + 'prim' { L _ (CmmT_prim) } + 'reserve' { L _ (CmmT_reserve) } + 'return' { L _ (CmmT_return) } + 'returns' { L _ (CmmT_returns) } + 'import' { L _ (CmmT_import) } + 'switch' { L _ (CmmT_switch) } + 'case' { L _ (CmmT_case) } + 'default' { L _ (CmmT_default) } + 'push' { L _ (CmmT_push) } + 'unwind' { L _ (CmmT_unwind) } + 'bits8' { L _ (CmmT_bits8) } + 'bits16' { L _ (CmmT_bits16) } + 'bits32' { L _ (CmmT_bits32) } + 'bits64' { L _ (CmmT_bits64) } + 'bits128' { L _ (CmmT_bits128) } + 'bits256' { L _ (CmmT_bits256) } + 'bits512' { L _ (CmmT_bits512) } + 'float32' { L _ (CmmT_float32) } + 'float64' { L _ (CmmT_float64) } + 'gcptr' { L _ (CmmT_gcptr) } + + GLOBALREG { L _ (CmmT_GlobalReg $$) } + NAME { L _ (CmmT_Name $$) } + STRING { L _ (CmmT_String $$) } + INT { L _ (CmmT_Int $$) } + FLOAT { L _ (CmmT_Float $$) } + +%monad { PD } { >>= } { return } +%lexer { cmmlex } { L _ CmmT_EOF } +%name cmmParse cmm +%tokentype { Located CmmToken } + +-- C-- operator precedences, taken from the C-- spec +%right '||' -- non-std extension, called %disjoin in C-- +%right '&&' -- non-std extension, called %conjoin in C-- +%right '!' +%nonassoc '>=' '>' '<=' '<' '!=' '==' +%left '|' +%left '^' +%left '&' +%left '>>' '<<' +%left '-' '+' +%left '/' '*' '%' +%right '~' + +%% + +cmm :: { CmmParse () } + : {- empty -} { return () } + | cmmtop cmm { do $1; $2 } + +cmmtop :: { CmmParse () } + : cmmproc { $1 } + | cmmdata { $1 } + | decl { $1 } + | 'CLOSURE' '(' NAME ',' NAME lits ')' ';' + {% liftP . withThisPackage $ \pkg -> + do lits <- sequence $6; + staticClosure pkg $3 $5 (map getLit lits) } + +-- The only static closures in the RTS are dummy closures like +-- stg_END_TSO_QUEUE_closure and stg_dummy_ret. We don't need +-- to provide the full generality of static closures here. +-- In particular: +-- * CCS can always be CCS_DONT_CARE +-- * closure is always extern +-- * payload is always empty +-- * we can derive closure and info table labels from a single NAME + +cmmdata :: { CmmParse () } + : 'section' STRING '{' data_label statics '}' + { do lbl <- $4; + ss <- sequence $5; + code (emitDecl (CmmData (Section (section $2) lbl) (Statics lbl $ concat ss))) } + +data_label :: { CmmParse CLabel } + : NAME ':' + {% liftP . withThisPackage $ \pkg -> + return (mkCmmDataLabel pkg $1) } + +statics :: { [CmmParse [CmmStatic]] } + : {- empty -} { [] } + | static statics { $1 : $2 } + +static :: { CmmParse [CmmStatic] } + : type expr ';' { do e <- $2; + return [CmmStaticLit (getLit e)] } + | type ';' { return [CmmUninitialised + (widthInBytes (typeWidth $1))] } + | 'bits8' '[' ']' STRING ';' { return [mkString $4] } + | 'bits8' '[' INT ']' ';' { return [CmmUninitialised + (fromIntegral $3)] } + | typenot8 '[' INT ']' ';' { return [CmmUninitialised + (widthInBytes (typeWidth $1) * + fromIntegral $3)] } + | 'CLOSURE' '(' NAME lits ')' + { do { lits <- sequence $4 + ; dflags <- getDynFlags + ; return $ map CmmStaticLit $ + mkStaticClosure dflags (mkForeignLabel $3 Nothing ForeignLabelInExternalPackage IsData) + -- mkForeignLabel because these are only used + -- for CHARLIKE and INTLIKE closures in the RTS. + dontCareCCS (map getLit lits) [] [] [] } } + -- arrays of closures required for the CHARLIKE & INTLIKE arrays + +lits :: { [CmmParse CmmExpr] } + : {- empty -} { [] } + | ',' expr lits { $2 : $3 } + +cmmproc :: { CmmParse () } + : info maybe_conv maybe_formals maybe_body + { do ((entry_ret_label, info, stk_formals, formals), agraph) <- + getCodeScoped $ loopDecls $ do { + (entry_ret_label, info, stk_formals) <- $1; + dflags <- getDynFlags; + formals <- sequence (fromMaybe [] $3); + withName (showSDoc dflags (ppr entry_ret_label)) + $4; + return (entry_ret_label, info, stk_formals, formals) } + let do_layout = isJust $3 + code (emitProcWithStackFrame $2 info + entry_ret_label stk_formals formals agraph + do_layout ) } + +maybe_conv :: { Convention } + : {- empty -} { NativeNodeCall } + | 'return' { NativeReturn } + +maybe_body :: { CmmParse () } + : ';' { return () } + | '{' body '}' { withSourceNote $1 $3 $2 } + +info :: { CmmParse (CLabel, Maybe CmmInfoTable, [LocalReg]) } + : NAME + {% liftP . withThisPackage $ \pkg -> + do newFunctionName $1 pkg + return (mkCmmCodeLabel pkg $1, Nothing, []) } + + + | 'INFO_TABLE' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ')' + -- ptrs, nptrs, closure type, description, type + {% liftP . withThisPackage $ \pkg -> + do dflags <- getDynFlags + let prof = profilingInfo dflags $11 $13 + rep = mkRTSRep (fromIntegral $9) $ + mkHeapRep dflags False (fromIntegral $5) + (fromIntegral $7) Thunk + -- not really Thunk, but that makes the info table + -- we want. + return (mkCmmEntryLabel pkg $3, + Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 + , cit_rep = rep + , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing }, + []) } + + | 'INFO_TABLE_FUN' '(' NAME ',' INT ',' INT ',' INT ',' STRING ',' STRING ',' INT ')' + -- ptrs, nptrs, closure type, description, type, fun type + {% liftP . withThisPackage $ \pkg -> + do dflags <- getDynFlags + let prof = profilingInfo dflags $11 $13 + ty = Fun 0 (ArgSpec (fromIntegral $15)) + -- Arity zero, arg_type $15 + rep = mkRTSRep (fromIntegral $9) $ + mkHeapRep dflags False (fromIntegral $5) + (fromIntegral $7) ty + return (mkCmmEntryLabel pkg $3, + Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 + , cit_rep = rep + , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing }, + []) } + -- we leave most of the fields zero here. This is only used + -- to generate the BCO info table in the RTS at the moment. + + | 'INFO_TABLE_CONSTR' '(' NAME ',' INT ',' INT ',' INT ',' INT ',' STRING ',' STRING ')' + -- ptrs, nptrs, tag, closure type, description, type + {% liftP . withThisPackage $ \pkg -> + do dflags <- getDynFlags + let prof = profilingInfo dflags $13 $15 + ty = Constr (fromIntegral $9) -- Tag + (BS8.pack $13) + rep = mkRTSRep (fromIntegral $11) $ + mkHeapRep dflags False (fromIntegral $5) + (fromIntegral $7) ty + return (mkCmmEntryLabel pkg $3, + Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 + , cit_rep = rep + , cit_prof = prof, cit_srt = Nothing,cit_clo = Nothing }, + []) } + + -- If profiling is on, this string gets duplicated, + -- but that's the way the old code did it we can fix it some other time. + + | 'INFO_TABLE_SELECTOR' '(' NAME ',' INT ',' INT ',' STRING ',' STRING ')' + -- selector, closure type, description, type + {% liftP . withThisPackage $ \pkg -> + do dflags <- getDynFlags + let prof = profilingInfo dflags $9 $11 + ty = ThunkSelector (fromIntegral $5) + rep = mkRTSRep (fromIntegral $7) $ + mkHeapRep dflags False 0 0 ty + return (mkCmmEntryLabel pkg $3, + Just $ CmmInfoTable { cit_lbl = mkCmmInfoLabel pkg $3 + , cit_rep = rep + , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing }, + []) } + + | 'INFO_TABLE_RET' '(' NAME ',' INT ')' + -- closure type (no live regs) + {% liftP . withThisPackage $ \pkg -> + do let prof = NoProfilingInfo + rep = mkRTSRep (fromIntegral $5) $ mkStackRep [] + return (mkCmmRetLabel pkg $3, + Just $ CmmInfoTable { cit_lbl = mkCmmRetInfoLabel pkg $3 + , cit_rep = rep + , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing }, + []) } + + | 'INFO_TABLE_RET' '(' NAME ',' INT ',' formals0 ')' + -- closure type, live regs + {% liftP . withThisPackage $ \pkg -> + do dflags <- getDynFlags + live <- sequence $7 + let prof = NoProfilingInfo + -- drop one for the info pointer + bitmap = mkLiveness dflags (drop 1 live) + rep = mkRTSRep (fromIntegral $5) $ mkStackRep bitmap + return (mkCmmRetLabel pkg $3, + Just $ CmmInfoTable { cit_lbl = mkCmmRetInfoLabel pkg $3 + , cit_rep = rep + , cit_prof = prof, cit_srt = Nothing, cit_clo = Nothing }, + live) } + +body :: { CmmParse () } + : {- empty -} { return () } + | decl body { do $1; $2 } + | stmt body { do $1; $2 } + +decl :: { CmmParse () } + : type names ';' { mapM_ (newLocal $1) $2 } + | 'import' importNames ';' { mapM_ newImport $2 } + | 'export' names ';' { return () } -- ignore exports + + +-- an imported function name, with optional packageId +importNames + :: { [(FastString, CLabel)] } + : importName { [$1] } + | importName ',' importNames { $1 : $3 } + +importName + :: { (FastString, CLabel) } + + -- A label imported without an explicit packageId. + -- These are taken to come from some foreign, unnamed package. + : NAME + { ($1, mkForeignLabel $1 Nothing ForeignLabelInExternalPackage IsFunction) } + + -- as previous 'NAME', but 'IsData' + | 'CLOSURE' NAME + { ($2, mkForeignLabel $2 Nothing ForeignLabelInExternalPackage IsData) } + + -- A label imported with an explicit packageId. + | STRING NAME + { ($2, mkCmmCodeLabel (fsToUnitId (mkFastString $1)) $2) } + + +names :: { [FastString] } + : NAME { [$1] } + | NAME ',' names { $1 : $3 } + +stmt :: { CmmParse () } + : ';' { return () } + + | NAME ':' + { do l <- newLabel $1; emitLabel l } + + + + | lreg '=' expr ';' + { do reg <- $1; e <- $3; withSourceNote $2 $4 (emitAssign reg e) } + | type '[' expr ']' '=' expr ';' + { withSourceNote $2 $7 (doStore $1 $3 $6) } + + -- Gah! We really want to say "foreign_results" but that causes + -- a shift/reduce conflict with assignment. We either + -- we expand out the no-result and single result cases or + -- we tweak the syntax to avoid the conflict. The later + -- option is taken here because the other way would require + -- multiple levels of expanding and get unwieldy. + | foreign_results 'foreign' STRING foreignLabel '(' cmm_hint_exprs0 ')' safety opt_never_returns ';' + {% foreignCall $3 $1 $4 $6 $8 $9 } + | foreign_results 'prim' '%' NAME '(' exprs0 ')' ';' + {% primCall $1 $4 $6 } + -- stmt-level macros, stealing syntax from ordinary C-- function calls. + -- Perhaps we ought to use the %%-form? + | NAME '(' exprs0 ')' ';' + {% stmtMacro $1 $3 } + | 'switch' maybe_range expr '{' arms default '}' + { do as <- sequence $5; doSwitch $2 $3 as $6 } + | 'goto' NAME ';' + { do l <- lookupLabel $2; emit (mkBranch l) } + | 'return' '(' exprs0 ')' ';' + { doReturn $3 } + | 'jump' expr vols ';' + { doRawJump $2 $3 } + | 'jump' expr '(' exprs0 ')' ';' + { doJumpWithStack $2 [] $4 } + | 'jump' expr '(' exprs0 ')' '(' exprs0 ')' ';' + { doJumpWithStack $2 $4 $7 } + | 'call' expr '(' exprs0 ')' ';' + { doCall $2 [] $4 } + | '(' formals ')' '=' 'call' expr '(' exprs0 ')' ';' + { doCall $6 $2 $8 } + | 'if' bool_expr cond_likely 'goto' NAME + { do l <- lookupLabel $5; cmmRawIf $2 l $3 } + | 'if' bool_expr cond_likely '{' body '}' else + { cmmIfThenElse $2 (withSourceNote $4 $6 $5) $7 $3 } + | 'push' '(' exprs0 ')' maybe_body + { pushStackFrame $3 $5 } + | 'reserve' expr '=' lreg maybe_body + { reserveStackFrame $2 $4 $5 } + | 'unwind' unwind_regs ';' + { $2 >>= code . emitUnwind } + +unwind_regs + :: { CmmParse [(GlobalReg, Maybe CmmExpr)] } + : GLOBALREG '=' expr_or_unknown ',' unwind_regs + { do e <- $3; rest <- $5; return (($1, e) : rest) } + | GLOBALREG '=' expr_or_unknown + { do e <- $3; return [($1, e)] } + +-- | Used by unwind to indicate unknown unwinding values. +expr_or_unknown + :: { CmmParse (Maybe CmmExpr) } + : 'return' + { do return Nothing } + | expr + { do e <- $1; return (Just e) } + +foreignLabel :: { CmmParse CmmExpr } + : NAME { return (CmmLit (CmmLabel (mkForeignLabel $1 Nothing ForeignLabelInThisPackage IsFunction))) } + +opt_never_returns :: { CmmReturnInfo } + : { CmmMayReturn } + | 'never' 'returns' { CmmNeverReturns } + +bool_expr :: { CmmParse BoolExpr } + : bool_op { $1 } + | expr { do e <- $1; return (BoolTest e) } + +bool_op :: { CmmParse BoolExpr } + : bool_expr '&&' bool_expr { do e1 <- $1; e2 <- $3; + return (BoolAnd e1 e2) } + | bool_expr '||' bool_expr { do e1 <- $1; e2 <- $3; + return (BoolOr e1 e2) } + | '!' bool_expr { do e <- $2; return (BoolNot e) } + | '(' bool_op ')' { $2 } + +safety :: { Safety } + : {- empty -} { PlayRisky } + | STRING {% parseSafety $1 } + +vols :: { [GlobalReg] } + : '[' ']' { [] } + | '[' '*' ']' {% do df <- getDynFlags + ; return (realArgRegsCover df) } + -- All of them. See comment attached + -- to realArgRegsCover + | '[' globals ']' { $2 } + +globals :: { [GlobalReg] } + : GLOBALREG { [$1] } + | GLOBALREG ',' globals { $1 : $3 } + +maybe_range :: { Maybe (Integer,Integer) } + : '[' INT '..' INT ']' { Just ($2, $4) } + | {- empty -} { Nothing } + +arms :: { [CmmParse ([Integer],Either BlockId (CmmParse ()))] } + : {- empty -} { [] } + | arm arms { $1 : $2 } + +arm :: { CmmParse ([Integer],Either BlockId (CmmParse ())) } + : 'case' ints ':' arm_body { do b <- $4; return ($2, b) } + +arm_body :: { CmmParse (Either BlockId (CmmParse ())) } + : '{' body '}' { return (Right (withSourceNote $1 $3 $2)) } + | 'goto' NAME ';' { do l <- lookupLabel $2; return (Left l) } + +ints :: { [Integer] } + : INT { [ $1 ] } + | INT ',' ints { $1 : $3 } + +default :: { Maybe (CmmParse ()) } + : 'default' ':' '{' body '}' { Just (withSourceNote $3 $5 $4) } + -- taking a few liberties with the C-- syntax here; C-- doesn't have + -- 'default' branches + | {- empty -} { Nothing } + +-- Note: OldCmm doesn't support a first class 'else' statement, though +-- CmmNode does. +else :: { CmmParse () } + : {- empty -} { return () } + | 'else' '{' body '}' { withSourceNote $2 $4 $3 } + +cond_likely :: { Maybe Bool } + : '(' 'likely' ':' 'True' ')' { Just True } + | '(' 'likely' ':' 'False' ')' { Just False } + | {- empty -} { Nothing } + + +-- we have to write this out longhand so that Happy's precedence rules +-- can kick in. +expr :: { CmmParse CmmExpr } + : expr '/' expr { mkMachOp MO_U_Quot [$1,$3] } + | expr '*' expr { mkMachOp MO_Mul [$1,$3] } + | expr '%' expr { mkMachOp MO_U_Rem [$1,$3] } + | expr '-' expr { mkMachOp MO_Sub [$1,$3] } + | expr '+' expr { mkMachOp MO_Add [$1,$3] } + | expr '>>' expr { mkMachOp MO_U_Shr [$1,$3] } + | expr '<<' expr { mkMachOp MO_Shl [$1,$3] } + | expr '&' expr { mkMachOp MO_And [$1,$3] } + | expr '^' expr { mkMachOp MO_Xor [$1,$3] } + | expr '|' expr { mkMachOp MO_Or [$1,$3] } + | expr '>=' expr { mkMachOp MO_U_Ge [$1,$3] } + | expr '>' expr { mkMachOp MO_U_Gt [$1,$3] } + | expr '<=' expr { mkMachOp MO_U_Le [$1,$3] } + | expr '<' expr { mkMachOp MO_U_Lt [$1,$3] } + | expr '!=' expr { mkMachOp MO_Ne [$1,$3] } + | expr '==' expr { mkMachOp MO_Eq [$1,$3] } + | '~' expr { mkMachOp MO_Not [$2] } + | '-' expr { mkMachOp MO_S_Neg [$2] } + | expr0 '`' NAME '`' expr0 {% do { mo <- nameToMachOp $3 ; + return (mkMachOp mo [$1,$5]) } } + | expr0 { $1 } + +expr0 :: { CmmParse CmmExpr } + : INT maybe_ty { return (CmmLit (CmmInt $1 (typeWidth $2))) } + | FLOAT maybe_ty { return (CmmLit (CmmFloat $1 (typeWidth $2))) } + | STRING { do s <- code (newStringCLit $1); + return (CmmLit s) } + | reg { $1 } + | type '[' expr ']' { do e <- $3; return (CmmLoad e $1) } + | '%' NAME '(' exprs0 ')' {% exprOp $2 $4 } + | '(' expr ')' { $2 } + + +-- leaving out the type of a literal gives you the native word size in C-- +maybe_ty :: { CmmType } + : {- empty -} {% do dflags <- getDynFlags; return $ bWord dflags } + | '::' type { $2 } + +cmm_hint_exprs0 :: { [CmmParse (CmmExpr, ForeignHint)] } + : {- empty -} { [] } + | cmm_hint_exprs { $1 } + +cmm_hint_exprs :: { [CmmParse (CmmExpr, ForeignHint)] } + : cmm_hint_expr { [$1] } + | cmm_hint_expr ',' cmm_hint_exprs { $1 : $3 } + +cmm_hint_expr :: { CmmParse (CmmExpr, ForeignHint) } + : expr { do e <- $1; + return (e, inferCmmHint e) } + | expr STRING {% do h <- parseCmmHint $2; + return $ do + e <- $1; return (e, h) } + +exprs0 :: { [CmmParse CmmExpr] } + : {- empty -} { [] } + | exprs { $1 } + +exprs :: { [CmmParse CmmExpr] } + : expr { [ $1 ] } + | expr ',' exprs { $1 : $3 } + +reg :: { CmmParse CmmExpr } + : NAME { lookupName $1 } + | GLOBALREG { return (CmmReg (CmmGlobal $1)) } + +foreign_results :: { [CmmParse (LocalReg, ForeignHint)] } + : {- empty -} { [] } + | '(' foreign_formals ')' '=' { $2 } + +foreign_formals :: { [CmmParse (LocalReg, ForeignHint)] } + : foreign_formal { [$1] } + | foreign_formal ',' { [$1] } + | foreign_formal ',' foreign_formals { $1 : $3 } + +foreign_formal :: { CmmParse (LocalReg, ForeignHint) } + : local_lreg { do e <- $1; return (e, inferCmmHint (CmmReg (CmmLocal e))) } + | STRING local_lreg {% do h <- parseCmmHint $1; + return $ do + e <- $2; return (e,h) } + +local_lreg :: { CmmParse LocalReg } + : NAME { do e <- lookupName $1; + return $ + case e of + CmmReg (CmmLocal r) -> r + other -> pprPanic "CmmParse:" (ftext $1 <> text " not a local register") } + +lreg :: { CmmParse CmmReg } + : NAME { do e <- lookupName $1; + return $ + case e of + CmmReg r -> r + other -> pprPanic "CmmParse:" (ftext $1 <> text " not a register") } + | GLOBALREG { return (CmmGlobal $1) } + +maybe_formals :: { Maybe [CmmParse LocalReg] } + : {- empty -} { Nothing } + | '(' formals0 ')' { Just $2 } + +formals0 :: { [CmmParse LocalReg] } + : {- empty -} { [] } + | formals { $1 } + +formals :: { [CmmParse LocalReg] } + : formal ',' { [$1] } + | formal { [$1] } + | formal ',' formals { $1 : $3 } + +formal :: { CmmParse LocalReg } + : type NAME { newLocal $1 $2 } + +type :: { CmmType } + : 'bits8' { b8 } + | typenot8 { $1 } + +typenot8 :: { CmmType } + : 'bits16' { b16 } + | 'bits32' { b32 } + | 'bits64' { b64 } + | 'bits128' { b128 } + | 'bits256' { b256 } + | 'bits512' { b512 } + | 'float32' { f32 } + | 'float64' { f64 } + | 'gcptr' {% do dflags <- getDynFlags; return $ gcWord dflags } + +{ +section :: String -> SectionType +section "text" = Text +section "data" = Data +section "rodata" = ReadOnlyData +section "relrodata" = RelocatableReadOnlyData +section "bss" = UninitialisedData +section s = OtherSection s + +mkString :: String -> CmmStatic +mkString s = CmmString (BS8.pack s) + +-- | +-- Given an info table, decide what the entry convention for the proc +-- is. That is, for an INFO_TABLE_RET we want the return convention, +-- otherwise it is a NativeNodeCall. +-- +infoConv :: Maybe CmmInfoTable -> Convention +infoConv Nothing = NativeNodeCall +infoConv (Just info) + | isStackRep (cit_rep info) = NativeReturn + | otherwise = NativeNodeCall + +-- mkMachOp infers the type of the MachOp from the type of its first +-- argument. We assume that this is correct: for MachOps that don't have +-- symmetrical args (e.g. shift ops), the first arg determines the type of +-- the op. +mkMachOp :: (Width -> MachOp) -> [CmmParse CmmExpr] -> CmmParse CmmExpr +mkMachOp fn args = do + dflags <- getDynFlags + arg_exprs <- sequence args + return (CmmMachOp (fn (typeWidth (cmmExprType dflags (head arg_exprs)))) arg_exprs) + +getLit :: CmmExpr -> CmmLit +getLit (CmmLit l) = l +getLit (CmmMachOp (MO_S_Neg _) [CmmLit (CmmInt i r)]) = CmmInt (negate i) r +getLit _ = panic "invalid literal" -- TODO messy failure + +nameToMachOp :: FastString -> PD (Width -> MachOp) +nameToMachOp name = + case lookupUFM machOps name of + Nothing -> fail ("unknown primitive " ++ unpackFS name) + Just m -> return m + +exprOp :: FastString -> [CmmParse CmmExpr] -> PD (CmmParse CmmExpr) +exprOp name args_code = do + dflags <- getDynFlags + case lookupUFM (exprMacros dflags) name of + Just f -> return $ do + args <- sequence args_code + return (f args) + Nothing -> do + mo <- nameToMachOp name + return $ mkMachOp mo args_code + +exprMacros :: DynFlags -> UniqFM ([CmmExpr] -> CmmExpr) +exprMacros dflags = listToUFM [ + ( fsLit "ENTRY_CODE", \ [x] -> entryCode dflags x ), + ( fsLit "INFO_PTR", \ [x] -> closureInfoPtr dflags x ), + ( fsLit "STD_INFO", \ [x] -> infoTable dflags x ), + ( fsLit "FUN_INFO", \ [x] -> funInfoTable dflags x ), + ( fsLit "GET_ENTRY", \ [x] -> entryCode dflags (closureInfoPtr dflags x) ), + ( fsLit "GET_STD_INFO", \ [x] -> infoTable dflags (closureInfoPtr dflags x) ), + ( fsLit "GET_FUN_INFO", \ [x] -> funInfoTable dflags (closureInfoPtr dflags x) ), + ( fsLit "INFO_TYPE", \ [x] -> infoTableClosureType dflags x ), + ( fsLit "INFO_PTRS", \ [x] -> infoTablePtrs dflags x ), + ( fsLit "INFO_NPTRS", \ [x] -> infoTableNonPtrs dflags x ) + ] + +-- we understand a subset of C-- primitives: +machOps = listToUFM $ + map (\(x, y) -> (mkFastString x, y)) [ + ( "add", MO_Add ), + ( "sub", MO_Sub ), + ( "eq", MO_Eq ), + ( "ne", MO_Ne ), + ( "mul", MO_Mul ), + ( "neg", MO_S_Neg ), + ( "quot", MO_S_Quot ), + ( "rem", MO_S_Rem ), + ( "divu", MO_U_Quot ), + ( "modu", MO_U_Rem ), + + ( "ge", MO_S_Ge ), + ( "le", MO_S_Le ), + ( "gt", MO_S_Gt ), + ( "lt", MO_S_Lt ), + + ( "geu", MO_U_Ge ), + ( "leu", MO_U_Le ), + ( "gtu", MO_U_Gt ), + ( "ltu", MO_U_Lt ), + + ( "and", MO_And ), + ( "or", MO_Or ), + ( "xor", MO_Xor ), + ( "com", MO_Not ), + ( "shl", MO_Shl ), + ( "shrl", MO_U_Shr ), + ( "shra", MO_S_Shr ), + + ( "fadd", MO_F_Add ), + ( "fsub", MO_F_Sub ), + ( "fneg", MO_F_Neg ), + ( "fmul", MO_F_Mul ), + ( "fquot", MO_F_Quot ), + + ( "feq", MO_F_Eq ), + ( "fne", MO_F_Ne ), + ( "fge", MO_F_Ge ), + ( "fle", MO_F_Le ), + ( "fgt", MO_F_Gt ), + ( "flt", MO_F_Lt ), + + ( "lobits8", flip MO_UU_Conv W8 ), + ( "lobits16", flip MO_UU_Conv W16 ), + ( "lobits32", flip MO_UU_Conv W32 ), + ( "lobits64", flip MO_UU_Conv W64 ), + + ( "zx16", flip MO_UU_Conv W16 ), + ( "zx32", flip MO_UU_Conv W32 ), + ( "zx64", flip MO_UU_Conv W64 ), + + ( "sx16", flip MO_SS_Conv W16 ), + ( "sx32", flip MO_SS_Conv W32 ), + ( "sx64", flip MO_SS_Conv W64 ), + + ( "f2f32", flip MO_FF_Conv W32 ), -- TODO; rounding mode + ( "f2f64", flip MO_FF_Conv W64 ), -- TODO; rounding mode + ( "f2i8", flip MO_FS_Conv W8 ), + ( "f2i16", flip MO_FS_Conv W16 ), + ( "f2i32", flip MO_FS_Conv W32 ), + ( "f2i64", flip MO_FS_Conv W64 ), + ( "i2f32", flip MO_SF_Conv W32 ), + ( "i2f64", flip MO_SF_Conv W64 ) + ] + +callishMachOps :: UniqFM ([CmmExpr] -> (CallishMachOp, [CmmExpr])) +callishMachOps = listToUFM $ + map (\(x, y) -> (mkFastString x, y)) [ + ( "read_barrier", (MO_ReadBarrier,)), + ( "write_barrier", (MO_WriteBarrier,)), + ( "memcpy", memcpyLikeTweakArgs MO_Memcpy ), + ( "memset", memcpyLikeTweakArgs MO_Memset ), + ( "memmove", memcpyLikeTweakArgs MO_Memmove ), + ( "memcmp", memcpyLikeTweakArgs MO_Memcmp ), + + ("prefetch0", (MO_Prefetch_Data 0,)), + ("prefetch1", (MO_Prefetch_Data 1,)), + ("prefetch2", (MO_Prefetch_Data 2,)), + ("prefetch3", (MO_Prefetch_Data 3,)), + + ( "popcnt8", (MO_PopCnt W8,)), + ( "popcnt16", (MO_PopCnt W16,)), + ( "popcnt32", (MO_PopCnt W32,)), + ( "popcnt64", (MO_PopCnt W64,)), + + ( "pdep8", (MO_Pdep W8,)), + ( "pdep16", (MO_Pdep W16,)), + ( "pdep32", (MO_Pdep W32,)), + ( "pdep64", (MO_Pdep W64,)), + + ( "pext8", (MO_Pext W8,)), + ( "pext16", (MO_Pext W16,)), + ( "pext32", (MO_Pext W32,)), + ( "pext64", (MO_Pext W64,)), + + ( "cmpxchg8", (MO_Cmpxchg W8,)), + ( "cmpxchg16", (MO_Cmpxchg W16,)), + ( "cmpxchg32", (MO_Cmpxchg W32,)), + ( "cmpxchg64", (MO_Cmpxchg W64,)) + + -- ToDo: the rest, maybe + -- edit: which rest? + -- also: how do we tell CMM Lint how to type check callish macops? + ] + where + memcpyLikeTweakArgs :: (Int -> CallishMachOp) -> [CmmExpr] -> (CallishMachOp, [CmmExpr]) + memcpyLikeTweakArgs op [] = pgmError "memcpy-like function requires at least one argument" + memcpyLikeTweakArgs op args@(_:_) = + (op align, args') + where + args' = init args + align = case last args of + CmmLit (CmmInt alignInteger _) -> fromInteger alignInteger + e -> pprPgmError "Non-constant alignment in memcpy-like function:" (ppr e) + -- The alignment of memcpy-ish operations must be a + -- compile-time constant. We verify this here, passing it around + -- in the MO_* constructor. In order to do this, however, we + -- must intercept the arguments in primCall. + +parseSafety :: String -> PD Safety +parseSafety "safe" = return PlaySafe +parseSafety "unsafe" = return PlayRisky +parseSafety "interruptible" = return PlayInterruptible +parseSafety str = fail ("unrecognised safety: " ++ str) + +parseCmmHint :: String -> PD ForeignHint +parseCmmHint "ptr" = return AddrHint +parseCmmHint "signed" = return SignedHint +parseCmmHint str = fail ("unrecognised hint: " ++ str) + +-- labels are always pointers, so we might as well infer the hint +inferCmmHint :: CmmExpr -> ForeignHint +inferCmmHint (CmmLit (CmmLabel _)) = AddrHint +inferCmmHint (CmmReg (CmmGlobal g)) | isPtrGlobalReg g = AddrHint +inferCmmHint _ = NoHint + +isPtrGlobalReg Sp = True +isPtrGlobalReg SpLim = True +isPtrGlobalReg Hp = True +isPtrGlobalReg HpLim = True +isPtrGlobalReg CCCS = True +isPtrGlobalReg CurrentTSO = True +isPtrGlobalReg CurrentNursery = True +isPtrGlobalReg (VanillaReg _ VGcPtr) = True +isPtrGlobalReg _ = False + +happyError :: PD a +happyError = PD $ \_ s -> unP srcParseFail s + +-- ----------------------------------------------------------------------------- +-- Statement-level macros + +stmtMacro :: FastString -> [CmmParse CmmExpr] -> PD (CmmParse ()) +stmtMacro fun args_code = do + case lookupUFM stmtMacros fun of + Nothing -> fail ("unknown macro: " ++ unpackFS fun) + Just fcode -> return $ do + args <- sequence args_code + code (fcode args) + +stmtMacros :: UniqFM ([CmmExpr] -> FCode ()) +stmtMacros = listToUFM [ + ( fsLit "CCS_ALLOC", \[words,ccs] -> profAlloc words ccs ), + ( fsLit "ENTER_CCS_THUNK", \[e] -> enterCostCentreThunk e ), + + ( fsLit "CLOSE_NURSERY", \[] -> emitCloseNursery ), + ( fsLit "OPEN_NURSERY", \[] -> emitOpenNursery ), + + -- completely generic heap and stack checks, for use in high-level cmm. + ( fsLit "HP_CHK_GEN", \[bytes] -> + heapStackCheckGen Nothing (Just bytes) ), + ( fsLit "STK_CHK_GEN", \[] -> + heapStackCheckGen (Just (CmmLit CmmHighStackMark)) Nothing ), + + -- A stack check for a fixed amount of stack. Sounds a bit strange, but + -- we use the stack for a bit of temporary storage in a couple of primops + ( fsLit "STK_CHK_GEN_N", \[bytes] -> + heapStackCheckGen (Just bytes) Nothing ), + + -- A stack check on entry to a thunk, where the argument is the thunk pointer. + ( fsLit "STK_CHK_NP" , \[node] -> entryHeapCheck' False node 0 [] (return ())), + + ( fsLit "LOAD_THREAD_STATE", \[] -> emitLoadThreadState ), + ( fsLit "SAVE_THREAD_STATE", \[] -> emitSaveThreadState ), + + ( fsLit "LDV_ENTER", \[e] -> ldvEnter e ), + ( fsLit "LDV_RECORD_CREATE", \[e] -> ldvRecordCreate e ), + + ( fsLit "PUSH_UPD_FRAME", \[sp,e] -> emitPushUpdateFrame sp e ), + ( fsLit "SET_HDR", \[ptr,info,ccs] -> + emitSetDynHdr ptr info ccs ), + ( fsLit "TICK_ALLOC_PRIM", \[hdr,goods,slop] -> + tickyAllocPrim hdr goods slop ), + ( fsLit "TICK_ALLOC_PAP", \[goods,slop] -> + tickyAllocPAP goods slop ), + ( fsLit "TICK_ALLOC_UP_THK", \[goods,slop] -> + tickyAllocThunk goods slop ), + ( fsLit "UPD_BH_UPDATABLE", \[reg] -> emitBlackHoleCode reg ) + ] + +emitPushUpdateFrame :: CmmExpr -> CmmExpr -> FCode () +emitPushUpdateFrame sp e = do + dflags <- getDynFlags + emitUpdateFrame dflags sp mkUpdInfoLabel e + +pushStackFrame :: [CmmParse CmmExpr] -> CmmParse () -> CmmParse () +pushStackFrame fields body = do + dflags <- getDynFlags + exprs <- sequence fields + updfr_off <- getUpdFrameOff + let (new_updfr_off, _, g) = copyOutOflow dflags NativeReturn Ret Old + [] updfr_off exprs + emit g + withUpdFrameOff new_updfr_off body + +reserveStackFrame + :: CmmParse CmmExpr + -> CmmParse CmmReg + -> CmmParse () + -> CmmParse () +reserveStackFrame psize preg body = do + dflags <- getDynFlags + old_updfr_off <- getUpdFrameOff + reg <- preg + esize <- psize + let size = case constantFoldExpr dflags esize of + CmmLit (CmmInt n _) -> n + _other -> pprPanic "CmmParse: not a compile-time integer: " + (ppr esize) + let frame = old_updfr_off + wORD_SIZE dflags * fromIntegral size + emitAssign reg (CmmStackSlot Old frame) + withUpdFrameOff frame body + +profilingInfo dflags desc_str ty_str + = if not (gopt Opt_SccProfilingOn dflags) + then NoProfilingInfo + else ProfilingInfo (BS8.pack desc_str) (BS8.pack ty_str) + +staticClosure :: UnitId -> FastString -> FastString -> [CmmLit] -> CmmParse () +staticClosure pkg cl_label info payload + = do dflags <- getDynFlags + let lits = mkStaticClosure dflags (mkCmmInfoLabel pkg info) dontCareCCS payload [] [] [] + code $ emitDataLits (mkCmmDataLabel pkg cl_label) lits + +foreignCall + :: String + -> [CmmParse (LocalReg, ForeignHint)] + -> CmmParse CmmExpr + -> [CmmParse (CmmExpr, ForeignHint)] + -> Safety + -> CmmReturnInfo + -> PD (CmmParse ()) +foreignCall conv_string results_code expr_code args_code safety ret + = do conv <- case conv_string of + "C" -> return CCallConv + "stdcall" -> return StdCallConv + _ -> fail ("unknown calling convention: " ++ conv_string) + return $ do + dflags <- getDynFlags + results <- sequence results_code + expr <- expr_code + args <- sequence args_code + let + expr' = adjCallTarget dflags conv expr args + (arg_exprs, arg_hints) = unzip args + (res_regs, res_hints) = unzip results + fc = ForeignConvention conv arg_hints res_hints ret + target = ForeignTarget expr' fc + _ <- code $ emitForeignCall safety res_regs target arg_exprs + return () + + +doReturn :: [CmmParse CmmExpr] -> CmmParse () +doReturn exprs_code = do + dflags <- getDynFlags + exprs <- sequence exprs_code + updfr_off <- getUpdFrameOff + emit (mkReturnSimple dflags exprs updfr_off) + +mkReturnSimple :: DynFlags -> [CmmActual] -> UpdFrameOffset -> CmmAGraph +mkReturnSimple dflags actuals updfr_off = + mkReturn dflags e actuals updfr_off + where e = entryCode dflags (CmmLoad (CmmStackSlot Old updfr_off) + (gcWord dflags)) + +doRawJump :: CmmParse CmmExpr -> [GlobalReg] -> CmmParse () +doRawJump expr_code vols = do + dflags <- getDynFlags + expr <- expr_code + updfr_off <- getUpdFrameOff + emit (mkRawJump dflags expr updfr_off vols) + +doJumpWithStack :: CmmParse CmmExpr -> [CmmParse CmmExpr] + -> [CmmParse CmmExpr] -> CmmParse () +doJumpWithStack expr_code stk_code args_code = do + dflags <- getDynFlags + expr <- expr_code + stk_args <- sequence stk_code + args <- sequence args_code + updfr_off <- getUpdFrameOff + emit (mkJumpExtra dflags NativeNodeCall expr args updfr_off stk_args) + +doCall :: CmmParse CmmExpr -> [CmmParse LocalReg] -> [CmmParse CmmExpr] + -> CmmParse () +doCall expr_code res_code args_code = do + dflags <- getDynFlags + expr <- expr_code + args <- sequence args_code + ress <- sequence res_code + updfr_off <- getUpdFrameOff + c <- code $ mkCall expr (NativeNodeCall,NativeReturn) ress args updfr_off [] + emit c + +adjCallTarget :: DynFlags -> CCallConv -> CmmExpr -> [(CmmExpr, ForeignHint) ] + -> CmmExpr +-- On Windows, we have to add the '@N' suffix to the label when making +-- a call with the stdcall calling convention. +adjCallTarget dflags StdCallConv (CmmLit (CmmLabel lbl)) args + | platformOS (targetPlatform dflags) == OSMinGW32 + = CmmLit (CmmLabel (addLabelSize lbl (sum (map size args)))) + where size (e, _) = max (wORD_SIZE dflags) (widthInBytes (typeWidth (cmmExprType dflags e))) + -- c.f. CgForeignCall.emitForeignCall +adjCallTarget _ _ expr _ + = expr + +primCall + :: [CmmParse (CmmFormal, ForeignHint)] + -> FastString + -> [CmmParse CmmExpr] + -> PD (CmmParse ()) +primCall results_code name args_code + = case lookupUFM callishMachOps name of + Nothing -> fail ("unknown primitive " ++ unpackFS name) + Just f -> return $ do + results <- sequence results_code + args <- sequence args_code + let (p, args') = f args + code (emitPrimCall (map fst results) p args') + +doStore :: CmmType -> CmmParse CmmExpr -> CmmParse CmmExpr -> CmmParse () +doStore rep addr_code val_code + = do dflags <- getDynFlags + addr <- addr_code + val <- val_code + -- if the specified store type does not match the type of the expr + -- on the rhs, then we insert a coercion that will cause the type + -- mismatch to be flagged by cmm-lint. If we don't do this, then + -- the store will happen at the wrong type, and the error will not + -- be noticed. + let val_width = typeWidth (cmmExprType dflags val) + rep_width = typeWidth rep + let coerce_val + | val_width /= rep_width = CmmMachOp (MO_UU_Conv val_width rep_width) [val] + | otherwise = val + emitStore addr coerce_val + +-- ----------------------------------------------------------------------------- +-- If-then-else and boolean expressions + +data BoolExpr + = BoolExpr `BoolAnd` BoolExpr + | BoolExpr `BoolOr` BoolExpr + | BoolNot BoolExpr + | BoolTest CmmExpr + +-- ToDo: smart constructors which simplify the boolean expression. + +cmmIfThenElse cond then_part else_part likely = do + then_id <- newBlockId + join_id <- newBlockId + c <- cond + emitCond c then_id likely + else_part + emit (mkBranch join_id) + emitLabel then_id + then_part + -- fall through to join + emitLabel join_id + +cmmRawIf cond then_id likely = do + c <- cond + emitCond c then_id likely + +-- 'emitCond cond true_id' emits code to test whether the cond is true, +-- branching to true_id if so, and falling through otherwise. +emitCond (BoolTest e) then_id likely = do + else_id <- newBlockId + emit (mkCbranch e then_id else_id likely) + emitLabel else_id +emitCond (BoolNot (BoolTest (CmmMachOp op args))) then_id likely + | Just op' <- maybeInvertComparison op + = emitCond (BoolTest (CmmMachOp op' args)) then_id (not <$> likely) +emitCond (BoolNot e) then_id likely = do + else_id <- newBlockId + emitCond e else_id likely + emit (mkBranch then_id) + emitLabel else_id +emitCond (e1 `BoolOr` e2) then_id likely = do + emitCond e1 then_id likely + emitCond e2 then_id likely +emitCond (e1 `BoolAnd` e2) then_id likely = do + -- we'd like to invert one of the conditionals here to avoid an + -- extra branch instruction, but we can't use maybeInvertComparison + -- here because we can't look too closely at the expression since + -- we're in a loop. + and_id <- newBlockId + else_id <- newBlockId + emitCond e1 and_id likely + emit (mkBranch else_id) + emitLabel and_id + emitCond e2 then_id likely + emitLabel else_id + +-- ----------------------------------------------------------------------------- +-- Source code notes + +-- | Generate a source note spanning from "a" to "b" (inclusive), then +-- proceed with parsing. This allows debugging tools to reason about +-- locations in Cmm code. +withSourceNote :: Located a -> Located b -> CmmParse c -> CmmParse c +withSourceNote a b parse = do + name <- getName + case combineSrcSpans (getLoc a) (getLoc b) of + RealSrcSpan span -> code (emitTick (SourceNote span name)) >> parse + _other -> parse + +-- ----------------------------------------------------------------------------- +-- Table jumps + +-- We use a simplified form of C-- switch statements for now. A +-- switch statement always compiles to a table jump. Each arm can +-- specify a list of values (not ranges), and there can be a single +-- default branch. The range of the table is given either by the +-- optional range on the switch (eg. switch [0..7] {...}), or by +-- the minimum/maximum values from the branches. + +doSwitch :: Maybe (Integer,Integer) + -> CmmParse CmmExpr + -> [([Integer],Either BlockId (CmmParse ()))] + -> Maybe (CmmParse ()) -> CmmParse () +doSwitch mb_range scrut arms deflt + = do + -- Compile code for the default branch + dflt_entry <- + case deflt of + Nothing -> return Nothing + Just e -> do b <- forkLabelledCode e; return (Just b) + + -- Compile each case branch + table_entries <- mapM emitArm arms + let table = M.fromList (concat table_entries) + + dflags <- getDynFlags + let range = fromMaybe (0, tARGET_MAX_WORD dflags) mb_range + + expr <- scrut + -- ToDo: check for out of range and jump to default if necessary + emit $ mkSwitch expr (mkSwitchTargets False range dflt_entry table) + where + emitArm :: ([Integer],Either BlockId (CmmParse ())) -> CmmParse [(Integer,BlockId)] + emitArm (ints,Left blockid) = return [ (i,blockid) | i <- ints ] + emitArm (ints,Right code) = do + blockid <- forkLabelledCode code + return [ (i,blockid) | i <- ints ] + +forkLabelledCode :: CmmParse () -> CmmParse BlockId +forkLabelledCode p = do + (_,ag) <- getCodeScoped p + l <- newBlockId + emitOutOfLine l ag + return l + +-- ----------------------------------------------------------------------------- +-- Putting it all together + +-- The initial environment: we define some constants that the compiler +-- knows about here. +initEnv :: DynFlags -> Env +initEnv dflags = listToUFM [ + ( fsLit "SIZEOF_StgHeader", + VarN (CmmLit (CmmInt (fromIntegral (fixedHdrSize dflags)) (wordWidth dflags)) )), + ( fsLit "SIZEOF_StgInfoTable", + VarN (CmmLit (CmmInt (fromIntegral (stdInfoTableSizeB dflags)) (wordWidth dflags)) )) + ] + +parseCmmFile :: DynFlags -> FilePath -> IO (Messages, Maybe CmmGroup) +parseCmmFile dflags filename = withTiming dflags (text "ParseCmm"<+>brackets (text filename)) (\_ -> ()) $ do + buf <- hGetStringBuffer filename + let + init_loc = mkRealSrcLoc (mkFastString filename) 1 1 + init_state = (mkPState dflags buf init_loc) { lex_state = [0] } + -- reset the lex_state: the Lexer monad leaves some stuff + -- in there we don't want. + case unPD cmmParse dflags init_state of + PFailed pst -> + return (getMessages pst dflags, Nothing) + POk pst code -> do + st <- initC + let fcode = getCmm $ unEC code "global" (initEnv dflags) [] >> return () + (cmm,_) = runC dflags no_module st fcode + let ms = getMessages pst dflags + if (errorsFound dflags ms) + then return (ms, Nothing) + else return (ms, Just cmm) + where + no_module = panic "parseCmmFile: no module" +} diff --git a/compiler/GHC/Cmm/Pipeline.hs b/compiler/GHC/Cmm/Pipeline.hs new file mode 100644 index 0000000000..6db9e23ee1 --- /dev/null +++ b/compiler/GHC/Cmm/Pipeline.hs @@ -0,0 +1,367 @@ +{-# LANGUAGE BangPatterns #-} + +module GHC.Cmm.Pipeline ( + -- | Converts C-- with an implicit stack and native C-- calls into + -- optimized, CPS converted and native-call-less C--. The latter + -- C-- can be used to generate assembly. + cmmPipeline +) where + +import GhcPrelude + +import GHC.Cmm +import GHC.Cmm.Lint +import GHC.Cmm.Info.Build +import GHC.Cmm.CommonBlockElim +import GHC.Cmm.Switch.Implement +import GHC.Cmm.ProcPoint +import GHC.Cmm.ContFlowOpt +import GHC.Cmm.LayoutStack +import GHC.Cmm.Sink +import GHC.Cmm.Dataflow.Collections + +import UniqSupply +import DynFlags +import ErrUtils +import HscTypes +import Control.Monad +import Outputable +import GHC.Platform + +----------------------------------------------------------------------------- +-- | Top level driver for C-- pipeline +----------------------------------------------------------------------------- + +cmmPipeline + :: HscEnv -- Compilation env including + -- dynamic flags: -dcmm-lint -ddump-cmm-cps + -> ModuleSRTInfo -- Info about SRTs generated so far + -> CmmGroup -- Input C-- with Procedures + -> IO (ModuleSRTInfo, CmmGroup) -- Output CPS transformed C-- + +cmmPipeline hsc_env srtInfo prog = withTimingSilent dflags (text "Cmm pipeline") forceRes $ + do let dflags = hsc_dflags hsc_env + + tops <- {-# SCC "tops" #-} mapM (cpsTop hsc_env) prog + + (srtInfo, cmms) <- {-# SCC "doSRTs" #-} doSRTs dflags srtInfo tops + dumpWith dflags Opt_D_dump_cmm_cps "Post CPS Cmm" FormatCMM (ppr cmms) + + return (srtInfo, cmms) + + where forceRes (info, group) = + info `seq` foldr (\decl r -> decl `seq` r) () group + + dflags = hsc_dflags hsc_env + +cpsTop :: HscEnv -> CmmDecl -> IO (CAFEnv, [CmmDecl]) +cpsTop _ p@(CmmData {}) = return (mapEmpty, [p]) +cpsTop hsc_env proc = + do + ----------- Control-flow optimisations ---------------------------------- + + -- The first round of control-flow optimisation speeds up the + -- later passes by removing lots of empty blocks, so we do it + -- even when optimisation isn't turned on. + -- + CmmProc h l v g <- {-# SCC "cmmCfgOpts(1)" #-} + return $ cmmCfgOptsProc splitting_proc_points proc + dump Opt_D_dump_cmm_cfg "Post control-flow optimisations" g + + let !TopInfo {stack_info=StackInfo { arg_space = entry_off + , do_layout = do_layout }} = h + + ----------- Eliminate common blocks ------------------------------------- + g <- {-# SCC "elimCommonBlocks" #-} + condPass Opt_CmmElimCommonBlocks elimCommonBlocks g + Opt_D_dump_cmm_cbe "Post common block elimination" + + -- Any work storing block Labels must be performed _after_ + -- elimCommonBlocks + + ----------- Implement switches ------------------------------------------ + g <- {-# SCC "createSwitchPlans" #-} + runUniqSM $ cmmImplementSwitchPlans dflags g + dump Opt_D_dump_cmm_switch "Post switch plan" g + + ----------- Proc points ------------------------------------------------- + let call_pps = {-# SCC "callProcPoints" #-} callProcPoints g + proc_points <- + if splitting_proc_points + then do + pp <- {-# SCC "minimalProcPointSet" #-} runUniqSM $ + minimalProcPointSet (targetPlatform dflags) call_pps g + dumpWith dflags Opt_D_dump_cmm_proc "Proc points" + FormatCMM (ppr l $$ ppr pp $$ ppr g) + return pp + else + return call_pps + + ----------- Layout the stack and manifest Sp ---------------------------- + (g, stackmaps) <- + {-# SCC "layoutStack" #-} + if do_layout + then runUniqSM $ cmmLayoutStack dflags proc_points entry_off g + else return (g, mapEmpty) + dump Opt_D_dump_cmm_sp "Layout Stack" g + + ----------- Sink and inline assignments -------------------------------- + g <- {-# SCC "sink" #-} -- See Note [Sinking after stack layout] + condPass Opt_CmmSink (cmmSink dflags) g + Opt_D_dump_cmm_sink "Sink assignments" + + ------------- CAF analysis ---------------------------------------------- + let cafEnv = {-# SCC "cafAnal" #-} cafAnal call_pps l g + dumpWith dflags Opt_D_dump_cmm_caf "CAFEnv" FormatText (ppr cafEnv) + + g <- if splitting_proc_points + then do + ------------- Split into separate procedures ----------------------- + let pp_map = {-# SCC "procPointAnalysis" #-} + procPointAnalysis proc_points g + dumpWith dflags Opt_D_dump_cmm_procmap "procpoint map" + FormatCMM (ppr pp_map) + g <- {-# SCC "splitAtProcPoints" #-} runUniqSM $ + splitAtProcPoints dflags l call_pps proc_points pp_map + (CmmProc h l v g) + dumps Opt_D_dump_cmm_split "Post splitting" g + return g + else do + -- attach info tables to return points + return $ [attachContInfoTables call_pps (CmmProc h l v g)] + + ------------- Populate info tables with stack info ----------------- + g <- {-# SCC "setInfoTableStackMap" #-} + return $ map (setInfoTableStackMap dflags stackmaps) g + dumps Opt_D_dump_cmm_info "after setInfoTableStackMap" g + + ----------- Control-flow optimisations ----------------------------- + g <- {-# SCC "cmmCfgOpts(2)" #-} + return $ if optLevel dflags >= 1 + then map (cmmCfgOptsProc splitting_proc_points) g + else g + g <- return (map removeUnreachableBlocksProc g) + -- See Note [unreachable blocks] + dumps Opt_D_dump_cmm_cfg "Post control-flow optimisations" g + + return (cafEnv, g) + + where dflags = hsc_dflags hsc_env + platform = targetPlatform dflags + dump = dumpGraph dflags + + dumps flag name + = mapM_ (dumpWith dflags flag name FormatCMM . ppr) + + condPass flag pass g dumpflag dumpname = + if gopt flag dflags + then do + g <- return $ pass g + dump dumpflag dumpname g + return g + else return g + + -- we don't need to split proc points for the NCG, unless + -- tablesNextToCode is off. The latter is because we have no + -- label to put on info tables for basic blocks that are not + -- the entry point. + splitting_proc_points = hscTarget dflags /= HscAsm + || not (tablesNextToCode dflags) + || -- Note [inconsistent-pic-reg] + usingInconsistentPicReg + usingInconsistentPicReg + = case (platformArch platform, platformOS platform, positionIndependent dflags) + of (ArchX86, OSDarwin, pic) -> pic + _ -> False + +-- Note [Sinking after stack layout] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- In the past we considered running sinking pass also before stack +-- layout, but after making some measurements we realized that: +-- +-- a) running sinking only before stack layout produces slower +-- code than running sinking only before stack layout +-- +-- b) running sinking both before and after stack layout produces +-- code that has the same performance as when running sinking +-- only after stack layout. +-- +-- In other words sinking before stack layout doesn't buy as anything. +-- +-- An interesting question is "why is it better to run sinking after +-- stack layout"? It seems that the major reason are stores and loads +-- generated by stack layout. Consider this code before stack layout: +-- +-- c1E: +-- _c1C::P64 = R3; +-- _c1B::P64 = R2; +-- _c1A::P64 = R1; +-- I64[(young<c1D> + 8)] = c1D; +-- call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8; +-- c1D: +-- R3 = _c1C::P64; +-- R2 = _c1B::P64; +-- R1 = _c1A::P64; +-- call (P64[(old + 8)])(R3, R2, R1) args: 8, res: 0, upd: 8; +-- +-- Stack layout pass will save all local variables live across a call +-- (_c1C, _c1B and _c1A in this example) on the stack just before +-- making a call and reload them from the stack after returning from a +-- call: +-- +-- c1E: +-- _c1C::P64 = R3; +-- _c1B::P64 = R2; +-- _c1A::P64 = R1; +-- I64[Sp - 32] = c1D; +-- P64[Sp - 24] = _c1A::P64; +-- P64[Sp - 16] = _c1B::P64; +-- P64[Sp - 8] = _c1C::P64; +-- Sp = Sp - 32; +-- call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8; +-- c1D: +-- _c1A::P64 = P64[Sp + 8]; +-- _c1B::P64 = P64[Sp + 16]; +-- _c1C::P64 = P64[Sp + 24]; +-- R3 = _c1C::P64; +-- R2 = _c1B::P64; +-- R1 = _c1A::P64; +-- Sp = Sp + 32; +-- call (P64[Sp])(R3, R2, R1) args: 8, res: 0, upd: 8; +-- +-- If we don't run sinking pass after stack layout we are basically +-- left with such code. However, running sinking on this code can lead +-- to significant improvements: +-- +-- c1E: +-- I64[Sp - 32] = c1D; +-- P64[Sp - 24] = R1; +-- P64[Sp - 16] = R2; +-- P64[Sp - 8] = R3; +-- Sp = Sp - 32; +-- call stg_gc_noregs() returns to c1D, args: 8, res: 8, upd: 8; +-- c1D: +-- R3 = P64[Sp + 24]; +-- R2 = P64[Sp + 16]; +-- R1 = P64[Sp + 8]; +-- Sp = Sp + 32; +-- call (P64[Sp])(R3, R2, R1) args: 8, res: 0, upd: 8; +-- +-- Now we only have 9 assignments instead of 15. +-- +-- There is one case when running sinking before stack layout could +-- be beneficial. Consider this: +-- +-- L1: +-- x = y +-- call f() returns L2 +-- L2: ...x...y... +-- +-- Since both x and y are live across a call to f, they will be stored +-- on the stack during stack layout and restored after the call: +-- +-- L1: +-- x = y +-- P64[Sp - 24] = L2 +-- P64[Sp - 16] = x +-- P64[Sp - 8] = y +-- Sp = Sp - 24 +-- call f() returns L2 +-- L2: +-- y = P64[Sp + 16] +-- x = P64[Sp + 8] +-- Sp = Sp + 24 +-- ...x...y... +-- +-- However, if we run sinking before stack layout we would propagate x +-- to its usage place (both x and y must be local register for this to +-- be possible - global registers cannot be floated past a call): +-- +-- L1: +-- x = y +-- call f() returns L2 +-- L2: ...y...y... +-- +-- Thus making x dead at the call to f(). If we ran stack layout now +-- we would generate less stores and loads: +-- +-- L1: +-- x = y +-- P64[Sp - 16] = L2 +-- P64[Sp - 8] = y +-- Sp = Sp - 16 +-- call f() returns L2 +-- L2: +-- y = P64[Sp + 8] +-- Sp = Sp + 16 +-- ...y...y... +-- +-- But since we don't see any benefits from running sinking before stack +-- layout, this situation probably doesn't arise too often in practice. +-- + +{- Note [inconsistent-pic-reg] + +On x86/Darwin, PIC is implemented by inserting a sequence like + + call 1f + 1: popl %reg + +at the proc entry point, and then referring to labels as offsets from +%reg. If we don't split proc points, then we could have many entry +points in a proc that would need this sequence, and each entry point +would then get a different value for %reg. If there are any join +points, then at the join point we don't have a consistent value for +%reg, so we don't know how to refer to labels. + +Hence, on x86/Darwin, we have to split proc points, and then each proc +point will get its own PIC initialisation sequence. + +This isn't an issue on x86/ELF, where the sequence is + + call 1f + 1: popl %reg + addl $_GLOBAL_OFFSET_TABLE_+(.-1b), %reg + +so %reg always has a consistent value: the address of +_GLOBAL_OFFSET_TABLE_, regardless of which entry point we arrived via. + +-} + +{- Note [unreachable blocks] + +The control-flow optimiser sometimes leaves unreachable blocks behind +containing junk code. These aren't necessarily a problem, but +removing them is good because it might save time in the native code +generator later. + +-} + +runUniqSM :: UniqSM a -> IO a +runUniqSM m = do + us <- mkSplitUniqSupply 'u' + return (initUs_ us m) + + +dumpGraph :: DynFlags -> DumpFlag -> String -> CmmGraph -> IO () +dumpGraph dflags flag name g = do + when (gopt Opt_DoCmmLinting dflags) $ do_lint g + dumpWith dflags flag name FormatCMM (ppr g) + where + do_lint g = case cmmLintGraph dflags g of + Just err -> do { fatalErrorMsg dflags err + ; ghcExit dflags 1 + } + Nothing -> return () + +dumpWith :: DynFlags -> DumpFlag -> String -> DumpFormat -> SDoc -> IO () +dumpWith dflags flag txt fmt sdoc = do + dumpIfSet_dyn dflags flag txt fmt sdoc + when (not (dopt flag dflags)) $ + -- If `-ddump-cmm-verbose -ddump-to-file` is specified, + -- dump each Cmm pipeline stage output to a separate file. #16930 + when (dopt Opt_D_dump_cmm_verbose dflags) + $ dumpAction dflags (mkDumpStyle dflags alwaysQualify) + (dumpOptionsFromFlag flag) txt fmt sdoc + dumpIfSet_dyn dflags Opt_D_dump_cmm_verbose_by_proc txt fmt sdoc diff --git a/compiler/GHC/Cmm/Ppr.hs b/compiler/GHC/Cmm/Ppr.hs new file mode 100644 index 0000000000..891cbd9c6d --- /dev/null +++ b/compiler/GHC/Cmm/Ppr.hs @@ -0,0 +1,309 @@ +{-# LANGUAGE GADTs, TypeFamilies, FlexibleContexts, FlexibleInstances #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +---------------------------------------------------------------------------- +-- +-- Pretty-printing of Cmm as (a superset of) C-- +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- +-- +-- This is where we walk over CmmNode emitting an external representation, +-- suitable for parsing, in a syntax strongly reminiscent of C--. This +-- is the "External Core" for the Cmm layer. +-- +-- As such, this should be a well-defined syntax: we want it to look nice. +-- Thus, we try wherever possible to use syntax defined in [1], +-- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We +-- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather +-- than C--'s bits8 .. bits64. +-- +-- We try to ensure that all information available in the abstract +-- syntax is reproduced, or reproducible, in the concrete syntax. +-- Data that is not in printed out can be reconstructed according to +-- conventions used in the pretty printer. There are at least two such +-- cases: +-- 1) if a value has wordRep type, the type is not appended in the +-- output. +-- 2) MachOps that operate over wordRep type are printed in a +-- C-style, rather than as their internal MachRep name. +-- +-- These conventions produce much more readable Cmm output. +-- +-- A useful example pass over Cmm is in nativeGen/MachCodeGen.hs + +module GHC.Cmm.Ppr + ( module GHC.Cmm.Ppr.Decl + , module GHC.Cmm.Ppr.Expr + ) +where + +import GhcPrelude hiding (succ) + +import GHC.Cmm.CLabel +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.Switch +import DynFlags +import FastString +import Outputable +import GHC.Cmm.Ppr.Decl +import GHC.Cmm.Ppr.Expr +import Util + +import BasicTypes +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Graph + +------------------------------------------------- +-- Outputable instances + +instance Outputable CmmStackInfo where + ppr = pprStackInfo + +instance Outputable CmmTopInfo where + ppr = pprTopInfo + + +instance Outputable (CmmNode e x) where + ppr = pprNode + +instance Outputable Convention where + ppr = pprConvention + +instance Outputable ForeignConvention where + ppr = pprForeignConvention + +instance Outputable ForeignTarget where + ppr = pprForeignTarget + +instance Outputable CmmReturnInfo where + ppr = pprReturnInfo + +instance Outputable (Block CmmNode C C) where + ppr = pprBlock +instance Outputable (Block CmmNode C O) where + ppr = pprBlock +instance Outputable (Block CmmNode O C) where + ppr = pprBlock +instance Outputable (Block CmmNode O O) where + ppr = pprBlock + +instance Outputable (Graph CmmNode e x) where + ppr = pprGraph + +instance Outputable CmmGraph where + ppr = pprCmmGraph + +---------------------------------------------------------- +-- Outputting types Cmm contains + +pprStackInfo :: CmmStackInfo -> SDoc +pprStackInfo (StackInfo {arg_space=arg_space, updfr_space=updfr_space}) = + text "arg_space: " <> ppr arg_space <+> + text "updfr_space: " <> ppr updfr_space + +pprTopInfo :: CmmTopInfo -> SDoc +pprTopInfo (TopInfo {info_tbls=info_tbl, stack_info=stack_info}) = + vcat [text "info_tbls: " <> ppr info_tbl, + text "stack_info: " <> ppr stack_info] + +---------------------------------------------------------- +-- Outputting blocks and graphs + +pprBlock :: IndexedCO x SDoc SDoc ~ SDoc + => Block CmmNode e x -> IndexedCO e SDoc SDoc +pprBlock block + = foldBlockNodesB3 ( ($$) . ppr + , ($$) . (nest 4) . ppr + , ($$) . (nest 4) . ppr + ) + block + empty + +pprGraph :: Graph CmmNode e x -> SDoc +pprGraph GNil = empty +pprGraph (GUnit block) = ppr block +pprGraph (GMany entry body exit) + = text "{" + $$ nest 2 (pprMaybeO entry $$ (vcat $ map ppr $ bodyToBlockList body) $$ pprMaybeO exit) + $$ text "}" + where pprMaybeO :: Outputable (Block CmmNode e x) + => MaybeO ex (Block CmmNode e x) -> SDoc + pprMaybeO NothingO = empty + pprMaybeO (JustO block) = ppr block + +pprCmmGraph :: CmmGraph -> SDoc +pprCmmGraph g + = text "{" <> text "offset" + $$ nest 2 (vcat $ map ppr blocks) + $$ text "}" + where blocks = revPostorder g + -- revPostorder has the side-effect of discarding unreachable code, + -- so pretty-printed Cmm will omit any unreachable blocks. This can + -- sometimes be confusing. + +--------------------------------------------- +-- Outputting CmmNode and types which it contains + +pprConvention :: Convention -> SDoc +pprConvention (NativeNodeCall {}) = text "<native-node-call-convention>" +pprConvention (NativeDirectCall {}) = text "<native-direct-call-convention>" +pprConvention (NativeReturn {}) = text "<native-ret-convention>" +pprConvention Slow = text "<slow-convention>" +pprConvention GC = text "<gc-convention>" + +pprForeignConvention :: ForeignConvention -> SDoc +pprForeignConvention (ForeignConvention c args res ret) = + doubleQuotes (ppr c) <+> text "arg hints: " <+> ppr args <+> text " result hints: " <+> ppr res <+> ppr ret + +pprReturnInfo :: CmmReturnInfo -> SDoc +pprReturnInfo CmmMayReturn = empty +pprReturnInfo CmmNeverReturns = text "never returns" + +pprForeignTarget :: ForeignTarget -> SDoc +pprForeignTarget (ForeignTarget fn c) = ppr c <+> ppr_target fn + where + ppr_target :: CmmExpr -> SDoc + ppr_target t@(CmmLit _) = ppr t + ppr_target fn' = parens (ppr fn') + +pprForeignTarget (PrimTarget op) + -- HACK: We're just using a ForeignLabel to get this printed, the label + -- might not really be foreign. + = ppr + (CmmLabel (mkForeignLabel + (mkFastString (show op)) + Nothing ForeignLabelInThisPackage IsFunction)) + +pprNode :: CmmNode e x -> SDoc +pprNode node = pp_node <+> pp_debug + where + pp_node :: SDoc + pp_node = sdocWithDynFlags $ \dflags -> case node of + -- label: + CmmEntry id tscope -> lbl <> colon <+> + (sdocWithDynFlags $ \dflags -> + ppUnless (gopt Opt_SuppressTicks dflags) (text "//" <+> ppr tscope)) + where + lbl = if gopt Opt_SuppressUniques dflags + then text "_lbl_" + else ppr id + + -- // text + CmmComment s -> text "//" <+> ftext s + + -- //tick bla<...> + CmmTick t -> ppUnless (gopt Opt_SuppressTicks dflags) $ + text "//tick" <+> ppr t + + -- unwind reg = expr; + CmmUnwind regs -> + text "unwind " + <> commafy (map (\(r,e) -> ppr r <+> char '=' <+> ppr e) regs) <> semi + + -- reg = expr; + CmmAssign reg expr -> ppr reg <+> equals <+> ppr expr <> semi + + -- rep[lv] = expr; + CmmStore lv expr -> rep <> brackets(ppr lv) <+> equals <+> ppr expr <> semi + where + rep = sdocWithDynFlags $ \dflags -> + ppr ( cmmExprType dflags expr ) + + -- call "ccall" foo(x, y)[r1, r2]; + -- ToDo ppr volatile + CmmUnsafeForeignCall target results args -> + hsep [ ppUnless (null results) $ + parens (commafy $ map ppr results) <+> equals, + text "call", + ppr target <> parens (commafy $ map ppr args) <> semi] + + -- goto label; + CmmBranch ident -> text "goto" <+> ppr ident <> semi + + -- if (expr) goto t; else goto f; + CmmCondBranch expr t f l -> + hsep [ text "if" + , parens(ppr expr) + , case l of + Nothing -> empty + Just b -> parens (text "likely:" <+> ppr b) + , text "goto" + , ppr t <> semi + , text "else goto" + , ppr f <> semi + ] + + CmmSwitch expr ids -> + hang (hsep [ text "switch" + , range + , if isTrivialCmmExpr expr + then ppr expr + else parens (ppr expr) + , text "{" + ]) + 4 (vcat (map ppCase cases) $$ def) $$ rbrace + where + (cases, mbdef) = switchTargetsFallThrough ids + ppCase (is,l) = hsep + [ text "case" + , commafy $ map integer is + , text ": goto" + , ppr l <> semi + ] + def | Just l <- mbdef = hsep + [ text "default:" + , braces (text "goto" <+> ppr l <> semi) + ] + | otherwise = empty + + range = brackets $ hsep [integer lo, text "..", integer hi] + where (lo,hi) = switchTargetsRange ids + + CmmCall tgt k regs out res updfr_off -> + hcat [ text "call", space + , pprFun tgt, parens (interpp'SP regs), space + , returns <+> + text "args: " <> ppr out <> comma <+> + text "res: " <> ppr res <> comma <+> + text "upd: " <> ppr updfr_off + , semi ] + where pprFun f@(CmmLit _) = ppr f + pprFun f = parens (ppr f) + + returns + | Just r <- k = text "returns to" <+> ppr r <> comma + | otherwise = empty + + CmmForeignCall {tgt=t, res=rs, args=as, succ=s, ret_args=a, ret_off=u, intrbl=i} -> + hcat $ if i then [text "interruptible", space] else [] ++ + [ text "foreign call", space + , ppr t, text "(...)", space + , text "returns to" <+> ppr s + <+> text "args:" <+> parens (ppr as) + <+> text "ress:" <+> parens (ppr rs) + , text "ret_args:" <+> ppr a + , text "ret_off:" <+> ppr u + , semi ] + + pp_debug :: SDoc + pp_debug = + if not debugIsOn then empty + else case node of + CmmEntry {} -> empty -- Looks terrible with text " // CmmEntry" + CmmComment {} -> empty -- Looks also terrible with text " // CmmComment" + CmmTick {} -> empty + CmmUnwind {} -> text " // CmmUnwind" + CmmAssign {} -> text " // CmmAssign" + CmmStore {} -> text " // CmmStore" + CmmUnsafeForeignCall {} -> text " // CmmUnsafeForeignCall" + CmmBranch {} -> text " // CmmBranch" + CmmCondBranch {} -> text " // CmmCondBranch" + CmmSwitch {} -> text " // CmmSwitch" + CmmCall {} -> text " // CmmCall" + CmmForeignCall {} -> text " // CmmForeignCall" + + commafy :: [SDoc] -> SDoc + commafy xs = hsep $ punctuate comma xs diff --git a/compiler/GHC/Cmm/Ppr/Decl.hs b/compiler/GHC/Cmm/Ppr/Decl.hs new file mode 100644 index 0000000000..2544e6a0d3 --- /dev/null +++ b/compiler/GHC/Cmm/Ppr/Decl.hs @@ -0,0 +1,169 @@ +---------------------------------------------------------------------------- +-- +-- Pretty-printing of common Cmm types +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +-- +-- This is where we walk over Cmm emitting an external representation, +-- suitable for parsing, in a syntax strongly reminiscent of C--. This +-- is the "External Core" for the Cmm layer. +-- +-- As such, this should be a well-defined syntax: we want it to look nice. +-- Thus, we try wherever possible to use syntax defined in [1], +-- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We +-- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather +-- than C--'s bits8 .. bits64. +-- +-- We try to ensure that all information available in the abstract +-- syntax is reproduced, or reproducible, in the concrete syntax. +-- Data that is not in printed out can be reconstructed according to +-- conventions used in the pretty printer. There are at least two such +-- cases: +-- 1) if a value has wordRep type, the type is not appended in the +-- output. +-- 2) MachOps that operate over wordRep type are printed in a +-- C-style, rather than as their internal MachRep name. +-- +-- These conventions produce much more readable Cmm output. +-- +-- A useful example pass over Cmm is in nativeGen/MachCodeGen.hs +-- + +{-# OPTIONS_GHC -fno-warn-orphans #-} +module GHC.Cmm.Ppr.Decl + ( writeCmms, pprCmms, pprCmmGroup, pprSection, pprStatic + ) +where + +import GhcPrelude + +import GHC.Cmm.Ppr.Expr +import GHC.Cmm + +import DynFlags +import Outputable +import FastString + +import Data.List +import System.IO + +import qualified Data.ByteString as BS + + +pprCmms :: (Outputable info, Outputable g) + => [GenCmmGroup CmmStatics info g] -> SDoc +pprCmms cmms = pprCode CStyle (vcat (intersperse separator $ map ppr cmms)) + where + separator = space $$ text "-------------------" $$ space + +writeCmms :: (Outputable info, Outputable g) + => DynFlags -> Handle -> [GenCmmGroup CmmStatics info g] -> IO () +writeCmms dflags handle cmms = printForC dflags handle (pprCmms cmms) + +----------------------------------------------------------------------------- + +instance (Outputable d, Outputable info, Outputable i) + => Outputable (GenCmmDecl d info i) where + ppr t = pprTop t + +instance Outputable CmmStatics where + ppr = pprStatics + +instance Outputable CmmStatic where + ppr = pprStatic + +instance Outputable CmmInfoTable where + ppr = pprInfoTable + + +----------------------------------------------------------------------------- + +pprCmmGroup :: (Outputable d, Outputable info, Outputable g) + => GenCmmGroup d info g -> SDoc +pprCmmGroup tops + = vcat $ intersperse blankLine $ map pprTop tops + +-- -------------------------------------------------------------------------- +-- Top level `procedure' blocks. +-- +pprTop :: (Outputable d, Outputable info, Outputable i) + => GenCmmDecl d info i -> SDoc + +pprTop (CmmProc info lbl live graph) + + = vcat [ ppr lbl <> lparen <> rparen <+> lbrace <+> text "// " <+> ppr live + , nest 8 $ lbrace <+> ppr info $$ rbrace + , nest 4 $ ppr graph + , rbrace ] + +-- -------------------------------------------------------------------------- +-- We follow [1], 4.5 +-- +-- section "data" { ... } +-- +pprTop (CmmData section ds) = + (hang (pprSection section <+> lbrace) 4 (ppr ds)) + $$ rbrace + +-- -------------------------------------------------------------------------- +-- Info tables. + +pprInfoTable :: CmmInfoTable -> SDoc +pprInfoTable (CmmInfoTable { cit_lbl = lbl, cit_rep = rep + , cit_prof = prof_info + , cit_srt = srt }) + = vcat [ text "label: " <> ppr lbl + , text "rep: " <> ppr rep + , case prof_info of + NoProfilingInfo -> empty + ProfilingInfo ct cd -> + vcat [ text "type: " <> text (show (BS.unpack ct)) + , text "desc: " <> text (show (BS.unpack cd)) ] + , text "srt: " <> ppr srt ] + +instance Outputable ForeignHint where + ppr NoHint = empty + ppr SignedHint = quotes(text "signed") +-- ppr AddrHint = quotes(text "address") +-- Temp Jan08 + ppr AddrHint = (text "PtrHint") + +-- -------------------------------------------------------------------------- +-- Static data. +-- Strings are printed as C strings, and we print them as I8[], +-- following C-- +-- +pprStatics :: CmmStatics -> SDoc +pprStatics (Statics lbl ds) = vcat ((ppr lbl <> colon) : map ppr ds) + +pprStatic :: CmmStatic -> SDoc +pprStatic s = case s of + CmmStaticLit lit -> nest 4 $ text "const" <+> pprLit lit <> semi + CmmUninitialised i -> nest 4 $ text "I8" <> brackets (int i) + CmmString s' -> nest 4 $ text "I8[]" <+> text (show s') + +-- -------------------------------------------------------------------------- +-- data sections +-- +pprSection :: Section -> SDoc +pprSection (Section t suffix) = + section <+> doubleQuotes (pprSectionType t <+> char '.' <+> ppr suffix) + where + section = text "section" + +pprSectionType :: SectionType -> SDoc +pprSectionType s = doubleQuotes (ptext t) + where + t = case s of + Text -> sLit "text" + Data -> sLit "data" + ReadOnlyData -> sLit "readonly" + ReadOnlyData16 -> sLit "readonly16" + RelocatableReadOnlyData + -> sLit "relreadonly" + UninitialisedData -> sLit "uninitialised" + CString -> sLit "cstring" + OtherSection s' -> sLit s' -- Not actually a literal though. diff --git a/compiler/GHC/Cmm/Ppr/Expr.hs b/compiler/GHC/Cmm/Ppr/Expr.hs new file mode 100644 index 0000000000..53a335e561 --- /dev/null +++ b/compiler/GHC/Cmm/Ppr/Expr.hs @@ -0,0 +1,286 @@ +---------------------------------------------------------------------------- +-- +-- Pretty-printing of common Cmm types +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +-- +-- This is where we walk over Cmm emitting an external representation, +-- suitable for parsing, in a syntax strongly reminiscent of C--. This +-- is the "External Core" for the Cmm layer. +-- +-- As such, this should be a well-defined syntax: we want it to look nice. +-- Thus, we try wherever possible to use syntax defined in [1], +-- "The C-- Reference Manual", http://www.cs.tufts.edu/~nr/c--/index.html. We +-- differ slightly, in some cases. For one, we use I8 .. I64 for types, rather +-- than C--'s bits8 .. bits64. +-- +-- We try to ensure that all information available in the abstract +-- syntax is reproduced, or reproducible, in the concrete syntax. +-- Data that is not in printed out can be reconstructed according to +-- conventions used in the pretty printer. There are at least two such +-- cases: +-- 1) if a value has wordRep type, the type is not appended in the +-- output. +-- 2) MachOps that operate over wordRep type are printed in a +-- C-style, rather than as their internal MachRep name. +-- +-- These conventions produce much more readable Cmm output. +-- +-- A useful example pass over Cmm is in nativeGen/MachCodeGen.hs +-- + +{-# OPTIONS_GHC -fno-warn-orphans #-} +module GHC.Cmm.Ppr.Expr + ( pprExpr, pprLit + ) +where + +import GhcPrelude + +import GHC.Cmm.Expr + +import Outputable +import DynFlags + +import Data.Maybe +import Numeric ( fromRat ) + +----------------------------------------------------------------------------- + +instance Outputable CmmExpr where + ppr e = pprExpr e + +instance Outputable CmmReg where + ppr e = pprReg e + +instance Outputable CmmLit where + ppr l = pprLit l + +instance Outputable LocalReg where + ppr e = pprLocalReg e + +instance Outputable Area where + ppr e = pprArea e + +instance Outputable GlobalReg where + ppr e = pprGlobalReg e + +-- -------------------------------------------------------------------------- +-- Expressions +-- + +pprExpr :: CmmExpr -> SDoc +pprExpr e + = sdocWithDynFlags $ \dflags -> + case e of + CmmRegOff reg i -> + pprExpr (CmmMachOp (MO_Add rep) + [CmmReg reg, CmmLit (CmmInt (fromIntegral i) rep)]) + where rep = typeWidth (cmmRegType dflags reg) + CmmLit lit -> pprLit lit + _other -> pprExpr1 e + +-- Here's the precedence table from GHC.Cmm.Parser: +-- %nonassoc '>=' '>' '<=' '<' '!=' '==' +-- %left '|' +-- %left '^' +-- %left '&' +-- %left '>>' '<<' +-- %left '-' '+' +-- %left '/' '*' '%' +-- %right '~' + +-- We just cope with the common operators for now, the rest will get +-- a default conservative behaviour. + +-- %nonassoc '>=' '>' '<=' '<' '!=' '==' +pprExpr1, pprExpr7, pprExpr8 :: CmmExpr -> SDoc +pprExpr1 (CmmMachOp op [x,y]) | Just doc <- infixMachOp1 op + = pprExpr7 x <+> doc <+> pprExpr7 y +pprExpr1 e = pprExpr7 e + +infixMachOp1, infixMachOp7, infixMachOp8 :: MachOp -> Maybe SDoc + +infixMachOp1 (MO_Eq _) = Just (text "==") +infixMachOp1 (MO_Ne _) = Just (text "!=") +infixMachOp1 (MO_Shl _) = Just (text "<<") +infixMachOp1 (MO_U_Shr _) = Just (text ">>") +infixMachOp1 (MO_U_Ge _) = Just (text ">=") +infixMachOp1 (MO_U_Le _) = Just (text "<=") +infixMachOp1 (MO_U_Gt _) = Just (char '>') +infixMachOp1 (MO_U_Lt _) = Just (char '<') +infixMachOp1 _ = Nothing + +-- %left '-' '+' +pprExpr7 (CmmMachOp (MO_Add rep1) [x, CmmLit (CmmInt i rep2)]) | i < 0 + = pprExpr7 (CmmMachOp (MO_Sub rep1) [x, CmmLit (CmmInt (negate i) rep2)]) +pprExpr7 (CmmMachOp op [x,y]) | Just doc <- infixMachOp7 op + = pprExpr7 x <+> doc <+> pprExpr8 y +pprExpr7 e = pprExpr8 e + +infixMachOp7 (MO_Add _) = Just (char '+') +infixMachOp7 (MO_Sub _) = Just (char '-') +infixMachOp7 _ = Nothing + +-- %left '/' '*' '%' +pprExpr8 (CmmMachOp op [x,y]) | Just doc <- infixMachOp8 op + = pprExpr8 x <+> doc <+> pprExpr9 y +pprExpr8 e = pprExpr9 e + +infixMachOp8 (MO_U_Quot _) = Just (char '/') +infixMachOp8 (MO_Mul _) = Just (char '*') +infixMachOp8 (MO_U_Rem _) = Just (char '%') +infixMachOp8 _ = Nothing + +pprExpr9 :: CmmExpr -> SDoc +pprExpr9 e = + case e of + CmmLit lit -> pprLit1 lit + CmmLoad expr rep -> ppr rep <> brackets (ppr expr) + CmmReg reg -> ppr reg + CmmRegOff reg off -> parens (ppr reg <+> char '+' <+> int off) + CmmStackSlot a off -> parens (ppr a <+> char '+' <+> int off) + CmmMachOp mop args -> genMachOp mop args + +genMachOp :: MachOp -> [CmmExpr] -> SDoc +genMachOp mop args + | Just doc <- infixMachOp mop = case args of + -- dyadic + [x,y] -> pprExpr9 x <+> doc <+> pprExpr9 y + + -- unary + [x] -> doc <> pprExpr9 x + + _ -> pprTrace "GHC.Cmm.Ppr.Expr.genMachOp: machop with strange number of args" + (pprMachOp mop <+> + parens (hcat $ punctuate comma (map pprExpr args))) + empty + + | isJust (infixMachOp1 mop) + || isJust (infixMachOp7 mop) + || isJust (infixMachOp8 mop) = parens (pprExpr (CmmMachOp mop args)) + + | otherwise = char '%' <> ppr_op <> parens (commafy (map pprExpr args)) + where ppr_op = text (map (\c -> if c == ' ' then '_' else c) + (show mop)) + -- replace spaces in (show mop) with underscores, + +-- +-- Unsigned ops on the word size of the machine get nice symbols. +-- All else get dumped in their ugly format. +-- +infixMachOp :: MachOp -> Maybe SDoc +infixMachOp mop + = case mop of + MO_And _ -> Just $ char '&' + MO_Or _ -> Just $ char '|' + MO_Xor _ -> Just $ char '^' + MO_Not _ -> Just $ char '~' + MO_S_Neg _ -> Just $ char '-' -- there is no unsigned neg :) + _ -> Nothing + +-- -------------------------------------------------------------------------- +-- Literals. +-- To minimise line noise we adopt the convention that if the literal +-- has the natural machine word size, we do not append the type +-- +pprLit :: CmmLit -> SDoc +pprLit lit = sdocWithDynFlags $ \dflags -> + case lit of + CmmInt i rep -> + hcat [ (if i < 0 then parens else id)(integer i) + , ppUnless (rep == wordWidth dflags) $ + space <> dcolon <+> ppr rep ] + + CmmFloat f rep -> hsep [ double (fromRat f), dcolon, ppr rep ] + CmmVec lits -> char '<' <> commafy (map pprLit lits) <> char '>' + CmmLabel clbl -> ppr clbl + CmmLabelOff clbl i -> ppr clbl <> ppr_offset i + CmmLabelDiffOff clbl1 clbl2 i _ -> ppr clbl1 <> char '-' + <> ppr clbl2 <> ppr_offset i + CmmBlock id -> ppr id + CmmHighStackMark -> text "<highSp>" + +pprLit1 :: CmmLit -> SDoc +pprLit1 lit@(CmmLabelOff {}) = parens (pprLit lit) +pprLit1 lit = pprLit lit + +ppr_offset :: Int -> SDoc +ppr_offset i + | i==0 = empty + | i>=0 = char '+' <> int i + | otherwise = char '-' <> int (-i) + +-- -------------------------------------------------------------------------- +-- Registers, whether local (temps) or global +-- +pprReg :: CmmReg -> SDoc +pprReg r + = case r of + CmmLocal local -> pprLocalReg local + CmmGlobal global -> pprGlobalReg global + +-- +-- We only print the type of the local reg if it isn't wordRep +-- +pprLocalReg :: LocalReg -> SDoc +pprLocalReg (LocalReg uniq rep) = sdocWithDynFlags $ \dflags -> +-- = ppr rep <> char '_' <> ppr uniq +-- Temp Jan08 + char '_' <> pprUnique dflags uniq <> + (if isWord32 rep -- && not (isGcPtrType rep) -- Temp Jan08 -- sigh + then dcolon <> ptr <> ppr rep + else dcolon <> ptr <> ppr rep) + where + pprUnique dflags unique = + if gopt Opt_SuppressUniques dflags + then text "_locVar_" + else ppr unique + ptr = empty + --if isGcPtrType rep + -- then doubleQuotes (text "ptr") + -- else empty + +-- Stack areas +pprArea :: Area -> SDoc +pprArea Old = text "old" +pprArea (Young id) = hcat [ text "young<", ppr id, text ">" ] + +-- needs to be kept in syn with CmmExpr.hs.GlobalReg +-- +pprGlobalReg :: GlobalReg -> SDoc +pprGlobalReg gr + = case gr of + VanillaReg n _ -> char 'R' <> int n +-- Temp Jan08 +-- VanillaReg n VNonGcPtr -> char 'R' <> int n +-- VanillaReg n VGcPtr -> char 'P' <> int n + FloatReg n -> char 'F' <> int n + DoubleReg n -> char 'D' <> int n + LongReg n -> char 'L' <> int n + XmmReg n -> text "XMM" <> int n + YmmReg n -> text "YMM" <> int n + ZmmReg n -> text "ZMM" <> int n + Sp -> text "Sp" + SpLim -> text "SpLim" + Hp -> text "Hp" + HpLim -> text "HpLim" + MachSp -> text "MachSp" + UnwindReturnReg-> text "UnwindReturnReg" + CCCS -> text "CCCS" + CurrentTSO -> text "CurrentTSO" + CurrentNursery -> text "CurrentNursery" + HpAlloc -> text "HpAlloc" + EagerBlackholeInfo -> text "stg_EAGER_BLACKHOLE_info" + GCEnter1 -> text "stg_gc_enter_1" + GCFun -> text "stg_gc_fun" + BaseReg -> text "BaseReg" + PicBaseReg -> text "PicBaseReg" + +----------------------------------------------------------------------------- + +commafy :: [SDoc] -> SDoc +commafy xs = fsep $ punctuate comma xs diff --git a/compiler/GHC/Cmm/ProcPoint.hs b/compiler/GHC/Cmm/ProcPoint.hs new file mode 100644 index 0000000000..00a7a73d89 --- /dev/null +++ b/compiler/GHC/Cmm/ProcPoint.hs @@ -0,0 +1,496 @@ +{-# LANGUAGE GADTs, DisambiguateRecordFields, BangPatterns #-} + +module GHC.Cmm.ProcPoint + ( ProcPointSet, Status(..) + , callProcPoints, minimalProcPointSet + , splitAtProcPoints, procPointAnalysis + , attachContInfoTables + ) +where + +import GhcPrelude hiding (last, unzip, succ, zip) + +import DynFlags +import GHC.Cmm.BlockId +import GHC.Cmm.CLabel +import GHC.Cmm +import GHC.Cmm.Ppr () -- For Outputable instances +import GHC.Cmm.Utils +import GHC.Cmm.Info +import GHC.Cmm.Liveness +import GHC.Cmm.Switch +import Data.List (sortBy) +import Maybes +import Control.Monad +import Outputable +import GHC.Platform +import UniqSupply +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label + +-- Compute a minimal set of proc points for a control-flow graph. + +-- Determine a protocol for each proc point (which live variables will +-- be passed as arguments and which will be on the stack). + +{- +A proc point is a basic block that, after CPS transformation, will +start a new function. The entry block of the original function is a +proc point, as is the continuation of each function call. +A third kind of proc point arises if we want to avoid copying code. +Suppose we have code like the following: + + f() { + if (...) { ..1..; call foo(); ..2..} + else { ..3..; call bar(); ..4..} + x = y + z; + return x; + } + +The statement 'x = y + z' can be reached from two different proc +points: the continuations of foo() and bar(). We would prefer not to +put a copy in each continuation; instead we would like 'x = y + z' to +be the start of a new procedure to which the continuations can jump: + + f_cps () { + if (...) { ..1..; push k_foo; jump foo_cps(); } + else { ..3..; push k_bar; jump bar_cps(); } + } + k_foo() { ..2..; jump k_join(y, z); } + k_bar() { ..4..; jump k_join(y, z); } + k_join(y, z) { x = y + z; return x; } + +You might think then that a criterion to make a node a proc point is +that it is directly reached by two distinct proc points. (Note +[Direct reachability].) But this criterion is a bit too simple; for +example, 'return x' is also reached by two proc points, yet there is +no point in pulling it out of k_join. A good criterion would be to +say that a node should be made a proc point if it is reached by a set +of proc points that is different than its immediate dominator. NR +believes this criterion can be shown to produce a minimum set of proc +points, and given a dominator tree, the proc points can be chosen in +time linear in the number of blocks. Lacking a dominator analysis, +however, we turn instead to an iterative solution, starting with no +proc points and adding them according to these rules: + + 1. The entry block is a proc point. + 2. The continuation of a call is a proc point. + 3. A node is a proc point if it is directly reached by more proc + points than one of its predecessors. + +Because we don't understand the problem very well, we apply rule 3 at +most once per iteration, then recompute the reachability information. +(See Note [No simple dataflow].) The choice of the new proc point is +arbitrary, and I don't know if the choice affects the final solution, +so I don't know if the number of proc points chosen is the +minimum---but the set will be minimal. + + + +Note [Proc-point analysis] +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Given a specified set of proc-points (a set of block-ids), "proc-point +analysis" figures out, for every block, which proc-point it belongs to. +All the blocks belonging to proc-point P will constitute a single +top-level C procedure. + +A non-proc-point block B "belongs to" a proc-point P iff B is +reachable from P without going through another proc-point. + +Invariant: a block B should belong to at most one proc-point; if it +belongs to two, that's a bug. + +Note [Non-existing proc-points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +On some architectures it might happen that the list of proc-points +computed before stack layout pass will be invalidated by the stack +layout. This will happen if stack layout removes from the graph +blocks that were determined to be proc-points. Later on in the pipeline +we use list of proc-points to perform [Proc-point analysis], but +if a proc-point does not exist anymore then we will get compiler panic. +See #8205. +-} + +type ProcPointSet = LabelSet + +data Status + = ReachedBy ProcPointSet -- set of proc points that directly reach the block + | ProcPoint -- this block is itself a proc point + +instance Outputable Status where + ppr (ReachedBy ps) + | setNull ps = text "<not-reached>" + | otherwise = text "reached by" <+> + (hsep $ punctuate comma $ map ppr $ setElems ps) + ppr ProcPoint = text "<procpt>" + +-------------------------------------------------- +-- Proc point analysis + +-- Once you know what the proc-points are, figure out +-- what proc-points each block is reachable from +-- See Note [Proc-point analysis] +procPointAnalysis :: ProcPointSet -> CmmGraph -> LabelMap Status +procPointAnalysis procPoints cmmGraph@(CmmGraph {g_graph = graph}) = + analyzeCmmFwd procPointLattice procPointTransfer cmmGraph initProcPoints + where + initProcPoints = + mkFactBase + procPointLattice + [ (id, ProcPoint) + | id <- setElems procPoints + -- See Note [Non-existing proc-points] + , id `setMember` labelsInGraph + ] + labelsInGraph = labelsDefined graph + +procPointTransfer :: TransferFun Status +procPointTransfer block facts = + let label = entryLabel block + !fact = case getFact procPointLattice label facts of + ProcPoint -> ReachedBy $! setSingleton label + f -> f + result = map (\id -> (id, fact)) (successors block) + in mkFactBase procPointLattice result + +procPointLattice :: DataflowLattice Status +procPointLattice = DataflowLattice unreached add_to + where + unreached = ReachedBy setEmpty + add_to (OldFact ProcPoint) _ = NotChanged ProcPoint + add_to _ (NewFact ProcPoint) = Changed ProcPoint -- because of previous case + add_to (OldFact (ReachedBy p)) (NewFact (ReachedBy p')) + | setSize union > setSize p = Changed (ReachedBy union) + | otherwise = NotChanged (ReachedBy p) + where + union = setUnion p' p + +---------------------------------------------------------------------- + +-- It is worth distinguishing two sets of proc points: those that are +-- induced by calls in the original graph and those that are +-- introduced because they're reachable from multiple proc points. +-- +-- Extract the set of Continuation BlockIds, see Note [Continuation BlockIds]. +callProcPoints :: CmmGraph -> ProcPointSet +callProcPoints g = foldlGraphBlocks add (setSingleton (g_entry g)) g + where add :: LabelSet -> CmmBlock -> LabelSet + add set b = case lastNode b of + CmmCall {cml_cont = Just k} -> setInsert k set + CmmForeignCall {succ=k} -> setInsert k set + _ -> set + +minimalProcPointSet :: Platform -> ProcPointSet -> CmmGraph + -> UniqSM ProcPointSet +-- Given the set of successors of calls (which must be proc-points) +-- figure out the minimal set of necessary proc-points +minimalProcPointSet platform callProcPoints g + = extendPPSet platform g (revPostorder g) callProcPoints + +extendPPSet + :: Platform -> CmmGraph -> [CmmBlock] -> ProcPointSet -> UniqSM ProcPointSet +extendPPSet platform g blocks procPoints = + let env = procPointAnalysis procPoints g + add pps block = let id = entryLabel block + in case mapLookup id env of + Just ProcPoint -> setInsert id pps + _ -> pps + procPoints' = foldlGraphBlocks add setEmpty g + newPoints = mapMaybe ppSuccessor blocks + newPoint = listToMaybe newPoints + ppSuccessor b = + let nreached id = case mapLookup id env `orElse` + pprPanic "no ppt" (ppr id <+> ppr b) of + ProcPoint -> 1 + ReachedBy ps -> setSize ps + block_procpoints = nreached (entryLabel b) + -- | Looking for a successor of b that is reached by + -- more proc points than b and is not already a proc + -- point. If found, it can become a proc point. + newId succ_id = not (setMember succ_id procPoints') && + nreached succ_id > block_procpoints + in listToMaybe $ filter newId $ successors b + + in case newPoint of + Just id -> + if setMember id procPoints' + then panic "added old proc pt" + else extendPPSet platform g blocks (setInsert id procPoints') + Nothing -> return procPoints' + + +-- At this point, we have found a set of procpoints, each of which should be +-- the entry point of a procedure. +-- Now, we create the procedure for each proc point, +-- which requires that we: +-- 1. build a map from proc points to the blocks reachable from the proc point +-- 2. turn each branch to a proc point into a jump +-- 3. turn calls and returns into jumps +-- 4. build info tables for the procedures -- and update the info table for +-- the SRTs in the entry procedure as well. +-- Input invariant: A block should only be reachable from a single ProcPoint. +-- ToDo: use the _ret naming convention that the old code generator +-- used. -- EZY +splitAtProcPoints :: DynFlags -> CLabel -> ProcPointSet-> ProcPointSet -> LabelMap Status -> + CmmDecl -> UniqSM [CmmDecl] +splitAtProcPoints dflags entry_label callPPs procPoints procMap + (CmmProc (TopInfo {info_tbls = info_tbls}) + top_l _ g@(CmmGraph {g_entry=entry})) = + do -- Build a map from procpoints to the blocks they reach + let add_block + :: LabelMap (LabelMap CmmBlock) + -> CmmBlock + -> LabelMap (LabelMap CmmBlock) + add_block graphEnv b = + case mapLookup bid procMap of + Just ProcPoint -> add graphEnv bid bid b + Just (ReachedBy set) -> + case setElems set of + [] -> graphEnv + [id] -> add graphEnv id bid b + _ -> panic "Each block should be reachable from only one ProcPoint" + Nothing -> graphEnv + where bid = entryLabel b + add graphEnv procId bid b = mapInsert procId graph' graphEnv + where graph = mapLookup procId graphEnv `orElse` mapEmpty + graph' = mapInsert bid b graph + + let liveness = cmmGlobalLiveness dflags g + let ppLiveness pp = filter isArgReg $ + regSetToList $ + expectJust "ppLiveness" $ mapLookup pp liveness + + graphEnv <- return $ foldlGraphBlocks add_block mapEmpty g + + -- Build a map from proc point BlockId to pairs of: + -- * Labels for their new procedures + -- * Labels for the info tables of their new procedures (only if + -- the proc point is a callPP) + -- Due to common blockification, we may overestimate the set of procpoints. + let add_label map pp = mapInsert pp lbls map + where lbls | pp == entry = (entry_label, fmap cit_lbl (mapLookup entry info_tbls)) + | otherwise = (block_lbl, guard (setMember pp callPPs) >> + Just info_table_lbl) + where block_lbl = blockLbl pp + info_table_lbl = infoTblLbl pp + + procLabels :: LabelMap (CLabel, Maybe CLabel) + procLabels = foldl' add_label mapEmpty + (filter (flip mapMember (toBlockMap g)) (setElems procPoints)) + + -- In each new graph, add blocks jumping off to the new procedures, + -- and replace branches to procpoints with branches to the jump-off blocks + let add_jump_block + :: (LabelMap Label, [CmmBlock]) + -> (Label, CLabel) + -> UniqSM (LabelMap Label, [CmmBlock]) + add_jump_block (env, bs) (pp, l) = + do bid <- liftM mkBlockId getUniqueM + let b = blockJoin (CmmEntry bid GlobalScope) emptyBlock jump + live = ppLiveness pp + jump = CmmCall (CmmLit (CmmLabel l)) Nothing live 0 0 0 + return (mapInsert pp bid env, b : bs) + + add_jumps + :: LabelMap CmmGraph + -> (Label, LabelMap CmmBlock) + -> UniqSM (LabelMap CmmGraph) + add_jumps newGraphEnv (ppId, blockEnv) = + do let needed_jumps = -- find which procpoints we currently branch to + mapFoldr add_if_branch_to_pp [] blockEnv + add_if_branch_to_pp :: CmmBlock -> [(BlockId, CLabel)] -> [(BlockId, CLabel)] + add_if_branch_to_pp block rst = + case lastNode block of + CmmBranch id -> add_if_pp id rst + CmmCondBranch _ ti fi _ -> add_if_pp ti (add_if_pp fi rst) + CmmSwitch _ ids -> foldr add_if_pp rst $ switchTargetsToList ids + _ -> rst + + -- when jumping to a PP that has an info table, if + -- tablesNextToCode is off we must jump to the entry + -- label instead. + jump_label (Just info_lbl) _ + | tablesNextToCode dflags = info_lbl + | otherwise = toEntryLbl info_lbl + jump_label Nothing block_lbl = block_lbl + + add_if_pp id rst = case mapLookup id procLabels of + Just (lbl, mb_info_lbl) -> (id, jump_label mb_info_lbl lbl) : rst + Nothing -> rst + (jumpEnv, jumpBlocks) <- + foldM add_jump_block (mapEmpty, []) needed_jumps + -- update the entry block + let b = expectJust "block in env" $ mapLookup ppId blockEnv + blockEnv' = mapInsert ppId b blockEnv + -- replace branches to procpoints with branches to jumps + blockEnv'' = toBlockMap $ replaceBranches jumpEnv $ ofBlockMap ppId blockEnv' + -- add the jump blocks to the graph + blockEnv''' = foldl' (flip addBlock) blockEnv'' jumpBlocks + let g' = ofBlockMap ppId blockEnv''' + -- pprTrace "g' pre jumps" (ppr g') $ do + return (mapInsert ppId g' newGraphEnv) + + graphEnv <- foldM add_jumps mapEmpty $ mapToList graphEnv + + let to_proc (bid, g) + | bid == entry + = CmmProc (TopInfo {info_tbls = info_tbls, + stack_info = stack_info}) + top_l live g' + | otherwise + = case expectJust "pp label" $ mapLookup bid procLabels of + (lbl, Just info_lbl) + -> CmmProc (TopInfo { info_tbls = mapSingleton (g_entry g) (mkEmptyContInfoTable info_lbl) + , stack_info=stack_info}) + lbl live g' + (lbl, Nothing) + -> CmmProc (TopInfo {info_tbls = mapEmpty, stack_info=stack_info}) + lbl live g' + where + g' = replacePPIds g + live = ppLiveness (g_entry g') + stack_info = StackInfo { arg_space = 0 + , updfr_space = Nothing + , do_layout = True } + -- cannot use panic, this is printed by -ddump-cmm + + -- References to procpoint IDs can now be replaced with the + -- infotable's label + replacePPIds g = {-# SCC "replacePPIds" #-} + mapGraphNodes (id, mapExp repl, mapExp repl) g + where repl e@(CmmLit (CmmBlock bid)) = + case mapLookup bid procLabels of + Just (_, Just info_lbl) -> CmmLit (CmmLabel info_lbl) + _ -> e + repl e = e + + -- The C back end expects to see return continuations before the + -- call sites. Here, we sort them in reverse order -- it gets + -- reversed later. + let (_, block_order) = + foldl' add_block_num (0::Int, mapEmpty :: LabelMap Int) + (revPostorder g) + add_block_num (i, map) block = + (i + 1, mapInsert (entryLabel block) i map) + sort_fn (bid, _) (bid', _) = + compare (expectJust "block_order" $ mapLookup bid block_order) + (expectJust "block_order" $ mapLookup bid' block_order) + procs <- return $ map to_proc $ sortBy sort_fn $ mapToList graphEnv + return -- pprTrace "procLabels" (ppr procLabels) + -- pprTrace "splitting graphs" (ppr procs) + procs +splitAtProcPoints _ _ _ _ _ t@(CmmData _ _) = return [t] + +-- Only called from GHC.Cmm.ProcPoint.splitAtProcPoints. NB. does a +-- recursive lookup, see comment below. +replaceBranches :: LabelMap BlockId -> CmmGraph -> CmmGraph +replaceBranches env cmmg + = {-# SCC "replaceBranches" #-} + ofBlockMap (g_entry cmmg) $ mapMap f $ toBlockMap cmmg + where + f block = replaceLastNode block $ last (lastNode block) + + last :: CmmNode O C -> CmmNode O C + last (CmmBranch id) = CmmBranch (lookup id) + last (CmmCondBranch e ti fi l) = CmmCondBranch e (lookup ti) (lookup fi) l + last (CmmSwitch e ids) = CmmSwitch e (mapSwitchTargets lookup ids) + last l@(CmmCall {}) = l { cml_cont = Nothing } + -- NB. remove the continuation of a CmmCall, since this + -- label will now be in a different CmmProc. Not only + -- is this tidier, it stops CmmLint from complaining. + last l@(CmmForeignCall {}) = l + lookup id = fmap lookup (mapLookup id env) `orElse` id + -- XXX: this is a recursive lookup, it follows chains + -- until the lookup returns Nothing, at which point we + -- return the last BlockId + +-- -------------------------------------------------------------- +-- Not splitting proc points: add info tables for continuations + +attachContInfoTables :: ProcPointSet -> CmmDecl -> CmmDecl +attachContInfoTables call_proc_points (CmmProc top_info top_l live g) + = CmmProc top_info{info_tbls = info_tbls'} top_l live g + where + info_tbls' = mapUnion (info_tbls top_info) $ + mapFromList [ (l, mkEmptyContInfoTable (infoTblLbl l)) + | l <- setElems call_proc_points + , l /= g_entry g ] +attachContInfoTables _ other_decl + = other_decl + +---------------------------------------------------------------- + +{- +Note [Direct reachability] + +Block B is directly reachable from proc point P iff control can flow +from P to B without passing through an intervening proc point. +-} + +---------------------------------------------------------------- + +{- +Note [No simple dataflow] + +Sadly, it seems impossible to compute the proc points using a single +dataflow pass. One might attempt to use this simple lattice: + + data Location = Unknown + | InProc BlockId -- node is in procedure headed by the named proc point + | ProcPoint -- node is itself a proc point + +At a join, a node in two different blocks becomes a proc point. +The difficulty is that the change of information during iterative +computation may promote a node prematurely. Here's a program that +illustrates the difficulty: + + f () { + entry: + .... + L1: + if (...) { ... } + else { ... } + + L2: if (...) { g(); goto L1; } + return x + y; + } + +The only proc-point needed (besides the entry) is L1. But in an +iterative analysis, consider what happens to L2. On the first pass +through, it rises from Unknown to 'InProc entry', but when L1 is +promoted to a proc point (because it's the successor of g()), L1's +successors will be promoted to 'InProc L1'. The problem hits when the +new fact 'InProc L1' flows into L2 which is already bound to 'InProc entry'. +The join operation makes it a proc point when in fact it needn't be, +because its immediate dominator L1 is already a proc point and there +are no other proc points that directly reach L2. +-} + + + +{- Note [Separate Adams optimization] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It may be worthwhile to attempt the Adams optimization by rewriting +the graph before the assignment of proc-point protocols. Here are a +couple of rules: + + g() returns to k; g() returns to L; + k: CopyIn c ress; goto L: + ... ==> ... + L: // no CopyIn node here L: CopyIn c ress; + + +And when c == c' and ress == ress', this also: + + g() returns to k; g() returns to L; + k: CopyIn c ress; goto L: + ... ==> ... + L: CopyIn c' ress' L: CopyIn c' ress' ; + +In both cases the goal is to eliminate k. +-} diff --git a/compiler/GHC/Cmm/Sink.hs b/compiler/GHC/Cmm/Sink.hs new file mode 100644 index 0000000000..8e231df300 --- /dev/null +++ b/compiler/GHC/Cmm/Sink.hs @@ -0,0 +1,854 @@ +{-# LANGUAGE GADTs #-} +module GHC.Cmm.Sink ( + cmmSink + ) where + +import GhcPrelude + +import GHC.Cmm +import GHC.Cmm.Opt +import GHC.Cmm.Liveness +import GHC.Cmm.Utils +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Label +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Graph +import GHC.Platform.Regs +import GHC.Platform (isARM, platformArch) + +import DynFlags +import Unique +import UniqFM + +import qualified Data.IntSet as IntSet +import Data.List (partition) +import qualified Data.Set as Set +import Data.Maybe + +-- Compact sets for membership tests of local variables. + +type LRegSet = IntSet.IntSet + +emptyLRegSet :: LRegSet +emptyLRegSet = IntSet.empty + +nullLRegSet :: LRegSet -> Bool +nullLRegSet = IntSet.null + +insertLRegSet :: LocalReg -> LRegSet -> LRegSet +insertLRegSet l = IntSet.insert (getKey (getUnique l)) + +elemLRegSet :: LocalReg -> LRegSet -> Bool +elemLRegSet l = IntSet.member (getKey (getUnique l)) + +-- ----------------------------------------------------------------------------- +-- Sinking and inlining + +-- This is an optimisation pass that +-- (a) moves assignments closer to their uses, to reduce register pressure +-- (b) pushes assignments into a single branch of a conditional if possible +-- (c) inlines assignments to registers that are mentioned only once +-- (d) discards dead assignments +-- +-- This tightens up lots of register-heavy code. It is particularly +-- helpful in the Cmm generated by the Stg->Cmm code generator, in +-- which every function starts with a copyIn sequence like: +-- +-- x1 = R1 +-- x2 = Sp[8] +-- x3 = Sp[16] +-- if (Sp - 32 < SpLim) then L1 else L2 +-- +-- we really want to push the x1..x3 assignments into the L2 branch. +-- +-- Algorithm: +-- +-- * Start by doing liveness analysis. +-- +-- * Keep a list of assignments A; earlier ones may refer to later ones. +-- Currently we only sink assignments to local registers, because we don't +-- have liveness information about global registers. +-- +-- * Walk forwards through the graph, look at each node N: +-- +-- * If it is a dead assignment, i.e. assignment to a register that is +-- not used after N, discard it. +-- +-- * Try to inline based on current list of assignments +-- * If any assignments in A (1) occur only once in N, and (2) are +-- not live after N, inline the assignment and remove it +-- from A. +-- +-- * If an assignment in A is cheap (RHS is local register), then +-- inline the assignment and keep it in A in case it is used afterwards. +-- +-- * Otherwise don't inline. +-- +-- * If N is assignment to a local register pick up the assignment +-- and add it to A. +-- +-- * If N is not an assignment to a local register: +-- * remove any assignments from A that conflict with N, and +-- place them before N in the current block. We call this +-- "dropping" the assignments. +-- +-- * An assignment conflicts with N if it: +-- - assigns to a register mentioned in N +-- - mentions a register assigned by N +-- - reads from memory written by N +-- * do this recursively, dropping dependent assignments +-- +-- * At an exit node: +-- * drop any assignments that are live on more than one successor +-- and are not trivial +-- * if any successor has more than one predecessor (a join-point), +-- drop everything live in that successor. Since we only propagate +-- assignments that are not dead at the successor, we will therefore +-- eliminate all assignments dead at this point. Thus analysis of a +-- join-point will always begin with an empty list of assignments. +-- +-- +-- As a result of above algorithm, sinking deletes some dead assignments +-- (transitively, even). This isn't as good as removeDeadAssignments, +-- but it's much cheaper. + +-- ----------------------------------------------------------------------------- +-- things that we aren't optimising very well yet. +-- +-- ----------- +-- (1) From GHC's FastString.hashStr: +-- +-- s2ay: +-- if ((_s2an::I64 == _s2ao::I64) >= 1) goto c2gn; else goto c2gp; +-- c2gn: +-- R1 = _s2au::I64; +-- call (I64[Sp])(R1) args: 8, res: 0, upd: 8; +-- c2gp: +-- _s2cO::I64 = %MO_S_Rem_W64(%MO_UU_Conv_W8_W64(I8[_s2aq::I64 + (_s2an::I64 << 0)]) + _s2au::I64 * 128, +-- 4091); +-- _s2an::I64 = _s2an::I64 + 1; +-- _s2au::I64 = _s2cO::I64; +-- goto s2ay; +-- +-- a nice loop, but we didn't eliminate the silly assignment at the end. +-- See Note [dependent assignments], which would probably fix this. +-- This is #8336. +-- +-- ----------- +-- (2) From stg_atomically_frame in PrimOps.cmm +-- +-- We have a diamond control flow: +-- +-- x = ... +-- | +-- / \ +-- A B +-- \ / +-- | +-- use of x +-- +-- Now x won't be sunk down to its use, because we won't push it into +-- both branches of the conditional. We certainly do have to check +-- that we can sink it past all the code in both A and B, but having +-- discovered that, we could sink it to its use. +-- + +-- ----------------------------------------------------------------------------- + +type Assignment = (LocalReg, CmmExpr, AbsMem) + -- Assignment caches AbsMem, an abstraction of the memory read by + -- the RHS of the assignment. + +type Assignments = [Assignment] + -- A sequence of assignments; kept in *reverse* order + -- So the list [ x=e1, y=e2 ] means the sequence of assignments + -- y = e2 + -- x = e1 + +cmmSink :: DynFlags -> CmmGraph -> CmmGraph +cmmSink dflags graph = ofBlockList (g_entry graph) $ sink mapEmpty $ blocks + where + liveness = cmmLocalLiveness dflags graph + getLive l = mapFindWithDefault Set.empty l liveness + + blocks = revPostorder graph + + join_pts = findJoinPoints blocks + + sink :: LabelMap Assignments -> [CmmBlock] -> [CmmBlock] + sink _ [] = [] + sink sunk (b:bs) = + -- pprTrace "sink" (ppr lbl) $ + blockJoin first final_middle final_last : sink sunk' bs + where + lbl = entryLabel b + (first, middle, last) = blockSplit b + + succs = successors last + + -- Annotate the middle nodes with the registers live *after* + -- the node. This will help us decide whether we can inline + -- an assignment in the current node or not. + live = Set.unions (map getLive succs) + live_middle = gen_kill dflags last live + ann_middles = annotate dflags live_middle (blockToList middle) + + -- Now sink and inline in this block + (middle', assigs) = walk dflags ann_middles (mapFindWithDefault [] lbl sunk) + fold_last = constantFoldNode dflags last + (final_last, assigs') = tryToInline dflags live fold_last assigs + + -- We cannot sink into join points (successors with more than + -- one predecessor), so identify the join points and the set + -- of registers live in them. + (joins, nonjoins) = partition (`mapMember` join_pts) succs + live_in_joins = Set.unions (map getLive joins) + + -- We do not want to sink an assignment into multiple branches, + -- so identify the set of registers live in multiple successors. + -- This is made more complicated because when we sink an assignment + -- into one branch, this might change the set of registers that are + -- now live in multiple branches. + init_live_sets = map getLive nonjoins + live_in_multi live_sets r = + case filter (Set.member r) live_sets of + (_one:_two:_) -> True + _ -> False + + -- Now, drop any assignments that we will not sink any further. + (dropped_last, assigs'') = dropAssignments dflags drop_if init_live_sets assigs' + + drop_if a@(r,rhs,_) live_sets = (should_drop, live_sets') + where + should_drop = conflicts dflags a final_last + || not (isTrivial dflags rhs) && live_in_multi live_sets r + || r `Set.member` live_in_joins + + live_sets' | should_drop = live_sets + | otherwise = map upd live_sets + + upd set | r `Set.member` set = set `Set.union` live_rhs + | otherwise = set + + live_rhs = foldRegsUsed dflags extendRegSet emptyRegSet rhs + + final_middle = foldl' blockSnoc middle' dropped_last + + sunk' = mapUnion sunk $ + mapFromList [ (l, filterAssignments dflags (getLive l) assigs'') + | l <- succs ] + +{- TODO: enable this later, when we have some good tests in place to + measure the effect and tune it. + +-- small: an expression we don't mind duplicating +isSmall :: CmmExpr -> Bool +isSmall (CmmReg (CmmLocal _)) = True -- +isSmall (CmmLit _) = True +isSmall (CmmMachOp (MO_Add _) [x,y]) = isTrivial x && isTrivial y +isSmall (CmmRegOff (CmmLocal _) _) = True +isSmall _ = False +-} + +-- +-- We allow duplication of trivial expressions: registers (both local and +-- global) and literals. +-- +isTrivial :: DynFlags -> CmmExpr -> Bool +isTrivial _ (CmmReg (CmmLocal _)) = True +isTrivial dflags (CmmReg (CmmGlobal r)) = -- see Note [Inline GlobalRegs?] + if isARM (platformArch (targetPlatform dflags)) + then True -- CodeGen.Platform.ARM does not have globalRegMaybe + else isJust (globalRegMaybe (targetPlatform dflags) r) + -- GlobalRegs that are loads from BaseReg are not trivial +isTrivial _ (CmmLit _) = True +isTrivial _ _ = False + +-- +-- annotate each node with the set of registers live *after* the node +-- +annotate :: DynFlags -> LocalRegSet -> [CmmNode O O] -> [(LocalRegSet, CmmNode O O)] +annotate dflags live nodes = snd $ foldr ann (live,[]) nodes + where ann n (live,nodes) = (gen_kill dflags n live, (live,n) : nodes) + +-- +-- Find the blocks that have multiple successors (join points) +-- +findJoinPoints :: [CmmBlock] -> LabelMap Int +findJoinPoints blocks = mapFilter (>1) succ_counts + where + all_succs = concatMap successors blocks + + succ_counts :: LabelMap Int + succ_counts = foldr (\l -> mapInsertWith (+) l 1) mapEmpty all_succs + +-- +-- filter the list of assignments to remove any assignments that +-- are not live in a continuation. +-- +filterAssignments :: DynFlags -> LocalRegSet -> Assignments -> Assignments +filterAssignments dflags live assigs = reverse (go assigs []) + where go [] kept = kept + go (a@(r,_,_):as) kept | needed = go as (a:kept) + | otherwise = go as kept + where + needed = r `Set.member` live + || any (conflicts dflags a) (map toNode kept) + -- Note that we must keep assignments that are + -- referred to by other assignments we have + -- already kept. + +-- ----------------------------------------------------------------------------- +-- Walk through the nodes of a block, sinking and inlining assignments +-- as we go. +-- +-- On input we pass in a: +-- * list of nodes in the block +-- * a list of assignments that appeared *before* this block and +-- that are being sunk. +-- +-- On output we get: +-- * a new block +-- * a list of assignments that will be placed *after* that block. +-- + +walk :: DynFlags + -> [(LocalRegSet, CmmNode O O)] -- nodes of the block, annotated with + -- the set of registers live *after* + -- this node. + + -> Assignments -- The current list of + -- assignments we are sinking. + -- Earlier assignments may refer + -- to later ones. + + -> ( Block CmmNode O O -- The new block + , Assignments -- Assignments to sink further + ) + +walk dflags nodes assigs = go nodes emptyBlock assigs + where + go [] block as = (block, as) + go ((live,node):ns) block as + | shouldDiscard node live = go ns block as + -- discard dead assignment + | Just a <- shouldSink dflags node2 = go ns block (a : as1) + | otherwise = go ns block' as' + where + node1 = constantFoldNode dflags node + + (node2, as1) = tryToInline dflags live node1 as + + (dropped, as') = dropAssignmentsSimple dflags + (\a -> conflicts dflags a node2) as1 + + block' = foldl' blockSnoc block dropped `blockSnoc` node2 + + +-- +-- Heuristic to decide whether to pick up and sink an assignment +-- Currently we pick up all assignments to local registers. It might +-- be profitable to sink assignments to global regs too, but the +-- liveness analysis doesn't track those (yet) so we can't. +-- +shouldSink :: DynFlags -> CmmNode e x -> Maybe Assignment +shouldSink dflags (CmmAssign (CmmLocal r) e) | no_local_regs = Just (r, e, exprMem dflags e) + where no_local_regs = True -- foldRegsUsed (\_ _ -> False) True e +shouldSink _ _other = Nothing + +-- +-- discard dead assignments. This doesn't do as good a job as +-- removeDeadAssignments, because it would need multiple passes +-- to get all the dead code, but it catches the common case of +-- superfluous reloads from the stack that the stack allocator +-- leaves behind. +-- +-- Also we catch "r = r" here. You might think it would fall +-- out of inlining, but the inliner will see that r is live +-- after the instruction and choose not to inline r in the rhs. +-- +shouldDiscard :: CmmNode e x -> LocalRegSet -> Bool +shouldDiscard node live + = case node of + CmmAssign r (CmmReg r') | r == r' -> True + CmmAssign (CmmLocal r) _ -> not (r `Set.member` live) + _otherwise -> False + + +toNode :: Assignment -> CmmNode O O +toNode (r,rhs,_) = CmmAssign (CmmLocal r) rhs + +dropAssignmentsSimple :: DynFlags -> (Assignment -> Bool) -> Assignments + -> ([CmmNode O O], Assignments) +dropAssignmentsSimple dflags f = dropAssignments dflags (\a _ -> (f a, ())) () + +dropAssignments :: DynFlags -> (Assignment -> s -> (Bool, s)) -> s -> Assignments + -> ([CmmNode O O], Assignments) +dropAssignments dflags should_drop state assigs + = (dropped, reverse kept) + where + (dropped,kept) = go state assigs [] [] + + go _ [] dropped kept = (dropped, kept) + go state (assig : rest) dropped kept + | conflict = go state' rest (toNode assig : dropped) kept + | otherwise = go state' rest dropped (assig:kept) + where + (dropit, state') = should_drop assig state + conflict = dropit || any (conflicts dflags assig) dropped + + +-- ----------------------------------------------------------------------------- +-- Try to inline assignments into a node. +-- This also does constant folding for primpops, since +-- inlining opens up opportunities for doing so. + +tryToInline + :: DynFlags + -> LocalRegSet -- set of registers live after this + -- node. We cannot inline anything + -- that is live after the node, unless + -- it is small enough to duplicate. + -> CmmNode O x -- The node to inline into + -> Assignments -- Assignments to inline + -> ( + CmmNode O x -- New node + , Assignments -- Remaining assignments + ) + +tryToInline dflags live node assigs = go usages node emptyLRegSet assigs + where + usages :: UniqFM Int -- Maps each LocalReg to a count of how often it is used + usages = foldLocalRegsUsed dflags addUsage emptyUFM node + + go _usages node _skipped [] = (node, []) + + go usages node skipped (a@(l,rhs,_) : rest) + | cannot_inline = dont_inline + | occurs_none = discard -- Note [discard during inlining] + | occurs_once = inline_and_discard + | isTrivial dflags rhs = inline_and_keep + | otherwise = dont_inline + where + inline_and_discard = go usages' inl_node skipped rest + where usages' = foldLocalRegsUsed dflags addUsage usages rhs + + discard = go usages node skipped rest + + dont_inline = keep node -- don't inline the assignment, keep it + inline_and_keep = keep inl_node -- inline the assignment, keep it + + keep node' = (final_node, a : rest') + where (final_node, rest') = go usages' node' (insertLRegSet l skipped) rest + usages' = foldLocalRegsUsed dflags (\m r -> addToUFM m r 2) + usages rhs + -- we must not inline anything that is mentioned in the RHS + -- of a binding that we have already skipped, so we set the + -- usages of the regs on the RHS to 2. + + cannot_inline = skipped `regsUsedIn` rhs -- Note [dependent assignments] + || l `elemLRegSet` skipped + || not (okToInline dflags rhs node) + + l_usages = lookupUFM usages l + l_live = l `elemRegSet` live + + occurs_once = not l_live && l_usages == Just 1 + occurs_none = not l_live && l_usages == Nothing + + inl_node = improveConditional (mapExpDeep inl_exp node) + + inl_exp :: CmmExpr -> CmmExpr + -- inl_exp is where the inlining actually takes place! + inl_exp (CmmReg (CmmLocal l')) | l == l' = rhs + inl_exp (CmmRegOff (CmmLocal l') off) | l == l' + = cmmOffset dflags rhs off + -- re-constant fold after inlining + inl_exp (CmmMachOp op args) = cmmMachOpFold dflags op args + inl_exp other = other + + +{- Note [improveConditional] + +cmmMachOpFold tries to simplify conditionals to turn things like + (a == b) != 1 +into + (a != b) +but there's one case it can't handle: when the comparison is over +floating-point values, we can't invert it, because floating-point +comparisons aren't invertible (because of NaNs). + +But we *can* optimise this conditional by swapping the true and false +branches. Given + CmmCondBranch ((a >## b) != 1) t f +we can turn it into + CmmCondBranch (a >## b) f t + +So here we catch conditionals that weren't optimised by cmmMachOpFold, +and apply above transformation to eliminate the comparison against 1. + +It's tempting to just turn every != into == and then let cmmMachOpFold +do its thing, but that risks changing a nice fall-through conditional +into one that requires two jumps. (see swapcond_last in +GHC.Cmm.ContFlowOpt), so instead we carefully look for just the cases where +we can eliminate a comparison. +-} +improveConditional :: CmmNode O x -> CmmNode O x +improveConditional + (CmmCondBranch (CmmMachOp mop [x, CmmLit (CmmInt 1 _)]) t f l) + | neLike mop, isComparisonExpr x + = CmmCondBranch x f t (fmap not l) + where + neLike (MO_Ne _) = True + neLike (MO_U_Lt _) = True -- (x<y) < 1 behaves like (x<y) != 1 + neLike (MO_S_Lt _) = True -- (x<y) < 1 behaves like (x<y) != 1 + neLike _ = False +improveConditional other = other + +-- Note [dependent assignments] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- If our assignment list looks like +-- +-- [ y = e, x = ... y ... ] +-- +-- We cannot inline x. Remember this list is really in reverse order, +-- so it means x = ... y ...; y = e +-- +-- Hence if we inline x, the outer assignment to y will capture the +-- reference in x's right hand side. +-- +-- In this case we should rename the y in x's right-hand side, +-- i.e. change the list to [ y = e, x = ... y1 ..., y1 = y ] +-- Now we can go ahead and inline x. +-- +-- For now we do nothing, because this would require putting +-- everything inside UniqSM. +-- +-- One more variant of this (#7366): +-- +-- [ y = e, y = z ] +-- +-- If we don't want to inline y = e, because y is used many times, we +-- might still be tempted to inline y = z (because we always inline +-- trivial rhs's). But of course we can't, because y is equal to e, +-- not z. + +-- Note [discard during inlining] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- Opportunities to discard assignments sometimes appear after we've +-- done some inlining. Here's an example: +-- +-- x = R1; +-- y = P64[x + 7]; +-- z = P64[x + 15]; +-- /* z is dead */ +-- R1 = y & (-8); +-- +-- The x assignment is trivial, so we inline it in the RHS of y, and +-- keep both x and y. z gets dropped because it is dead, then we +-- inline y, and we have a dead assignment to x. If we don't notice +-- that x is dead in tryToInline, we end up retaining it. + +addUsage :: UniqFM Int -> LocalReg -> UniqFM Int +addUsage m r = addToUFM_C (+) m r 1 + +regsUsedIn :: LRegSet -> CmmExpr -> Bool +regsUsedIn ls _ | nullLRegSet ls = False +regsUsedIn ls e = wrapRecExpf f e False + where f (CmmReg (CmmLocal l)) _ | l `elemLRegSet` ls = True + f (CmmRegOff (CmmLocal l) _) _ | l `elemLRegSet` ls = True + f _ z = z + +-- we don't inline into CmmUnsafeForeignCall if the expression refers +-- to global registers. This is a HACK to avoid global registers +-- clashing with C argument-passing registers, really the back-end +-- ought to be able to handle it properly, but currently neither PprC +-- nor the NCG can do it. See Note [Register parameter passing] +-- See also GHC.StgToCmm.Foreign.load_args_into_temps. +okToInline :: DynFlags -> CmmExpr -> CmmNode e x -> Bool +okToInline dflags expr node@(CmmUnsafeForeignCall{}) = + not (globalRegistersConflict dflags expr node) +okToInline _ _ _ = True + +-- ----------------------------------------------------------------------------- + +-- | @conflicts (r,e) node@ is @False@ if and only if the assignment +-- @r = e@ can be safely commuted past statement @node@. +conflicts :: DynFlags -> Assignment -> CmmNode O x -> Bool +conflicts dflags (r, rhs, addr) node + + -- (1) node defines registers used by rhs of assignment. This catches + -- assignments and all three kinds of calls. See Note [Sinking and calls] + | globalRegistersConflict dflags rhs node = True + | localRegistersConflict dflags rhs node = True + + -- (2) node uses register defined by assignment + | foldRegsUsed dflags (\b r' -> r == r' || b) False node = True + + -- (3) a store to an address conflicts with a read of the same memory + | CmmStore addr' e <- node + , memConflicts addr (loadAddr dflags addr' (cmmExprWidth dflags e)) = True + + -- (4) an assignment to Hp/Sp conflicts with a heap/stack read respectively + | HeapMem <- addr, CmmAssign (CmmGlobal Hp) _ <- node = True + | StackMem <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True + | SpMem{} <- addr, CmmAssign (CmmGlobal Sp) _ <- node = True + + -- (5) foreign calls clobber heap: see Note [Foreign calls clobber heap] + | CmmUnsafeForeignCall{} <- node, memConflicts addr AnyMem = True + + -- (6) native calls clobber any memory + | CmmCall{} <- node, memConflicts addr AnyMem = True + + -- (7) otherwise, no conflict + | otherwise = False + +-- Returns True if node defines any global registers that are used in the +-- Cmm expression +globalRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool +globalRegistersConflict dflags expr node = + foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmGlobal r) expr) + False node + +-- Returns True if node defines any local registers that are used in the +-- Cmm expression +localRegistersConflict :: DynFlags -> CmmExpr -> CmmNode e x -> Bool +localRegistersConflict dflags expr node = + foldRegsDefd dflags (\b r -> b || regUsedIn dflags (CmmLocal r) expr) + False node + +-- Note [Sinking and calls] +-- ~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- We have three kinds of calls: normal (CmmCall), safe foreign (CmmForeignCall) +-- and unsafe foreign (CmmUnsafeForeignCall). We perform sinking pass after +-- stack layout (see Note [Sinking after stack layout]) which leads to two +-- invariants related to calls: +-- +-- a) during stack layout phase all safe foreign calls are turned into +-- unsafe foreign calls (see Note [Lower safe foreign calls]). This +-- means that we will never encounter CmmForeignCall node when running +-- sinking after stack layout +-- +-- b) stack layout saves all variables live across a call on the stack +-- just before making a call (remember we are not sinking assignments to +-- stack): +-- +-- L1: +-- x = R1 +-- P64[Sp - 16] = L2 +-- P64[Sp - 8] = x +-- Sp = Sp - 16 +-- call f() returns L2 +-- L2: +-- +-- We will attempt to sink { x = R1 } but we will detect conflict with +-- { P64[Sp - 8] = x } and hence we will drop { x = R1 } without even +-- checking whether it conflicts with { call f() }. In this way we will +-- never need to check any assignment conflicts with CmmCall. Remember +-- that we still need to check for potential memory conflicts. +-- +-- So the result is that we only need to worry about CmmUnsafeForeignCall nodes +-- when checking conflicts (see Note [Unsafe foreign calls clobber caller-save registers]). +-- This assumption holds only when we do sinking after stack layout. If we run +-- it before stack layout we need to check for possible conflicts with all three +-- kinds of calls. Our `conflicts` function does that by using a generic +-- foldRegsDefd and foldRegsUsed functions defined in DefinerOfRegs and +-- UserOfRegs typeclasses. +-- + +-- An abstraction of memory read or written. +data AbsMem + = NoMem -- no memory accessed + | AnyMem -- arbitrary memory + | HeapMem -- definitely heap memory + | StackMem -- definitely stack memory + | SpMem -- <size>[Sp+n] + {-# UNPACK #-} !Int + {-# UNPACK #-} !Int + +-- Having SpMem is important because it lets us float loads from Sp +-- past stores to Sp as long as they don't overlap, and this helps to +-- unravel some long sequences of +-- x1 = [Sp + 8] +-- x2 = [Sp + 16] +-- ... +-- [Sp + 8] = xi +-- [Sp + 16] = xj +-- +-- Note that SpMem is invalidated if Sp is changed, but the definition +-- of 'conflicts' above handles that. + +-- ToDo: this won't currently fix the following commonly occurring code: +-- x1 = [R1 + 8] +-- x2 = [R1 + 16] +-- .. +-- [Hp - 8] = x1 +-- [Hp - 16] = x2 +-- .. + +-- because [R1 + 8] and [Hp - 8] are both HeapMem. We know that +-- assignments to [Hp + n] do not conflict with any other heap memory, +-- but this is tricky to nail down. What if we had +-- +-- x = Hp + n +-- [x] = ... +-- +-- the store to [x] should be "new heap", not "old heap". +-- Furthermore, you could imagine that if we started inlining +-- functions in Cmm then there might well be reads of heap memory +-- that was written in the same basic block. To take advantage of +-- non-aliasing of heap memory we will have to be more clever. + +-- Note [Foreign calls clobber heap] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- It is tempting to say that foreign calls clobber only +-- non-heap/stack memory, but unfortunately we break this invariant in +-- the RTS. For example, in stg_catch_retry_frame we call +-- stmCommitNestedTransaction() which modifies the contents of the +-- TRec it is passed (this actually caused incorrect code to be +-- generated). +-- +-- Since the invariant is true for the majority of foreign calls, +-- perhaps we ought to have a special annotation for calls that can +-- modify heap/stack memory. For now we just use the conservative +-- definition here. +-- +-- Some CallishMachOp imply a memory barrier e.g. AtomicRMW and +-- therefore we should never float any memory operations across one of +-- these calls. + + +bothMems :: AbsMem -> AbsMem -> AbsMem +bothMems NoMem x = x +bothMems x NoMem = x +bothMems HeapMem HeapMem = HeapMem +bothMems StackMem StackMem = StackMem +bothMems (SpMem o1 w1) (SpMem o2 w2) + | o1 == o2 = SpMem o1 (max w1 w2) + | otherwise = StackMem +bothMems SpMem{} StackMem = StackMem +bothMems StackMem SpMem{} = StackMem +bothMems _ _ = AnyMem + +memConflicts :: AbsMem -> AbsMem -> Bool +memConflicts NoMem _ = False +memConflicts _ NoMem = False +memConflicts HeapMem StackMem = False +memConflicts StackMem HeapMem = False +memConflicts SpMem{} HeapMem = False +memConflicts HeapMem SpMem{} = False +memConflicts (SpMem o1 w1) (SpMem o2 w2) + | o1 < o2 = o1 + w1 > o2 + | otherwise = o2 + w2 > o1 +memConflicts _ _ = True + +exprMem :: DynFlags -> CmmExpr -> AbsMem +exprMem dflags (CmmLoad addr w) = bothMems (loadAddr dflags addr (typeWidth w)) (exprMem dflags addr) +exprMem dflags (CmmMachOp _ es) = foldr bothMems NoMem (map (exprMem dflags) es) +exprMem _ _ = NoMem + +loadAddr :: DynFlags -> CmmExpr -> Width -> AbsMem +loadAddr dflags e w = + case e of + CmmReg r -> regAddr dflags r 0 w + CmmRegOff r i -> regAddr dflags r i w + _other | regUsedIn dflags spReg e -> StackMem + | otherwise -> AnyMem + +regAddr :: DynFlags -> CmmReg -> Int -> Width -> AbsMem +regAddr _ (CmmGlobal Sp) i w = SpMem i (widthInBytes w) +regAddr _ (CmmGlobal Hp) _ _ = HeapMem +regAddr _ (CmmGlobal CurrentTSO) _ _ = HeapMem -- important for PrimOps +regAddr dflags r _ _ | isGcPtrType (cmmRegType dflags r) = HeapMem -- yay! GCPtr pays for itself +regAddr _ _ _ _ = AnyMem + +{- +Note [Inline GlobalRegs?] + +Should we freely inline GlobalRegs? + +Actually it doesn't make a huge amount of difference either way, so we +*do* currently treat GlobalRegs as "trivial" and inline them +everywhere, but for what it's worth, here is what I discovered when I +(SimonM) looked into this: + +Common sense says we should not inline GlobalRegs, because when we +have + + x = R1 + +the register allocator will coalesce this assignment, generating no +code, and simply record the fact that x is bound to $rbx (or +whatever). Furthermore, if we were to sink this assignment, then the +range of code over which R1 is live increases, and the range of code +over which x is live decreases. All things being equal, it is better +for x to be live than R1, because R1 is a fixed register whereas x can +live in any register. So we should neither sink nor inline 'x = R1'. + +However, not inlining GlobalRegs can have surprising +consequences. e.g. (cgrun020) + + c3EN: + _s3DB::P64 = R1; + _c3ES::P64 = _s3DB::P64 & 7; + if (_c3ES::P64 >= 2) goto c3EU; else goto c3EV; + c3EU: + _s3DD::P64 = P64[_s3DB::P64 + 6]; + _s3DE::P64 = P64[_s3DB::P64 + 14]; + I64[Sp - 8] = c3F0; + R1 = _s3DE::P64; + P64[Sp] = _s3DD::P64; + +inlining the GlobalReg gives: + + c3EN: + if (R1 & 7 >= 2) goto c3EU; else goto c3EV; + c3EU: + I64[Sp - 8] = c3F0; + _s3DD::P64 = P64[R1 + 6]; + R1 = P64[R1 + 14]; + P64[Sp] = _s3DD::P64; + +but if we don't inline the GlobalReg, instead we get: + + _s3DB::P64 = R1; + if (_s3DB::P64 & 7 >= 2) goto c3EU; else goto c3EV; + c3EU: + I64[Sp - 8] = c3F0; + R1 = P64[_s3DB::P64 + 14]; + P64[Sp] = P64[_s3DB::P64 + 6]; + +This looks better - we managed to inline _s3DD - but in fact it +generates an extra reg-reg move: + +.Lc3EU: + movq $c3F0_info,-8(%rbp) + movq %rbx,%rax + movq 14(%rbx),%rbx + movq 6(%rax),%rax + movq %rax,(%rbp) + +because _s3DB is now live across the R1 assignment, we lost the +benefit of coalescing. + +Who is at fault here? Perhaps if we knew that _s3DB was an alias for +R1, then we would not sink a reference to _s3DB past the R1 +assignment. Or perhaps we *should* do that - we might gain by sinking +it, despite losing the coalescing opportunity. + +Sometimes not inlining global registers wins by virtue of the rule +about not inlining into arguments of a foreign call, e.g. (T7163) this +is what happens when we inlined F1: + + _s3L2::F32 = F1; + _c3O3::F32 = %MO_F_Mul_W32(F1, 10.0 :: W32); + (_s3L7::F32) = call "ccall" arg hints: [] result hints: [] rintFloat(_c3O3::F32); + +but if we don't inline F1: + + (_s3L7::F32) = call "ccall" arg hints: [] result hints: [] rintFloat(%MO_F_Mul_W32(_s3L2::F32, + 10.0 :: W32)); +-} diff --git a/compiler/GHC/Cmm/Switch.hs b/compiler/GHC/Cmm/Switch.hs new file mode 100644 index 0000000000..e89fadfd2e --- /dev/null +++ b/compiler/GHC/Cmm/Switch.hs @@ -0,0 +1,502 @@ +{-# LANGUAGE GADTs #-} +module GHC.Cmm.Switch ( + SwitchTargets, + mkSwitchTargets, + switchTargetsCases, switchTargetsDefault, switchTargetsRange, switchTargetsSigned, + mapSwitchTargets, switchTargetsToTable, switchTargetsFallThrough, + switchTargetsToList, eqSwitchTargetWith, + + SwitchPlan(..), + targetSupportsSwitch, + createSwitchPlan, + ) where + +import GhcPrelude + +import Outputable +import DynFlags +import GHC.Cmm.Dataflow.Label (Label) + +import Data.Maybe +import Data.List (groupBy) +import Data.Function (on) +import qualified Data.Map as M + +-- Note [Cmm Switches, the general plan] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Compiling a high-level switch statement, as it comes out of a STG case +-- expression, for example, allows for a surprising amount of design decisions. +-- Therefore, we cleanly separated this from the Stg → Cmm transformation, as +-- well as from the actual code generation. +-- +-- The overall plan is: +-- * The Stg → Cmm transformation creates a single `SwitchTargets` in +-- emitSwitch and emitCmmLitSwitch in GHC.StgToCmm.Utils. +-- At this stage, they are unsuitable for code generation. +-- * A dedicated Cmm transformation (GHC.Cmm.Switch.Implement) replaces these +-- switch statements with code that is suitable for code generation, i.e. +-- a nice balanced tree of decisions with dense jump tables in the leafs. +-- The actual planning of this tree is performed in pure code in createSwitchPlan +-- in this module. See Note [createSwitchPlan]. +-- * The actual code generation will not do any further processing and +-- implement each CmmSwitch with a jump tables. +-- +-- When compiling to LLVM or C, GHC.Cmm.Switch.Implement leaves the switch +-- statements alone, as we can turn a SwitchTargets value into a nice +-- switch-statement in LLVM resp. C, and leave the rest to the compiler. +-- +-- See Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement] why the two module are +-- separated. + +----------------------------------------------------------------------------- +-- Note [Magic Constants in GHC.Cmm.Switch] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- There are a lot of heuristics here that depend on magic values where it is +-- hard to determine the "best" value (for whatever that means). These are the +-- magic values: + +-- | Number of consecutive default values allowed in a jump table. If there are +-- more of them, the jump tables are split. +-- +-- Currently 7, as it costs 7 words of additional code when a jump table is +-- split (at least on x64, determined experimentally). +maxJumpTableHole :: Integer +maxJumpTableHole = 7 + +-- | Minimum size of a jump table. If the number is smaller, the switch is +-- implemented using conditionals. +-- Currently 5, because an if-then-else tree of 4 values is nice and compact. +minJumpTableSize :: Int +minJumpTableSize = 5 + +-- | Minimum non-zero offset for a jump table. See Note [Jump Table Offset]. +minJumpTableOffset :: Integer +minJumpTableOffset = 2 + + +----------------------------------------------------------------------------- +-- Switch Targets + +-- Note [SwitchTargets] +-- ~~~~~~~~~~~~~~~~~~~~ +-- +-- The branches of a switch are stored in a SwitchTargets, which consists of an +-- (optional) default jump target, and a map from values to jump targets. +-- +-- If the default jump target is absent, the behaviour of the switch outside the +-- values of the map is undefined. +-- +-- We use an Integer for the keys the map so that it can be used in switches on +-- unsigned as well as signed integers. +-- +-- The map may be empty (we prune out-of-range branches here, so it could be us +-- emptying it). +-- +-- Before code generation, the table needs to be brought into a form where all +-- entries are non-negative, so that it can be compiled into a jump table. +-- See switchTargetsToTable. + + +-- | A value of type SwitchTargets contains the alternatives for a 'CmmSwitch' +-- value, and knows whether the value is signed, the possible range, an +-- optional default value and a map from values to jump labels. +data SwitchTargets = + SwitchTargets + Bool -- Signed values + (Integer, Integer) -- Range + (Maybe Label) -- Default value + (M.Map Integer Label) -- The branches + deriving (Show, Eq) + +-- | The smart constructor mkSwitchTargets normalises the map a bit: +-- * No entries outside the range +-- * No entries equal to the default +-- * No default if all elements have explicit values +mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe Label -> M.Map Integer Label -> SwitchTargets +mkSwitchTargets signed range@(lo,hi) mbdef ids + = SwitchTargets signed range mbdef' ids' + where + ids' = dropDefault $ restrict ids + mbdef' | defaultNeeded = mbdef + | otherwise = Nothing + + -- Drop entries outside the range, if there is a range + restrict = restrictMap (lo,hi) + + -- Drop entries that equal the default, if there is a default + dropDefault | Just l <- mbdef = M.filter (/= l) + | otherwise = id + + -- Check if the default is still needed + defaultNeeded = fromIntegral (M.size ids') /= hi-lo+1 + + +-- | Changes all labels mentioned in the SwitchTargets value +mapSwitchTargets :: (Label -> Label) -> SwitchTargets -> SwitchTargets +mapSwitchTargets f (SwitchTargets signed range mbdef branches) + = SwitchTargets signed range (fmap f mbdef) (fmap f branches) + +-- | Returns the list of non-default branches of the SwitchTargets value +switchTargetsCases :: SwitchTargets -> [(Integer, Label)] +switchTargetsCases (SwitchTargets _ _ _ branches) = M.toList branches + +-- | Return the default label of the SwitchTargets value +switchTargetsDefault :: SwitchTargets -> Maybe Label +switchTargetsDefault (SwitchTargets _ _ mbdef _) = mbdef + +-- | Return the range of the SwitchTargets value +switchTargetsRange :: SwitchTargets -> (Integer, Integer) +switchTargetsRange (SwitchTargets _ range _ _) = range + +-- | Return whether this is used for a signed value +switchTargetsSigned :: SwitchTargets -> Bool +switchTargetsSigned (SwitchTargets signed _ _ _) = signed + +-- | switchTargetsToTable creates a dense jump table, usable for code generation. +-- +-- Also returns an offset to add to the value; the list is 0-based on the +-- result of that addition. +-- +-- The conversion from Integer to Int is a bit of a wart, as the actual +-- scrutinee might be an unsigned word, but it just works, due to wrap-around +-- arithmetic (as verified by the CmmSwitchTest test case). +switchTargetsToTable :: SwitchTargets -> (Int, [Maybe Label]) +switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches) + = (fromIntegral (-start), [ labelFor i | i <- [start..hi] ]) + where + labelFor i = case M.lookup i branches of Just l -> Just l + Nothing -> mbdef + start | lo >= 0 && lo < minJumpTableOffset = 0 -- See Note [Jump Table Offset] + | otherwise = lo + +-- Note [Jump Table Offset] +-- ~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Usually, the code for a jump table starting at x will first subtract x from +-- the value, to avoid a large amount of empty entries. But if x is very small, +-- the extra entries are no worse than the subtraction in terms of code size, and +-- not having to do the subtraction is quicker. +-- +-- I.e. instead of +-- _u20N: +-- leaq -1(%r14),%rax +-- jmp *_n20R(,%rax,8) +-- _n20R: +-- .quad _c20p +-- .quad _c20q +-- do +-- _u20N: +-- jmp *_n20Q(,%r14,8) +-- +-- _n20Q: +-- .quad 0 +-- .quad _c20p +-- .quad _c20q +-- .quad _c20r + +-- | The list of all labels occurring in the SwitchTargets value. +switchTargetsToList :: SwitchTargets -> [Label] +switchTargetsToList (SwitchTargets _ _ mbdef branches) + = maybeToList mbdef ++ M.elems branches + +-- | Groups cases with equal targets, suitable for pretty-printing to a +-- c-like switch statement with fall-through semantics. +switchTargetsFallThrough :: SwitchTargets -> ([([Integer], Label)], Maybe Label) +switchTargetsFallThrough (SwitchTargets _ _ mbdef branches) = (groups, mbdef) + where + groups = map (\xs -> (map fst xs, snd (head xs))) $ + groupBy ((==) `on` snd) $ + M.toList branches + +-- | Custom equality helper, needed for "GHC.Cmm.CommonBlockElim" +eqSwitchTargetWith :: (Label -> Label -> Bool) -> SwitchTargets -> SwitchTargets -> Bool +eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets signed2 range2 mbdef2 ids2) = + signed1 == signed2 && range1 == range2 && goMB mbdef1 mbdef2 && goList (M.toList ids1) (M.toList ids2) + where + goMB Nothing Nothing = True + goMB (Just l1) (Just l2) = l1 `eq` l2 + goMB _ _ = False + goList [] [] = True + goList ((i1,l1):ls1) ((i2,l2):ls2) = i1 == i2 && l1 `eq` l2 && goList ls1 ls2 + goList _ _ = False + +----------------------------------------------------------------------------- +-- Code generation for Switches + + +-- | A SwitchPlan abstractly describes how a Switch statement ought to be +-- implemented. See Note [createSwitchPlan] +data SwitchPlan + = Unconditionally Label + | IfEqual Integer Label SwitchPlan + | IfLT Bool Integer SwitchPlan SwitchPlan + | JumpTable SwitchTargets + deriving Show +-- +-- Note [createSwitchPlan] +-- ~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- A SwitchPlan describes how a Switch statement is to be broken down into +-- smaller pieces suitable for code generation. +-- +-- createSwitchPlan creates such a switch plan, in these steps: +-- 1. It splits the switch statement at segments of non-default values that +-- are too large. See splitAtHoles and Note [Magic Constants in GHC.Cmm.Switch] +-- 2. Too small jump tables should be avoided, so we break up smaller pieces +-- in breakTooSmall. +-- 3. We fill in the segments between those pieces with a jump to the default +-- label (if there is one), returning a SeparatedList in mkFlatSwitchPlan +-- 4. We find and replace two less-than branches by a single equal-to-test in +-- findSingleValues +-- 5. The thus collected pieces are assembled to a balanced binary tree. + +{- + Note [Two alts + default] + ~~~~~~~~~~~~~~~~~~~~~~~~~ + +Discussion and a bit more info at #14644 + +When dealing with a switch of the form: +switch(e) { + case 1: goto l1; + case 3000: goto l2; + default: goto ldef; +} + +If we treat it as a sparse jump table we would generate: + +if (e > 3000) //Check if value is outside of the jump table. + goto ldef; +else { + if (e < 3000) { //Compare to upper value + if(e != 1) //Compare to remaining value + goto ldef; + else + goto l2; + } + else + goto l1; +} + +Instead we special case this to : + +if (e==1) goto l1; +else if (e==3000) goto l2; +else goto l3; + +This means we have: +* Less comparisons for: 1,<3000 +* Unchanged for 3000 +* One more for >3000 + +This improves code in a few ways: +* One comparison less means smaller code which helps with cache. +* It exchanges a taken jump for two jumps no taken in the >range case. + Jumps not taken are cheaper (See Agner guides) making this about as fast. +* For all other cases the first range check is removed making it faster. + +The end result is that the change is not measurably slower for the case +>3000 and faster for the other cases. + +This makes running this kind of match in an inner loop cheaper by 10-20% +depending on the data. +In nofib this improves wheel-sieve1 by 4-9% depending on problem +size. + +We could also add a second conditional jump after the comparison to +keep the range check like this: + cmp 3000, rArgument + jg <default> + je <branch 2> +While this is fairly cheap it made no big difference for the >3000 case +and slowed down all other cases making it not worthwhile. +-} + + +-- | Does the target support switch out of the box? Then leave this to the +-- target! +targetSupportsSwitch :: HscTarget -> Bool +targetSupportsSwitch HscC = True +targetSupportsSwitch HscLlvm = True +targetSupportsSwitch _ = False + +-- | This function creates a SwitchPlan from a SwitchTargets value, breaking it +-- down into smaller pieces suitable for code generation. +createSwitchPlan :: SwitchTargets -> SwitchPlan +-- Lets do the common case of a singleton map quickly and efficiently (#10677) +createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m) + | [(x, l)] <- M.toList m + = IfEqual x l (Unconditionally defLabel) +-- And another common case, matching "booleans" +createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m) + | [(x1, l1), (_x2,l2)] <- M.toAscList m + --Checking If |range| = 2 is enough if we have two unique literals + , hi - lo == 1 + = IfEqual x1 l1 (Unconditionally l2) +-- See Note [Two alts + default] +createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m) + | [(x1, l1), (x2,l2)] <- M.toAscList m + = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel)) +createSwitchPlan (SwitchTargets signed range mbdef m) = + -- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $ + plan + where + pieces = concatMap breakTooSmall $ splitAtHoles maxJumpTableHole m + flatPlan = findSingleValues $ mkFlatSwitchPlan signed mbdef range pieces + plan = buildTree signed $ flatPlan + + +--- +--- Step 1: Splitting at large holes +--- +splitAtHoles :: Integer -> M.Map Integer a -> [M.Map Integer a] +splitAtHoles _ m | M.null m = [] +splitAtHoles holeSize m = map (\range -> restrictMap range m) nonHoles + where + holes = filter (\(l,h) -> h - l > holeSize) $ zip (M.keys m) (tail (M.keys m)) + nonHoles = reassocTuples lo holes hi + + (lo,_) = M.findMin m + (hi,_) = M.findMax m + +--- +--- Step 2: Avoid small jump tables +--- +-- We do not want jump tables below a certain size. This breaks them up +-- (into singleton maps, for now). +breakTooSmall :: M.Map Integer a -> [M.Map Integer a] +breakTooSmall m + | M.size m > minJumpTableSize = [m] + | otherwise = [M.singleton k v | (k,v) <- M.toList m] + +--- +--- Step 3: Fill in the blanks +--- + +-- | A FlatSwitchPlan is a list of SwitchPlans, with an integer inbetween every +-- two entries, dividing the range. +-- So if we have (abusing list syntax) [plan1,n,plan2], then we use plan1 if +-- the expression is < n, and plan2 otherwise. + +type FlatSwitchPlan = SeparatedList Integer SwitchPlan + +mkFlatSwitchPlan :: Bool -> Maybe Label -> (Integer, Integer) -> [M.Map Integer Label] -> FlatSwitchPlan + +-- If we have no default (i.e. undefined where there is no entry), we can +-- branch at the minimum of each map +mkFlatSwitchPlan _ Nothing _ [] = pprPanic "mkFlatSwitchPlan with nothing left to do" empty +mkFlatSwitchPlan signed Nothing _ (m:ms) + = (mkLeafPlan signed Nothing m , [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ]) + +-- If we have a default, we have to interleave segments that jump +-- to the default between the maps +mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps) + where + go (lo,hi) [] + | lo > hi = [] + | otherwise = [(lo, Unconditionally l)] + go (lo,hi) (m:ms) + | lo < min + = (lo, Unconditionally l) : go (min,hi) (m:ms) + | lo == min + = (lo, mkLeafPlan signed (Just l) m) : go (max+1,hi) ms + | otherwise + = pprPanic "mkFlatSwitchPlan" (integer lo <+> integer min) + where + min = fst (M.findMin m) + max = fst (M.findMax m) + + +mkLeafPlan :: Bool -> Maybe Label -> M.Map Integer Label -> SwitchPlan +mkLeafPlan signed mbdef m + | [(_,l)] <- M.toList m -- singleton map + = Unconditionally l + | otherwise + = JumpTable $ mkSwitchTargets signed (min,max) mbdef m + where + min = fst (M.findMin m) + max = fst (M.findMax m) + +--- +--- Step 4: Reduce the number of branches using == +--- + +-- A sequence of three unconditional jumps, with the outer two pointing to the +-- same value and the bounds off by exactly one can be improved +findSingleValues :: FlatSwitchPlan -> FlatSwitchPlan +findSingleValues (Unconditionally l, (i, Unconditionally l2) : (i', Unconditionally l3) : xs) + | l == l3 && i + 1 == i' + = findSingleValues (IfEqual i l2 (Unconditionally l), xs) +findSingleValues (p, (i,p'):xs) + = (p,i) `consSL` findSingleValues (p', xs) +findSingleValues (p, []) + = (p, []) + +--- +--- Step 5: Actually build the tree +--- + +-- Build a balanced tree from a separated list +buildTree :: Bool -> FlatSwitchPlan -> SwitchPlan +buildTree _ (p,[]) = p +buildTree signed sl = IfLT signed m (buildTree signed sl1) (buildTree signed sl2) + where + (sl1, m, sl2) = divideSL sl + + + +-- +-- Utility data type: Non-empty lists with extra markers in between each +-- element: +-- + +type SeparatedList b a = (a, [(b,a)]) + +consSL :: (a, b) -> SeparatedList b a -> SeparatedList b a +consSL (a, b) (a', xs) = (a, (b,a'):xs) + +divideSL :: SeparatedList b a -> (SeparatedList b a, b, SeparatedList b a) +divideSL (_,[]) = error "divideSL: Singleton SeparatedList" +divideSL (p,xs) = ((p, xs1), m, (p', xs2)) + where + (xs1, (m,p'):xs2) = splitAt (length xs `div` 2) xs + +-- +-- Other Utilities +-- + +restrictMap :: (Integer,Integer) -> M.Map Integer b -> M.Map Integer b +restrictMap (lo,hi) m = mid + where (_, mid_hi) = M.split (lo-1) m + (mid, _) = M.split (hi+1) mid_hi + +-- for example: reassocTuples a [(b,c),(d,e)] f == [(a,b),(c,d),(e,f)] +reassocTuples :: a -> [(a,a)] -> a -> [(a,a)] +reassocTuples initial [] last + = [(initial,last)] +reassocTuples initial ((a,b):tuples) last + = (initial,a) : reassocTuples b tuples last + +-- Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- I (Joachim) separated the two somewhat closely related modules +-- +-- - GHC.Cmm.Switch, which provides the CmmSwitchTargets type and contains the strategy +-- for implementing a Cmm switch (createSwitchPlan), and +-- - GHC.Cmm.Switch.Implement, which contains the actual Cmm graph modification, +-- +-- for these reasons: +-- +-- * GHC.Cmm.Switch is very low in the dependency tree, i.e. does not depend on any +-- GHC specific modules at all (with the exception of Output and +-- GHC.Cmm.Dataflow (Literal)). +-- * GHC.Cmm.Switch.Implement is the Cmm transformation and hence very high in +-- the dependency tree. +-- * GHC.Cmm.Switch provides the CmmSwitchTargets data type, which is abstract, but +-- used in GHC.Cmm.Node. +-- * Because GHC.Cmm.Switch is low in the dependency tree, the separation allows +-- for more parallelism when building GHC. +-- * The interaction between the modules is very explicit and easy to +-- understand, due to the small and simple interface. diff --git a/compiler/GHC/Cmm/Switch/Implement.hs b/compiler/GHC/Cmm/Switch/Implement.hs new file mode 100644 index 0000000000..dfac116764 --- /dev/null +++ b/compiler/GHC/Cmm/Switch/Implement.hs @@ -0,0 +1,116 @@ +{-# LANGUAGE GADTs #-} +module GHC.Cmm.Switch.Implement + ( cmmImplementSwitchPlans + ) +where + +import GhcPrelude + +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.BlockId +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.Switch +import UniqSupply +import DynFlags + +-- +-- This module replaces Switch statements as generated by the Stg -> Cmm +-- transformation, which might be huge and sparse and hence unsuitable for +-- assembly code, by proper constructs (if-then-else trees, dense jump tables). +-- +-- The actual, abstract strategy is determined by createSwitchPlan in +-- GHC.Cmm.Switch and returned as a SwitchPlan; here is just the implementation in +-- terms of Cmm code. See Note [Cmm Switches, the general plan] in GHC.Cmm.Switch. +-- +-- This division into different modules is both to clearly separate concerns, +-- but also because createSwitchPlan needs access to the constructors of +-- SwitchTargets, a data type exported abstractly by GHC.Cmm.Switch. +-- + +-- | Traverses the 'CmmGraph', making sure that 'CmmSwitch' are suitable for +-- code generation. +cmmImplementSwitchPlans :: DynFlags -> CmmGraph -> UniqSM CmmGraph +cmmImplementSwitchPlans dflags g + -- Switch generation done by backend (LLVM/C) + | targetSupportsSwitch (hscTarget dflags) = return g + | otherwise = do + blocks' <- concat `fmap` mapM (visitSwitches dflags) (toBlockList g) + return $ ofBlockList (g_entry g) blocks' + +visitSwitches :: DynFlags -> CmmBlock -> UniqSM [CmmBlock] +visitSwitches dflags block + | (entry@(CmmEntry _ scope), middle, CmmSwitch vanillaExpr ids) <- blockSplit block + = do + let plan = createSwitchPlan ids + -- See Note [Floating switch expressions] + (assignSimple, simpleExpr) <- floatSwitchExpr dflags vanillaExpr + + (newTail, newBlocks) <- implementSwitchPlan dflags scope simpleExpr plan + + let block' = entry `blockJoinHead` middle `blockAppend` assignSimple `blockAppend` newTail + + return $ block' : newBlocks + + | otherwise + = return [block] + +-- Note [Floating switch expressions] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +-- When we translate a sparse switch into a search tree we would like +-- to compute the value we compare against only once. + +-- For this purpose we assign the switch expression to a local register +-- and then use this register when constructing the actual binary tree. + +-- This is important as the expression could contain expensive code like +-- memory loads or divisions which we REALLY don't want to duplicate. + +-- This happened in parts of the handwritten RTS Cmm code. See also #16933 + +-- See Note [Floating switch expressions] +floatSwitchExpr :: DynFlags -> CmmExpr -> UniqSM (Block CmmNode O O, CmmExpr) +floatSwitchExpr _ reg@(CmmReg {}) = return (emptyBlock, reg) +floatSwitchExpr dflags expr = do + (assign, expr') <- cmmMkAssign dflags expr <$> getUniqueM + return (BMiddle assign, expr') + + +-- Implementing a switch plan (returning a tail block) +implementSwitchPlan :: DynFlags -> CmmTickScope -> CmmExpr -> SwitchPlan -> UniqSM (Block CmmNode O C, [CmmBlock]) +implementSwitchPlan dflags scope expr = go + where + go (Unconditionally l) + = return (emptyBlock `blockJoinTail` CmmBranch l, []) + go (JumpTable ids) + = return (emptyBlock `blockJoinTail` CmmSwitch expr ids, []) + go (IfLT signed i ids1 ids2) + = do + (bid1, newBlocks1) <- go' ids1 + (bid2, newBlocks2) <- go' ids2 + + let lt | signed = cmmSLtWord + | otherwise = cmmULtWord + scrut = lt dflags expr $ CmmLit $ mkWordCLit dflags i + lastNode = CmmCondBranch scrut bid1 bid2 Nothing + lastBlock = emptyBlock `blockJoinTail` lastNode + return (lastBlock, newBlocks1++newBlocks2) + go (IfEqual i l ids2) + = do + (bid2, newBlocks2) <- go' ids2 + + let scrut = cmmNeWord dflags expr $ CmmLit $ mkWordCLit dflags i + lastNode = CmmCondBranch scrut bid2 l Nothing + lastBlock = emptyBlock `blockJoinTail` lastNode + return (lastBlock, newBlocks2) + + -- Same but returning a label to branch to + go' (Unconditionally l) + = return (l, []) + go' p + = do + bid <- mkBlockId `fmap` getUniqueM + (last, newBlocks) <- go p + let block = CmmEntry bid scope `blockJoinHead` last + return (bid, block: newBlocks) diff --git a/compiler/GHC/Cmm/Type.hs b/compiler/GHC/Cmm/Type.hs new file mode 100644 index 0000000000..867a260078 --- /dev/null +++ b/compiler/GHC/Cmm/Type.hs @@ -0,0 +1,432 @@ +module GHC.Cmm.Type + ( CmmType -- Abstract + , b8, b16, b32, b64, b128, b256, b512, f32, f64, bWord, bHalfWord, gcWord + , cInt + , cmmBits, cmmFloat + , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood + , isFloatType, isGcPtrType, isBitsType + , isWord32, isWord64, isFloat64, isFloat32 + + , Width(..) + , widthInBits, widthInBytes, widthInLog, widthFromBytes + , wordWidth, halfWordWidth, cIntWidth + , halfWordMask + , narrowU, narrowS + , rEP_CostCentreStack_mem_alloc + , rEP_CostCentreStack_scc_count + , rEP_StgEntCounter_allocs + , rEP_StgEntCounter_allocd + + , ForeignHint(..) + + , Length + , vec, vec2, vec4, vec8, vec16 + , vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8 + , cmmVec + , vecLength, vecElemType + , isVecType + ) +where + + +import GhcPrelude + +import DynFlags +import FastString +import Outputable + +import Data.Word +import Data.Int + +----------------------------------------------------------------------------- +-- CmmType +----------------------------------------------------------------------------- + + -- NOTE: CmmType is an abstract type, not exported from this + -- module so you can easily change its representation + -- + -- However Width is exported in a concrete way, + -- and is used extensively in pattern-matching + +data CmmType -- The important one! + = CmmType CmmCat Width + +data CmmCat -- "Category" (not exported) + = GcPtrCat -- GC pointer + | BitsCat -- Non-pointer + | FloatCat -- Float + | VecCat Length CmmCat -- Vector + deriving( Eq ) + -- See Note [Signed vs unsigned] at the end + +instance Outputable CmmType where + ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid) + +instance Outputable CmmCat where + ppr FloatCat = text "F" + ppr GcPtrCat = text "P" + ppr BitsCat = text "I" + ppr (VecCat n cat) = ppr cat <> text "x" <> ppr n <> text "V" + +-- Why is CmmType stratified? For native code generation, +-- most of the time you just want to know what sort of register +-- to put the thing in, and for this you need to know how +-- many bits thing has, and whether it goes in a floating-point +-- register. By contrast, the distinction between GcPtr and +-- GcNonPtr is of interest to only a few parts of the code generator. + +-------- Equality on CmmType -------------- +-- CmmType is *not* an instance of Eq; sometimes we care about the +-- Gc/NonGc distinction, and sometimes we don't +-- So we use an explicit function to force you to think about it +cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality +cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2 + +cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool + -- This equality is temporary; used in CmmLint + -- but the RTS files are not yet well-typed wrt pointers +cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2) + = c1 `weak_eq` c2 && w1==w2 + where + weak_eq :: CmmCat -> CmmCat -> Bool + FloatCat `weak_eq` FloatCat = True + FloatCat `weak_eq` _other = False + _other `weak_eq` FloatCat = False + (VecCat l1 cat1) `weak_eq` (VecCat l2 cat2) = l1 == l2 + && cat1 `weak_eq` cat2 + (VecCat {}) `weak_eq` _other = False + _other `weak_eq` (VecCat {}) = False + _word1 `weak_eq` _word2 = True -- Ignores GcPtr + +--- Simple operations on CmmType ----- +typeWidth :: CmmType -> Width +typeWidth (CmmType _ w) = w + +cmmBits, cmmFloat :: Width -> CmmType +cmmBits = CmmType BitsCat +cmmFloat = CmmType FloatCat + +-------- Common CmmTypes ------------ +-- Floats and words of specific widths +b8, b16, b32, b64, b128, b256, b512, f32, f64 :: CmmType +b8 = cmmBits W8 +b16 = cmmBits W16 +b32 = cmmBits W32 +b64 = cmmBits W64 +b128 = cmmBits W128 +b256 = cmmBits W256 +b512 = cmmBits W512 +f32 = cmmFloat W32 +f64 = cmmFloat W64 + +-- CmmTypes of native word widths +bWord :: DynFlags -> CmmType +bWord dflags = cmmBits (wordWidth dflags) + +bHalfWord :: DynFlags -> CmmType +bHalfWord dflags = cmmBits (halfWordWidth dflags) + +gcWord :: DynFlags -> CmmType +gcWord dflags = CmmType GcPtrCat (wordWidth dflags) + +cInt :: DynFlags -> CmmType +cInt dflags = cmmBits (cIntWidth dflags) + +------------ Predicates ---------------- +isFloatType, isGcPtrType, isBitsType :: CmmType -> Bool +isFloatType (CmmType FloatCat _) = True +isFloatType _other = False + +isGcPtrType (CmmType GcPtrCat _) = True +isGcPtrType _other = False + +isBitsType (CmmType BitsCat _) = True +isBitsType _ = False + +isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool +-- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise) +-- isFloat32 and 64 are obvious + +isWord64 (CmmType BitsCat W64) = True +isWord64 (CmmType GcPtrCat W64) = True +isWord64 _other = False + +isWord32 (CmmType BitsCat W32) = True +isWord32 (CmmType GcPtrCat W32) = True +isWord32 _other = False + +isFloat32 (CmmType FloatCat W32) = True +isFloat32 _other = False + +isFloat64 (CmmType FloatCat W64) = True +isFloat64 _other = False + +----------------------------------------------------------------------------- +-- Width +----------------------------------------------------------------------------- + +data Width = W8 | W16 | W32 | W64 + | W128 + | W256 + | W512 + deriving (Eq, Ord, Show) + +instance Outputable Width where + ppr rep = ptext (mrStr rep) + +mrStr :: Width -> PtrString +mrStr = sLit . show + + +-------- Common Widths ------------ +wordWidth :: DynFlags -> Width +wordWidth dflags + | wORD_SIZE dflags == 4 = W32 + | wORD_SIZE dflags == 8 = W64 + | otherwise = panic "MachOp.wordRep: Unknown word size" + +halfWordWidth :: DynFlags -> Width +halfWordWidth dflags + | wORD_SIZE dflags == 4 = W16 + | wORD_SIZE dflags == 8 = W32 + | otherwise = panic "MachOp.halfWordRep: Unknown word size" + +halfWordMask :: DynFlags -> Integer +halfWordMask dflags + | wORD_SIZE dflags == 4 = 0xFFFF + | wORD_SIZE dflags == 8 = 0xFFFFFFFF + | otherwise = panic "MachOp.halfWordMask: Unknown word size" + +-- cIntRep is the Width for a C-language 'int' +cIntWidth :: DynFlags -> Width +cIntWidth dflags = case cINT_SIZE dflags of + 4 -> W32 + 8 -> W64 + s -> panic ("cIntWidth: Unknown cINT_SIZE: " ++ show s) + +widthInBits :: Width -> Int +widthInBits W8 = 8 +widthInBits W16 = 16 +widthInBits W32 = 32 +widthInBits W64 = 64 +widthInBits W128 = 128 +widthInBits W256 = 256 +widthInBits W512 = 512 + + +widthInBytes :: Width -> Int +widthInBytes W8 = 1 +widthInBytes W16 = 2 +widthInBytes W32 = 4 +widthInBytes W64 = 8 +widthInBytes W128 = 16 +widthInBytes W256 = 32 +widthInBytes W512 = 64 + + +widthFromBytes :: Int -> Width +widthFromBytes 1 = W8 +widthFromBytes 2 = W16 +widthFromBytes 4 = W32 +widthFromBytes 8 = W64 +widthFromBytes 16 = W128 +widthFromBytes 32 = W256 +widthFromBytes 64 = W512 + +widthFromBytes n = pprPanic "no width for given number of bytes" (ppr n) + +-- log_2 of the width in bytes, useful for generating shifts. +widthInLog :: Width -> Int +widthInLog W8 = 0 +widthInLog W16 = 1 +widthInLog W32 = 2 +widthInLog W64 = 3 +widthInLog W128 = 4 +widthInLog W256 = 5 +widthInLog W512 = 6 + + +-- widening / narrowing + +narrowU :: Width -> Integer -> Integer +narrowU W8 x = fromIntegral (fromIntegral x :: Word8) +narrowU W16 x = fromIntegral (fromIntegral x :: Word16) +narrowU W32 x = fromIntegral (fromIntegral x :: Word32) +narrowU W64 x = fromIntegral (fromIntegral x :: Word64) +narrowU _ _ = panic "narrowTo" + +narrowS :: Width -> Integer -> Integer +narrowS W8 x = fromIntegral (fromIntegral x :: Int8) +narrowS W16 x = fromIntegral (fromIntegral x :: Int16) +narrowS W32 x = fromIntegral (fromIntegral x :: Int32) +narrowS W64 x = fromIntegral (fromIntegral x :: Int64) +narrowS _ _ = panic "narrowTo" + +----------------------------------------------------------------------------- +-- SIMD +----------------------------------------------------------------------------- + +type Length = Int + +vec :: Length -> CmmType -> CmmType +vec l (CmmType cat w) = CmmType (VecCat l cat) vecw + where + vecw :: Width + vecw = widthFromBytes (l*widthInBytes w) + +vec2, vec4, vec8, vec16 :: CmmType -> CmmType +vec2 = vec 2 +vec4 = vec 4 +vec8 = vec 8 +vec16 = vec 16 + +vec2f64, vec2b64, vec4f32, vec4b32, vec8b16, vec16b8 :: CmmType +vec2f64 = vec 2 f64 +vec2b64 = vec 2 b64 +vec4f32 = vec 4 f32 +vec4b32 = vec 4 b32 +vec8b16 = vec 8 b16 +vec16b8 = vec 16 b8 + +cmmVec :: Int -> CmmType -> CmmType +cmmVec n (CmmType cat w) = + CmmType (VecCat n cat) (widthFromBytes (n*widthInBytes w)) + +vecLength :: CmmType -> Length +vecLength (CmmType (VecCat l _) _) = l +vecLength _ = panic "vecLength: not a vector" + +vecElemType :: CmmType -> CmmType +vecElemType (CmmType (VecCat l cat) w) = CmmType cat scalw + where + scalw :: Width + scalw = widthFromBytes (widthInBytes w `div` l) +vecElemType _ = panic "vecElemType: not a vector" + +isVecType :: CmmType -> Bool +isVecType (CmmType (VecCat {}) _) = True +isVecType _ = False + +------------------------------------------------------------------------- +-- Hints + +-- Hints are extra type information we attach to the arguments and +-- results of a foreign call, where more type information is sometimes +-- needed by the ABI to make the correct kind of call. + +data ForeignHint + = NoHint | AddrHint | SignedHint + deriving( Eq ) + -- Used to give extra per-argument or per-result + -- information needed by foreign calling conventions + +------------------------------------------------------------------------- + +-- These don't really belong here, but I don't know where is best to +-- put them. + +rEP_CostCentreStack_mem_alloc :: DynFlags -> CmmType +rEP_CostCentreStack_mem_alloc dflags + = cmmBits (widthFromBytes (pc_REP_CostCentreStack_mem_alloc pc)) + where pc = platformConstants dflags + +rEP_CostCentreStack_scc_count :: DynFlags -> CmmType +rEP_CostCentreStack_scc_count dflags + = cmmBits (widthFromBytes (pc_REP_CostCentreStack_scc_count pc)) + where pc = platformConstants dflags + +rEP_StgEntCounter_allocs :: DynFlags -> CmmType +rEP_StgEntCounter_allocs dflags + = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocs pc)) + where pc = platformConstants dflags + +rEP_StgEntCounter_allocd :: DynFlags -> CmmType +rEP_StgEntCounter_allocd dflags + = cmmBits (widthFromBytes (pc_REP_StgEntCounter_allocd pc)) + where pc = platformConstants dflags + +------------------------------------------------------------------------- +{- Note [Signed vs unsigned] + ~~~~~~~~~~~~~~~~~~~~~~~~~ +Should a CmmType include a signed vs. unsigned distinction? + +This is very much like a "hint" in C-- terminology: it isn't necessary +in order to generate correct code, but it might be useful in that the +compiler can generate better code if it has access to higher-level +hints about data. This is important at call boundaries, because the +definition of a function is not visible at all of its call sites, so +the compiler cannot infer the hints. + +Here in Cmm, we're taking a slightly different approach. We include +the int vs. float hint in the CmmType, because (a) the majority of +platforms have a strong distinction between float and int registers, +and (b) we don't want to do any heavyweight hint-inference in the +native code backend in order to get good code. We're treating the +hint more like a type: our Cmm is always completely consistent with +respect to hints. All coercions between float and int are explicit. + +What about the signed vs. unsigned hint? This information might be +useful if we want to keep sub-word-sized values in word-size +registers, which we must do if we only have word-sized registers. + +On such a system, there are two straightforward conventions for +representing sub-word-sized values: + +(a) Leave the upper bits undefined. Comparison operations must + sign- or zero-extend both operands before comparing them, + depending on whether the comparison is signed or unsigned. + +(b) Always keep the values sign- or zero-extended as appropriate. + Arithmetic operations must narrow the result to the appropriate + size. + +A clever compiler might not use either (a) or (b) exclusively, instead +it would attempt to minimize the coercions by analysis: the same kind +of analysis that propagates hints around. In Cmm we don't want to +have to do this, so we plump for having richer types and keeping the +type information consistent. + +If signed/unsigned hints are missing from CmmType, then the only +choice we have is (a), because we don't know whether the result of an +operation should be sign- or zero-extended. + +Many architectures have extending load operations, which work well +with (b). To make use of them with (a), you need to know whether the +value is going to be sign- or zero-extended by an enclosing comparison +(for example), which involves knowing above the context. This is +doable but more complex. + +Further complicating the issue is foreign calls: a foreign calling +convention can specify that signed 8-bit quantities are passed as +sign-extended 32 bit quantities, for example (this is the case on the +PowerPC). So we *do* need sign information on foreign call arguments. + +Pros for adding signed vs. unsigned to CmmType: + + - It would let us use convention (b) above, and get easier + code generation for extending loads. + + - Less information required on foreign calls. + + - MachOp type would be simpler + +Cons: + + - More complexity + + - What is the CmmType for a VanillaReg? Currently it is + always wordRep, but now we have to decide whether it is + signed or unsigned. The same VanillaReg can thus have + different CmmType in different parts of the program. + + - Extra coercions cluttering up expressions. + +Currently for GHC, the foreign call point is moot, because we do our +own promotion of sub-word-sized values to word-sized values. The Int8 +type is represented by an Int# which is kept sign-extended at all times +(this is slightly naughty, because we're making assumptions about the +C calling convention rather early on in the compiler). However, given +this, the cons outweigh the pros. + +-} + diff --git a/compiler/GHC/Cmm/Utils.hs b/compiler/GHC/Cmm/Utils.hs new file mode 100644 index 0000000000..d879c7b82f --- /dev/null +++ b/compiler/GHC/Cmm/Utils.hs @@ -0,0 +1,607 @@ +{-# LANGUAGE GADTs, RankNTypes #-} +{-# LANGUAGE BangPatterns #-} + +----------------------------------------------------------------------------- +-- +-- Cmm utilities. +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module GHC.Cmm.Utils( + -- CmmType + primRepCmmType, slotCmmType, slotForeignHint, + typeCmmType, typeForeignHint, primRepForeignHint, + + -- CmmLit + zeroCLit, mkIntCLit, + mkWordCLit, packHalfWordsCLit, + mkByteStringCLit, + mkDataLits, mkRODataLits, + mkStgWordCLit, + + -- CmmExpr + mkIntExpr, zeroExpr, + mkLblExpr, + cmmRegOff, cmmOffset, cmmLabelOff, cmmOffsetLit, cmmOffsetExpr, + cmmRegOffB, cmmOffsetB, cmmLabelOffB, cmmOffsetLitB, cmmOffsetExprB, + cmmRegOffW, cmmOffsetW, cmmLabelOffW, cmmOffsetLitW, cmmOffsetExprW, + cmmIndex, cmmIndexExpr, cmmLoadIndex, cmmLoadIndexW, + cmmNegate, + cmmULtWord, cmmUGeWord, cmmUGtWord, cmmUShrWord, + cmmSLtWord, + cmmNeWord, cmmEqWord, + cmmOrWord, cmmAndWord, + cmmSubWord, cmmAddWord, cmmMulWord, cmmQuotWord, + cmmToWord, + + cmmMkAssign, + + isTrivialCmmExpr, hasNoGlobalRegs, isLit, isComparisonExpr, + + baseExpr, spExpr, hpExpr, spLimExpr, hpLimExpr, + currentTSOExpr, currentNurseryExpr, cccsExpr, + + -- Statics + blankWord, + + -- Tagging + cmmTagMask, cmmPointerMask, cmmUntag, cmmIsTagged, + cmmConstrTag1, + + -- Overlap and usage + regsOverlap, regUsedIn, + + -- Liveness and bitmaps + mkLiveness, + + -- * Operations that probably don't belong here + modifyGraph, + + ofBlockMap, toBlockMap, + ofBlockList, toBlockList, bodyToBlockList, + toBlockListEntryFirst, toBlockListEntryFirstFalseFallthrough, + foldlGraphBlocks, mapGraphNodes, revPostorder, mapGraphNodes1, + + -- * Ticks + blockTicks + ) where + +import GhcPrelude + +import TyCon ( PrimRep(..), PrimElemRep(..) ) +import GHC.Types.RepType ( UnaryType, SlotTy (..), typePrimRep1 ) + +import GHC.Runtime.Layout +import GHC.Cmm +import GHC.Cmm.BlockId +import GHC.Cmm.CLabel +import Outputable +import DynFlags +import Unique +import GHC.Platform.Regs + +import Data.ByteString (ByteString) +import qualified Data.ByteString as BS +import Data.Bits +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Dataflow.Label +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections + +--------------------------------------------------- +-- +-- CmmTypes +-- +--------------------------------------------------- + +primRepCmmType :: DynFlags -> PrimRep -> CmmType +primRepCmmType _ VoidRep = panic "primRepCmmType:VoidRep" +primRepCmmType dflags LiftedRep = gcWord dflags +primRepCmmType dflags UnliftedRep = gcWord dflags +primRepCmmType dflags IntRep = bWord dflags +primRepCmmType dflags WordRep = bWord dflags +primRepCmmType _ Int8Rep = b8 +primRepCmmType _ Word8Rep = b8 +primRepCmmType _ Int16Rep = b16 +primRepCmmType _ Word16Rep = b16 +primRepCmmType _ Int32Rep = b32 +primRepCmmType _ Word32Rep = b32 +primRepCmmType _ Int64Rep = b64 +primRepCmmType _ Word64Rep = b64 +primRepCmmType dflags AddrRep = bWord dflags +primRepCmmType _ FloatRep = f32 +primRepCmmType _ DoubleRep = f64 +primRepCmmType _ (VecRep len rep) = vec len (primElemRepCmmType rep) + +slotCmmType :: DynFlags -> SlotTy -> CmmType +slotCmmType dflags PtrSlot = gcWord dflags +slotCmmType dflags WordSlot = bWord dflags +slotCmmType _ Word64Slot = b64 +slotCmmType _ FloatSlot = f32 +slotCmmType _ DoubleSlot = f64 + +primElemRepCmmType :: PrimElemRep -> CmmType +primElemRepCmmType Int8ElemRep = b8 +primElemRepCmmType Int16ElemRep = b16 +primElemRepCmmType Int32ElemRep = b32 +primElemRepCmmType Int64ElemRep = b64 +primElemRepCmmType Word8ElemRep = b8 +primElemRepCmmType Word16ElemRep = b16 +primElemRepCmmType Word32ElemRep = b32 +primElemRepCmmType Word64ElemRep = b64 +primElemRepCmmType FloatElemRep = f32 +primElemRepCmmType DoubleElemRep = f64 + +typeCmmType :: DynFlags -> UnaryType -> CmmType +typeCmmType dflags ty = primRepCmmType dflags (typePrimRep1 ty) + +primRepForeignHint :: PrimRep -> ForeignHint +primRepForeignHint VoidRep = panic "primRepForeignHint:VoidRep" +primRepForeignHint LiftedRep = AddrHint +primRepForeignHint UnliftedRep = AddrHint +primRepForeignHint IntRep = SignedHint +primRepForeignHint Int8Rep = SignedHint +primRepForeignHint Int16Rep = SignedHint +primRepForeignHint Int32Rep = SignedHint +primRepForeignHint Int64Rep = SignedHint +primRepForeignHint WordRep = NoHint +primRepForeignHint Word8Rep = NoHint +primRepForeignHint Word16Rep = NoHint +primRepForeignHint Word32Rep = NoHint +primRepForeignHint Word64Rep = NoHint +primRepForeignHint AddrRep = AddrHint -- NB! AddrHint, but NonPtrArg +primRepForeignHint FloatRep = NoHint +primRepForeignHint DoubleRep = NoHint +primRepForeignHint (VecRep {}) = NoHint + +slotForeignHint :: SlotTy -> ForeignHint +slotForeignHint PtrSlot = AddrHint +slotForeignHint WordSlot = NoHint +slotForeignHint Word64Slot = NoHint +slotForeignHint FloatSlot = NoHint +slotForeignHint DoubleSlot = NoHint + +typeForeignHint :: UnaryType -> ForeignHint +typeForeignHint = primRepForeignHint . typePrimRep1 + +--------------------------------------------------- +-- +-- CmmLit +-- +--------------------------------------------------- + +-- XXX: should really be Integer, since Int doesn't necessarily cover +-- the full range of target Ints. +mkIntCLit :: DynFlags -> Int -> CmmLit +mkIntCLit dflags i = CmmInt (toInteger i) (wordWidth dflags) + +mkIntExpr :: DynFlags -> Int -> CmmExpr +mkIntExpr dflags i = CmmLit $! mkIntCLit dflags i + +zeroCLit :: DynFlags -> CmmLit +zeroCLit dflags = CmmInt 0 (wordWidth dflags) + +zeroExpr :: DynFlags -> CmmExpr +zeroExpr dflags = CmmLit (zeroCLit dflags) + +mkWordCLit :: DynFlags -> Integer -> CmmLit +mkWordCLit dflags wd = CmmInt wd (wordWidth dflags) + +mkByteStringCLit + :: CLabel -> ByteString -> (CmmLit, GenCmmDecl CmmStatics info stmt) +-- We have to make a top-level decl for the string, +-- and return a literal pointing to it +mkByteStringCLit lbl bytes + = (CmmLabel lbl, CmmData (Section sec lbl) $ Statics lbl [CmmString bytes]) + where + -- This can not happen for String literals (as there \NUL is replaced by + -- C0 80). However, it can happen with Addr# literals. + sec = if 0 `BS.elem` bytes then ReadOnlyData else CString + +mkDataLits :: Section -> CLabel -> [CmmLit] -> GenCmmDecl CmmStatics info stmt +-- Build a data-segment data block +mkDataLits section lbl lits + = CmmData section (Statics lbl $ map CmmStaticLit lits) + +mkRODataLits :: CLabel -> [CmmLit] -> GenCmmDecl CmmStatics info stmt +-- Build a read-only data block +mkRODataLits lbl lits + = mkDataLits section lbl lits + where + section | any needsRelocation lits = Section RelocatableReadOnlyData lbl + | otherwise = Section ReadOnlyData lbl + needsRelocation (CmmLabel _) = True + needsRelocation (CmmLabelOff _ _) = True + needsRelocation _ = False + +mkStgWordCLit :: DynFlags -> StgWord -> CmmLit +mkStgWordCLit dflags wd = CmmInt (fromStgWord wd) (wordWidth dflags) + +packHalfWordsCLit :: DynFlags -> StgHalfWord -> StgHalfWord -> CmmLit +-- Make a single word literal in which the lower_half_word is +-- at the lower address, and the upper_half_word is at the +-- higher address +-- ToDo: consider using half-word lits instead +-- but be careful: that's vulnerable when reversed +packHalfWordsCLit dflags lower_half_word upper_half_word + = if wORDS_BIGENDIAN dflags + then mkWordCLit dflags ((l `shiftL` halfWordSizeInBits dflags) .|. u) + else mkWordCLit dflags (l .|. (u `shiftL` halfWordSizeInBits dflags)) + where l = fromStgHalfWord lower_half_word + u = fromStgHalfWord upper_half_word + +--------------------------------------------------- +-- +-- CmmExpr +-- +--------------------------------------------------- + +mkLblExpr :: CLabel -> CmmExpr +mkLblExpr lbl = CmmLit (CmmLabel lbl) + +cmmOffsetExpr :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr +-- assumes base and offset have the same CmmType +cmmOffsetExpr dflags e (CmmLit (CmmInt n _)) = cmmOffset dflags e (fromInteger n) +cmmOffsetExpr dflags e byte_off = CmmMachOp (MO_Add (cmmExprWidth dflags e)) [e, byte_off] + +cmmOffset :: DynFlags -> CmmExpr -> Int -> CmmExpr +cmmOffset _ e 0 = e +cmmOffset _ (CmmReg reg) byte_off = cmmRegOff reg byte_off +cmmOffset _ (CmmRegOff reg m) byte_off = cmmRegOff reg (m+byte_off) +cmmOffset _ (CmmLit lit) byte_off = CmmLit (cmmOffsetLit lit byte_off) +cmmOffset _ (CmmStackSlot area off) byte_off + = CmmStackSlot area (off - byte_off) + -- note stack area offsets increase towards lower addresses +cmmOffset _ (CmmMachOp (MO_Add rep) [expr, CmmLit (CmmInt byte_off1 _rep)]) byte_off2 + = CmmMachOp (MO_Add rep) + [expr, CmmLit (CmmInt (byte_off1 + toInteger byte_off2) rep)] +cmmOffset dflags expr byte_off + = CmmMachOp (MO_Add width) [expr, CmmLit (CmmInt (toInteger byte_off) width)] + where + width = cmmExprWidth dflags expr + +-- Smart constructor for CmmRegOff. Same caveats as cmmOffset above. +cmmRegOff :: CmmReg -> Int -> CmmExpr +cmmRegOff reg 0 = CmmReg reg +cmmRegOff reg byte_off = CmmRegOff reg byte_off + +cmmOffsetLit :: CmmLit -> Int -> CmmLit +cmmOffsetLit (CmmLabel l) byte_off = cmmLabelOff l byte_off +cmmOffsetLit (CmmLabelOff l m) byte_off = cmmLabelOff l (m+byte_off) +cmmOffsetLit (CmmLabelDiffOff l1 l2 m w) byte_off + = CmmLabelDiffOff l1 l2 (m+byte_off) w +cmmOffsetLit (CmmInt m rep) byte_off = CmmInt (m + fromIntegral byte_off) rep +cmmOffsetLit _ byte_off = pprPanic "cmmOffsetLit" (ppr byte_off) + +cmmLabelOff :: CLabel -> Int -> CmmLit +-- Smart constructor for CmmLabelOff +cmmLabelOff lbl 0 = CmmLabel lbl +cmmLabelOff lbl byte_off = CmmLabelOff lbl byte_off + +-- | Useful for creating an index into an array, with a statically known offset. +-- The type is the element type; used for making the multiplier +cmmIndex :: DynFlags + -> Width -- Width w + -> CmmExpr -- Address of vector of items of width w + -> Int -- Which element of the vector (0 based) + -> CmmExpr -- Address of i'th element +cmmIndex dflags width base idx = cmmOffset dflags base (idx * widthInBytes width) + +-- | Useful for creating an index into an array, with an unknown offset. +cmmIndexExpr :: DynFlags + -> Width -- Width w + -> CmmExpr -- Address of vector of items of width w + -> CmmExpr -- Which element of the vector (0 based) + -> CmmExpr -- Address of i'th element +cmmIndexExpr dflags width base (CmmLit (CmmInt n _)) = cmmIndex dflags width base (fromInteger n) +cmmIndexExpr dflags width base idx = + cmmOffsetExpr dflags base byte_off + where + idx_w = cmmExprWidth dflags idx + byte_off = CmmMachOp (MO_Shl idx_w) [idx, mkIntExpr dflags (widthInLog width)] + +cmmLoadIndex :: DynFlags -> CmmType -> CmmExpr -> Int -> CmmExpr +cmmLoadIndex dflags ty expr ix = CmmLoad (cmmIndex dflags (typeWidth ty) expr ix) ty + +-- The "B" variants take byte offsets +cmmRegOffB :: CmmReg -> ByteOff -> CmmExpr +cmmRegOffB = cmmRegOff + +cmmOffsetB :: DynFlags -> CmmExpr -> ByteOff -> CmmExpr +cmmOffsetB = cmmOffset + +cmmOffsetExprB :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr +cmmOffsetExprB = cmmOffsetExpr + +cmmLabelOffB :: CLabel -> ByteOff -> CmmLit +cmmLabelOffB = cmmLabelOff + +cmmOffsetLitB :: CmmLit -> ByteOff -> CmmLit +cmmOffsetLitB = cmmOffsetLit + +----------------------- +-- The "W" variants take word offsets + +cmmOffsetExprW :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr +-- The second arg is a *word* offset; need to change it to bytes +cmmOffsetExprW dflags e (CmmLit (CmmInt n _)) = cmmOffsetW dflags e (fromInteger n) +cmmOffsetExprW dflags e wd_off = cmmIndexExpr dflags (wordWidth dflags) e wd_off + +cmmOffsetW :: DynFlags -> CmmExpr -> WordOff -> CmmExpr +cmmOffsetW dflags e n = cmmOffsetB dflags e (wordsToBytes dflags n) + +cmmRegOffW :: DynFlags -> CmmReg -> WordOff -> CmmExpr +cmmRegOffW dflags reg wd_off = cmmRegOffB reg (wordsToBytes dflags wd_off) + +cmmOffsetLitW :: DynFlags -> CmmLit -> WordOff -> CmmLit +cmmOffsetLitW dflags lit wd_off = cmmOffsetLitB lit (wordsToBytes dflags wd_off) + +cmmLabelOffW :: DynFlags -> CLabel -> WordOff -> CmmLit +cmmLabelOffW dflags lbl wd_off = cmmLabelOffB lbl (wordsToBytes dflags wd_off) + +cmmLoadIndexW :: DynFlags -> CmmExpr -> Int -> CmmType -> CmmExpr +cmmLoadIndexW dflags base off ty = CmmLoad (cmmOffsetW dflags base off) ty + +----------------------- +cmmULtWord, cmmUGeWord, cmmUGtWord, cmmUShrWord, + cmmSLtWord, + cmmNeWord, cmmEqWord, + cmmOrWord, cmmAndWord, + cmmSubWord, cmmAddWord, cmmMulWord, cmmQuotWord + :: DynFlags -> CmmExpr -> CmmExpr -> CmmExpr +cmmOrWord dflags e1 e2 = CmmMachOp (mo_wordOr dflags) [e1, e2] +cmmAndWord dflags e1 e2 = CmmMachOp (mo_wordAnd dflags) [e1, e2] +cmmNeWord dflags e1 e2 = CmmMachOp (mo_wordNe dflags) [e1, e2] +cmmEqWord dflags e1 e2 = CmmMachOp (mo_wordEq dflags) [e1, e2] +cmmULtWord dflags e1 e2 = CmmMachOp (mo_wordULt dflags) [e1, e2] +cmmUGeWord dflags e1 e2 = CmmMachOp (mo_wordUGe dflags) [e1, e2] +cmmUGtWord dflags e1 e2 = CmmMachOp (mo_wordUGt dflags) [e1, e2] +cmmSLtWord dflags e1 e2 = CmmMachOp (mo_wordSLt dflags) [e1, e2] +cmmUShrWord dflags e1 e2 = CmmMachOp (mo_wordUShr dflags) [e1, e2] +cmmAddWord dflags e1 e2 = CmmMachOp (mo_wordAdd dflags) [e1, e2] +cmmSubWord dflags e1 e2 = CmmMachOp (mo_wordSub dflags) [e1, e2] +cmmMulWord dflags e1 e2 = CmmMachOp (mo_wordMul dflags) [e1, e2] +cmmQuotWord dflags e1 e2 = CmmMachOp (mo_wordUQuot dflags) [e1, e2] + +cmmNegate :: DynFlags -> CmmExpr -> CmmExpr +cmmNegate _ (CmmLit (CmmInt n rep)) = CmmLit (CmmInt (-n) rep) +cmmNegate dflags e = CmmMachOp (MO_S_Neg (cmmExprWidth dflags e)) [e] + +blankWord :: DynFlags -> CmmStatic +blankWord dflags = CmmUninitialised (wORD_SIZE dflags) + +cmmToWord :: DynFlags -> CmmExpr -> CmmExpr +cmmToWord dflags e + | w == word = e + | otherwise = CmmMachOp (MO_UU_Conv w word) [e] + where + w = cmmExprWidth dflags e + word = wordWidth dflags + +cmmMkAssign :: DynFlags -> CmmExpr -> Unique -> (CmmNode O O, CmmExpr) +cmmMkAssign dflags expr uq = + let !ty = cmmExprType dflags expr + reg = (CmmLocal (LocalReg uq ty)) + in (CmmAssign reg expr, CmmReg reg) + + +--------------------------------------------------- +-- +-- CmmExpr predicates +-- +--------------------------------------------------- + +isTrivialCmmExpr :: CmmExpr -> Bool +isTrivialCmmExpr (CmmLoad _ _) = False +isTrivialCmmExpr (CmmMachOp _ _) = False +isTrivialCmmExpr (CmmLit _) = True +isTrivialCmmExpr (CmmReg _) = True +isTrivialCmmExpr (CmmRegOff _ _) = True +isTrivialCmmExpr (CmmStackSlot _ _) = panic "isTrivialCmmExpr CmmStackSlot" + +hasNoGlobalRegs :: CmmExpr -> Bool +hasNoGlobalRegs (CmmLoad e _) = hasNoGlobalRegs e +hasNoGlobalRegs (CmmMachOp _ es) = all hasNoGlobalRegs es +hasNoGlobalRegs (CmmLit _) = True +hasNoGlobalRegs (CmmReg (CmmLocal _)) = True +hasNoGlobalRegs (CmmRegOff (CmmLocal _) _) = True +hasNoGlobalRegs _ = False + +isLit :: CmmExpr -> Bool +isLit (CmmLit _) = True +isLit _ = False + +isComparisonExpr :: CmmExpr -> Bool +isComparisonExpr (CmmMachOp op _) = isComparisonMachOp op +isComparisonExpr _ = False + +--------------------------------------------------- +-- +-- Tagging +-- +--------------------------------------------------- + +-- Tag bits mask +cmmTagMask, cmmPointerMask :: DynFlags -> CmmExpr +cmmTagMask dflags = mkIntExpr dflags (tAG_MASK dflags) +cmmPointerMask dflags = mkIntExpr dflags (complement (tAG_MASK dflags)) + +-- Used to untag a possibly tagged pointer +-- A static label need not be untagged +cmmUntag, cmmIsTagged, cmmConstrTag1 :: DynFlags -> CmmExpr -> CmmExpr +cmmUntag _ e@(CmmLit (CmmLabel _)) = e +-- Default case +cmmUntag dflags e = cmmAndWord dflags e (cmmPointerMask dflags) + +-- Test if a closure pointer is untagged +cmmIsTagged dflags e = cmmNeWord dflags (cmmAndWord dflags e (cmmTagMask dflags)) (zeroExpr dflags) + +-- Get constructor tag, but one based. +cmmConstrTag1 dflags e = cmmAndWord dflags e (cmmTagMask dflags) + + +----------------------------------------------------------------------------- +-- Overlap and usage + +-- | Returns True if the two STG registers overlap on the specified +-- platform, in the sense that writing to one will clobber the +-- other. This includes the case that the two registers are the same +-- STG register. See Note [Overlapping global registers] for details. +regsOverlap :: DynFlags -> CmmReg -> CmmReg -> Bool +regsOverlap dflags (CmmGlobal g) (CmmGlobal g') + | Just real <- globalRegMaybe (targetPlatform dflags) g, + Just real' <- globalRegMaybe (targetPlatform dflags) g', + real == real' + = True +regsOverlap _ reg reg' = reg == reg' + +-- | Returns True if the STG register is used by the expression, in +-- the sense that a store to the register might affect the value of +-- the expression. +-- +-- We must check for overlapping registers and not just equal +-- registers here, otherwise CmmSink may incorrectly reorder +-- assignments that conflict due to overlap. See #10521 and Note +-- [Overlapping global registers]. +regUsedIn :: DynFlags -> CmmReg -> CmmExpr -> Bool +regUsedIn dflags = regUsedIn_ where + _ `regUsedIn_` CmmLit _ = False + reg `regUsedIn_` CmmLoad e _ = reg `regUsedIn_` e + reg `regUsedIn_` CmmReg reg' = regsOverlap dflags reg reg' + reg `regUsedIn_` CmmRegOff reg' _ = regsOverlap dflags reg reg' + reg `regUsedIn_` CmmMachOp _ es = any (reg `regUsedIn_`) es + _ `regUsedIn_` CmmStackSlot _ _ = False + +-------------------------------------------- +-- +-- mkLiveness +-- +--------------------------------------------- + +mkLiveness :: DynFlags -> [LocalReg] -> Liveness +mkLiveness _ [] = [] +mkLiveness dflags (reg:regs) + = bits ++ mkLiveness dflags regs + where + sizeW = (widthInBytes (typeWidth (localRegType reg)) + wORD_SIZE dflags - 1) + `quot` wORD_SIZE dflags + -- number of words, rounded up + bits = replicate sizeW is_non_ptr -- True <=> Non Ptr + + is_non_ptr = not $ isGcPtrType (localRegType reg) + + +-- ============================================== - +-- ============================================== - +-- ============================================== - + +--------------------------------------------------- +-- +-- Manipulating CmmGraphs +-- +--------------------------------------------------- + +modifyGraph :: (Graph n C C -> Graph n' C C) -> GenCmmGraph n -> GenCmmGraph n' +modifyGraph f g = CmmGraph {g_entry=g_entry g, g_graph=f (g_graph g)} + +toBlockMap :: CmmGraph -> LabelMap CmmBlock +toBlockMap (CmmGraph {g_graph=GMany NothingO body NothingO}) = body + +ofBlockMap :: BlockId -> LabelMap CmmBlock -> CmmGraph +ofBlockMap entry bodyMap = CmmGraph {g_entry=entry, g_graph=GMany NothingO bodyMap NothingO} + +toBlockList :: CmmGraph -> [CmmBlock] +toBlockList g = mapElems $ toBlockMap g + +-- | like 'toBlockList', but the entry block always comes first +toBlockListEntryFirst :: CmmGraph -> [CmmBlock] +toBlockListEntryFirst g + | mapNull m = [] + | otherwise = entry_block : others + where + m = toBlockMap g + entry_id = g_entry g + Just entry_block = mapLookup entry_id m + others = filter ((/= entry_id) . entryLabel) (mapElems m) + +-- | Like 'toBlockListEntryFirst', but we strive to ensure that we order blocks +-- so that the false case of a conditional jumps to the next block in the output +-- list of blocks. This matches the way OldCmm blocks were output since in +-- OldCmm the false case was a fallthrough, whereas in Cmm conditional branches +-- have both true and false successors. Block ordering can make a big difference +-- in performance in the LLVM backend. Note that we rely crucially on the order +-- of successors returned for CmmCondBranch by the NonLocal instance for CmmNode +-- defined in cmm/CmmNode.hs. -GBM +toBlockListEntryFirstFalseFallthrough :: CmmGraph -> [CmmBlock] +toBlockListEntryFirstFalseFallthrough g + | mapNull m = [] + | otherwise = dfs setEmpty [entry_block] + where + m = toBlockMap g + entry_id = g_entry g + Just entry_block = mapLookup entry_id m + + dfs :: LabelSet -> [CmmBlock] -> [CmmBlock] + dfs _ [] = [] + dfs visited (block:bs) + | id `setMember` visited = dfs visited bs + | otherwise = block : dfs (setInsert id visited) bs' + where id = entryLabel block + bs' = foldr add_id bs (successors block) + add_id id bs = case mapLookup id m of + Just b -> b : bs + Nothing -> bs + +ofBlockList :: BlockId -> [CmmBlock] -> CmmGraph +ofBlockList entry blocks = CmmGraph { g_entry = entry + , g_graph = GMany NothingO body NothingO } + where body = foldr addBlock emptyBody blocks + +bodyToBlockList :: Body CmmNode -> [CmmBlock] +bodyToBlockList body = mapElems body + +mapGraphNodes :: ( CmmNode C O -> CmmNode C O + , CmmNode O O -> CmmNode O O + , CmmNode O C -> CmmNode O C) + -> CmmGraph -> CmmGraph +mapGraphNodes funs@(mf,_,_) g = + ofBlockMap (entryLabel $ mf $ CmmEntry (g_entry g) GlobalScope) $ + mapMap (mapBlock3' funs) $ toBlockMap g + +mapGraphNodes1 :: (forall e x. CmmNode e x -> CmmNode e x) -> CmmGraph -> CmmGraph +mapGraphNodes1 f = modifyGraph (mapGraph f) + + +foldlGraphBlocks :: (a -> CmmBlock -> a) -> a -> CmmGraph -> a +foldlGraphBlocks k z g = mapFoldl k z $ toBlockMap g + +revPostorder :: CmmGraph -> [CmmBlock] +revPostorder g = {-# SCC "revPostorder" #-} + revPostorderFrom (toBlockMap g) (g_entry g) + +------------------------------------------------- +-- Tick utilities + +-- | Extract all tick annotations from the given block +blockTicks :: Block CmmNode C C -> [CmmTickish] +blockTicks b = reverse $ foldBlockNodesF goStmt b [] + where goStmt :: CmmNode e x -> [CmmTickish] -> [CmmTickish] + goStmt (CmmTick t) ts = t:ts + goStmt _other ts = ts + + +-- ----------------------------------------------------------------------------- +-- Access to common global registers + +baseExpr, spExpr, hpExpr, currentTSOExpr, currentNurseryExpr, + spLimExpr, hpLimExpr, cccsExpr :: CmmExpr +baseExpr = CmmReg baseReg +spExpr = CmmReg spReg +spLimExpr = CmmReg spLimReg +hpExpr = CmmReg hpReg +hpLimExpr = CmmReg hpLimReg +currentTSOExpr = CmmReg currentTSOReg +currentNurseryExpr = CmmReg currentNurseryReg +cccsExpr = CmmReg cccsReg diff --git a/compiler/GHC/Cmm/cmm-notes b/compiler/GHC/Cmm/cmm-notes new file mode 100644 index 0000000000..d664a195b7 --- /dev/null +++ b/compiler/GHC/Cmm/cmm-notes @@ -0,0 +1,184 @@ +More notes (Aug 11) +~~~~~~~~~~~~~~~~~~ +* CmmInfo.cmmToRawCmm expands info tables to their representations + (needed for .cmm files as well as the code generators) + +* Why is FCode a lazy monad? That makes it inefficient. + We want laziness to get code out one procedure at a time, + but not at the instruction level. + UPDATE (31/5/2016): FCode is strict since 09afcc9b. + +Things we did + * Remove CmmCvt.graphToZgraph (Conversion from old to new Cmm reps) + * Remove HscMain.optionallyConvertAndOrCPS (converted old Cmm to + new, ran pipeline, and converted back) + * Remove CmmDecl. Put its types in Cmm. Import Cmm into OldCmm + so it can get those types. + + +More notes (June 11) +~~~~~~~~~~~~~~~~~~~~ + +* In CmmContFlowOpt.branchChainElim, can a single block be the + successor of two calls? + +* Check in ClosureInfo: + -- NB: Results here should line up with the results of SMRep.rtsClosureType + +More notes (May 11) +~~~~~~~~~~~~~~~~~~~ +In CmmNode, consider splitting CmmCall into two: call and jump + +Notes on new codegen (Aug 10) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Things to do: + - Proc points pass all arguments on the stack, adding more code and + slowing down things a lot. We either need to fix this or even better + would be to get rid of proc points. + + - Sort out Label, LabelMap, LabelSet versus BlockId, BlockEnv, BlockSet + dichotomy. Mostly this means global replace, but we also need to make + Label an instance of Outputable (probably in the Outputable module). + + EZY: We should use Label, since that's the terminology Hoopl uses. + + - AsmCodeGen has a generic Cmm optimiser; move this into new pipeline + EZY (2011-04-16): The mini-inliner has been generalized and ported, + but the constant folding and other optimizations need to still be + ported. + + - AsmCodeGen has post-native-cg branch eliminator (shortCutBranches); + we ultimately want to share this with the Cmm branch eliminator. + + - At the moment, references to global registers like Hp are "lowered" + late (in CgUtils.fixStgRegisters). We should do this early, in the + new native codegen, much in the way that we lower calling conventions. + Might need to be a bit sophisticated about aliasing. + + - Move to new Cmm rep: + * Make native CG consume New Cmm; + * Convert Old Cmm->New Cmm to keep old path alive + * Produce New Cmm when reading in .cmm files + + - Top-level SRT threading is a bit ugly + + - See "CAFs" below; we want to totally refactor the way SRTs are calculated + + - Garbage-collect https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/cps + moving good stuff into + https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/new-code-gen-pipeline + + - Currently AsmCodeGen top level calls AsmCodeGen.cmmToCmm, which is a small + C-- optimiser. It has quite a lot of boilerplate folding code in AsmCodeGen + (cmmBlockConFold, cmmStmtConFold, cmmExprConFold), before calling out to + CmmOpt. ToDo: see what optimisations are being done; and do them before + AsmCodeGen. + + - If we stick CAF and stack liveness info on a LastCall node (not LastRet/Jump) + then all CAF and stack liveness stuff be completed before we split + into separate C procedures. + + Short term: + compute and attach liveness into LastCall + right at end, split, cvt to old rep + [must split before cvt, because old rep is not expressive enough] + + Longer term: + when old rep disappears, + move the whole splitting game into the C back end *only* + (guided by the procpoint set) + +---------------------------------------------------- + Proc-points +---------------------------------------------------- + +Consider this program, which has a diamond control flow, +with a call on one branch + fn(p,x) { + h() + if b then { ... f(x) ...; q=5; goto J } + else { ...; q=7; goto J } + J: ..p...q... + } +then the join point J is a "proc-point". So, is 'p' passed to J +as a parameter? Or, if 'p' was saved on the stack anyway, perhaps +to keep it alive across the call to h(), maybe 'p' gets communicated +to J that way. This is an awkward choice. (We think that we currently +never pass variables to join points via arguments.) + +Furthermore, there is *no way* to pass q to J in a register (other +than a parameter register). + +What we want is to do register allocation across the whole caboodle. +Then we could drop all the code that deals with the above awkward +decisions about spilling variables across proc-points. + +Note that J doesn't need an info table. + +What we really want is for each LastCall (not LastJump/Ret) +to have an info table. Note that ProcPoints that are not successors +of calls don't need an info table. + +Figuring out proc-points +~~~~~~~~~~~~~~~~~~~~~~~~ +Proc-points are identified by +GHC.Cmm.ProcPoint.minimalProcPointSet/extendPPSet Although there isn't +that much code, JD thinks that it could be done much more nicely using +a dominator analysis, using the Dataflow Engine. + +---------------------------------------------------- + CAFs +---------------------------------------------------- + +* The code for a procedure f may refer to either the *closure* + or the *entry point* of another top-level procedure g. + If f is live, then so is g. f's SRT must include g's closure. + +* The CLabel for the entry-point/closure reveals whether g is + a CAF (or refers to CAFs). See the IdLabel constructor of CLabel. + +* The CAF-ness of the original top-level definitions is figured out + (by GHC.Iface.Tidy) before we generate C--. This CafInfo is only set for + top-level Ids; nested bindings stay with MayHaveCafRefs. + +* Currently an SRT contains (only) pointers to (top-level) closures. + +* Consider this Core code + f = \x -> let g = \y -> ...x...y...h1... + in ...h2...g... + and suppose that h1, h2 have IdInfo of MayHaveCafRefs. + Therefore, so will f, But g will not (since it's nested). + + This generates C-- roughly like this: + f_closure: .word f_entry + f_entry() [info-tbl-for-f] { ...jump g_entry...jump h2... } + g_entry() [info-tbl-for-g] { ...jump h1... } + + Note that there is no top-level closure for g (only an info table). + This fact (whether or not there is a top-level closure) is recorded + in the InfoTable attached to the CmmProc for f, g + INVARIANT: + Any out-of-Group references to an IdLabel goes to + a Proc whose InfoTable says "I have a top-level closure". + Equivalently: + A CmmProc whose InfoTable says "I do not have a top-level + closure" is referred to only from its own Group. + +* So: info-tbl-for-f must have an SRT that keeps h1,h2 alive + info-tbl-for-g must have an SRT that keeps h1 (only) alive + + But if we just look for the free CAF refs, we get: + f h2 (only) + g h1 + + So we need to do a transitive closure thing to flesh out + f's keep-alive refs to include h1. + +* The SRT info is the C_SRT field of Cmm.ClosureTypeInfo in a + CmmInfoTable attached to each CmmProc. CmmPipeline.toTops actually does + the attaching, right at the end of the pipeline. The C_SRT part + gives offsets within a single, shared table of closure pointers. + +* DECIDED: we can generate SRTs based on the final Cmm program + without knowledge of how it is generated. diff --git a/compiler/GHC/CmmToC.hs b/compiler/GHC/CmmToC.hs new file mode 100644 index 0000000000..a413820e30 --- /dev/null +++ b/compiler/GHC/CmmToC.hs @@ -0,0 +1,1380 @@ +{-# LANGUAGE CPP, DeriveFunctor, GADTs, PatternSynonyms #-} + +----------------------------------------------------------------------------- +-- +-- Pretty-printing of Cmm as C, suitable for feeding gcc +-- +-- (c) The University of Glasgow 2004-2006 +-- +-- Print Cmm as real C, for -fvia-C +-- +-- See wiki:commentary/compiler/backends/ppr-c +-- +-- This is simpler than the old PprAbsC, because Cmm is "macro-expanded" +-- relative to the old AbstractC, and many oddities/decorations have +-- disappeared from the data type. +-- +-- This code generator is only supported in unregisterised mode. +-- +----------------------------------------------------------------------------- + +module GHC.CmmToC ( + writeC + ) where + +#include "HsVersions.h" + +-- Cmm stuff +import GhcPrelude + +import GHC.Cmm.BlockId +import GHC.Cmm.CLabel +import ForeignCall +import GHC.Cmm hiding (pprBBlock) +import GHC.Cmm.Ppr () -- For Outputable instances +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Utils +import GHC.Cmm.Switch + +-- Utils +import CPrim +import DynFlags +import FastString +import Outputable +import GHC.Platform +import UniqSet +import UniqFM +import Unique +import Util + +-- The rest +import Data.ByteString (ByteString) +import qualified Data.ByteString as BS +import Control.Monad.ST +import Data.Bits +import Data.Char +import Data.List +import Data.Map (Map) +import Data.Word +import System.IO +import qualified Data.Map as Map +import Control.Monad (ap) +import qualified Data.Array.Unsafe as U ( castSTUArray ) +import Data.Array.ST + +-- -------------------------------------------------------------------------- +-- Top level + +writeC :: DynFlags -> Handle -> RawCmmGroup -> IO () +writeC dflags handle cmm = printForC dflags handle (pprC cmm $$ blankLine) + +-- -------------------------------------------------------------------------- +-- Now do some real work +-- +-- for fun, we could call cmmToCmm over the tops... +-- + +pprC :: RawCmmGroup -> SDoc +pprC tops = vcat $ intersperse blankLine $ map pprTop tops + +-- +-- top level procs +-- +pprTop :: RawCmmDecl -> SDoc +pprTop (CmmProc infos clbl _in_live_regs graph) = + + (case mapLookup (g_entry graph) infos of + Nothing -> empty + Just (Statics info_clbl info_dat) -> + pprDataExterns info_dat $$ + pprWordArray info_is_in_rodata info_clbl info_dat) $$ + (vcat [ + blankLine, + extern_decls, + (if (externallyVisibleCLabel clbl) + then mkFN_ else mkIF_) (ppr clbl) <+> lbrace, + nest 8 temp_decls, + vcat (map pprBBlock blocks), + rbrace ] + ) + where + -- info tables are always in .rodata + info_is_in_rodata = True + blocks = toBlockListEntryFirst graph + (temp_decls, extern_decls) = pprTempAndExternDecls blocks + + +-- Chunks of static data. + +-- We only handle (a) arrays of word-sized things and (b) strings. + +pprTop (CmmData section (Statics lbl [CmmString str])) = + pprExternDecl lbl $$ + hcat [ + pprLocalness lbl, pprConstness (isSecConstant section), text "char ", ppr lbl, + text "[] = ", pprStringInCStyle str, semi + ] + +pprTop (CmmData section (Statics lbl [CmmUninitialised size])) = + pprExternDecl lbl $$ + hcat [ + pprLocalness lbl, pprConstness (isSecConstant section), text "char ", ppr lbl, + brackets (int size), semi + ] + +pprTop (CmmData section (Statics lbl lits)) = + pprDataExterns lits $$ + pprWordArray (isSecConstant section) lbl lits + +-- -------------------------------------------------------------------------- +-- BasicBlocks are self-contained entities: they always end in a jump. +-- +-- Like nativeGen/AsmCodeGen, we could probably reorder blocks to turn +-- as many jumps as possible into fall throughs. +-- + +pprBBlock :: CmmBlock -> SDoc +pprBBlock block = + nest 4 (pprBlockId (entryLabel block) <> colon) $$ + nest 8 (vcat (map pprStmt (blockToList nodes)) $$ pprStmt last) + where + (_, nodes, last) = blockSplit block + +-- -------------------------------------------------------------------------- +-- Info tables. Just arrays of words. +-- See codeGen/ClosureInfo, and nativeGen/PprMach + +pprWordArray :: Bool -> CLabel -> [CmmStatic] -> SDoc +pprWordArray is_ro lbl ds + = sdocWithDynFlags $ \dflags -> + -- TODO: align closures only + pprExternDecl lbl $$ + hcat [ pprLocalness lbl, pprConstness is_ro, text "StgWord" + , space, ppr lbl, text "[]" + -- See Note [StgWord alignment] + , pprAlignment (wordWidth dflags) + , text "= {" ] + $$ nest 8 (commafy (pprStatics dflags ds)) + $$ text "};" + +pprAlignment :: Width -> SDoc +pprAlignment words = + text "__attribute__((aligned(" <> int (widthInBytes words) <> text ")))" + +-- Note [StgWord alignment] +-- C codegen builds static closures as StgWord C arrays (pprWordArray). +-- Their real C type is 'StgClosure'. Macros like UNTAG_CLOSURE assume +-- pointers to 'StgClosure' are aligned at pointer size boundary: +-- 4 byte boundary on 32 systems +-- and 8 bytes on 64-bit systems +-- see TAG_MASK and TAG_BITS definition and usage. +-- +-- It's a reasonable assumption also known as natural alignment. +-- Although some architectures have different alignment rules. +-- One of known exceptions is m68k (#11395, comment:16) where: +-- __alignof__(StgWord) == 2, sizeof(StgWord) == 4 +-- +-- Thus we explicitly increase alignment by using +-- __attribute__((aligned(4))) +-- declaration. + +-- +-- has to be static, if it isn't globally visible +-- +pprLocalness :: CLabel -> SDoc +pprLocalness lbl | not $ externallyVisibleCLabel lbl = text "static " + | otherwise = empty + +pprConstness :: Bool -> SDoc +pprConstness is_ro | is_ro = text "const " + | otherwise = empty + +-- -------------------------------------------------------------------------- +-- Statements. +-- + +pprStmt :: CmmNode e x -> SDoc + +pprStmt stmt = + sdocWithDynFlags $ \dflags -> + case stmt of + CmmEntry{} -> empty + CmmComment _ -> empty -- (hang (text "/*") 3 (ftext s)) $$ ptext (sLit "*/") + -- XXX if the string contains "*/", we need to fix it + -- XXX we probably want to emit these comments when + -- some debugging option is on. They can get quite + -- large. + + CmmTick _ -> empty + CmmUnwind{} -> empty + + CmmAssign dest src -> pprAssign dflags dest src + + CmmStore dest src + | typeWidth rep == W64 && wordWidth dflags /= W64 + -> (if isFloatType rep then text "ASSIGN_DBL" + else ptext (sLit ("ASSIGN_Word64"))) <> + parens (mkP_ <> pprExpr1 dest <> comma <> pprExpr src) <> semi + + | otherwise + -> hsep [ pprExpr (CmmLoad dest rep), equals, pprExpr src <> semi ] + where + rep = cmmExprType dflags src + + CmmUnsafeForeignCall target@(ForeignTarget fn conv) results args -> + fnCall + where + (res_hints, arg_hints) = foreignTargetHints target + hresults = zip results res_hints + hargs = zip args arg_hints + + ForeignConvention cconv _ _ ret = conv + + cast_fn = parens (cCast (pprCFunType (char '*') cconv hresults hargs) fn) + + -- See wiki:commentary/compiler/backends/ppr-c#prototypes + fnCall = + case fn of + CmmLit (CmmLabel lbl) + | StdCallConv <- cconv -> + pprCall (ppr lbl) cconv hresults hargs + -- stdcall functions must be declared with + -- a function type, otherwise the C compiler + -- doesn't add the @n suffix to the label. We + -- can't add the @n suffix ourselves, because + -- it isn't valid C. + | CmmNeverReturns <- ret -> + pprCall cast_fn cconv hresults hargs <> semi + | not (isMathFun lbl) -> + pprForeignCall (ppr lbl) cconv hresults hargs + _ -> + pprCall cast_fn cconv hresults hargs <> semi + -- for a dynamic call, no declaration is necessary. + + CmmUnsafeForeignCall (PrimTarget MO_Touch) _results _args -> empty + CmmUnsafeForeignCall (PrimTarget (MO_Prefetch_Data _)) _results _args -> empty + + CmmUnsafeForeignCall target@(PrimTarget op) results args -> + fn_call + where + cconv = CCallConv + fn = pprCallishMachOp_for_C op + + (res_hints, arg_hints) = foreignTargetHints target + hresults = zip results res_hints + hargs = zip args arg_hints + + fn_call + -- The mem primops carry an extra alignment arg. + -- We could maybe emit an alignment directive using this info. + -- We also need to cast mem primops to prevent conflicts with GCC + -- builtins (see bug #5967). + | Just _align <- machOpMemcpyishAlign op + = (text ";EFF_(" <> fn <> char ')' <> semi) $$ + pprForeignCall fn cconv hresults hargs + | otherwise + = pprCall fn cconv hresults hargs + + CmmBranch ident -> pprBranch ident + CmmCondBranch expr yes no _ -> pprCondBranch expr yes no + CmmCall { cml_target = expr } -> mkJMP_ (pprExpr expr) <> semi + CmmSwitch arg ids -> sdocWithDynFlags $ \dflags -> + pprSwitch dflags arg ids + + _other -> pprPanic "PprC.pprStmt" (ppr stmt) + +type Hinted a = (a, ForeignHint) + +pprForeignCall :: SDoc -> CCallConv -> [Hinted CmmFormal] -> [Hinted CmmActual] + -> SDoc +pprForeignCall fn cconv results args = fn_call + where + fn_call = braces ( + pprCFunType (char '*' <> text "ghcFunPtr") cconv results args <> semi + $$ text "ghcFunPtr" <+> equals <+> cast_fn <> semi + $$ pprCall (text "ghcFunPtr") cconv results args <> semi + ) + cast_fn = parens (parens (pprCFunType (char '*') cconv results args) <> fn) + +pprCFunType :: SDoc -> CCallConv -> [Hinted CmmFormal] -> [Hinted CmmActual] -> SDoc +pprCFunType ppr_fn cconv ress args + = sdocWithDynFlags $ \dflags -> + let res_type [] = text "void" + res_type [(one, hint)] = machRepHintCType (localRegType one) hint + res_type _ = panic "pprCFunType: only void or 1 return value supported" + + arg_type (expr, hint) = machRepHintCType (cmmExprType dflags expr) hint + in res_type ress <+> + parens (ccallConvAttribute cconv <> ppr_fn) <> + parens (commafy (map arg_type args)) + +-- --------------------------------------------------------------------- +-- unconditional branches +pprBranch :: BlockId -> SDoc +pprBranch ident = text "goto" <+> pprBlockId ident <> semi + + +-- --------------------------------------------------------------------- +-- conditional branches to local labels +pprCondBranch :: CmmExpr -> BlockId -> BlockId -> SDoc +pprCondBranch expr yes no + = hsep [ text "if" , parens(pprExpr expr) , + text "goto", pprBlockId yes <> semi, + text "else goto", pprBlockId no <> semi ] + +-- --------------------------------------------------------------------- +-- a local table branch +-- +-- we find the fall-through cases +-- +pprSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> SDoc +pprSwitch dflags e ids + = (hang (text "switch" <+> parens ( pprExpr e ) <+> lbrace) + 4 (vcat ( map caseify pairs ) $$ def)) $$ rbrace + where + (pairs, mbdef) = switchTargetsFallThrough ids + + -- fall through case + caseify (ix:ixs, ident) = vcat (map do_fallthrough ixs) $$ final_branch ix + where + do_fallthrough ix = + hsep [ text "case" , pprHexVal ix (wordWidth dflags) <> colon , + text "/* fall through */" ] + + final_branch ix = + hsep [ text "case" , pprHexVal ix (wordWidth dflags) <> colon , + text "goto" , (pprBlockId ident) <> semi ] + + caseify (_ , _ ) = panic "pprSwitch: switch with no cases!" + + def | Just l <- mbdef = text "default: goto" <+> pprBlockId l <> semi + | otherwise = empty + +-- --------------------------------------------------------------------- +-- Expressions. +-- + +-- C Types: the invariant is that the C expression generated by +-- +-- pprExpr e +-- +-- has a type in C which is also given by +-- +-- machRepCType (cmmExprType e) +-- +-- (similar invariants apply to the rest of the pretty printer). + +pprExpr :: CmmExpr -> SDoc +pprExpr e = case e of + CmmLit lit -> pprLit lit + + + CmmLoad e ty -> sdocWithDynFlags $ \dflags -> pprLoad dflags e ty + CmmReg reg -> pprCastReg reg + CmmRegOff reg 0 -> pprCastReg reg + + -- CmmRegOff is an alias of MO_Add + CmmRegOff reg i -> sdocWithDynFlags $ \dflags -> + pprCastReg reg <> char '+' <> + pprHexVal (fromIntegral i) (wordWidth dflags) + + CmmMachOp mop args -> pprMachOpApp mop args + + CmmStackSlot _ _ -> panic "pprExpr: CmmStackSlot not supported!" + + +pprLoad :: DynFlags -> CmmExpr -> CmmType -> SDoc +pprLoad dflags e ty + | width == W64, wordWidth dflags /= W64 + = (if isFloatType ty then text "PK_DBL" + else text "PK_Word64") + <> parens (mkP_ <> pprExpr1 e) + + | otherwise + = case e of + CmmReg r | isPtrReg r && width == wordWidth dflags && not (isFloatType ty) + -> char '*' <> pprAsPtrReg r + + CmmRegOff r 0 | isPtrReg r && width == wordWidth dflags && not (isFloatType ty) + -> char '*' <> pprAsPtrReg r + + CmmRegOff r off | isPtrReg r && width == wordWidth dflags + , off `rem` wORD_SIZE dflags == 0 && not (isFloatType ty) + -- ToDo: check that the offset is a word multiple? + -- (For tagging to work, I had to avoid unaligned loads. --ARY) + -> pprAsPtrReg r <> brackets (ppr (off `shiftR` wordShift dflags)) + + _other -> cLoad e ty + where + width = typeWidth ty + +pprExpr1 :: CmmExpr -> SDoc +pprExpr1 (CmmLit lit) = pprLit1 lit +pprExpr1 e@(CmmReg _reg) = pprExpr e +pprExpr1 other = parens (pprExpr other) + +-- -------------------------------------------------------------------------- +-- MachOp applications + +pprMachOpApp :: MachOp -> [CmmExpr] -> SDoc + +pprMachOpApp op args + | isMulMayOfloOp op + = text "mulIntMayOflo" <> parens (commafy (map pprExpr args)) + where isMulMayOfloOp (MO_U_MulMayOflo _) = True + isMulMayOfloOp (MO_S_MulMayOflo _) = True + isMulMayOfloOp _ = False + +pprMachOpApp mop args + | Just ty <- machOpNeedsCast mop + = ty <> parens (pprMachOpApp' mop args) + | otherwise + = pprMachOpApp' mop args + +-- Comparisons in C have type 'int', but we want type W_ (this is what +-- resultRepOfMachOp says). The other C operations inherit their type +-- from their operands, so no casting is required. +machOpNeedsCast :: MachOp -> Maybe SDoc +machOpNeedsCast mop + | isComparisonMachOp mop = Just mkW_ + | otherwise = Nothing + +pprMachOpApp' :: MachOp -> [CmmExpr] -> SDoc +pprMachOpApp' mop args + = case args of + -- dyadic + [x,y] -> pprArg x <+> pprMachOp_for_C mop <+> pprArg y + + -- unary + [x] -> pprMachOp_for_C mop <> parens (pprArg x) + + _ -> panic "PprC.pprMachOp : machop with wrong number of args" + + where + -- Cast needed for signed integer ops + pprArg e | signedOp mop = sdocWithDynFlags $ \dflags -> + cCast (machRep_S_CType (typeWidth (cmmExprType dflags e))) e + | needsFCasts mop = sdocWithDynFlags $ \dflags -> + cCast (machRep_F_CType (typeWidth (cmmExprType dflags e))) e + | otherwise = pprExpr1 e + needsFCasts (MO_F_Eq _) = False + needsFCasts (MO_F_Ne _) = False + needsFCasts (MO_F_Neg _) = True + needsFCasts (MO_F_Quot _) = True + needsFCasts mop = floatComparison mop + +-- -------------------------------------------------------------------------- +-- Literals + +pprLit :: CmmLit -> SDoc +pprLit lit = case lit of + CmmInt i rep -> pprHexVal i rep + + CmmFloat f w -> parens (machRep_F_CType w) <> str + where d = fromRational f :: Double + str | isInfinite d && d < 0 = text "-INFINITY" + | isInfinite d = text "INFINITY" + | isNaN d = text "NAN" + | otherwise = text (show d) + -- these constants come from <math.h> + -- see #1861 + + CmmVec {} -> panic "PprC printing vector literal" + + CmmBlock bid -> mkW_ <> pprCLabelAddr (infoTblLbl bid) + CmmHighStackMark -> panic "PprC printing high stack mark" + CmmLabel clbl -> mkW_ <> pprCLabelAddr clbl + CmmLabelOff clbl i -> mkW_ <> pprCLabelAddr clbl <> char '+' <> int i + CmmLabelDiffOff clbl1 _ i _ -- non-word widths not supported via C + -- WARNING: + -- * the lit must occur in the info table clbl2 + -- * clbl1 must be an SRT, a slow entry point or a large bitmap + -> mkW_ <> pprCLabelAddr clbl1 <> char '+' <> int i + + where + pprCLabelAddr lbl = char '&' <> ppr lbl + +pprLit1 :: CmmLit -> SDoc +pprLit1 lit@(CmmLabelOff _ _) = parens (pprLit lit) +pprLit1 lit@(CmmLabelDiffOff _ _ _ _) = parens (pprLit lit) +pprLit1 lit@(CmmFloat _ _) = parens (pprLit lit) +pprLit1 other = pprLit other + +-- --------------------------------------------------------------------------- +-- Static data + +pprStatics :: DynFlags -> [CmmStatic] -> [SDoc] +pprStatics _ [] = [] +pprStatics dflags (CmmStaticLit (CmmFloat f W32) : rest) + -- odd numbers of floats are padded to a word by mkVirtHeapOffsetsWithPadding + | wORD_SIZE dflags == 8, CmmStaticLit (CmmInt 0 W32) : rest' <- rest + = pprLit1 (floatToWord dflags f) : pprStatics dflags rest' + -- adjacent floats aren't padded but combined into a single word + | wORD_SIZE dflags == 8, CmmStaticLit (CmmFloat g W32) : rest' <- rest + = pprLit1 (floatPairToWord dflags f g) : pprStatics dflags rest' + | wORD_SIZE dflags == 4 + = pprLit1 (floatToWord dflags f) : pprStatics dflags rest + | otherwise + = pprPanic "pprStatics: float" (vcat (map ppr' rest)) + where ppr' (CmmStaticLit l) = sdocWithDynFlags $ \dflags -> + ppr (cmmLitType dflags l) + ppr' _other = text "bad static!" +pprStatics dflags (CmmStaticLit (CmmFloat f W64) : rest) + = map pprLit1 (doubleToWords dflags f) ++ pprStatics dflags rest + +pprStatics dflags (CmmStaticLit (CmmInt i W64) : rest) + | wordWidth dflags == W32 + = if wORDS_BIGENDIAN dflags + then pprStatics dflags (CmmStaticLit (CmmInt q W32) : + CmmStaticLit (CmmInt r W32) : rest) + else pprStatics dflags (CmmStaticLit (CmmInt r W32) : + CmmStaticLit (CmmInt q W32) : rest) + where r = i .&. 0xffffffff + q = i `shiftR` 32 +pprStatics dflags (CmmStaticLit (CmmInt a W32) : + CmmStaticLit (CmmInt b W32) : rest) + | wordWidth dflags == W64 + = if wORDS_BIGENDIAN dflags + then pprStatics dflags (CmmStaticLit (CmmInt ((shiftL a 32) .|. b) W64) : + rest) + else pprStatics dflags (CmmStaticLit (CmmInt ((shiftL b 32) .|. a) W64) : + rest) +pprStatics dflags (CmmStaticLit (CmmInt a W16) : + CmmStaticLit (CmmInt b W16) : rest) + | wordWidth dflags == W32 + = if wORDS_BIGENDIAN dflags + then pprStatics dflags (CmmStaticLit (CmmInt ((shiftL a 16) .|. b) W32) : + rest) + else pprStatics dflags (CmmStaticLit (CmmInt ((shiftL b 16) .|. a) W32) : + rest) +pprStatics dflags (CmmStaticLit (CmmInt _ w) : _) + | w /= wordWidth dflags + = pprPanic "pprStatics: cannot emit a non-word-sized static literal" (ppr w) +pprStatics dflags (CmmStaticLit lit : rest) + = pprLit1 lit : pprStatics dflags rest +pprStatics _ (other : _) + = pprPanic "pprStatics: other" (pprStatic other) + +pprStatic :: CmmStatic -> SDoc +pprStatic s = case s of + + CmmStaticLit lit -> nest 4 (pprLit lit) + CmmUninitialised i -> nest 4 (mkC_ <> brackets (int i)) + + -- these should be inlined, like the old .hc + CmmString s' -> nest 4 (mkW_ <> parens(pprStringInCStyle s')) + + +-- --------------------------------------------------------------------------- +-- Block Ids + +pprBlockId :: BlockId -> SDoc +pprBlockId b = char '_' <> ppr (getUnique b) + +-- -------------------------------------------------------------------------- +-- Print a MachOp in a way suitable for emitting via C. +-- + +pprMachOp_for_C :: MachOp -> SDoc + +pprMachOp_for_C mop = case mop of + + -- Integer operations + MO_Add _ -> char '+' + MO_Sub _ -> char '-' + MO_Eq _ -> text "==" + MO_Ne _ -> text "!=" + MO_Mul _ -> char '*' + + MO_S_Quot _ -> char '/' + MO_S_Rem _ -> char '%' + MO_S_Neg _ -> char '-' + + MO_U_Quot _ -> char '/' + MO_U_Rem _ -> char '%' + + -- & Floating-point operations + MO_F_Add _ -> char '+' + MO_F_Sub _ -> char '-' + MO_F_Neg _ -> char '-' + MO_F_Mul _ -> char '*' + MO_F_Quot _ -> char '/' + + -- Signed comparisons + MO_S_Ge _ -> text ">=" + MO_S_Le _ -> text "<=" + MO_S_Gt _ -> char '>' + MO_S_Lt _ -> char '<' + + -- & Unsigned comparisons + MO_U_Ge _ -> text ">=" + MO_U_Le _ -> text "<=" + MO_U_Gt _ -> char '>' + MO_U_Lt _ -> char '<' + + -- & Floating-point comparisons + MO_F_Eq _ -> text "==" + MO_F_Ne _ -> text "!=" + MO_F_Ge _ -> text ">=" + MO_F_Le _ -> text "<=" + MO_F_Gt _ -> char '>' + MO_F_Lt _ -> char '<' + + -- Bitwise operations. Not all of these may be supported at all + -- sizes, and only integral MachReps are valid. + MO_And _ -> char '&' + MO_Or _ -> char '|' + MO_Xor _ -> char '^' + MO_Not _ -> char '~' + MO_Shl _ -> text "<<" + MO_U_Shr _ -> text ">>" -- unsigned shift right + MO_S_Shr _ -> text ">>" -- signed shift right + +-- Conversions. Some of these will be NOPs, but never those that convert +-- between ints and floats. +-- Floating-point conversions use the signed variant. +-- We won't know to generate (void*) casts here, but maybe from +-- context elsewhere + +-- noop casts + MO_UU_Conv from to | from == to -> empty + MO_UU_Conv _from to -> parens (machRep_U_CType to) + + MO_SS_Conv from to | from == to -> empty + MO_SS_Conv _from to -> parens (machRep_S_CType to) + + MO_XX_Conv from to | from == to -> empty + MO_XX_Conv _from to -> parens (machRep_U_CType to) + + MO_FF_Conv from to | from == to -> empty + MO_FF_Conv _from to -> parens (machRep_F_CType to) + + MO_SF_Conv _from to -> parens (machRep_F_CType to) + MO_FS_Conv _from to -> parens (machRep_S_CType to) + + MO_S_MulMayOflo _ -> pprTrace "offending mop:" + (text "MO_S_MulMayOflo") + (panic $ "PprC.pprMachOp_for_C: MO_S_MulMayOflo" + ++ " should have been handled earlier!") + MO_U_MulMayOflo _ -> pprTrace "offending mop:" + (text "MO_U_MulMayOflo") + (panic $ "PprC.pprMachOp_for_C: MO_U_MulMayOflo" + ++ " should have been handled earlier!") + + MO_V_Insert {} -> pprTrace "offending mop:" + (text "MO_V_Insert") + (panic $ "PprC.pprMachOp_for_C: MO_V_Insert" + ++ " should have been handled earlier!") + MO_V_Extract {} -> pprTrace "offending mop:" + (text "MO_V_Extract") + (panic $ "PprC.pprMachOp_for_C: MO_V_Extract" + ++ " should have been handled earlier!") + + MO_V_Add {} -> pprTrace "offending mop:" + (text "MO_V_Add") + (panic $ "PprC.pprMachOp_for_C: MO_V_Add" + ++ " should have been handled earlier!") + MO_V_Sub {} -> pprTrace "offending mop:" + (text "MO_V_Sub") + (panic $ "PprC.pprMachOp_for_C: MO_V_Sub" + ++ " should have been handled earlier!") + MO_V_Mul {} -> pprTrace "offending mop:" + (text "MO_V_Mul") + (panic $ "PprC.pprMachOp_for_C: MO_V_Mul" + ++ " should have been handled earlier!") + + MO_VS_Quot {} -> pprTrace "offending mop:" + (text "MO_VS_Quot") + (panic $ "PprC.pprMachOp_for_C: MO_VS_Quot" + ++ " should have been handled earlier!") + MO_VS_Rem {} -> pprTrace "offending mop:" + (text "MO_VS_Rem") + (panic $ "PprC.pprMachOp_for_C: MO_VS_Rem" + ++ " should have been handled earlier!") + MO_VS_Neg {} -> pprTrace "offending mop:" + (text "MO_VS_Neg") + (panic $ "PprC.pprMachOp_for_C: MO_VS_Neg" + ++ " should have been handled earlier!") + + MO_VU_Quot {} -> pprTrace "offending mop:" + (text "MO_VU_Quot") + (panic $ "PprC.pprMachOp_for_C: MO_VU_Quot" + ++ " should have been handled earlier!") + MO_VU_Rem {} -> pprTrace "offending mop:" + (text "MO_VU_Rem") + (panic $ "PprC.pprMachOp_for_C: MO_VU_Rem" + ++ " should have been handled earlier!") + + MO_VF_Insert {} -> pprTrace "offending mop:" + (text "MO_VF_Insert") + (panic $ "PprC.pprMachOp_for_C: MO_VF_Insert" + ++ " should have been handled earlier!") + MO_VF_Extract {} -> pprTrace "offending mop:" + (text "MO_VF_Extract") + (panic $ "PprC.pprMachOp_for_C: MO_VF_Extract" + ++ " should have been handled earlier!") + + MO_VF_Add {} -> pprTrace "offending mop:" + (text "MO_VF_Add") + (panic $ "PprC.pprMachOp_for_C: MO_VF_Add" + ++ " should have been handled earlier!") + MO_VF_Sub {} -> pprTrace "offending mop:" + (text "MO_VF_Sub") + (panic $ "PprC.pprMachOp_for_C: MO_VF_Sub" + ++ " should have been handled earlier!") + MO_VF_Neg {} -> pprTrace "offending mop:" + (text "MO_VF_Neg") + (panic $ "PprC.pprMachOp_for_C: MO_VF_Neg" + ++ " should have been handled earlier!") + MO_VF_Mul {} -> pprTrace "offending mop:" + (text "MO_VF_Mul") + (panic $ "PprC.pprMachOp_for_C: MO_VF_Mul" + ++ " should have been handled earlier!") + MO_VF_Quot {} -> pprTrace "offending mop:" + (text "MO_VF_Quot") + (panic $ "PprC.pprMachOp_for_C: MO_VF_Quot" + ++ " should have been handled earlier!") + + MO_AlignmentCheck {} -> panic "-falignment-santisation not supported by unregisterised backend" + +signedOp :: MachOp -> Bool -- Argument type(s) are signed ints +signedOp (MO_S_Quot _) = True +signedOp (MO_S_Rem _) = True +signedOp (MO_S_Neg _) = True +signedOp (MO_S_Ge _) = True +signedOp (MO_S_Le _) = True +signedOp (MO_S_Gt _) = True +signedOp (MO_S_Lt _) = True +signedOp (MO_S_Shr _) = True +signedOp (MO_SS_Conv _ _) = True +signedOp (MO_SF_Conv _ _) = True +signedOp _ = False + +floatComparison :: MachOp -> Bool -- comparison between float args +floatComparison (MO_F_Eq _) = True +floatComparison (MO_F_Ne _) = True +floatComparison (MO_F_Ge _) = True +floatComparison (MO_F_Le _) = True +floatComparison (MO_F_Gt _) = True +floatComparison (MO_F_Lt _) = True +floatComparison _ = False + +-- --------------------------------------------------------------------- +-- tend to be implemented by foreign calls + +pprCallishMachOp_for_C :: CallishMachOp -> SDoc + +pprCallishMachOp_for_C mop + = case mop of + MO_F64_Pwr -> text "pow" + MO_F64_Sin -> text "sin" + MO_F64_Cos -> text "cos" + MO_F64_Tan -> text "tan" + MO_F64_Sinh -> text "sinh" + MO_F64_Cosh -> text "cosh" + MO_F64_Tanh -> text "tanh" + MO_F64_Asin -> text "asin" + MO_F64_Acos -> text "acos" + MO_F64_Atanh -> text "atanh" + MO_F64_Asinh -> text "asinh" + MO_F64_Acosh -> text "acosh" + MO_F64_Atan -> text "atan" + MO_F64_Log -> text "log" + MO_F64_Log1P -> text "log1p" + MO_F64_Exp -> text "exp" + MO_F64_ExpM1 -> text "expm1" + MO_F64_Sqrt -> text "sqrt" + MO_F64_Fabs -> text "fabs" + MO_F32_Pwr -> text "powf" + MO_F32_Sin -> text "sinf" + MO_F32_Cos -> text "cosf" + MO_F32_Tan -> text "tanf" + MO_F32_Sinh -> text "sinhf" + MO_F32_Cosh -> text "coshf" + MO_F32_Tanh -> text "tanhf" + MO_F32_Asin -> text "asinf" + MO_F32_Acos -> text "acosf" + MO_F32_Atan -> text "atanf" + MO_F32_Asinh -> text "asinhf" + MO_F32_Acosh -> text "acoshf" + MO_F32_Atanh -> text "atanhf" + MO_F32_Log -> text "logf" + MO_F32_Log1P -> text "log1pf" + MO_F32_Exp -> text "expf" + MO_F32_ExpM1 -> text "expm1f" + MO_F32_Sqrt -> text "sqrtf" + MO_F32_Fabs -> text "fabsf" + MO_ReadBarrier -> text "load_load_barrier" + MO_WriteBarrier -> text "write_barrier" + MO_Memcpy _ -> text "memcpy" + MO_Memset _ -> text "memset" + MO_Memmove _ -> text "memmove" + MO_Memcmp _ -> text "memcmp" + (MO_BSwap w) -> ptext (sLit $ bSwapLabel w) + (MO_BRev w) -> ptext (sLit $ bRevLabel w) + (MO_PopCnt w) -> ptext (sLit $ popCntLabel w) + (MO_Pext w) -> ptext (sLit $ pextLabel w) + (MO_Pdep w) -> ptext (sLit $ pdepLabel w) + (MO_Clz w) -> ptext (sLit $ clzLabel w) + (MO_Ctz w) -> ptext (sLit $ ctzLabel w) + (MO_AtomicRMW w amop) -> ptext (sLit $ atomicRMWLabel w amop) + (MO_Cmpxchg w) -> ptext (sLit $ cmpxchgLabel w) + (MO_AtomicRead w) -> ptext (sLit $ atomicReadLabel w) + (MO_AtomicWrite w) -> ptext (sLit $ atomicWriteLabel w) + (MO_UF_Conv w) -> ptext (sLit $ word2FloatLabel w) + + MO_S_Mul2 {} -> unsupported + MO_S_QuotRem {} -> unsupported + MO_U_QuotRem {} -> unsupported + MO_U_QuotRem2 {} -> unsupported + MO_Add2 {} -> unsupported + MO_AddWordC {} -> unsupported + MO_SubWordC {} -> unsupported + MO_AddIntC {} -> unsupported + MO_SubIntC {} -> unsupported + MO_U_Mul2 {} -> unsupported + MO_Touch -> unsupported + (MO_Prefetch_Data _ ) -> unsupported + --- we could support prefetch via "__builtin_prefetch" + --- Not adding it for now + where unsupported = panic ("pprCallishMachOp_for_C: " ++ show mop + ++ " not supported!") + +-- --------------------------------------------------------------------- +-- Useful #defines +-- + +mkJMP_, mkFN_, mkIF_ :: SDoc -> SDoc + +mkJMP_ i = text "JMP_" <> parens i +mkFN_ i = text "FN_" <> parens i -- externally visible function +mkIF_ i = text "IF_" <> parens i -- locally visible + +-- from includes/Stg.h +-- +mkC_,mkW_,mkP_ :: SDoc + +mkC_ = text "(C_)" -- StgChar +mkW_ = text "(W_)" -- StgWord +mkP_ = text "(P_)" -- StgWord* + +-- --------------------------------------------------------------------- +-- +-- Assignments +-- +-- Generating assignments is what we're all about, here +-- +pprAssign :: DynFlags -> CmmReg -> CmmExpr -> SDoc + +-- dest is a reg, rhs is a reg +pprAssign _ r1 (CmmReg r2) + | isPtrReg r1 && isPtrReg r2 + = hcat [ pprAsPtrReg r1, equals, pprAsPtrReg r2, semi ] + +-- dest is a reg, rhs is a CmmRegOff +pprAssign dflags r1 (CmmRegOff r2 off) + | isPtrReg r1 && isPtrReg r2 && (off `rem` wORD_SIZE dflags == 0) + = hcat [ pprAsPtrReg r1, equals, pprAsPtrReg r2, op, int off', semi ] + where + off1 = off `shiftR` wordShift dflags + + (op,off') | off >= 0 = (char '+', off1) + | otherwise = (char '-', -off1) + +-- dest is a reg, rhs is anything. +-- We can't cast the lvalue, so we have to cast the rhs if necessary. Casting +-- the lvalue elicits a warning from new GCC versions (3.4+). +pprAssign _ r1 r2 + | isFixedPtrReg r1 = mkAssign (mkP_ <> pprExpr1 r2) + | Just ty <- strangeRegType r1 = mkAssign (parens ty <> pprExpr1 r2) + | otherwise = mkAssign (pprExpr r2) + where mkAssign x = if r1 == CmmGlobal BaseReg + then text "ASSIGN_BaseReg" <> parens x <> semi + else pprReg r1 <> text " = " <> x <> semi + +-- --------------------------------------------------------------------- +-- Registers + +pprCastReg :: CmmReg -> SDoc +pprCastReg reg + | isStrangeTypeReg reg = mkW_ <> pprReg reg + | otherwise = pprReg reg + +-- True if (pprReg reg) will give an expression with type StgPtr. We +-- need to take care with pointer arithmetic on registers with type +-- StgPtr. +isFixedPtrReg :: CmmReg -> Bool +isFixedPtrReg (CmmLocal _) = False +isFixedPtrReg (CmmGlobal r) = isFixedPtrGlobalReg r + +-- True if (pprAsPtrReg reg) will give an expression with type StgPtr +-- JD: THIS IS HORRIBLE AND SHOULD BE RENAMED, AT THE VERY LEAST. +-- THE GARBAGE WITH THE VNonGcPtr HELPS MATCH THE OLD CODE GENERATOR'S OUTPUT; +-- I'M NOT SURE IF IT SHOULD REALLY STAY THAT WAY. +isPtrReg :: CmmReg -> Bool +isPtrReg (CmmLocal _) = False +isPtrReg (CmmGlobal (VanillaReg _ VGcPtr)) = True -- if we print via pprAsPtrReg +isPtrReg (CmmGlobal (VanillaReg _ VNonGcPtr)) = False -- if we print via pprAsPtrReg +isPtrReg (CmmGlobal reg) = isFixedPtrGlobalReg reg + +-- True if this global reg has type StgPtr +isFixedPtrGlobalReg :: GlobalReg -> Bool +isFixedPtrGlobalReg Sp = True +isFixedPtrGlobalReg Hp = True +isFixedPtrGlobalReg HpLim = True +isFixedPtrGlobalReg SpLim = True +isFixedPtrGlobalReg _ = False + +-- True if in C this register doesn't have the type given by +-- (machRepCType (cmmRegType reg)), so it has to be cast. +isStrangeTypeReg :: CmmReg -> Bool +isStrangeTypeReg (CmmLocal _) = False +isStrangeTypeReg (CmmGlobal g) = isStrangeTypeGlobal g + +isStrangeTypeGlobal :: GlobalReg -> Bool +isStrangeTypeGlobal CCCS = True +isStrangeTypeGlobal CurrentTSO = True +isStrangeTypeGlobal CurrentNursery = True +isStrangeTypeGlobal BaseReg = True +isStrangeTypeGlobal r = isFixedPtrGlobalReg r + +strangeRegType :: CmmReg -> Maybe SDoc +strangeRegType (CmmGlobal CCCS) = Just (text "struct CostCentreStack_ *") +strangeRegType (CmmGlobal CurrentTSO) = Just (text "struct StgTSO_ *") +strangeRegType (CmmGlobal CurrentNursery) = Just (text "struct bdescr_ *") +strangeRegType (CmmGlobal BaseReg) = Just (text "struct StgRegTable_ *") +strangeRegType _ = Nothing + +-- pprReg just prints the register name. +-- +pprReg :: CmmReg -> SDoc +pprReg r = case r of + CmmLocal local -> pprLocalReg local + CmmGlobal global -> pprGlobalReg global + +pprAsPtrReg :: CmmReg -> SDoc +pprAsPtrReg (CmmGlobal (VanillaReg n gcp)) + = WARN( gcp /= VGcPtr, ppr n ) char 'R' <> int n <> text ".p" +pprAsPtrReg other_reg = pprReg other_reg + +pprGlobalReg :: GlobalReg -> SDoc +pprGlobalReg gr = case gr of + VanillaReg n _ -> char 'R' <> int n <> text ".w" + -- pprGlobalReg prints a VanillaReg as a .w regardless + -- Example: R1.w = R1.w & (-0x8UL); + -- JMP_(*R1.p); + FloatReg n -> char 'F' <> int n + DoubleReg n -> char 'D' <> int n + LongReg n -> char 'L' <> int n + Sp -> text "Sp" + SpLim -> text "SpLim" + Hp -> text "Hp" + HpLim -> text "HpLim" + CCCS -> text "CCCS" + CurrentTSO -> text "CurrentTSO" + CurrentNursery -> text "CurrentNursery" + HpAlloc -> text "HpAlloc" + BaseReg -> text "BaseReg" + EagerBlackholeInfo -> text "stg_EAGER_BLACKHOLE_info" + GCEnter1 -> text "stg_gc_enter_1" + GCFun -> text "stg_gc_fun" + other -> panic $ "pprGlobalReg: Unsupported register: " ++ show other + +pprLocalReg :: LocalReg -> SDoc +pprLocalReg (LocalReg uniq _) = char '_' <> ppr uniq + +-- ----------------------------------------------------------------------------- +-- Foreign Calls + +pprCall :: SDoc -> CCallConv -> [Hinted CmmFormal] -> [Hinted CmmActual] -> SDoc +pprCall ppr_fn cconv results args + | not (is_cishCC cconv) + = panic $ "pprCall: unknown calling convention" + + | otherwise + = + ppr_assign results (ppr_fn <> parens (commafy (map pprArg args))) <> semi + where + ppr_assign [] rhs = rhs + ppr_assign [(one,hint)] rhs + = pprLocalReg one <> text " = " + <> pprUnHint hint (localRegType one) <> rhs + ppr_assign _other _rhs = panic "pprCall: multiple results" + + pprArg (expr, AddrHint) + = cCast (text "void *") expr + -- see comment by machRepHintCType below + pprArg (expr, SignedHint) + = sdocWithDynFlags $ \dflags -> + cCast (machRep_S_CType $ typeWidth $ cmmExprType dflags expr) expr + pprArg (expr, _other) + = pprExpr expr + + pprUnHint AddrHint rep = parens (machRepCType rep) + pprUnHint SignedHint rep = parens (machRepCType rep) + pprUnHint _ _ = empty + +-- Currently we only have these two calling conventions, but this might +-- change in the future... +is_cishCC :: CCallConv -> Bool +is_cishCC CCallConv = True +is_cishCC CApiConv = True +is_cishCC StdCallConv = True +is_cishCC PrimCallConv = False +is_cishCC JavaScriptCallConv = False + +-- --------------------------------------------------------------------- +-- Find and print local and external declarations for a list of +-- Cmm statements. +-- +pprTempAndExternDecls :: [CmmBlock] -> (SDoc{-temps-}, SDoc{-externs-}) +pprTempAndExternDecls stmts + = (pprUFM (getUniqSet temps) (vcat . map pprTempDecl), + vcat (map pprExternDecl (Map.keys lbls))) + where (temps, lbls) = runTE (mapM_ te_BB stmts) + +pprDataExterns :: [CmmStatic] -> SDoc +pprDataExterns statics + = vcat (map pprExternDecl (Map.keys lbls)) + where (_, lbls) = runTE (mapM_ te_Static statics) + +pprTempDecl :: LocalReg -> SDoc +pprTempDecl l@(LocalReg _ rep) + = hcat [ machRepCType rep, space, pprLocalReg l, semi ] + +pprExternDecl :: CLabel -> SDoc +pprExternDecl lbl + -- do not print anything for "known external" things + | not (needsCDecl lbl) = empty + | Just sz <- foreignLabelStdcallInfo lbl = stdcall_decl sz + | otherwise = + hcat [ visibility, label_type lbl , lparen, ppr lbl, text ");" + -- occasionally useful to see label type + -- , text "/* ", pprDebugCLabel lbl, text " */" + ] + where + label_type lbl | isBytesLabel lbl = text "B_" + | isForeignLabel lbl && isCFunctionLabel lbl + = text "FF_" + | isCFunctionLabel lbl = text "F_" + | isStaticClosureLabel lbl = text "C_" + -- generic .rodata labels + | isSomeRODataLabel lbl = text "RO_" + -- generic .data labels (common case) + | otherwise = text "RW_" + + visibility + | externallyVisibleCLabel lbl = char 'E' + | otherwise = char 'I' + + -- If the label we want to refer to is a stdcall function (on Windows) then + -- we must generate an appropriate prototype for it, so that the C compiler will + -- add the @n suffix to the label (#2276) + stdcall_decl sz = sdocWithDynFlags $ \dflags -> + text "extern __attribute__((stdcall)) void " <> ppr lbl + <> parens (commafy (replicate (sz `quot` wORD_SIZE dflags) (machRep_U_CType (wordWidth dflags)))) + <> semi + +type TEState = (UniqSet LocalReg, Map CLabel ()) +newtype TE a = TE { unTE :: TEState -> (a, TEState) } deriving (Functor) + +instance Applicative TE where + pure a = TE $ \s -> (a, s) + (<*>) = ap + +instance Monad TE where + TE m >>= k = TE $ \s -> case m s of (a, s') -> unTE (k a) s' + +te_lbl :: CLabel -> TE () +te_lbl lbl = TE $ \(temps,lbls) -> ((), (temps, Map.insert lbl () lbls)) + +te_temp :: LocalReg -> TE () +te_temp r = TE $ \(temps,lbls) -> ((), (addOneToUniqSet temps r, lbls)) + +runTE :: TE () -> TEState +runTE (TE m) = snd (m (emptyUniqSet, Map.empty)) + +te_Static :: CmmStatic -> TE () +te_Static (CmmStaticLit lit) = te_Lit lit +te_Static _ = return () + +te_BB :: CmmBlock -> TE () +te_BB block = mapM_ te_Stmt (blockToList mid) >> te_Stmt last + where (_, mid, last) = blockSplit block + +te_Lit :: CmmLit -> TE () +te_Lit (CmmLabel l) = te_lbl l +te_Lit (CmmLabelOff l _) = te_lbl l +te_Lit (CmmLabelDiffOff l1 _ _ _) = te_lbl l1 +te_Lit _ = return () + +te_Stmt :: CmmNode e x -> TE () +te_Stmt (CmmAssign r e) = te_Reg r >> te_Expr e +te_Stmt (CmmStore l r) = te_Expr l >> te_Expr r +te_Stmt (CmmUnsafeForeignCall target rs es) + = do te_Target target + mapM_ te_temp rs + mapM_ te_Expr es +te_Stmt (CmmCondBranch e _ _ _) = te_Expr e +te_Stmt (CmmSwitch e _) = te_Expr e +te_Stmt (CmmCall { cml_target = e }) = te_Expr e +te_Stmt _ = return () + +te_Target :: ForeignTarget -> TE () +te_Target (ForeignTarget e _) = te_Expr e +te_Target (PrimTarget{}) = return () + +te_Expr :: CmmExpr -> TE () +te_Expr (CmmLit lit) = te_Lit lit +te_Expr (CmmLoad e _) = te_Expr e +te_Expr (CmmReg r) = te_Reg r +te_Expr (CmmMachOp _ es) = mapM_ te_Expr es +te_Expr (CmmRegOff r _) = te_Reg r +te_Expr (CmmStackSlot _ _) = panic "te_Expr: CmmStackSlot not supported!" + +te_Reg :: CmmReg -> TE () +te_Reg (CmmLocal l) = te_temp l +te_Reg _ = return () + + +-- --------------------------------------------------------------------- +-- C types for MachReps + +cCast :: SDoc -> CmmExpr -> SDoc +cCast ty expr = parens ty <> pprExpr1 expr + +cLoad :: CmmExpr -> CmmType -> SDoc +cLoad expr rep + = sdocWithPlatform $ \platform -> + if bewareLoadStoreAlignment (platformArch platform) + then let decl = machRepCType rep <+> text "x" <> semi + struct = text "struct" <+> braces (decl) + packed_attr = text "__attribute__((packed))" + cast = parens (struct <+> packed_attr <> char '*') + in parens (cast <+> pprExpr1 expr) <> text "->x" + else char '*' <> parens (cCast (machRepPtrCType rep) expr) + where -- On these platforms, unaligned loads are known to cause problems + bewareLoadStoreAlignment ArchAlpha = True + bewareLoadStoreAlignment ArchMipseb = True + bewareLoadStoreAlignment ArchMipsel = True + bewareLoadStoreAlignment (ArchARM {}) = True + bewareLoadStoreAlignment ArchARM64 = True + bewareLoadStoreAlignment ArchSPARC = True + bewareLoadStoreAlignment ArchSPARC64 = True + -- Pessimistically assume that they will also cause problems + -- on unknown arches + bewareLoadStoreAlignment ArchUnknown = True + bewareLoadStoreAlignment _ = False + +isCmmWordType :: DynFlags -> CmmType -> Bool +-- True of GcPtrReg/NonGcReg of native word size +isCmmWordType dflags ty = not (isFloatType ty) + && typeWidth ty == wordWidth dflags + +-- This is for finding the types of foreign call arguments. For a pointer +-- argument, we always cast the argument to (void *), to avoid warnings from +-- the C compiler. +machRepHintCType :: CmmType -> ForeignHint -> SDoc +machRepHintCType _ AddrHint = text "void *" +machRepHintCType rep SignedHint = machRep_S_CType (typeWidth rep) +machRepHintCType rep _other = machRepCType rep + +machRepPtrCType :: CmmType -> SDoc +machRepPtrCType r + = sdocWithDynFlags $ \dflags -> + if isCmmWordType dflags r then text "P_" + else machRepCType r <> char '*' + +machRepCType :: CmmType -> SDoc +machRepCType ty | isFloatType ty = machRep_F_CType w + | otherwise = machRep_U_CType w + where + w = typeWidth ty + +machRep_F_CType :: Width -> SDoc +machRep_F_CType W32 = text "StgFloat" -- ToDo: correct? +machRep_F_CType W64 = text "StgDouble" +machRep_F_CType _ = panic "machRep_F_CType" + +machRep_U_CType :: Width -> SDoc +machRep_U_CType w + = sdocWithDynFlags $ \dflags -> + case w of + _ | w == wordWidth dflags -> text "W_" + W8 -> text "StgWord8" + W16 -> text "StgWord16" + W32 -> text "StgWord32" + W64 -> text "StgWord64" + _ -> panic "machRep_U_CType" + +machRep_S_CType :: Width -> SDoc +machRep_S_CType w + = sdocWithDynFlags $ \dflags -> + case w of + _ | w == wordWidth dflags -> text "I_" + W8 -> text "StgInt8" + W16 -> text "StgInt16" + W32 -> text "StgInt32" + W64 -> text "StgInt64" + _ -> panic "machRep_S_CType" + + +-- --------------------------------------------------------------------- +-- print strings as valid C strings + +pprStringInCStyle :: ByteString -> SDoc +pprStringInCStyle s = doubleQuotes (text (concatMap charToC (BS.unpack s))) + +-- --------------------------------------------------------------------------- +-- Initialising static objects with floating-point numbers. We can't +-- just emit the floating point number, because C will cast it to an int +-- by rounding it. We want the actual bit-representation of the float. +-- +-- Consider a concrete C example: +-- double d = 2.5e-10; +-- float f = 2.5e-10f; +-- +-- int * i2 = &d; printf ("i2: %08X %08X\n", i2[0], i2[1]); +-- long long * l = &d; printf (" l: %016llX\n", l[0]); +-- int * i = &f; printf (" i: %08X\n", i[0]); +-- Result on 64-bit LE (x86_64): +-- i2: E826D695 3DF12E0B +-- l: 3DF12E0BE826D695 +-- i: 2F89705F +-- Result on 32-bit BE (m68k): +-- i2: 3DF12E0B E826D695 +-- l: 3DF12E0BE826D695 +-- i: 2F89705F +-- +-- The trick here is to notice that binary representation does not +-- change much: only Word32 values get swapped on LE hosts / targets. + +-- This is a hack to turn the floating point numbers into ints that we +-- can safely initialise to static locations. + +castFloatToWord32Array :: STUArray s Int Float -> ST s (STUArray s Int Word32) +castFloatToWord32Array = U.castSTUArray + +castDoubleToWord64Array :: STUArray s Int Double -> ST s (STUArray s Int Word64) +castDoubleToWord64Array = U.castSTUArray + +floatToWord :: DynFlags -> Rational -> CmmLit +floatToWord dflags r + = runST (do + arr <- newArray_ ((0::Int),0) + writeArray arr 0 (fromRational r) + arr' <- castFloatToWord32Array arr + w32 <- readArray arr' 0 + return (CmmInt (toInteger w32 `shiftL` wo) (wordWidth dflags)) + ) + where wo | wordWidth dflags == W64 + , wORDS_BIGENDIAN dflags = 32 + | otherwise = 0 + +floatPairToWord :: DynFlags -> Rational -> Rational -> CmmLit +floatPairToWord dflags r1 r2 + = runST (do + arr <- newArray_ ((0::Int),1) + writeArray arr 0 (fromRational r1) + writeArray arr 1 (fromRational r2) + arr' <- castFloatToWord32Array arr + w32_1 <- readArray arr' 0 + w32_2 <- readArray arr' 1 + return (pprWord32Pair w32_1 w32_2) + ) + where pprWord32Pair w32_1 w32_2 + | wORDS_BIGENDIAN dflags = + CmmInt ((shiftL i1 32) .|. i2) W64 + | otherwise = + CmmInt ((shiftL i2 32) .|. i1) W64 + where i1 = toInteger w32_1 + i2 = toInteger w32_2 + +doubleToWords :: DynFlags -> Rational -> [CmmLit] +doubleToWords dflags r + = runST (do + arr <- newArray_ ((0::Int),1) + writeArray arr 0 (fromRational r) + arr' <- castDoubleToWord64Array arr + w64 <- readArray arr' 0 + return (pprWord64 w64) + ) + where targetWidth = wordWidth dflags + targetBE = wORDS_BIGENDIAN dflags + pprWord64 w64 + | targetWidth == W64 = + [ CmmInt (toInteger w64) targetWidth ] + | targetWidth == W32 = + [ CmmInt (toInteger targetW1) targetWidth + , CmmInt (toInteger targetW2) targetWidth + ] + | otherwise = panic "doubleToWords.pprWord64" + where (targetW1, targetW2) + | targetBE = (wHi, wLo) + | otherwise = (wLo, wHi) + wHi = w64 `shiftR` 32 + wLo = w64 .&. 0xFFFFffff + +-- --------------------------------------------------------------------------- +-- Utils + +wordShift :: DynFlags -> Int +wordShift dflags = widthInLog (wordWidth dflags) + +commafy :: [SDoc] -> SDoc +commafy xs = hsep $ punctuate comma xs + +-- Print in C hex format: 0x13fa +pprHexVal :: Integer -> Width -> SDoc +pprHexVal w rep + | w < 0 = parens (char '-' <> + text "0x" <> intToDoc (-w) <> repsuffix rep) + | otherwise = text "0x" <> intToDoc w <> repsuffix rep + where + -- type suffix for literals: + -- Integer literals are unsigned in Cmm/C. We explicitly cast to + -- signed values for doing signed operations, but at all other + -- times values are unsigned. This also helps eliminate occasional + -- warnings about integer overflow from gcc. + + repsuffix W64 = sdocWithDynFlags $ \dflags -> + if cINT_SIZE dflags == 8 then char 'U' + else if cLONG_SIZE dflags == 8 then text "UL" + else if cLONG_LONG_SIZE dflags == 8 then text "ULL" + else panic "pprHexVal: Can't find a 64-bit type" + repsuffix _ = char 'U' + + intToDoc :: Integer -> SDoc + intToDoc i = case truncInt i of + 0 -> char '0' + v -> go v + + -- We need to truncate value as Cmm backend does not drop + -- redundant bits to ease handling of negative values. + -- Thus the following Cmm code on 64-bit arch, like amd64: + -- CInt v; + -- v = {something}; + -- if (v == %lobits32(-1)) { ... + -- leads to the following C code: + -- StgWord64 v = (StgWord32)({something}); + -- if (v == 0xFFFFffffFFFFffffU) { ... + -- Such code is incorrect as it promotes both operands to StgWord64 + -- and the whole condition is always false. + truncInt :: Integer -> Integer + truncInt i = + case rep of + W8 -> i `rem` (2^(8 :: Int)) + W16 -> i `rem` (2^(16 :: Int)) + W32 -> i `rem` (2^(32 :: Int)) + W64 -> i `rem` (2^(64 :: Int)) + _ -> panic ("pprHexVal/truncInt: C backend can't encode " + ++ show rep ++ " literals") + + go 0 = empty + go w' = go q <> dig + where + (q,r) = w' `quotRem` 16 + dig | r < 10 = char (chr (fromInteger r + ord '0')) + | otherwise = char (chr (fromInteger r - 10 + ord 'a')) diff --git a/compiler/GHC/Data/Bitmap.hs b/compiler/GHC/Data/Bitmap.hs new file mode 100644 index 0000000000..a8eba5e2e8 --- /dev/null +++ b/compiler/GHC/Data/Bitmap.hs @@ -0,0 +1,134 @@ +{-# LANGUAGE BangPatterns #-} + +-- +-- (c) The University of Glasgow 2003-2006 +-- + +-- Functions for constructing bitmaps, which are used in various +-- places in generated code (stack frame liveness masks, function +-- argument liveness masks, SRT bitmaps). + +module GHC.Data.Bitmap ( + Bitmap, mkBitmap, + intsToBitmap, intsToReverseBitmap, + mAX_SMALL_BITMAP_SIZE, + seqBitmap, + ) where + +import GhcPrelude + +import GHC.Runtime.Layout +import DynFlags +import Util + +import Data.Bits + +{-| +A bitmap represented by a sequence of 'StgWord's on the /target/ +architecture. These are used for bitmaps in info tables and other +generated code which need to be emitted as sequences of StgWords. +-} +type Bitmap = [StgWord] + +-- | Make a bitmap from a sequence of bits +mkBitmap :: DynFlags -> [Bool] -> Bitmap +mkBitmap _ [] = [] +mkBitmap dflags stuff = chunkToBitmap dflags chunk : mkBitmap dflags rest + where (chunk, rest) = splitAt (wORD_SIZE_IN_BITS dflags) stuff + +chunkToBitmap :: DynFlags -> [Bool] -> StgWord +chunkToBitmap dflags chunk = + foldl' (.|.) (toStgWord dflags 0) [ oneAt n | (True,n) <- zip chunk [0..] ] + where + oneAt :: Int -> StgWord + oneAt i = toStgWord dflags 1 `shiftL` i + +-- | Make a bitmap where the slots specified are the /ones/ in the bitmap. +-- eg. @[0,1,3], size 4 ==> 0xb@. +-- +-- The list of @Int@s /must/ be already sorted. +intsToBitmap :: DynFlags + -> Int -- ^ size in bits + -> [Int] -- ^ sorted indices of ones + -> Bitmap +intsToBitmap dflags size = go 0 + where + word_sz = wORD_SIZE_IN_BITS dflags + oneAt :: Int -> StgWord + oneAt i = toStgWord dflags 1 `shiftL` i + + -- It is important that we maintain strictness here. + -- See Note [Strictness when building Bitmaps]. + go :: Int -> [Int] -> Bitmap + go !pos slots + | size <= pos = [] + | otherwise = + (foldl' (.|.) (toStgWord dflags 0) (map (\i->oneAt (i - pos)) these)) : + go (pos + word_sz) rest + where + (these,rest) = span (< (pos + word_sz)) slots + +-- | Make a bitmap where the slots specified are the /zeros/ in the bitmap. +-- eg. @[0,1,3], size 4 ==> 0x4@ (we leave any bits outside the size as zero, +-- just to make the bitmap easier to read). +-- +-- The list of @Int@s /must/ be already sorted and duplicate-free. +intsToReverseBitmap :: DynFlags + -> Int -- ^ size in bits + -> [Int] -- ^ sorted indices of zeros free of duplicates + -> Bitmap +intsToReverseBitmap dflags size = go 0 + where + word_sz = wORD_SIZE_IN_BITS dflags + oneAt :: Int -> StgWord + oneAt i = toStgWord dflags 1 `shiftL` i + + -- It is important that we maintain strictness here. + -- See Note [Strictness when building Bitmaps]. + go :: Int -> [Int] -> Bitmap + go !pos slots + | size <= pos = [] + | otherwise = + (foldl' xor (toStgWord dflags init) (map (\i->oneAt (i - pos)) these)) : + go (pos + word_sz) rest + where + (these,rest) = span (< (pos + word_sz)) slots + remain = size - pos + init + | remain >= word_sz = -1 + | otherwise = (1 `shiftL` remain) - 1 + +{- + +Note [Strictness when building Bitmaps] +======================================== + +One of the places where @Bitmap@ is used is in in building Static Reference +Tables (SRTs) (in @GHC.Cmm.Info.Build.procpointSRT@). In #7450 it was noticed +that some test cases (particularly those whose C-- have large numbers of CAFs) +produced large quantities of allocations from this function. + +The source traced back to 'intsToBitmap', which was lazily subtracting the word +size from the elements of the tail of the @slots@ list and recursively invoking +itself with the result. This resulted in large numbers of subtraction thunks +being built up. Here we take care to avoid passing new thunks to the recursive +call. Instead we pass the unmodified tail along with an explicit position +accumulator, which get subtracted in the fold when we compute the Word. + +-} + +{- | +Magic number, must agree with @BITMAP_BITS_SHIFT@ in InfoTables.h. +Some kinds of bitmap pack a size\/bitmap into a single word if +possible, or fall back to an external pointer when the bitmap is too +large. This value represents the largest size of bitmap that can be +packed into a single word. +-} +mAX_SMALL_BITMAP_SIZE :: DynFlags -> Int +mAX_SMALL_BITMAP_SIZE dflags + | wORD_SIZE dflags == 4 = 27 + | otherwise = 58 + +seqBitmap :: Bitmap -> a -> a +seqBitmap = seqList + diff --git a/compiler/GHC/Platform/Regs.hs b/compiler/GHC/Platform/Regs.hs index c304d4f5ad..51f7658db2 100644 --- a/compiler/GHC/Platform/Regs.hs +++ b/compiler/GHC/Platform/Regs.hs @@ -5,7 +5,7 @@ module GHC.Platform.Regs import GhcPrelude -import CmmExpr +import GHC.Cmm.Expr import GHC.Platform import Reg diff --git a/compiler/GHC/Runtime/Layout.hs b/compiler/GHC/Runtime/Layout.hs new file mode 100644 index 0000000000..8f245479c1 --- /dev/null +++ b/compiler/GHC/Runtime/Layout.hs @@ -0,0 +1,563 @@ +-- (c) The University of Glasgow 2006 +-- (c) The GRASP/AQUA Project, Glasgow University, 1992-1998 +-- +-- Storage manager representation of closures + +{-# LANGUAGE CPP,GeneralizedNewtypeDeriving #-} + +module GHC.Runtime.Layout ( + -- * Words and bytes + WordOff, ByteOff, + wordsToBytes, bytesToWordsRoundUp, + roundUpToWords, roundUpTo, + + StgWord, fromStgWord, toStgWord, + StgHalfWord, fromStgHalfWord, toStgHalfWord, + halfWordSize, halfWordSizeInBits, + + -- * Closure representation + SMRep(..), -- CmmInfo sees the rep; no one else does + IsStatic, + ClosureTypeInfo(..), ArgDescr(..), Liveness, + ConstrDescription, + + -- ** Construction + mkHeapRep, blackHoleRep, indStaticRep, mkStackRep, mkRTSRep, arrPtrsRep, + smallArrPtrsRep, arrWordsRep, + + -- ** Predicates + isStaticRep, isConRep, isThunkRep, isFunRep, isStaticNoCafCon, + isStackRep, + + -- ** Size-related things + heapClosureSizeW, + fixedHdrSizeW, arrWordsHdrSize, arrWordsHdrSizeW, arrPtrsHdrSize, + arrPtrsHdrSizeW, profHdrSize, thunkHdrSize, nonHdrSize, nonHdrSizeW, + smallArrPtrsHdrSize, smallArrPtrsHdrSizeW, hdrSize, hdrSizeW, + fixedHdrSize, + + -- ** RTS closure types + rtsClosureType, rET_SMALL, rET_BIG, + aRG_GEN, aRG_GEN_BIG, + + -- ** Arrays + card, cardRoundUp, cardTableSizeB, cardTableSizeW + ) where + +import GhcPrelude + +import BasicTypes( ConTagZ ) +import DynFlags +import Outputable +import GHC.Platform +import FastString + +import Data.Word +import Data.Bits +import Data.ByteString (ByteString) + +{- +************************************************************************ +* * + Words and bytes +* * +************************************************************************ +-} + +-- | Word offset, or word count +type WordOff = Int + +-- | Byte offset, or byte count +type ByteOff = Int + +-- | Round up the given byte count to the next byte count that's a +-- multiple of the machine's word size. +roundUpToWords :: DynFlags -> ByteOff -> ByteOff +roundUpToWords dflags n = roundUpTo n (wORD_SIZE dflags) + +-- | Round up @base@ to a multiple of @size@. +roundUpTo :: ByteOff -> ByteOff -> ByteOff +roundUpTo base size = (base + (size - 1)) .&. (complement (size - 1)) + +-- | Convert the given number of words to a number of bytes. +-- +-- This function morally has type @WordOff -> ByteOff@, but uses @Num +-- a@ to allow for overloading. +wordsToBytes :: Num a => DynFlags -> a -> a +wordsToBytes dflags n = fromIntegral (wORD_SIZE dflags) * n +{-# SPECIALIZE wordsToBytes :: DynFlags -> Int -> Int #-} +{-# SPECIALIZE wordsToBytes :: DynFlags -> Word -> Word #-} +{-# SPECIALIZE wordsToBytes :: DynFlags -> Integer -> Integer #-} + +-- | First round the given byte count up to a multiple of the +-- machine's word size and then convert the result to words. +bytesToWordsRoundUp :: DynFlags -> ByteOff -> WordOff +bytesToWordsRoundUp dflags n = (n + word_size - 1) `quot` word_size + where word_size = wORD_SIZE dflags +-- StgWord is a type representing an StgWord on the target platform. +-- A Word64 is large enough to hold a Word for either a 32bit or 64bit platform +newtype StgWord = StgWord Word64 + deriving (Eq, Bits) + +fromStgWord :: StgWord -> Integer +fromStgWord (StgWord i) = toInteger i + +toStgWord :: DynFlags -> Integer -> StgWord +toStgWord dflags i + = case platformWordSize (targetPlatform dflags) of + -- These conversions mean that things like toStgWord (-1) + -- do the right thing + PW4 -> StgWord (fromIntegral (fromInteger i :: Word32)) + PW8 -> StgWord (fromInteger i) + +instance Outputable StgWord where + ppr (StgWord i) = integer (toInteger i) + +-- + +-- A Word32 is large enough to hold half a Word for either a 32bit or +-- 64bit platform +newtype StgHalfWord = StgHalfWord Word32 + deriving Eq + +fromStgHalfWord :: StgHalfWord -> Integer +fromStgHalfWord (StgHalfWord w) = toInteger w + +toStgHalfWord :: DynFlags -> Integer -> StgHalfWord +toStgHalfWord dflags i + = case platformWordSize (targetPlatform dflags) of + -- These conversions mean that things like toStgHalfWord (-1) + -- do the right thing + PW4 -> StgHalfWord (fromIntegral (fromInteger i :: Word16)) + PW8 -> StgHalfWord (fromInteger i :: Word32) + +instance Outputable StgHalfWord where + ppr (StgHalfWord w) = integer (toInteger w) + +-- | Half word size in bytes +halfWordSize :: DynFlags -> ByteOff +halfWordSize dflags = platformWordSizeInBytes (targetPlatform dflags) `div` 2 + +halfWordSizeInBits :: DynFlags -> Int +halfWordSizeInBits dflags = platformWordSizeInBits (targetPlatform dflags) `div` 2 + +{- +************************************************************************ +* * +\subsubsection[SMRep-datatype]{@SMRep@---storage manager representation} +* * +************************************************************************ +-} + +-- | A description of the layout of a closure. Corresponds directly +-- to the closure types in includes/rts/storage/ClosureTypes.h. +data SMRep + = HeapRep -- GC routines consult sizes in info tbl + IsStatic + !WordOff -- # ptr words + !WordOff -- # non-ptr words INCLUDING SLOP (see mkHeapRep below) + ClosureTypeInfo -- type-specific info + + | ArrayPtrsRep + !WordOff -- # ptr words + !WordOff -- # card table words + + | SmallArrayPtrsRep + !WordOff -- # ptr words + + | ArrayWordsRep + !WordOff -- # bytes expressed in words, rounded up + + | StackRep -- Stack frame (RET_SMALL or RET_BIG) + Liveness + + | RTSRep -- The RTS needs to declare info tables with specific + Int -- type tags, so this form lets us override the default + SMRep -- tag for an SMRep. + +-- | True <=> This is a static closure. Affects how we garbage-collect it. +-- Static closure have an extra static link field at the end. +-- Constructors do not have a static variant; see Note [static constructors] +type IsStatic = Bool + +-- From an SMRep you can get to the closure type defined in +-- includes/rts/storage/ClosureTypes.h. Described by the function +-- rtsClosureType below. + +data ClosureTypeInfo + = Constr ConTagZ ConstrDescription + | Fun FunArity ArgDescr + | Thunk + | ThunkSelector SelectorOffset + | BlackHole + | IndStatic + +type ConstrDescription = ByteString -- result of dataConIdentity +type FunArity = Int +type SelectorOffset = Int + +------------------------- +-- We represent liveness bitmaps as a Bitmap (whose internal +-- representation really is a bitmap). These are pinned onto case return +-- vectors to indicate the state of the stack for the garbage collector. +-- +-- In the compiled program, liveness bitmaps that fit inside a single +-- word (StgWord) are stored as a single word, while larger bitmaps are +-- stored as a pointer to an array of words. + +type Liveness = [Bool] -- One Bool per word; True <=> non-ptr or dead + -- False <=> ptr + +------------------------- +-- An ArgDescr describes the argument pattern of a function + +data ArgDescr + = ArgSpec -- Fits one of the standard patterns + !Int -- RTS type identifier ARG_P, ARG_N, ... + + | ArgGen -- General case + Liveness -- Details about the arguments + + +----------------------------------------------------------------------------- +-- Construction + +mkHeapRep :: DynFlags -> IsStatic -> WordOff -> WordOff -> ClosureTypeInfo + -> SMRep +mkHeapRep dflags is_static ptr_wds nonptr_wds cl_type_info + = HeapRep is_static + ptr_wds + (nonptr_wds + slop_wds) + cl_type_info + where + slop_wds + | is_static = 0 + | otherwise = max 0 (minClosureSize dflags - (hdr_size + payload_size)) + + hdr_size = closureTypeHdrSize dflags cl_type_info + payload_size = ptr_wds + nonptr_wds + +mkRTSRep :: Int -> SMRep -> SMRep +mkRTSRep = RTSRep + +mkStackRep :: [Bool] -> SMRep +mkStackRep liveness = StackRep liveness + +blackHoleRep :: SMRep +blackHoleRep = HeapRep False 0 0 BlackHole + +indStaticRep :: SMRep +indStaticRep = HeapRep True 1 0 IndStatic + +arrPtrsRep :: DynFlags -> WordOff -> SMRep +arrPtrsRep dflags elems = ArrayPtrsRep elems (cardTableSizeW dflags elems) + +smallArrPtrsRep :: WordOff -> SMRep +smallArrPtrsRep elems = SmallArrayPtrsRep elems + +arrWordsRep :: DynFlags -> ByteOff -> SMRep +arrWordsRep dflags bytes = ArrayWordsRep (bytesToWordsRoundUp dflags bytes) + +----------------------------------------------------------------------------- +-- Predicates + +isStaticRep :: SMRep -> IsStatic +isStaticRep (HeapRep is_static _ _ _) = is_static +isStaticRep (RTSRep _ rep) = isStaticRep rep +isStaticRep _ = False + +isStackRep :: SMRep -> Bool +isStackRep StackRep{} = True +isStackRep (RTSRep _ rep) = isStackRep rep +isStackRep _ = False + +isConRep :: SMRep -> Bool +isConRep (HeapRep _ _ _ Constr{}) = True +isConRep _ = False + +isThunkRep :: SMRep -> Bool +isThunkRep (HeapRep _ _ _ Thunk) = True +isThunkRep (HeapRep _ _ _ ThunkSelector{}) = True +isThunkRep (HeapRep _ _ _ BlackHole) = True +isThunkRep (HeapRep _ _ _ IndStatic) = True +isThunkRep _ = False + +isFunRep :: SMRep -> Bool +isFunRep (HeapRep _ _ _ Fun{}) = True +isFunRep _ = False + +isStaticNoCafCon :: SMRep -> Bool +-- This should line up exactly with CONSTR_NOCAF below +-- See Note [Static NoCaf constructors] +isStaticNoCafCon (HeapRep _ 0 _ Constr{}) = True +isStaticNoCafCon _ = False + + +----------------------------------------------------------------------------- +-- Size-related things + +fixedHdrSize :: DynFlags -> ByteOff +fixedHdrSize dflags = wordsToBytes dflags (fixedHdrSizeW dflags) + +-- | Size of a closure header (StgHeader in includes/rts/storage/Closures.h) +fixedHdrSizeW :: DynFlags -> WordOff +fixedHdrSizeW dflags = sTD_HDR_SIZE dflags + profHdrSize dflags + +-- | Size of the profiling part of a closure header +-- (StgProfHeader in includes/rts/storage/Closures.h) +profHdrSize :: DynFlags -> WordOff +profHdrSize dflags + | gopt Opt_SccProfilingOn dflags = pROF_HDR_SIZE dflags + | otherwise = 0 + +-- | The garbage collector requires that every closure is at least as +-- big as this. +minClosureSize :: DynFlags -> WordOff +minClosureSize dflags = fixedHdrSizeW dflags + mIN_PAYLOAD_SIZE dflags + +arrWordsHdrSize :: DynFlags -> ByteOff +arrWordsHdrSize dflags + = fixedHdrSize dflags + sIZEOF_StgArrBytes_NoHdr dflags + +arrWordsHdrSizeW :: DynFlags -> WordOff +arrWordsHdrSizeW dflags = + fixedHdrSizeW dflags + + (sIZEOF_StgArrBytes_NoHdr dflags `quot` wORD_SIZE dflags) + +arrPtrsHdrSize :: DynFlags -> ByteOff +arrPtrsHdrSize dflags + = fixedHdrSize dflags + sIZEOF_StgMutArrPtrs_NoHdr dflags + +arrPtrsHdrSizeW :: DynFlags -> WordOff +arrPtrsHdrSizeW dflags = + fixedHdrSizeW dflags + + (sIZEOF_StgMutArrPtrs_NoHdr dflags `quot` wORD_SIZE dflags) + +smallArrPtrsHdrSize :: DynFlags -> ByteOff +smallArrPtrsHdrSize dflags + = fixedHdrSize dflags + sIZEOF_StgSmallMutArrPtrs_NoHdr dflags + +smallArrPtrsHdrSizeW :: DynFlags -> WordOff +smallArrPtrsHdrSizeW dflags = + fixedHdrSizeW dflags + + (sIZEOF_StgSmallMutArrPtrs_NoHdr dflags `quot` wORD_SIZE dflags) + +-- Thunks have an extra header word on SMP, so the update doesn't +-- splat the payload. +thunkHdrSize :: DynFlags -> WordOff +thunkHdrSize dflags = fixedHdrSizeW dflags + smp_hdr + where smp_hdr = sIZEOF_StgSMPThunkHeader dflags `quot` wORD_SIZE dflags + +hdrSize :: DynFlags -> SMRep -> ByteOff +hdrSize dflags rep = wordsToBytes dflags (hdrSizeW dflags rep) + +hdrSizeW :: DynFlags -> SMRep -> WordOff +hdrSizeW dflags (HeapRep _ _ _ ty) = closureTypeHdrSize dflags ty +hdrSizeW dflags (ArrayPtrsRep _ _) = arrPtrsHdrSizeW dflags +hdrSizeW dflags (SmallArrayPtrsRep _) = smallArrPtrsHdrSizeW dflags +hdrSizeW dflags (ArrayWordsRep _) = arrWordsHdrSizeW dflags +hdrSizeW _ _ = panic "SMRep.hdrSizeW" + +nonHdrSize :: DynFlags -> SMRep -> ByteOff +nonHdrSize dflags rep = wordsToBytes dflags (nonHdrSizeW rep) + +nonHdrSizeW :: SMRep -> WordOff +nonHdrSizeW (HeapRep _ p np _) = p + np +nonHdrSizeW (ArrayPtrsRep elems ct) = elems + ct +nonHdrSizeW (SmallArrayPtrsRep elems) = elems +nonHdrSizeW (ArrayWordsRep words) = words +nonHdrSizeW (StackRep bs) = length bs +nonHdrSizeW (RTSRep _ rep) = nonHdrSizeW rep + +-- | The total size of the closure, in words. +heapClosureSizeW :: DynFlags -> SMRep -> WordOff +heapClosureSizeW dflags (HeapRep _ p np ty) + = closureTypeHdrSize dflags ty + p + np +heapClosureSizeW dflags (ArrayPtrsRep elems ct) + = arrPtrsHdrSizeW dflags + elems + ct +heapClosureSizeW dflags (SmallArrayPtrsRep elems) + = smallArrPtrsHdrSizeW dflags + elems +heapClosureSizeW dflags (ArrayWordsRep words) + = arrWordsHdrSizeW dflags + words +heapClosureSizeW _ _ = panic "SMRep.heapClosureSize" + +closureTypeHdrSize :: DynFlags -> ClosureTypeInfo -> WordOff +closureTypeHdrSize dflags ty = case ty of + Thunk -> thunkHdrSize dflags + ThunkSelector{} -> thunkHdrSize dflags + BlackHole -> thunkHdrSize dflags + IndStatic -> thunkHdrSize dflags + _ -> fixedHdrSizeW dflags + -- All thunks use thunkHdrSize, even if they are non-updatable. + -- this is because we don't have separate closure types for + -- updatable vs. non-updatable thunks, so the GC can't tell the + -- difference. If we ever have significant numbers of non- + -- updatable thunks, it might be worth fixing this. + +-- --------------------------------------------------------------------------- +-- Arrays + +-- | The byte offset into the card table of the card for a given element +card :: DynFlags -> Int -> Int +card dflags i = i `shiftR` mUT_ARR_PTRS_CARD_BITS dflags + +-- | Convert a number of elements to a number of cards, rounding up +cardRoundUp :: DynFlags -> Int -> Int +cardRoundUp dflags i = + card dflags (i + ((1 `shiftL` mUT_ARR_PTRS_CARD_BITS dflags) - 1)) + +-- | The size of a card table, in bytes +cardTableSizeB :: DynFlags -> Int -> ByteOff +cardTableSizeB dflags elems = cardRoundUp dflags elems + +-- | The size of a card table, in words +cardTableSizeW :: DynFlags -> Int -> WordOff +cardTableSizeW dflags elems = + bytesToWordsRoundUp dflags (cardTableSizeB dflags elems) + +----------------------------------------------------------------------------- +-- deriving the RTS closure type from an SMRep + +#include "../includes/rts/storage/ClosureTypes.h" +#include "../includes/rts/storage/FunTypes.h" +-- Defines CONSTR, CONSTR_1_0 etc + +-- | Derives the RTS closure type from an 'SMRep' +rtsClosureType :: SMRep -> Int +rtsClosureType rep + = case rep of + RTSRep ty _ -> ty + + -- See Note [static constructors] + HeapRep _ 1 0 Constr{} -> CONSTR_1_0 + HeapRep _ 0 1 Constr{} -> CONSTR_0_1 + HeapRep _ 2 0 Constr{} -> CONSTR_2_0 + HeapRep _ 1 1 Constr{} -> CONSTR_1_1 + HeapRep _ 0 2 Constr{} -> CONSTR_0_2 + HeapRep _ 0 _ Constr{} -> CONSTR_NOCAF + -- See Note [Static NoCaf constructors] + HeapRep _ _ _ Constr{} -> CONSTR + + HeapRep False 1 0 Fun{} -> FUN_1_0 + HeapRep False 0 1 Fun{} -> FUN_0_1 + HeapRep False 2 0 Fun{} -> FUN_2_0 + HeapRep False 1 1 Fun{} -> FUN_1_1 + HeapRep False 0 2 Fun{} -> FUN_0_2 + HeapRep False _ _ Fun{} -> FUN + + HeapRep False 1 0 Thunk -> THUNK_1_0 + HeapRep False 0 1 Thunk -> THUNK_0_1 + HeapRep False 2 0 Thunk -> THUNK_2_0 + HeapRep False 1 1 Thunk -> THUNK_1_1 + HeapRep False 0 2 Thunk -> THUNK_0_2 + HeapRep False _ _ Thunk -> THUNK + + HeapRep False _ _ ThunkSelector{} -> THUNK_SELECTOR + + HeapRep True _ _ Fun{} -> FUN_STATIC + HeapRep True _ _ Thunk -> THUNK_STATIC + HeapRep False _ _ BlackHole -> BLACKHOLE + HeapRep False _ _ IndStatic -> IND_STATIC + + _ -> panic "rtsClosureType" + +-- We export these ones +rET_SMALL, rET_BIG, aRG_GEN, aRG_GEN_BIG :: Int +rET_SMALL = RET_SMALL +rET_BIG = RET_BIG +aRG_GEN = ARG_GEN +aRG_GEN_BIG = ARG_GEN_BIG + +{- +Note [static constructors] +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We used to have a CONSTR_STATIC closure type, and each constructor had +two info tables: one with CONSTR (or CONSTR_1_0 etc.), and one with +CONSTR_STATIC. + +This distinction was removed, because when copying a data structure +into a compact region, we must copy static constructors into the +compact region too. If we didn't do this, we would need to track the +references from the compact region out to the static constructors, +because they might (indirectly) refer to CAFs. + +Since static constructors will be copied to the heap, if we wanted to +use different info tables for static and dynamic constructors, we +would have to switch the info pointer when copying the constructor +into the compact region, which means we would need an extra field of +the static info table to point to the dynamic one. + +However, since the distinction between static and dynamic closure +types is never actually needed (other than for assertions), we can +just drop the distinction and use the same info table for both. + +The GC *does* need to distinguish between static and dynamic closures, +but it does this using the HEAP_ALLOCED() macro which checks whether +the address of the closure resides within the dynamic heap. +HEAP_ALLOCED() doesn't read the closure's info table. + +Note [Static NoCaf constructors] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we know that a top-level binding 'x' is not Caffy (ie no CAFs are +reachable from 'x'), then a statically allocated constructor (Just x) +is also not Caffy, and the garbage collector need not follow its +argument fields. Exploiting this would require two static info tables +for Just, for the two cases where the argument was Caffy or non-Caffy. + +Currently we don't do this; instead we treat nullary constructors +as non-Caffy, and the others as potentially Caffy. + + +************************************************************************ +* * + Pretty printing of SMRep and friends +* * +************************************************************************ +-} + +instance Outputable ClosureTypeInfo where + ppr = pprTypeInfo + +instance Outputable SMRep where + ppr (HeapRep static ps nps tyinfo) + = hang (header <+> lbrace) 2 (ppr tyinfo <+> rbrace) + where + header = text "HeapRep" + <+> if static then text "static" else empty + <+> pp_n "ptrs" ps <+> pp_n "nonptrs" nps + pp_n :: String -> Int -> SDoc + pp_n _ 0 = empty + pp_n s n = int n <+> text s + + ppr (ArrayPtrsRep size _) = text "ArrayPtrsRep" <+> ppr size + + ppr (SmallArrayPtrsRep size) = text "SmallArrayPtrsRep" <+> ppr size + + ppr (ArrayWordsRep words) = text "ArrayWordsRep" <+> ppr words + + ppr (StackRep bs) = text "StackRep" <+> ppr bs + + ppr (RTSRep ty rep) = text "tag:" <> ppr ty <+> ppr rep + +instance Outputable ArgDescr where + ppr (ArgSpec n) = text "ArgSpec" <+> ppr n + ppr (ArgGen ls) = text "ArgGen" <+> ppr ls + +pprTypeInfo :: ClosureTypeInfo -> SDoc +pprTypeInfo (Constr tag descr) + = text "Con" <+> + braces (sep [ text "tag:" <+> ppr tag + , text "descr:" <> text (show descr) ]) + +pprTypeInfo (Fun arity args) + = text "Fun" <+> + braces (sep [ text "arity:" <+> ppr arity + , ptext (sLit ("fun_type:")) <+> ppr args ]) + +pprTypeInfo (ThunkSelector offset) + = text "ThunkSel" <+> ppr offset + +pprTypeInfo Thunk = text "Thunk" +pprTypeInfo BlackHole = text "BlackHole" +pprTypeInfo IndStatic = text "IndStatic" diff --git a/compiler/GHC/Stg/Lift/Analysis.hs b/compiler/GHC/Stg/Lift/Analysis.hs index 02d439cef7..ccbad37210 100644 --- a/compiler/GHC/Stg/Lift/Analysis.hs +++ b/compiler/GHC/Stg/Lift/Analysis.hs @@ -26,7 +26,7 @@ import BasicTypes import Demand import DynFlags import Id -import SMRep ( WordOff ) +import GHC.Runtime.Layout ( WordOff ) import GHC.Stg.Syntax import qualified GHC.StgToCmm.ArgRep as StgToCmm.ArgRep import qualified GHC.StgToCmm.Closure as StgToCmm.Closure diff --git a/compiler/GHC/StgToCmm.hs b/compiler/GHC/StgToCmm.hs index 10a9dc2c6a..f489ce6456 100644 --- a/compiler/GHC/StgToCmm.hs +++ b/compiler/GHC/StgToCmm.hs @@ -26,9 +26,9 @@ import GHC.StgToCmm.Closure import GHC.StgToCmm.Hpc import GHC.StgToCmm.Ticky -import Cmm -import CmmUtils -import CLabel +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.CLabel import GHC.Stg.Syntax import DynFlags @@ -48,7 +48,7 @@ import BasicTypes import VarSet ( isEmptyDVarSet ) import OrdList -import MkGraph +import GHC.Cmm.Graph import Data.IORef import Control.Monad (when,void) diff --git a/compiler/GHC/StgToCmm/ArgRep.hs b/compiler/GHC/StgToCmm/ArgRep.hs index cc2fe8306a..347d908b44 100644 --- a/compiler/GHC/StgToCmm/ArgRep.hs +++ b/compiler/GHC/StgToCmm/ArgRep.hs @@ -19,7 +19,7 @@ import GhcPrelude import GHC.StgToCmm.Closure ( idPrimRep ) -import SMRep ( WordOff ) +import GHC.Runtime.Layout ( WordOff ) import Id ( Id ) import TyCon ( PrimRep(..), primElemRepSizeB ) import BasicTypes ( RepArity ) diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs index b1cb34ace7..a78ab5cb41 100644 --- a/compiler/GHC/StgToCmm/Bind.hs +++ b/compiler/GHC/StgToCmm/Bind.hs @@ -28,14 +28,14 @@ import GHC.StgToCmm.Utils import GHC.StgToCmm.Closure import GHC.StgToCmm.Foreign (emitPrimCall) -import MkGraph +import GHC.Cmm.Graph import CoreSyn ( AltCon(..), tickishIsCode ) -import BlockId -import SMRep -import Cmm -import CmmInfo -import CmmUtils -import CLabel +import GHC.Cmm.BlockId +import GHC.Runtime.Layout +import GHC.Cmm +import GHC.Cmm.Info +import GHC.Cmm.Utils +import GHC.Cmm.CLabel import GHC.Stg.Syntax import CostCentre import Id @@ -105,7 +105,7 @@ cgTopRhsClosure dflags rec id ccs upd_flag args body = -- We don't generate the static closure here, because we might -- want to add references to static closures to it later. The - -- static closure is generated by CmmBuildInfoTables.updInfoSRTs, + -- static closure is generated by GHC.Cmm.Info.Build.updInfoSRTs, -- See Note [SRTs], specifically the [FUN] optimisation. ; let fv_details :: [(NonVoid Id, ByteOff)] @@ -622,7 +622,7 @@ emitBlackHoleCode node = do -- unconditionally disabled. -- krc 1/2007 -- Note the eager-blackholing check is here rather than in blackHoleOnEntry, - -- because emitBlackHoleCode is called from CmmParse. + -- because emitBlackHoleCode is called from GHC.Cmm.Parser. let eager_blackholing = not (gopt Opt_SccProfilingOn dflags) && gopt Opt_EagerBlackHoling dflags diff --git a/compiler/GHC/StgToCmm/CgUtils.hs b/compiler/GHC/StgToCmm/CgUtils.hs index f3dccd9745..58c46f8fa2 100644 --- a/compiler/GHC/StgToCmm/CgUtils.hs +++ b/compiler/GHC/StgToCmm/CgUtils.hs @@ -19,11 +19,11 @@ module GHC.StgToCmm.CgUtils ( import GhcPrelude import GHC.Platform.Regs -import Cmm -import Hoopl.Block -import Hoopl.Graph -import CmmUtils -import CLabel +import GHC.Cmm +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Graph +import GHC.Cmm.Utils +import GHC.Cmm.CLabel import DynFlags import Outputable diff --git a/compiler/GHC/StgToCmm/Closure.hs b/compiler/GHC/StgToCmm/Closure.hs index df8cb046c4..724ca6000a 100644 --- a/compiler/GHC/StgToCmm/Closure.hs +++ b/compiler/GHC/StgToCmm/Closure.hs @@ -67,13 +67,13 @@ module GHC.StgToCmm.Closure ( import GhcPrelude import GHC.Stg.Syntax -import SMRep -import Cmm -import PprCmmExpr() -- For Outputable instances +import GHC.Runtime.Layout +import GHC.Cmm +import GHC.Cmm.Ppr.Expr() -- For Outputable instances import CostCentre -import BlockId -import CLabel +import GHC.Cmm.BlockId +import GHC.Cmm.CLabel import Id import IdInfo import DataCon diff --git a/compiler/GHC/StgToCmm/DataCon.hs b/compiler/GHC/StgToCmm/DataCon.hs index 1e929663df..2bbeabace6 100644 --- a/compiler/GHC/StgToCmm/DataCon.hs +++ b/compiler/GHC/StgToCmm/DataCon.hs @@ -29,11 +29,11 @@ import GHC.StgToCmm.Layout import GHC.StgToCmm.Utils import GHC.StgToCmm.Closure -import CmmExpr -import CmmUtils -import CLabel -import MkGraph -import SMRep +import GHC.Cmm.Expr +import GHC.Cmm.Utils +import GHC.Cmm.CLabel +import GHC.Cmm.Graph +import GHC.Runtime.Layout import CostCentre import Module import DataCon diff --git a/compiler/GHC/StgToCmm/Env.hs b/compiler/GHC/StgToCmm/Env.hs index 45b09a3d26..b2c1371840 100644 --- a/compiler/GHC/StgToCmm/Env.hs +++ b/compiler/GHC/StgToCmm/Env.hs @@ -31,14 +31,14 @@ import GHC.StgToCmm.Monad import GHC.StgToCmm.Utils import GHC.StgToCmm.Closure -import CLabel +import GHC.Cmm.CLabel -import BlockId -import CmmExpr -import CmmUtils +import GHC.Cmm.BlockId +import GHC.Cmm.Expr +import GHC.Cmm.Utils import DynFlags import Id -import MkGraph +import GHC.Cmm.Graph import Name import Outputable import GHC.Stg.Syntax diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs index 3836aa3d2a..0c2d9b8ae5 100644 --- a/compiler/GHC/StgToCmm/Expr.hs +++ b/compiler/GHC/StgToCmm/Expr.hs @@ -30,10 +30,10 @@ import GHC.StgToCmm.Closure import GHC.Stg.Syntax -import MkGraph -import BlockId -import Cmm hiding ( succ ) -import CmmInfo +import GHC.Cmm.Graph +import GHC.Cmm.BlockId +import GHC.Cmm hiding ( succ ) +import GHC.Cmm.Info import CoreSyn import DataCon import DynFlags ( mAX_PTR_TAG ) diff --git a/compiler/GHC/StgToCmm/ExtCode.hs b/compiler/GHC/StgToCmm/ExtCode.hs index 4a5225eec6..2679ce4992 100644 --- a/compiler/GHC/StgToCmm/ExtCode.hs +++ b/compiler/GHC/StgToCmm/ExtCode.hs @@ -42,11 +42,11 @@ import GhcPrelude import qualified GHC.StgToCmm.Monad as F import GHC.StgToCmm.Monad (FCode, newUnique) -import Cmm -import CLabel -import MkGraph +import GHC.Cmm +import GHC.Cmm.CLabel +import GHC.Cmm.Graph -import BlockId +import GHC.Cmm.BlockId import DynFlags import FastString import Module diff --git a/compiler/GHC/StgToCmm/Foreign.hs b/compiler/GHC/StgToCmm/Foreign.hs index 3ef0872c2e..62a948d13c 100644 --- a/compiler/GHC/StgToCmm/Foreign.hs +++ b/compiler/GHC/StgToCmm/Foreign.hs @@ -9,7 +9,7 @@ module GHC.StgToCmm.Foreign ( cgForeignCall, emitPrimCall, emitCCall, - emitForeignCall, -- For CmmParse + emitForeignCall, emitSaveThreadState, saveThreadState, emitLoadThreadState, @@ -28,14 +28,14 @@ import GHC.StgToCmm.Utils import GHC.StgToCmm.Closure import GHC.StgToCmm.Layout -import BlockId (newBlockId) -import Cmm -import CmmUtils -import MkGraph +import GHC.Cmm.BlockId (newBlockId) +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.Graph import Type import GHC.Types.RepType -import CLabel -import SMRep +import GHC.Cmm.CLabel +import GHC.Runtime.Layout import ForeignCall import DynFlags import Maybes @@ -202,7 +202,7 @@ emitPrimCall :: [CmmFormal] -> CallishMachOp -> [CmmActual] -> FCode () emitPrimCall res op args = void $ emitForeignCall PlayRisky res (PrimTarget op) args --- alternative entry point, used by CmmParse +-- alternative entry point, used by GHC.Cmm.Parser emitForeignCall :: Safety -> [CmmFormal] -- where to put the results @@ -257,9 +257,9 @@ load_target_into_temp other_target@(PrimTarget _) = -- Note [Register Parameter Passing]). -- -- However, we can't pattern-match on the expression here, because --- this is used in a loop by CmmParse, and testing the expression +-- this is used in a loop by GHC.Cmm.Parser, and testing the expression -- results in a black hole. So we always create a temporary, and rely --- on CmmSink to clean it up later. (Yuck, ToDo). The generated code +-- on GHC.Cmm.Sink to clean it up later. (Yuck, ToDo). The generated code -- ends up being the same, at least for the RTS .cmm code. -- maybe_assign_temp :: CmmExpr -> FCode CmmExpr diff --git a/compiler/GHC/StgToCmm/Heap.hs b/compiler/GHC/StgToCmm/Heap.hs index d36cad5788..492a4460f8 100644 --- a/compiler/GHC/StgToCmm/Heap.hs +++ b/compiler/GHC/StgToCmm/Heap.hs @@ -23,7 +23,7 @@ module GHC.StgToCmm.Heap ( import GhcPrelude hiding ((<*>)) import GHC.Stg.Syntax -import CLabel +import GHC.Cmm.CLabel import GHC.StgToCmm.Layout import GHC.StgToCmm.Utils import GHC.StgToCmm.Monad @@ -32,13 +32,13 @@ import GHC.StgToCmm.Ticky import GHC.StgToCmm.Closure import GHC.StgToCmm.Env -import MkGraph +import GHC.Cmm.Graph -import Hoopl.Label -import SMRep -import BlockId -import Cmm -import CmmUtils +import GHC.Cmm.Dataflow.Label +import GHC.Runtime.Layout +import GHC.Cmm.BlockId +import GHC.Cmm +import GHC.Cmm.Utils import CostCentre import IdInfo( CafInfo(..), mayHaveCafRefs ) import Id ( Id ) @@ -337,7 +337,7 @@ entryHeapCheck cl_info nodeSet arity args code Just (_, ArgGen _) -> False _otherwise -> True --- | lower-level version for CmmParse +-- | lower-level version for GHC.Cmm.Parser entryHeapCheck' :: Bool -- is a known function pattern -> CmmExpr -- expression for the closure pointer -> Int -- Arity -- not same as len args b/c of voids diff --git a/compiler/GHC/StgToCmm/Hpc.hs b/compiler/GHC/StgToCmm/Hpc.hs index e33d39245c..a3f4112206 100644 --- a/compiler/GHC/StgToCmm/Hpc.hs +++ b/compiler/GHC/StgToCmm/Hpc.hs @@ -12,11 +12,11 @@ import GhcPrelude import GHC.StgToCmm.Monad -import MkGraph -import CmmExpr -import CLabel +import GHC.Cmm.Graph +import GHC.Cmm.Expr +import GHC.Cmm.CLabel import Module -import CmmUtils +import GHC.Cmm.Utils import GHC.StgToCmm.Utils import HscTypes import DynFlags diff --git a/compiler/GHC/StgToCmm/Layout.hs b/compiler/GHC/StgToCmm/Layout.hs index 6d7825eb93..e78221de3a 100644 --- a/compiler/GHC/StgToCmm/Layout.hs +++ b/compiler/GHC/StgToCmm/Layout.hs @@ -41,13 +41,13 @@ import GHC.StgToCmm.Ticky import GHC.StgToCmm.Monad import GHC.StgToCmm.Utils -import MkGraph -import SMRep -import BlockId -import Cmm -import CmmUtils -import CmmInfo -import CLabel +import GHC.Cmm.Graph +import GHC.Runtime.Layout +import GHC.Cmm.BlockId +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.Info +import GHC.Cmm.CLabel import GHC.Stg.Syntax import Id import TyCon ( PrimRep(..), primRepSizeB ) diff --git a/compiler/GHC/StgToCmm/Monad.hs b/compiler/GHC/StgToCmm/Monad.hs index 716cbdab78..4f7d2e1220 100644 --- a/compiler/GHC/StgToCmm/Monad.hs +++ b/compiler/GHC/StgToCmm/Monad.hs @@ -61,14 +61,14 @@ module GHC.StgToCmm.Monad ( import GhcPrelude hiding( sequence, succ ) -import Cmm +import GHC.Cmm import GHC.StgToCmm.Closure import DynFlags -import Hoopl.Collections -import MkGraph -import BlockId -import CLabel -import SMRep +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Graph as CmmGraph +import GHC.Cmm.BlockId +import GHC.Cmm.CLabel +import GHC.Runtime.Layout import Module import Id import VarEnv @@ -369,7 +369,7 @@ addCodeBlocksFrom :: CgState -> CgState -> CgState -- Add code blocks from the latter to the former -- (The cgs_stmts will often be empty, but not always; see codeOnly) s1 `addCodeBlocksFrom` s2 - = s1 { cgs_stmts = cgs_stmts s1 MkGraph.<*> cgs_stmts s2, + = s1 { cgs_stmts = cgs_stmts s1 CmmGraph.<*> cgs_stmts s2, cgs_tops = cgs_tops s1 `appOL` cgs_tops s2 } @@ -715,7 +715,7 @@ emitStore l r = emitCgStmt (CgStmt (CmmStore l r)) emit :: CmmAGraph -> FCode () emit ag = do { state <- getState - ; setState $ state { cgs_stmts = cgs_stmts state MkGraph.<*> ag } } + ; setState $ state { cgs_stmts = cgs_stmts state CmmGraph.<*> ag } } emitDecl :: CmmDecl -> FCode () emitDecl decl @@ -743,7 +743,7 @@ emitProcWithStackFrame conv mb_info lbl stk_args args (graph, tscope) True -- do layout = do { dflags <- getDynFlags ; let (offset, live, entry) = mkCallEntry dflags conv args stk_args - graph' = entry MkGraph.<*> graph + graph' = entry CmmGraph.<*> graph ; emitProc mb_info lbl live (graph', tscope) offset True } emitProcWithStackFrame _ _ _ _ _ _ _ = panic "emitProcWithStackFrame" diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs index e469e15a5d..06264099df 100644 --- a/compiler/GHC/StgToCmm/Prim.hs +++ b/compiler/GHC/StgToCmm/Prim.hs @@ -36,17 +36,17 @@ import GHC.StgToCmm.Prof ( costCentreFrom ) import DynFlags import GHC.Platform import BasicTypes -import BlockId -import MkGraph +import GHC.Cmm.BlockId +import GHC.Cmm.Graph import GHC.Stg.Syntax -import Cmm +import GHC.Cmm import Module ( rtsUnitId ) import Type ( Type, tyConAppTyCon ) import TyCon -import CLabel -import CmmUtils +import GHC.Cmm.CLabel +import GHC.Cmm.Utils import PrimOp -import SMRep +import GHC.Runtime.Layout import FastString import Outputable import Util @@ -1525,7 +1525,7 @@ emitPrimOp dflags = \case -- `quot` and `rem` with constant divisor can be implemented with fast bit-ops -- (shift, .&.). -- - -- Currently we only support optimization (performed in CmmOpt) when the + -- Currently we only support optimization (performed in GHC.Cmm.Opt) when the -- constant is a power of 2. #9041 tracks the implementation of the general -- optimization. -- diff --git a/compiler/GHC/StgToCmm/Prof.hs b/compiler/GHC/StgToCmm/Prof.hs index 4743b79622..cf5ce5acfb 100644 --- a/compiler/GHC/StgToCmm/Prof.hs +++ b/compiler/GHC/StgToCmm/Prof.hs @@ -28,12 +28,12 @@ import GhcPrelude import GHC.StgToCmm.Closure import GHC.StgToCmm.Utils import GHC.StgToCmm.Monad -import SMRep +import GHC.Runtime.Layout -import MkGraph -import Cmm -import CmmUtils -import CLabel +import GHC.Cmm.Graph +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.CLabel import CostCentre import DynFlags diff --git a/compiler/GHC/StgToCmm/Ticky.hs b/compiler/GHC/StgToCmm/Ticky.hs index 9eeb134cc9..6e2e2d3a6b 100644 --- a/compiler/GHC/StgToCmm/Ticky.hs +++ b/compiler/GHC/StgToCmm/Ticky.hs @@ -23,9 +23,9 @@ Some of the relevant source files: * some codeGen/ modules import this one - * this module imports cmm/CLabel.hs to manage labels + * this module imports GHC.Cmm.CLabel to manage labels - * cmm/CmmParse.y expands some macros using generators defined in + * GHC.Cmm.Parser expands some macros using generators defined in this module * includes/stg/Ticky.h declares all of the global counters @@ -112,11 +112,11 @@ import GHC.StgToCmm.Utils import GHC.StgToCmm.Monad import GHC.Stg.Syntax -import CmmExpr -import MkGraph -import CmmUtils -import CLabel -import SMRep +import GHC.Cmm.Expr +import GHC.Cmm.Graph +import GHC.Cmm.Utils +import GHC.Cmm.CLabel +import GHC.Runtime.Layout import Module import Name @@ -517,7 +517,7 @@ tickyAllocHeap genuine hp -------------------------------------------------------------------------------- --- these three are only called from CmmParse.y (ie ultimately from the RTS) +-- these three are only called from GHC.Cmm.Parser (ie ultimately from the RTS) -- the units are bytes diff --git a/compiler/GHC/StgToCmm/Utils.hs b/compiler/GHC/StgToCmm/Utils.hs index 34fb93468c..7a784ea85c 100644 --- a/compiler/GHC/StgToCmm/Utils.hs +++ b/compiler/GHC/StgToCmm/Utils.hs @@ -52,20 +52,20 @@ import GhcPrelude import GHC.StgToCmm.Monad import GHC.StgToCmm.Closure -import Cmm -import BlockId -import MkGraph +import GHC.Cmm +import GHC.Cmm.BlockId +import GHC.Cmm.Graph as CmmGraph import GHC.Platform.Regs -import CLabel -import CmmUtils -import CmmSwitch +import GHC.Cmm.CLabel +import GHC.Cmm.Utils +import GHC.Cmm.Switch import GHC.StgToCmm.CgUtils import ForeignCall import IdInfo import Type import TyCon -import SMRep +import GHC.Runtime.Layout import Module import Literal import Digraph @@ -458,8 +458,8 @@ mk_discrete_switch _ _tag_expr [(_tag,lbl)] Nothing _ -- In that situation we can be sure the (:) case -- can't happen, so no need to test --- SOMETHING MORE COMPLICATED: defer to CmmImplementSwitchPlans --- See Note [Cmm Switches, the general plan] in CmmSwitch +-- SOMETHING MORE COMPLICATED: defer to GHC.Cmm.Switch.Implement +-- See Note [Cmm Switches, the general plan] in GHC.Cmm.Switch mk_discrete_switch signed tag_expr branches mb_deflt range = mkSwitch tag_expr $ mkSwitchTargets signed range mb_deflt (M.fromList branches) @@ -568,7 +568,7 @@ label_code :: BlockId -> CmmAGraphScoped -> FCode BlockId -- and returns L label_code join_lbl (code,tsc) = do lbl <- newBlockId - emitOutOfLine lbl (code MkGraph.<*> mkBranch join_lbl, tsc) + emitOutOfLine lbl (code CmmGraph.<*> mkBranch join_lbl, tsc) return lbl -------------- |