diff options
author | dias@eecs.harvard.edu <unknown> | 2008-08-14 12:40:27 +0000 |
---|---|---|
committer | dias@eecs.harvard.edu <unknown> | 2008-08-14 12:40:27 +0000 |
commit | 176fa33f17dd78355cc572e006d2ab26898e2c69 (patch) | |
tree | 54f951a515eac57626f8f15d57f7bc75f1096a7a /compiler/cmm | |
parent | e06951a75a1f519e8f015880c363a8dedc08ff9c (diff) | |
download | haskell-176fa33f17dd78355cc572e006d2ab26898e2c69.tar.gz |
Merging in the new codegen branch
This merge does not turn on the new codegen (which only compiles
a select few programs at this point),
but it does introduce some changes to the old code generator.
The high bits:
1. The Rep Swamp patch is finally here.
The highlight is that the representation of types at the
machine level has changed.
Consequently, this patch contains updates across several back ends.
2. The new Stg -> Cmm path is here, although it appears to have a
fair number of bugs lurking.
3. Many improvements along the CmmCPSZ path, including:
o stack layout
o some code for infotables, half of which is right and half wrong
o proc-point splitting
Diffstat (limited to 'compiler/cmm')
37 files changed, 3248 insertions, 2368 deletions
diff --git a/compiler/cmm/BlockId.hs b/compiler/cmm/BlockId.hs index fb9b7cab8f..2e4d452e75 100644 --- a/compiler/cmm/BlockId.hs +++ b/compiler/cmm/BlockId.hs @@ -1,9 +1,13 @@ module BlockId ( BlockId(..), mkBlockId -- ToDo: BlockId should be abstract, but it isn't yet - , BlockEnv, emptyBlockEnv, lookupBlockEnv, extendBlockEnv, mkBlockEnv + , BlockEnv, emptyBlockEnv, lookupBlockEnv, extendBlockEnv, mkBlockEnv, mapBlockEnv , BlockSet, emptyBlockSet, elemBlockSet, extendBlockSet, sizeBlockSet, mkBlockSet + , foldBlockEnv, blockLbl, infoTblLbl ) where +import CLabel +import IdInfo +import Name import Outputable import UniqFM import Unique @@ -36,6 +40,11 @@ instance Show BlockId where instance Outputable BlockId where ppr = ppr . getUnique +blockLbl :: BlockId -> CLabel +blockLbl id = mkEntryLabel (mkFCallName (getUnique id) "block") NoCafRefs + +infoTblLbl :: BlockId -> CLabel +infoTblLbl id = mkInfoTableLabel (mkFCallName (getUnique id) "block") NoCafRefs type BlockEnv a = UniqFM {- BlockId -} a emptyBlockEnv :: BlockEnv a @@ -46,6 +55,10 @@ lookupBlockEnv :: BlockEnv a -> BlockId -> Maybe a lookupBlockEnv = lookupUFM extendBlockEnv :: BlockEnv a -> BlockId -> a -> BlockEnv a extendBlockEnv = addToUFM +mapBlockEnv :: (a -> b) -> BlockEnv a -> BlockEnv b +mapBlockEnv = mapUFM +foldBlockEnv :: (BlockId -> a -> b -> b) -> b -> BlockEnv a -> b +foldBlockEnv f = foldUFM_Directly (\u x y -> f (mkBlockId u) x y) type BlockSet = UniqSet BlockId emptyBlockSet :: BlockSet diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs index 1c338243ab..ffa93fb356 100644 --- a/compiler/cmm/CLabel.hs +++ b/compiler/cmm/CLabel.hs @@ -51,6 +51,7 @@ module CLabel ( mkModuleInitLabel, mkPlainModuleInitLabel, + mkModuleInitTableLabel, mkSplitMarkerLabel, mkDirty_MUT_VAR_Label, @@ -67,6 +68,7 @@ module CLabel ( mkRtsSlowTickyCtrLabel, moduleRegdLabel, + moduleRegTableLabel, mkSelectorInfoLabel, mkSelectorEntryLabel, @@ -77,6 +79,7 @@ module CLabel ( mkRtsRetLabel, mkRtsCodeLabel, mkRtsDataLabel, + mkRtsGcPtrLabel, mkRtsInfoLabelFS, mkRtsEntryLabelFS, @@ -103,16 +106,18 @@ module CLabel ( mkHpcTicksLabel, mkHpcModuleNameLabel, + hasCAF, infoLblToEntryLbl, entryLblToInfoLbl, needsCDecl, isAsmTemp, maybeAsmTemp, externallyVisibleCLabel, isMathFun, - CLabelType(..), labelType, labelDynamic, + isCFunctionLabel, isGcPtrLabel, labelDynamic, pprCLabel ) where #include "HsVersions.h" +import IdInfo import StaticFlags import Packages import DataCon @@ -155,6 +160,7 @@ CLabel is an abstract type that supports the following operations: data CLabel = IdLabel -- A family of labels related to the Name -- definition of a particular Id or Con + CafInfo IdLabelInfo | CaseLabel -- A family of labels related to a particular @@ -177,7 +183,10 @@ data CLabel -- because we don't always recompile modules which depend on a module -- whose version has changed. - | PlainModuleInitLabel -- without the vesrion & way info + | PlainModuleInitLabel -- without the version & way info + Module + + | ModuleInitTableLabel -- table of imported modules to init Module | ModuleRegdLabel @@ -262,7 +271,8 @@ data RtsLabelInfo | RtsEntry LitString -- misc rts entry points | RtsRetInfo LitString -- misc rts ret info tables | RtsRet LitString -- misc rts return points - | RtsData LitString -- misc rts data bits, eg CHARLIKE_closure + | RtsData LitString -- misc rts data bits + | RtsGcPtr LitString -- GcPtrs eg CHARLIKE_closure | RtsCode LitString -- misc rts code | RtsInfoFS FastString -- misc rts info tables @@ -292,29 +302,29 @@ data DynamicLinkerLabelInfo -- Constructing CLabels -- These are always local: -mkSRTLabel name = IdLabel name SRT -mkSlowEntryLabel name = IdLabel name Slow -mkRednCountsLabel name = IdLabel name RednCounts +mkSRTLabel name c = IdLabel name c SRT +mkSlowEntryLabel name c = IdLabel name c Slow +mkRednCountsLabel name c = IdLabel name c RednCounts -- These have local & (possibly) external variants: -mkLocalClosureLabel name = IdLabel name Closure -mkLocalInfoTableLabel name = IdLabel name InfoTable -mkLocalEntryLabel name = IdLabel name Entry -mkLocalClosureTableLabel name = IdLabel name ClosureTable - -mkClosureLabel name = IdLabel name Closure -mkInfoTableLabel name = IdLabel name InfoTable -mkEntryLabel name = IdLabel name Entry -mkClosureTableLabel name = IdLabel name ClosureTable -mkLocalConInfoTableLabel con = IdLabel con ConInfoTable -mkLocalConEntryLabel con = IdLabel con ConEntry -mkLocalStaticInfoTableLabel con = IdLabel con StaticInfoTable -mkLocalStaticConEntryLabel con = IdLabel con StaticConEntry -mkConInfoTableLabel name = IdLabel name ConInfoTable -mkStaticInfoTableLabel name = IdLabel name StaticInfoTable - -mkConEntryLabel name = IdLabel name ConEntry -mkStaticConEntryLabel name = IdLabel name StaticConEntry +mkLocalClosureLabel name c = IdLabel name c Closure +mkLocalInfoTableLabel name c = IdLabel name c InfoTable +mkLocalEntryLabel name c = IdLabel name c Entry +mkLocalClosureTableLabel name c = IdLabel name c ClosureTable + +mkClosureLabel name c = IdLabel name c Closure +mkInfoTableLabel name c = IdLabel name c InfoTable +mkEntryLabel name c = IdLabel name c Entry +mkClosureTableLabel name c = IdLabel name c ClosureTable +mkLocalConInfoTableLabel c con = IdLabel con c ConInfoTable +mkLocalConEntryLabel c con = IdLabel con c ConEntry +mkLocalStaticInfoTableLabel c con = IdLabel con c StaticInfoTable +mkLocalStaticConEntryLabel c con = IdLabel con c StaticConEntry +mkConInfoTableLabel name c = IdLabel name c ConInfoTable +mkStaticInfoTableLabel name c = IdLabel name c StaticInfoTable + +mkConEntryLabel name c = IdLabel name c ConEntry +mkStaticConEntryLabel name c = IdLabel name c StaticConEntry mkLargeSRTLabel uniq = LargeSRTLabel uniq mkBitmapLabel uniq = LargeBitmapLabel uniq @@ -334,6 +344,9 @@ mkModuleInitLabel mod way = ModuleInitLabel mod way mkPlainModuleInitLabel :: Module -> CLabel mkPlainModuleInitLabel mod = PlainModuleInitLabel mod +mkModuleInitTableLabel :: Module -> CLabel +mkModuleInitTableLabel mod = ModuleInitTableLabel mod + -- Some fixed runtime system labels mkSplitMarkerLabel = RtsLabel (RtsCode (sLit "__stg_split_marker")) @@ -350,6 +363,7 @@ mkCAFBlackHoleInfoTableLabel = RtsLabel (RtsInfo (sLit "stg_CAF_BLACKHOLE")) mkRtsPrimOpLabel primop = RtsLabel (RtsPrimOp primop) moduleRegdLabel = ModuleRegdLabel +moduleRegTableLabel = ModuleInitTableLabel mkSelectorInfoLabel upd off = RtsLabel (RtsSelectorInfoTable upd off) mkSelectorEntryLabel upd off = RtsLabel (RtsSelectorEntry upd off) @@ -383,6 +397,7 @@ mkRtsRetInfoLabel str = RtsLabel (RtsRetInfo str) mkRtsRetLabel str = RtsLabel (RtsRet str) mkRtsCodeLabel str = RtsLabel (RtsCode str) mkRtsDataLabel str = RtsLabel (RtsData str) +mkRtsGcPtrLabel str = RtsLabel (RtsGcPtr str) mkRtsInfoLabelFS str = RtsLabel (RtsInfoFS str) mkRtsEntryLabelFS str = RtsLabel (RtsEntryFS str) @@ -422,9 +437,9 @@ mkDeadStripPreventer lbl = DeadStripPreventer lbl -- Converting between info labels and entry/ret labels. infoLblToEntryLbl :: CLabel -> CLabel -infoLblToEntryLbl (IdLabel n InfoTable) = IdLabel n Entry -infoLblToEntryLbl (IdLabel n ConInfoTable) = IdLabel n ConEntry -infoLblToEntryLbl (IdLabel n StaticInfoTable) = IdLabel n StaticConEntry +infoLblToEntryLbl (IdLabel n c InfoTable) = IdLabel n c Entry +infoLblToEntryLbl (IdLabel n c ConInfoTable) = IdLabel n c ConEntry +infoLblToEntryLbl (IdLabel n c StaticInfoTable) = IdLabel n c StaticConEntry infoLblToEntryLbl (CaseLabel n CaseReturnInfo) = CaseLabel n CaseReturnPt infoLblToEntryLbl (RtsLabel (RtsInfo s)) = RtsLabel (RtsEntry s) infoLblToEntryLbl (RtsLabel (RtsRetInfo s)) = RtsLabel (RtsRet s) @@ -433,9 +448,9 @@ infoLblToEntryLbl (RtsLabel (RtsRetInfoFS s)) = RtsLabel (RtsRetFS s) infoLblToEntryLbl _ = panic "CLabel.infoLblToEntryLbl" entryLblToInfoLbl :: CLabel -> CLabel -entryLblToInfoLbl (IdLabel n Entry) = IdLabel n InfoTable -entryLblToInfoLbl (IdLabel n ConEntry) = IdLabel n ConInfoTable -entryLblToInfoLbl (IdLabel n StaticConEntry) = IdLabel n StaticInfoTable +entryLblToInfoLbl (IdLabel n c Entry) = IdLabel n c InfoTable +entryLblToInfoLbl (IdLabel n c ConEntry) = IdLabel n c ConInfoTable +entryLblToInfoLbl (IdLabel n c StaticConEntry) = IdLabel n c StaticInfoTable entryLblToInfoLbl (CaseLabel n CaseReturnPt) = CaseLabel n CaseReturnInfo entryLblToInfoLbl (RtsLabel (RtsEntry s)) = RtsLabel (RtsInfo s) entryLblToInfoLbl (RtsLabel (RtsRet s)) = RtsLabel (RtsRetInfo s) @@ -444,6 +459,12 @@ entryLblToInfoLbl (RtsLabel (RtsRetFS s)) = RtsLabel (RtsRetInfoFS s) entryLblToInfoLbl l = pprPanic "CLabel.entryLblToInfoLbl" (pprCLabel l) -- ----------------------------------------------------------------------------- +-- Does a CLabel refer to a CAF? +hasCAF :: CLabel -> Bool +hasCAF (IdLabel _ MayHaveCafRefs Closure) = True +hasCAF _ = False + +-- ----------------------------------------------------------------------------- -- Does a CLabel need declaring before use or not? -- -- See wiki:Commentary/Compiler/Backends/PprC#Prototypes @@ -452,13 +473,14 @@ needsCDecl :: CLabel -> Bool -- False <=> it's pre-declared; don't bother -- don't bother declaring SRT & Bitmap labels, we always make sure -- they are defined before use. -needsCDecl (IdLabel _ SRT) = False +needsCDecl (IdLabel _ _ SRT) = False needsCDecl (LargeSRTLabel _) = False needsCDecl (LargeBitmapLabel _) = False -needsCDecl (IdLabel _ _) = True +needsCDecl (IdLabel _ _ _) = True needsCDecl (CaseLabel _ _) = True needsCDecl (ModuleInitLabel _ _) = True needsCDecl (PlainModuleInitLabel _) = True +needsCDecl (ModuleInitTableLabel _) = True needsCDecl ModuleRegdLabel = False needsCDecl (StringLitLabel _) = False @@ -520,12 +542,11 @@ externallyVisibleCLabel (StringLitLabel _) = False externallyVisibleCLabel (AsmTempLabel _) = False externallyVisibleCLabel (ModuleInitLabel _ _) = True externallyVisibleCLabel (PlainModuleInitLabel _)= True +externallyVisibleCLabel (ModuleInitTableLabel _)= False externallyVisibleCLabel ModuleRegdLabel = False externallyVisibleCLabel (RtsLabel _) = True externallyVisibleCLabel (ForeignLabel _ _ _) = True -externallyVisibleCLabel (IdLabel name SRT) = False - -- SRTs don't need to be external -externallyVisibleCLabel (IdLabel name _) = isExternalName name +externallyVisibleCLabel (IdLabel name _ _) = isExternalName name externallyVisibleCLabel (CC_Label _) = True externallyVisibleCLabel (CCS_Label _) = True externallyVisibleCLabel (DynamicLinkerLabel _ _) = False @@ -540,13 +561,25 @@ externallyVisibleCLabel (LargeSRTLabel _) = False -- For generating correct types in label declarations: data CLabelType - = CodeLabel - | DataLabel + = CodeLabel -- Address of some executable instructions + | DataLabel -- Address of data, not a GC ptr + | GcPtrLabel -- Address of a (presumably static) GC object + +isCFunctionLabel :: CLabel -> Bool +isCFunctionLabel lbl = case labelType lbl of + CodeLabel -> True + _other -> False + +isGcPtrLabel :: CLabel -> Bool +isGcPtrLabel lbl = case labelType lbl of + GcPtrLabel -> True + _other -> False labelType :: CLabel -> CLabelType labelType (RtsLabel (RtsSelectorInfoTable _ _)) = DataLabel labelType (RtsLabel (RtsApInfoTable _ _)) = DataLabel labelType (RtsLabel (RtsData _)) = DataLabel +labelType (RtsLabel (RtsGcPtr _)) = GcPtrLabel labelType (RtsLabel (RtsCode _)) = CodeLabel labelType (RtsLabel (RtsInfo _)) = DataLabel labelType (RtsLabel (RtsEntry _)) = CodeLabel @@ -563,20 +596,19 @@ labelType (CaseLabel _ CaseReturnInfo) = DataLabel labelType (CaseLabel _ _) = CodeLabel labelType (ModuleInitLabel _ _) = CodeLabel labelType (PlainModuleInitLabel _) = CodeLabel +labelType (ModuleInitTableLabel _) = DataLabel labelType (LargeSRTLabel _) = DataLabel labelType (LargeBitmapLabel _) = DataLabel - -labelType (IdLabel _ info) = idInfoLabelType info -labelType _ = DataLabel +labelType (IdLabel _ _ info) = idInfoLabelType info +labelType _ = DataLabel idInfoLabelType info = case info of InfoTable -> DataLabel - Closure -> DataLabel + Closure -> GcPtrLabel ConInfoTable -> DataLabel StaticInfoTable -> DataLabel ClosureTable -> DataLabel --- krc: aie! a ticky counter label is data RednCounts -> DataLabel _ -> CodeLabel @@ -593,7 +625,7 @@ labelDynamic :: PackageId -> CLabel -> Bool labelDynamic this_pkg lbl = case lbl of RtsLabel _ -> not opt_Static && (this_pkg /= rtsPackageId) -- i.e., is the RTS in a DLL or not? - IdLabel n k -> isDllName this_pkg n + IdLabel n _ k -> isDllName this_pkg n #if mingw32_TARGET_OS ForeignLabel _ _ d -> d #else @@ -603,6 +635,7 @@ labelDynamic this_pkg lbl = #endif ModuleInitLabel m _ -> not opt_Static && this_pkg /= (modulePackageId m) PlainModuleInitLabel m -> not opt_Static && this_pkg /= (modulePackageId m) + ModuleInitTableLabel m -> not opt_Static && this_pkg /= (modulePackageId m) -- Note that DynamicLinkerLabels do NOT require dynamic linking themselves. _ -> False @@ -720,6 +753,7 @@ pprCLbl (LargeBitmapLabel u) = text "b" <> pprUnique u <> pp_cSEP <> ptext (sLi pprCLbl (RtsLabel (RtsCode str)) = ptext str pprCLbl (RtsLabel (RtsData str)) = ptext str +pprCLbl (RtsLabel (RtsGcPtr str)) = ptext str pprCLbl (RtsLabel (RtsCodeFS str)) = ftext str pprCLbl (RtsLabel (RtsDataFS str)) = ftext str @@ -789,7 +823,7 @@ pprCLbl ModuleRegdLabel pprCLbl (ForeignLabel str _ _) = ftext str -pprCLbl (IdLabel name flavor) = ppr name <> ppIdFlavor flavor +pprCLbl (IdLabel name _ flavor) = ppr name <> ppIdFlavor flavor pprCLbl (CC_Label cc) = ppr cc pprCLbl (CCS_Label ccs) = ppr ccs @@ -799,6 +833,8 @@ pprCLbl (ModuleInitLabel mod way) <> char '_' <> text way pprCLbl (PlainModuleInitLabel mod) = ptext (sLit "__stginit_") <> ppr mod +pprCLbl (ModuleInitTableLabel mod) + = ptext (sLit "__stginittable_") <> ppr mod pprCLbl (HpcTicksLabel mod) = ptext (sLit "_hpc_tickboxes_") <> ppr mod <> ptext (sLit "_hpc") diff --git a/compiler/cmm/Cmm.hs b/compiler/cmm/Cmm.hs index 9dcaf8447e..5e52a5786c 100644 --- a/compiler/cmm/Cmm.hs +++ b/compiler/cmm/Cmm.hs @@ -7,21 +7,21 @@ ----------------------------------------------------------------------------- module Cmm ( - GenCmm(..), Cmm, RawCmm, - GenCmmTop(..), CmmTop, RawCmmTop, - ListGraph(..), + GenCmm(..), Cmm, RawCmm, + GenCmmTop(..), CmmTop, RawCmmTop, + ListGraph(..), cmmMapGraph, cmmTopMapGraph, cmmMapGraphM, cmmTopMapGraphM, - CmmInfo(..), UpdateFrame(..), + CmmInfo(..), UpdateFrame(..), CmmInfoTable(..), ClosureTypeInfo(..), ProfilingInfo(..), ClosureTypeTag, GenBasicBlock(..), CmmBasicBlock, blockId, blockStmts, mapBlockStmts, CmmReturnInfo(..), - CmmStmt(..), CmmActual, CmmActuals, CmmFormal, CmmFormals, CmmKind, - CmmFormalsWithoutKinds, CmmFormalWithoutKind, - CmmKinded(..), + CmmStmt(..), CmmActual, CmmActuals, CmmFormal, CmmFormals, + HintedCmmFormal, HintedCmmFormals, HintedCmmActual, HintedCmmActuals, CmmSafety(..), - CmmCallTarget(..), - CmmStatic(..), Section(..), + CmmCallTarget(..), CallishMachOp(..), pprCallishMachOp, + ForeignHint(..), CmmHinted(..), + CmmStatic(..), Section(..), module CmmExpr, ) where @@ -29,10 +29,10 @@ module Cmm ( import BlockId import CmmExpr -import MachOp import CLabel import ForeignCall import SMRep + import ClosureInfo import Outputable import FastString @@ -46,7 +46,7 @@ import Data.Word -- with assembly-language labels. ----------------------------------------------------------------------------- --- Cmm, CmmTop, CmmBasicBlock +-- Cmm, CmmTop, CmmBasicBlock ----------------------------------------------------------------------------- -- A file is a list of top-level chunks. These may be arbitrarily @@ -59,7 +59,7 @@ import Data.Word -- -- We expect there to be two main instances of this type: -- (a) C--, i.e. populated with various C-- constructs --- (Cmm and RawCmm below) +-- (Cmm and RawCmm below) -- (b) Native code, populated with data/instructions -- -- A second family of instances based on ZipCfg is work in progress. @@ -72,7 +72,7 @@ data GenCmmTop d h g = CmmProc -- A procedure h -- Extra header such as the info table CLabel -- Used to generate both info & entry labels - CmmFormalsWithoutKinds -- Argument locals live on entry (C-- procedure params) + CmmFormals -- Argument locals live on entry (C-- procedure params) -- XXX Odd that there are no kinds, but there you are ---NR g -- Control-flow graph for the procedure's code @@ -164,11 +164,11 @@ data CmmInfoTable data ClosureTypeInfo = ConstrInfo ClosureLayout ConstrTag ConstrDescription - | FunInfo ClosureLayout C_SRT FunType FunArity ArgDescr SlowEntry - | ThunkInfo ClosureLayout C_SRT + | FunInfo ClosureLayout C_SRT FunArity ArgDescr SlowEntry + | ThunkInfo ClosureLayout C_SRT | ThunkSelectorInfo SelectorOffset C_SRT | ContInfo - [Maybe LocalReg] -- Forced stack parameters + [Maybe LocalReg] -- stack layout C_SRT data CmmReturnInfo = CmmMayReturn @@ -180,7 +180,6 @@ type ClosureTypeTag = StgHalfWord type ClosureLayout = (StgHalfWord, StgHalfWord) -- ptrs, nptrs type ConstrTag = StgHalfWord type ConstrDescription = CmmLit -type FunType = StgHalfWord type FunArity = StgHalfWord type SlowEntry = CmmLit -- We would like this to be a CLabel but @@ -201,19 +200,19 @@ data UpdateFrame = -- control to a new function. ----------------------------------------------------------------------------- -data CmmStmt +data CmmStmt -- Old-style = CmmNop | CmmComment FastString | CmmAssign CmmReg CmmExpr -- Assign to register | CmmStore CmmExpr CmmExpr -- Assign to memory location. Size is - -- given by cmmExprRep of the rhs. + -- given by cmmExprType of the rhs. | CmmCall -- A call (forign, native or primitive), with CmmCallTarget - CmmFormals -- zero or more results - CmmActuals -- zero or more arguments + HintedCmmFormals -- zero or more results + HintedCmmActuals -- zero or more arguments CmmSafety -- whether to build a continuation CmmReturnInfo @@ -228,27 +227,27 @@ data CmmStmt -- Undefined outside range, and when there's a Nothing | CmmJump CmmExpr -- Jump to another C-- function, - CmmActuals -- with these parameters. + HintedCmmActuals -- with these parameters. (parameters never used) | CmmReturn -- Return from a native C-- function, - CmmActuals -- with these return values. + HintedCmmActuals -- with these return values. (parameters never used) -type CmmKind = MachHint -data CmmKinded a = CmmKinded { kindlessCmm :: a, cmmKind :: CmmKind } - deriving (Eq) -type CmmActual = CmmKinded CmmExpr -type CmmFormal = CmmKinded LocalReg +type CmmActual = CmmExpr +type CmmFormal = LocalReg type CmmActuals = [CmmActual] type CmmFormals = [CmmFormal] -type CmmFormalWithoutKind = LocalReg -type CmmFormalsWithoutKinds = [CmmFormalWithoutKind] + +data CmmHinted a = CmmHinted { hintlessCmm :: a, cmmHint :: ForeignHint } + deriving( Eq ) + +type HintedCmmActuals = [HintedCmmActual] +type HintedCmmFormals = [HintedCmmFormal] +type HintedCmmFormal = CmmHinted CmmFormal +type HintedCmmActual = CmmHinted CmmActual data CmmSafety = CmmUnsafe | CmmSafe C_SRT -- | enable us to fold used registers over 'CmmActuals' and 'CmmFormals' -instance UserOfLocalRegs a => UserOfLocalRegs (CmmKinded a) where - foldRegsUsed f set (CmmKinded a _) = foldRegsUsed f set a - instance UserOfLocalRegs CmmStmt where foldRegsUsed f set s = stmt s set where stmt (CmmNop) = id @@ -267,13 +266,18 @@ instance UserOfLocalRegs CmmCallTarget where foldRegsUsed f set (CmmCallee e _) = foldRegsUsed f set e foldRegsUsed _ set (CmmPrim {}) = set -instance DefinerOfLocalRegs a => DefinerOfLocalRegs (CmmKinded a) where - foldRegsDefd f z (CmmKinded x _) = foldRegsDefd f z x +instance UserOfSlots CmmCallTarget where + foldSlotsUsed f set (CmmCallee e _) = foldSlotsUsed f set e + foldSlotsUsed _ set (CmmPrim {}) = set ---just look like a tuple, since it was a tuple before --- ... is that a good idea? --Isaac Dupree -instance (Outputable a) => Outputable (CmmKinded a) where - ppr (CmmKinded a k) = ppr (a, k) +instance UserOfLocalRegs a => UserOfLocalRegs (CmmHinted a) where + foldRegsUsed f set a = foldRegsUsed f set (hintlessCmm a) + +instance UserOfSlots a => UserOfSlots (CmmHinted a) where + foldSlotsUsed f set a = foldSlotsUsed f set (hintlessCmm a) + +instance DefinerOfLocalRegs a => DefinerOfLocalRegs (CmmHinted a) where + foldRegsDefd f set a = foldRegsDefd f set (hintlessCmm a) {- Discussion @@ -332,6 +336,51 @@ data CmmCallTarget -- code by the backend. deriving Eq + +data ForeignHint + = NoHint | AddrHint | SignedHint + deriving( Eq ) + -- Used to give extra per-argument or per-result + -- information needed by foreign calling conventions + + +-- CallishMachOps tend to be implemented by foreign calls in some backends, +-- so we separate them out. In Cmm, these can only occur in a +-- statement position, in contrast to an ordinary MachOp which can occur +-- anywhere in an expression. +data CallishMachOp + = MO_F64_Pwr + | MO_F64_Sin + | MO_F64_Cos + | MO_F64_Tan + | MO_F64_Sinh + | MO_F64_Cosh + | MO_F64_Tanh + | MO_F64_Asin + | MO_F64_Acos + | MO_F64_Atan + | MO_F64_Log + | MO_F64_Exp + | MO_F64_Sqrt + | MO_F32_Pwr + | MO_F32_Sin + | MO_F32_Cos + | MO_F32_Tan + | MO_F32_Sinh + | MO_F32_Cosh + | MO_F32_Tanh + | MO_F32_Asin + | MO_F32_Acos + | MO_F32_Atan + | MO_F32_Log + | MO_F32_Exp + | MO_F32_Sqrt + | MO_WriteBarrier + deriving (Eq, Show) + +pprCallishMachOp :: CallishMachOp -> SDoc +pprCallishMachOp mo = text (show mo) + ----------------------------------------------------------------------------- -- Static Data ----------------------------------------------------------------------------- diff --git a/compiler/cmm/CmmBrokenBlock.hs b/compiler/cmm/CmmBrokenBlock.hs index aa8dbf8ac6..ffb7f025af 100644 --- a/compiler/cmm/CmmBrokenBlock.hs +++ b/compiler/cmm/CmmBrokenBlock.hs @@ -15,7 +15,7 @@ module CmmBrokenBlock ( adaptBlockToFormat, selectContinuations, ContFormat, - makeContinuationEntries, + makeContinuationEntries ) where #include "HsVersions.h" @@ -24,7 +24,6 @@ import BlockId import Cmm import CmmUtils import CLabel -import MachOp (MachHint(..)) import CgUtils (callerSaveVolatileRegs) import ClosureInfo @@ -69,14 +68,14 @@ data BrokenBlock -- | How a block could be entered -- See Note [An example of CPS conversion] data BlockEntryInfo - = FunctionEntry CmmInfo CLabel CmmFormalsWithoutKinds + = FunctionEntry CmmInfo CLabel CmmFormals -- ^ Block is the beginning of a function, parameters are: -- 1. Function header info -- 2. The function name -- 3. Aguments to function -- Only the formal parameters are live - | ContinuationEntry CmmFormalsWithoutKinds C_SRT Bool + | ContinuationEntry CmmFormals C_SRT Bool -- ^ Return point of a function call, parameters are: -- 1. return values (argument to continuation) -- 2. SRT for the continuation's info table @@ -124,7 +123,7 @@ f2(x, y) { // ProcPointEntry -} -data ContFormat = ContFormat CmmFormals C_SRT Bool +data ContFormat = ContFormat HintedCmmFormals C_SRT Bool -- ^ Arguments -- 1. return values (argument to continuation) -- 2. SRT for the continuation's info table @@ -138,15 +137,15 @@ data FinalStmt = FinalBranch BlockId -- ^ Same as 'CmmBranch'. Target must be a ControlEntry - | FinalReturn CmmActuals + | FinalReturn HintedCmmActuals -- ^ Same as 'CmmReturn'. Parameter is the return values. - | FinalJump CmmExpr CmmActuals + | FinalJump CmmExpr HintedCmmActuals -- ^ Same as 'CmmJump'. Parameters: -- 1. The function to call, -- 2. Arguments of the call - | FinalCall BlockId CmmCallTarget CmmFormals CmmActuals + | FinalCall BlockId CmmCallTarget HintedCmmFormals HintedCmmActuals C_SRT CmmReturnInfo Bool -- ^ Same as 'CmmCallee' followed by 'CmmGoto'. Parameters: -- 1. Target of the 'CmmGoto' (must be a 'ContinuationEntry') @@ -195,7 +194,7 @@ breakProc :: -- to create names of the new blocks with -> CmmInfo -- ^ Info table for the procedure -> CLabel -- ^ Name of the procedure - -> CmmFormalsWithoutKinds -- ^ Parameters of the procedure + -> CmmFormals -- ^ Parameters of the procedure -> [CmmBasicBlock] -- ^ Blocks of the procecure -- (First block is the entry block) -> [BrokenBlock] @@ -353,7 +352,7 @@ makeContinuationEntries formats case lookup ident formats of Nothing -> block Just (ContFormat formals srt is_gc) -> - BrokenBlock ident (ContinuationEntry (map kindlessCmm formals) srt is_gc) + BrokenBlock ident (ContinuationEntry (map hintlessCmm formals) srt is_gc) stmts targets exit adaptBlockToFormat :: [(BlockId, ContFormat)] @@ -383,20 +382,19 @@ adaptBlockToFormat formats unique target formals actuals srt ret is_gc adaptor_block = mk_adaptor_block adaptor_ident - (ContinuationEntry (map kindlessCmm formals) srt is_gc) - next format_formals + (ContinuationEntry (map hintlessCmm formals) srt is_gc) next adaptor_ident = BlockId unique - mk_adaptor_block :: BlockId -> BlockEntryInfo -> BlockId -> CmmFormals -> BrokenBlock - mk_adaptor_block ident entry next formals = + mk_adaptor_block :: BlockId -> BlockEntryInfo -> BlockId -> BrokenBlock + mk_adaptor_block ident entry next = BrokenBlock ident entry [] [next] exit where exit = FinalJump (CmmLit (CmmLabel (mkReturnPtLabel (getUnique next)))) (map formal_to_actual format_formals) - formal_to_actual (CmmKinded reg hint) - = (CmmKinded (CmmReg (CmmLocal reg)) hint) + formal_to_actual (CmmHinted reg hint) + = (CmmHinted (CmmReg (CmmLocal reg)) hint) -- TODO: Check if NoHint is right. We're -- jumping to a C-- function not a foreign one -- so it might always be right. diff --git a/compiler/cmm/CmmCPS.hs b/compiler/cmm/CmmCPS.hs index 025c12735e..f00a93c750 100644 --- a/compiler/cmm/CmmCPS.hs +++ b/compiler/cmm/CmmCPS.hs @@ -26,7 +26,6 @@ import CmmCPSGen import CmmUtils import ClosureInfo -import MachOp import CLabel import SMRep import Constants @@ -118,7 +117,7 @@ cpsProc uniqSupply (CmmProc info ident params (ListGraph blocks)) = cps_procs block_uniques = uniques proc_uniques = map (map (map uniqsFromSupply . listSplitUniqSupply) . listSplitUniqSupply) $ listSplitUniqSupply uniqSupply2 - stack_use = CmmLocal (LocalReg stack_use_unique (cmmRegRep spReg) GCKindPtr) + stack_use = CmmLocal (LocalReg stack_use_unique (cmmRegType spReg)) stack_check_block_id = BlockId stack_check_block_unique stack_check_block = make_stack_check stack_check_block_id info stack_use (blockId $ head blocks) @@ -171,7 +170,7 @@ cpsProc uniqSupply (CmmProc info ident params (ListGraph blocks)) = cps_procs -- This is an association list instead of a UniqFM because -- CLabel's don't have a 'Uniqueable' instance. formats :: [(CLabel, -- key - (CmmFormalsWithoutKinds, -- arguments + (CmmFormals, -- arguments Maybe CLabel, -- label in top slot [Maybe LocalReg]))] -- slots formats = selectContinuationFormat live continuations @@ -200,7 +199,7 @@ make_stack_check stack_check_block_id info stack_use next_block_id = -- then great, well check the stack. CmmInfo (Just gc_block) _ _ -> [CmmCondBranch - (CmmMachOp (MO_U_Lt $ cmmRegRep spReg) + (CmmMachOp (MO_U_Lt (typeWidth (cmmRegType spReg))) [CmmReg stack_use, CmmReg spLimReg]) gc_block] -- If we aren't given a stack check handler, @@ -277,7 +276,7 @@ gatherBlocksIntoContinuation live proc_points blocks start = selectContinuationFormat :: BlockEnv CmmLive -> [Continuation (Either C_SRT CmmInfo)] - -> [(CLabel, (CmmFormalsWithoutKinds, Maybe CLabel, [Maybe LocalReg]))] + -> [(CLabel, (CmmFormals, Maybe CLabel, [Maybe LocalReg]))] selectContinuationFormat live continuations = map (\c -> (continuationLabel c, selectContinuationFormat' c)) continuations where @@ -301,7 +300,7 @@ selectContinuationFormat live continuations = unknown_block = panic "unknown BlockId in selectContinuationFormat" -processFormats :: [(CLabel, (CmmFormalsWithoutKinds, Maybe CLabel, [Maybe LocalReg]))] +processFormats :: [(CLabel, (CmmFormals, Maybe CLabel, [Maybe LocalReg]))] -> Maybe UpdateFrame -> [Continuation (Either C_SRT CmmInfo)] -> (WordOff, WordOff, [(CLabel, ContinuationFormat)]) @@ -330,7 +329,7 @@ processFormats formats update_frame continuations = update_size [] = 0 update_size (expr:exprs) = width + update_size exprs where - width = machRepByteWidth (cmmExprRep expr) `quot` wORD_SIZE + width = (widthInBytes $ typeWidth $ cmmExprType expr) `quot` wORD_SIZE -- TODO: it would be better if we had a machRepWordWidth -- TODO: get rid of "+ 1" etc. @@ -340,7 +339,7 @@ processFormats formats update_frame continuations = stack_size (Nothing:formats) = 1 + stack_size formats -- one dead word stack_size (Just reg:formats) = width + stack_size formats where - width = machRepByteWidth (localRegRep reg) `quot` wORD_SIZE + width = (widthInBytes $ typeWidth $ localRegType reg) `quot` wORD_SIZE -- TODO: it would be better if we had a machRepWordWidth continuationMaxStack :: [(CLabel, ContinuationFormat)] @@ -360,14 +359,14 @@ continuationMaxStack formats (Continuation _ label _ False blocks) = map stmt_arg_size (brokenBlockStmts block)) final_arg_size (FinalReturn args) = - argumentsSize (cmmExprRep . kindlessCmm) args + argumentsSize (cmmExprType . hintlessCmm) args final_arg_size (FinalJump _ args) = - argumentsSize (cmmExprRep . kindlessCmm) args + argumentsSize (cmmExprType . hintlessCmm) args final_arg_size (FinalCall next _ _ args _ _ True) = 0 final_arg_size (FinalCall next _ _ args _ _ False) = -- We have to account for the stack used when we build a frame -- for the *next* continuation from *this* continuation - argumentsSize (cmmExprRep . kindlessCmm) args + + argumentsSize (cmmExprType . hintlessCmm) args + continuation_frame_size next_format where next_format = maybe unknown_format id $ lookup next' formats @@ -376,7 +375,7 @@ continuationMaxStack formats (Continuation _ label _ False blocks) = final_arg_size _ = 0 stmt_arg_size (CmmJump _ args) = - argumentsSize (cmmExprRep . kindlessCmm) args + argumentsSize (cmmExprType . hintlessCmm) args stmt_arg_size (CmmCall _ _ _ (CmmSafe _) _) = panic "Safe call in processFormats" stmt_arg_size (CmmReturn _) = diff --git a/compiler/cmm/CmmCPSGen.hs b/compiler/cmm/CmmCPSGen.hs index dd1887f53a..c1e71436f0 100644 --- a/compiler/cmm/CmmCPSGen.hs +++ b/compiler/cmm/CmmCPSGen.hs @@ -17,7 +17,6 @@ import BlockId import Cmm import CLabel import CmmBrokenBlock -- Data types only -import MachOp import CmmUtils import CmmCallConv @@ -57,7 +56,7 @@ data Continuation info = info -- Left <=> Continuation created by the CPS -- Right <=> Function or Proc point CLabel -- Used to generate both info & entry labels - CmmFormalsWithoutKinds -- Argument locals live on entry (C-- procedure params) + CmmFormals -- Argument locals live on entry (C-- procedure params) Bool -- True <=> GC block so ignore stack size [BrokenBlock] -- Code, may be empty. The first block is -- the entry point. The order is otherwise initially @@ -70,7 +69,7 @@ data Continuation info = data ContinuationFormat = ContinuationFormat { - continuation_formals :: CmmFormalsWithoutKinds, + continuation_formals :: CmmFormals, continuation_label :: Maybe CLabel, -- The label occupying the top slot continuation_frame_size :: WordOff, -- Total frame size in words (not including arguments) continuation_stack :: [Maybe LocalReg] -- local reg offsets from stack top @@ -95,7 +94,7 @@ continuationToProc (max_stack, update_frame_size, formats) stack_use uniques curr_format = maybe unknown_block id $ lookup label formats unknown_block = panic "unknown BlockId in continuationToProc" curr_stack = continuation_frame_size curr_format - arg_stack = argumentsSize localRegRep formals + arg_stack = argumentsSize localRegType formals param_stmts :: [CmmStmt] param_stmts = function_entry curr_format @@ -145,8 +144,8 @@ continuationToProc (max_stack, update_frame_size, formats) stack_use uniques [BasicBlock new_next $ pack_continuation curr_format cont_format ++ tail_call (curr_stack - cont_stack) - (CmmLit $ CmmLabel $ toCLabel next) - arguments]) + (CmmLit $ CmmLabel $ toCLabel next) + arguments]) -- branches to blocks in the current function don't have to jump | otherwise @@ -194,7 +193,7 @@ continuationToProc (max_stack, update_frame_size, formats) stack_use uniques -- A return is a tail call to the stack top FinalReturn arguments -> tail_call curr_stack - (entryCode (CmmLoad (CmmReg spReg) wordRep)) + (entryCode (CmmLoad (CmmReg spReg) bWord)) arguments -- A tail call @@ -228,22 +227,22 @@ continuationToProc (max_stack, update_frame_size, formats) stack_use uniques foreignCall call_uniques (CmmPrim target) results arguments -formal_to_actual reg = CmmKinded (CmmReg (CmmLocal reg)) NoHint +formal_to_actual reg = CmmHinted (CmmReg (CmmLocal reg)) NoHint -foreignCall :: [Unique] -> CmmCallTarget -> CmmFormals -> CmmActuals -> [CmmStmt] +foreignCall :: [Unique] -> CmmCallTarget -> HintedCmmFormals -> HintedCmmActuals -> [CmmStmt] foreignCall uniques call results arguments = arg_stmts ++ saveThreadState ++ caller_save ++ [CmmCall (CmmCallee suspendThread CCallConv) - [ CmmKinded id PtrHint ] - [ CmmKinded (CmmReg (CmmGlobal BaseReg)) PtrHint ] + [ CmmHinted id AddrHint ] + [ CmmHinted (CmmReg (CmmGlobal BaseReg)) AddrHint ] CmmUnsafe CmmMayReturn, CmmCall call results new_args CmmUnsafe CmmMayReturn, CmmCall (CmmCallee resumeThread CCallConv) - [ CmmKinded new_base PtrHint ] - [ CmmKinded (CmmReg (CmmLocal id)) PtrHint ] + [ CmmHinted new_base AddrHint ] + [ CmmHinted (CmmReg (CmmLocal id)) AddrHint ] CmmUnsafe CmmMayReturn, -- Assign the result to BaseReg: we @@ -251,14 +250,14 @@ foreignCall uniques call results arguments = CmmAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base))] ++ caller_load ++ loadThreadState tso_unique ++ - [CmmJump (CmmReg spReg) (map (formal_to_actual . kindlessCmm) results)] + [CmmJump (CmmReg spReg) (map (formal_to_actual . hintlessCmm) results)] where (_, arg_stmts, new_args) = loadArgsIntoTemps argument_uniques arguments (caller_save, caller_load) = callerSaveVolatileRegs (Just [{-only system regs-}]) - new_base = LocalReg base_unique (cmmRegRep (CmmGlobal BaseReg)) GCKindNonPtr - id = LocalReg id_unique wordRep GCKindNonPtr + new_base = LocalReg base_unique (cmmRegType (CmmGlobal BaseReg)) + id = LocalReg id_unique bWord tso_unique : base_unique : id_unique : argument_uniques = uniques -- ----------------------------------------------------------------------------- @@ -288,7 +287,7 @@ loadThreadState tso_unique = CmmAssign (CmmLocal tso) stgCurrentTSO, -- Sp = tso->sp; CmmAssign sp (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_SP) - wordRep), + bWord), -- SpLim = tso->stack + RESERVED_STACK_WORDS; CmmAssign spLim (cmmOffsetW (cmmOffset (CmmReg (CmmLocal tso)) tso_STACK) rESERVED_STACK_WORDS) @@ -297,24 +296,24 @@ loadThreadState tso_unique = -- and load the current cost centre stack from the TSO when profiling: if opt_SccProfilingOn then [CmmStore curCCSAddr - (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) wordRep)] + (CmmLoad (cmmOffset (CmmReg (CmmLocal tso)) tso_CCCS) bWord)] else [] - where tso = LocalReg tso_unique wordRep GCKindNonPtr -- TODO FIXME NOW + where tso = LocalReg tso_unique bWord -- TODO FIXME NOW openNursery = [ -- Hp = CurrentNursery->free - 1; - CmmAssign hp (cmmOffsetW (CmmLoad nursery_bdescr_free wordRep) (-1)), + CmmAssign hp (cmmOffsetW (CmmLoad nursery_bdescr_free bWord) (-1)), -- HpLim = CurrentNursery->start + -- CurrentNursery->blocks*BLOCK_SIZE_W - 1; CmmAssign hpLim (cmmOffsetExpr - (CmmLoad nursery_bdescr_start wordRep) + (CmmLoad nursery_bdescr_start bWord) (cmmOffset (CmmMachOp mo_wordMul [ - CmmMachOp (MO_S_Conv I32 wordRep) - [CmmLoad nursery_bdescr_blocks I32], + CmmMachOp (MO_SS_Conv W32 wordWidth) + [CmmLoad nursery_bdescr_blocks b32], CmmLit (mkIntCLit bLOCK_SIZE) ]) (-1) @@ -358,17 +357,17 @@ currentNursery = CmmGlobal CurrentNursery -- for packing/unpacking continuations -- and entering/exiting functions -tail_call :: WordOff -> CmmExpr -> CmmActuals -> [CmmStmt] +tail_call :: WordOff -> CmmExpr -> HintedCmmActuals -> [CmmStmt] tail_call spRel target arguments = store_arguments ++ adjust_sp_reg spRel ++ jump where store_arguments = [stack_put spRel expr offset - | ((CmmKinded expr _), StackParam offset) <- argument_formats] ++ + | ((CmmHinted expr _), StackParam offset) <- argument_formats] ++ [global_put expr global - | ((CmmKinded expr _), RegisterParam global) <- argument_formats] + | ((CmmHinted expr _), RegisterParam global) <- argument_formats] jump = [CmmJump target arguments] - argument_formats = assignArguments (cmmExprRep . kindlessCmm) arguments + argument_formats = assignArguments (cmmExprType . hintlessCmm) arguments adjust_sp_reg spRel = if spRel == 0 @@ -386,8 +385,8 @@ gc_stack_check gc_block max_frame_size = check_stack_limit where check_stack_limit = [ CmmCondBranch - (CmmMachOp (MO_U_Lt $ cmmRegRep spReg) - [CmmRegOff spReg (-max_frame_size*wORD_SIZE), + (CmmMachOp (MO_U_Lt (typeWidth (cmmRegType spReg))) + [CmmRegOff spReg (-max_frame_size*wORD_SIZE), CmmReg spLimReg]) gc_block] @@ -437,7 +436,7 @@ pack_frame curr_frame_size next_frame_size next_frame_header frame_args = mkOffsets size (Nothing:exprs) = mkOffsets (size+1) exprs mkOffsets size (Just expr:exprs) = (expr, size):mkOffsets (size + width) exprs where - width = machRepByteWidth (cmmExprRep expr) `quot` wORD_SIZE + width = (widthInBytes $ typeWidth $ cmmExprType expr) `quot` wORD_SIZE -- TODO: it would be better if we had a machRepWordWidth spRel = curr_frame_size - next_frame_size @@ -461,7 +460,7 @@ function_entry (ContinuationFormat formals _ _ live_regs) [global_get reg global | (reg, RegisterParam global) <- argument_formats] - argument_formats = assignArguments (localRegRep) formals + argument_formats = assignArguments (localRegType) formals -- TODO: eliminate copy/paste with pack_continuation curr_offsets = mkOffsets label_size live_regs @@ -472,7 +471,7 @@ function_entry (ContinuationFormat formals _ _ live_regs) mkOffsets size (Nothing:regs) = mkOffsets (size+1) regs mkOffsets size (Just reg:regs) = (reg, size):mkOffsets (size + width) regs where - width = machRepByteWidth (localRegRep reg) `quot` wORD_SIZE + width = (widthInBytes $ typeWidth $ localRegType reg) `quot` wORD_SIZE -- TODO: it would be better if we had a machRepWordWidth ----------------------------------------------------------------------------- @@ -499,7 +498,7 @@ stack_get :: WordOff stack_get spRel reg offset = CmmAssign (CmmLocal reg) (CmmLoad (CmmRegOff spReg (wORD_SIZE*(spRel + offset))) - (localRegRep reg)) + (localRegType reg)) global_put :: CmmExpr -> GlobalReg -> CmmStmt global_put expr global = CmmAssign (CmmGlobal global) expr global_get :: LocalReg -> GlobalReg -> CmmStmt diff --git a/compiler/cmm/CmmCPSZ.hs b/compiler/cmm/CmmCPSZ.hs index b6b77f0f10..d8c9560b49 100644 --- a/compiler/cmm/CmmCPSZ.hs +++ b/compiler/cmm/CmmCPSZ.hs @@ -5,25 +5,19 @@ module CmmCPSZ ( protoCmmCPSZ ) where -import BlockId import Cmm import CmmCommonBlockElimZ -import CmmContFlowOpt import CmmProcPointZ import CmmSpillReload -import CmmTx import DFMonad import PprCmmZ() -import ZipCfg hiding (zip, unzip) import ZipCfgCmmRep import DynFlags import ErrUtils -import FiniteMap import HscTypes import Monad import Outputable -import UniqSupply ----------------------------------------------------------------------------- -- |Top level driver for the CPS pass @@ -38,7 +32,7 @@ protoCmmCPSZ hsc_env (Cmm tops) | otherwise = do let dflags = hsc_dflags hsc_env showPass dflags "CPSZ" - tops <- mapM (cpsTop hsc_env) tops + tops <- liftM concat $ mapM (cpsTop hsc_env) tops dumpIfSet_dyn dflags Opt_D_dump_cps_cmm "Post CPS Cmm" (ppr (Cmm tops)) return $ Cmm tops @@ -49,44 +43,48 @@ mutable reference cells in an 'HscEnv' and are global to one compiler session. -} -cpsTop :: HscEnv -> CmmTopZ -> IO CmmTopZ -cpsTop _ p@(CmmData {}) = return p +cpsTop :: HscEnv -> CmmTopZ -> IO [CmmTopZ] +cpsTop _ p@(CmmData {}) = return [p] cpsTop hsc_env (CmmProc h l args g) = do dump Opt_D_dump_cmmz "Pre Proc Points Added" g let callPPs = callProcPoints g - g <- return $ map_nodes id NotSpillOrReload id g - -- Change types of middle nodes to allow spill/reload - g <- dual_rewrite Opt_D_dump_cmmz "spills and reloads" + g <- dual_rewrite Opt_D_dump_cmmz "spills and reloads" (dualLivenessWithInsertion callPPs) g - (varSlots, g) <- trim g >>= return . elimSpillAndReload emptyFM - procPoints <- run $ minimalProcPointSet callPPs (runTx cmmCfgOptsZ g) + dump Opt_D_dump_cmmz "Pre common block elimination" g + g <- return $ elimCommonBlocks g + dump Opt_D_dump_cmmz "Post common block elimination" g + procPoints <- run $ minimalProcPointSet callPPs g + print $ "call procPoints: " ++ (showSDoc $ ppr procPoints) g <- run $ addProcPointProtocols callPPs procPoints g dump Opt_D_dump_cmmz "Post Proc Points Added" g - g <- return $ map_nodes id NotSpillOrReload id g - -- Change types of middle nodes to allow spill/reload g <- dual_rewrite Opt_D_dump_cmmz "spills and reloads" (dualLivenessWithInsertion procPoints) g -- Insert spills at defns; reloads at return points g <- run $ insertLateReloads' g -- Duplicate reloads just before uses dump Opt_D_dump_cmmz "Post late reloads" g - g <- trim g >>= dual_rewrite Opt_D_dump_cmmz "Dead Assignment Elimination" - (removeDeadAssignmentsAndReloads procPoints) + g <- dual_rewrite Opt_D_dump_cmmz "Dead Assignment Elimination" + (removeDeadAssignmentsAndReloads procPoints) g -- Remove redundant reloads (and any other redundant asst) - (_, g) <- trim g >>= return . elimSpillAndReload varSlots - gs <- run $ splitAtProcPoints args l procPoints g - gs `seq` dump Opt_D_dump_cmmz "Pre common block elimination" g - g <- return $ elimCommonBlocks g - dump Opt_D_dump_cmmz "Post common block elimination" g - return $ CmmProc h l args (runTx cmmCfgOptsZ g) + slotEnv <- run $ liveSlotAnal g + print $ "live slot analysis results: " ++ (showSDoc $ ppr slotEnv) + cafEnv <- run $ cafAnal g + print $ "live CAF analysis results: " ++ (showSDoc $ ppr cafEnv) + slotIGraph <- return $ igraph areaBuilder slotEnv g + print $ "slot IGraph: " ++ (showSDoc $ ppr slotIGraph) + print $ "graph before procPointMap: " ++ (showSDoc $ ppr g) + procPointMap <- run $ procPointAnalysis procPoints g + let areaMap = layout procPoints slotEnv g + g <- run $ manifestSP procPoints procPointMap areaMap g + procPointMap <- run $ procPointAnalysis procPoints g + gs <- run $ splitAtProcPoints l callPPs procPoints procPointMap slotEnv areaMap + (CmmProc h l args g) + return gs + --return $ [CmmProc h l args (runTx cmmCfgOptsZ g)] where dflags = hsc_dflags hsc_env dump f txt g = dumpIfSet_dyn dflags f txt (ppr g) run = runFuelIO (hsc_OptFuel hsc_env) dual_rewrite flag txt pass g = do dump flag ("Pre " ++ txt) g - g <- run $ pass (graphOfLGraph g) >>= lGraphOfGraph + g <- run $ pass g dump flag ("Post " ++ txt) $ g - return $ graphOfLGraph g - trim (Graph (ZLast (LastOther (LastBranch id))) blocks) = return $ LGraph id blocks - trim (Graph tail blocks) = - do entry <- liftM BlockId $ run $ getUniqueM - return $ LGraph entry (insertBlock (Block entry tail) blocks) + return g diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs index d24d77a41d..5476eb8fa2 100644 --- a/compiler/cmm/CmmCallConv.hs +++ b/compiler/cmm/CmmCallConv.hs @@ -9,29 +9,31 @@ module CmmCallConv ( ParamLocation(..), ArgumentFormat, assignArguments, + assignArgumentsPos, argumentsSize, ) where #include "HsVersions.h" import Cmm -import MachOp import SMRep import Constants import StaticFlags (opt_Unregisterised) +import Outputable import Panic -- Calculate the 'GlobalReg' or stack locations for function call -- parameters as used by the Cmm calling convention. -data ParamLocation +data ParamLocation a = RegisterParam GlobalReg - | StackParam WordOff + | StackParam a -type ArgumentFormat a = [(a, ParamLocation)] +type ArgumentFormat a b = [(a, ParamLocation b)] -assignArguments :: (a -> MachRep) -> [a] -> ArgumentFormat a +-- Stack parameters are returned as word offsets. +assignArguments :: (a -> CmmType) -> [a] -> ArgumentFormat a WordOff assignArguments f reps = assignments where (sizes, assignments) = unzip $ assignArguments' reps (negate (sum sizes)) availRegs @@ -40,20 +42,38 @@ assignArguments f reps = assignments (size,(r,assignment)):assignArguments' rs new_offset remaining where (assignment, new_offset, size, remaining) = - assign_reg (f r) offset availRegs + assign_reg False assign_slot_up (f r) offset availRegs + +-- | JD: For the new stack story, I want arguments passed on the stack to manifest as +-- positive offsets in a CallArea, not negative offsets from the stack pointer. +-- Also, I want byte offsets, not word offsets. +-- The first argument tells us whether we are assigning positions for call arguments +-- or return results. The distinction matters because we reserve different +-- global registers in each case. +assignArgumentsPos :: Bool -> (a -> CmmType) -> [a] -> ArgumentFormat a ByteOff +assignArgumentsPos isCall arg_ty reps = map cvt assignments + where + (sizes, assignments) = unzip $ assignArguments' reps 0 availRegs + assignArguments' [] _ _ = [] + assignArguments' (r:rs) offset avails = + (size,(r,assignment)):assignArguments' rs new_offset remaining + where + (assignment, new_offset, size, remaining) = + assign_reg isCall assign_slot_down (arg_ty r) offset avails + cvt (l, RegisterParam r) = (l, RegisterParam r) + cvt (l, StackParam off) = (l, StackParam $ off * wORD_SIZE) -argumentsSize :: (a -> MachRep) -> [a] -> WordOff +argumentsSize :: (a -> CmmType) -> [a] -> WordOff argumentsSize f reps = maximum (0 : map arg_top args) where args = assignArguments f reps - arg_top (a, StackParam offset) = -offset arg_top (_, RegisterParam _) = 0 ----------------------------------------------------------------------------- -- Local information about the registers available -type AvailRegs = ( [GlobalReg] -- available vanilla regs. +type AvailRegs = ( [VGcPtr -> GlobalReg] -- available vanilla regs. , [GlobalReg] -- floats , [GlobalReg] -- doubles , [GlobalReg] -- longs (int64 and word64) @@ -81,20 +101,49 @@ availRegs = (regList VanillaReg useVanillaRegs, where regList f max = map f [1 .. max] +-- Round the size of a local register up to the nearest word. slot_size :: LocalReg -> Int -slot_size reg = - ((machRepByteWidth (localRegRep reg) - 1) `div` wORD_SIZE) + 1 - -slot_size' :: MachRep -> Int -slot_size' reg = ((machRepByteWidth reg - 1) `div` wORD_SIZE) + 1 - -assign_reg :: MachRep -> WordOff -> AvailRegs -> (ParamLocation, WordOff, WordOff, AvailRegs) -assign_reg I8 off (v:vs, fs, ds, ls) = (RegisterParam $ v, off, 0, (vs, fs, ds, ls)) -assign_reg I16 off (v:vs, fs, ds, ls) = (RegisterParam $ v, off, 0, (vs, fs, ds, ls)) -assign_reg I32 off (v:vs, fs, ds, ls) = (RegisterParam $ v, off, 0, (vs, fs, ds, ls)) -assign_reg I64 off (vs, fs, ds, l:ls) = (RegisterParam $ l, off, 0, (vs, fs, ds, ls)) -assign_reg I128 off _ = panic "I128 is not a supported register type" -assign_reg F32 off (vs, f:fs, ds, ls) = (RegisterParam $ f, off, 0, (vs, fs, ds, ls)) -assign_reg F64 off (vs, fs, d:ds, ls) = (RegisterParam $ d, off, 0, (vs, fs, ds, ls)) -assign_reg F80 off _ = panic "F80 is not a supported register type" -assign_reg reg off _ = (StackParam $ off, off + size, size, ([], [], [], [])) where size = slot_size' reg +slot_size reg = slot_size' (typeWidth (localRegType reg)) + +slot_size' :: Width -> Int +slot_size' reg = ((widthInBytes reg - 1) `div` wORD_SIZE) + 1 + +type Assignment = (ParamLocation WordOff, WordOff, WordOff, AvailRegs) +type SlotAssigner = Width -> Int -> AvailRegs -> Assignment + +assign_reg :: Bool -> SlotAssigner -> CmmType -> WordOff -> AvailRegs -> Assignment +assign_reg isCall slot ty off avails + | isFloatType ty = assign_float_reg slot width off avails + | otherwise = assign_bits_reg isCall slot width off gcp avails + where + width = typeWidth ty + gcp | isGcPtrType ty = VGcPtr + | otherwise = VNonGcPtr + +-- Assigning a slot on a stack that grows up: +-- JD: I don't know why this convention stops using all the registers +-- after running out of one class of registers. +assign_slot_up :: SlotAssigner +assign_slot_up width off regs = + (StackParam $ off, off + size, size, ([], [], [], [])) where size = slot_size' width + +-- Assigning a slot on a stack that grows down: +assign_slot_down :: SlotAssigner +assign_slot_down width off regs = + (StackParam $ off + size, off + size, size, ([], [], [], [])) + where size = slot_size' width + +-- On calls, `node` is used to hold the closure that is entered, so we can't +-- pass arguments in that register. +assign_bits_reg _ _ W128 _ _ _ = panic "I128 is not a supported register type" +assign_bits_reg isCall assign_slot w off gcp regs@(v:vs, fs, ds, ls) = + if isCall && v gcp == node then + assign_bits_reg isCall assign_slot w off gcp (vs, fs, ds, ls) + else if widthInBits w <= widthInBits wordWidth then + (RegisterParam (v gcp), off, 0, (vs, fs, ds, ls)) + else assign_slot w off regs + +assign_float_reg _ W32 off (vs, f:fs, ds, ls) = (RegisterParam $ f, off, 0, (vs, fs, ds, ls)) +assign_float_reg _ W64 off (vs, fs, d:ds, ls) = (RegisterParam $ d, off, 0, (vs, fs, ds, ls)) +assign_float_reg _ W80 off _ = panic "F80 is not a supported register type" +assign_float_reg assign_slot width off r = assign_slot width off r diff --git a/compiler/cmm/CmmCommonBlockElimZ.hs b/compiler/cmm/CmmCommonBlockElimZ.hs index 97ec31d4bb..2cef222054 100644 --- a/compiler/cmm/CmmCommonBlockElimZ.hs +++ b/compiler/cmm/CmmCommonBlockElimZ.hs @@ -5,7 +5,6 @@ where import BlockId -import Cmm hiding (blockId) import CmmExpr import Prelude hiding (iterate, zip, unzip) import ZipCfg @@ -70,7 +69,7 @@ upd_graph g subst = map_nodes id middle last g where middle m = m last (LastBranch bid) = LastBranch $ sub bid last (LastCondBranch p t f) = cond p (sub t) (sub f) - last (LastCall t bid) = LastCall t $ liftM sub bid + last (LastCall t bid s) = LastCall t (liftM sub bid) s last (LastSwitch e bs) = LastSwitch e $ map (liftM sub) bs last l = l cond p t f = if t == f then LastBranch t else LastCondBranch p t f @@ -80,17 +79,15 @@ upd_graph g subst = map_nodes id middle last g -- The hashing is a bit arbitrary (the numbers are completely arbitrary), -- but it should be fast and good enough. hash_block :: CmmBlock -> Int -hash_block (Block _ t) = hash_tail t 0 +hash_block (Block _ _ t) = hash_tail t 0 where hash_mid (MidComment (FastString u _ _ _ _)) = u hash_mid (MidAssign r e) = hash_reg r + hash_e e hash_mid (MidStore e e') = hash_e e + hash_e e' - hash_mid (MidUnsafeCall t _ as) = hash_tgt t + hash_as as + hash_mid (MidUnsafeCall t _ as) = hash_tgt t + hash_lst hash_e as hash_mid (MidAddToContext e es) = hash_e e + hash_lst hash_e es - hash_mid (CopyIn _ fs _) = hash_fs fs - hash_mid (CopyOut _ as) = hash_as as hash_reg (CmmLocal l) = hash_local l hash_reg (CmmGlobal _) = 19 - hash_local (LocalReg _ _ _) = 117 + hash_local (LocalReg _ _) = 117 hash_e (CmmLit l) = hash_lit l hash_e (CmmLoad e _) = 67 + hash_e e hash_e (CmmReg r) = hash_reg r @@ -102,17 +99,14 @@ hash_block (Block _ t) = hash_tail t 0 hash_lit (CmmLabel _) = 119 -- ugh hash_lit (CmmLabelOff _ i) = 199 + i hash_lit (CmmLabelDiffOff _ _ i) = 299 + i - hash_tgt (CmmCallee e _) = hash_e e - hash_tgt (CmmPrim _) = 31 -- lots of these - hash_as = hash_lst $ hash_kinded hash_e - hash_fs = hash_lst $ hash_kinded hash_local - hash_kinded f (CmmKinded x _) = f x - hash_lst f = foldl (\z x -> f x + z) 0 + hash_tgt (ForeignTarget e _) = hash_e e + hash_tgt (PrimTarget _) = 31 -- lots of these + hash_lst f = foldl (\z x -> f x + z) (0::Int) hash_last (LastBranch _) = 23 -- would be great to hash these properly hash_last (LastCondBranch p _ _) = hash_e p - hash_last LastReturn = 17 -- better ideas? - hash_last (LastJump e) = hash_e e - hash_last (LastCall e _) = hash_e e + hash_last (LastReturn _) = 17 -- better ideas? + hash_last (LastJump e _) = hash_e e + hash_last (LastCall e _ _) = hash_e e hash_last (LastSwitch e _) = hash_e e hash_tail (ZLast LastExit) v = 29 + v * 2 hash_tail (ZLast (LastOther l)) v = hash_last l + (v * 2) @@ -130,7 +124,8 @@ lookupBid subst bid = case lookupFM subst bid of -- Equality on the body of a block, modulo a function mapping block IDs to block IDs. eqBlockBodyWith :: (BlockId -> BlockId -> Bool) -> CmmBlock -> CmmBlock -> Bool -eqBlockBodyWith eqBid (Block _ t) (Block _ t') = eqTailWith eqBid t t' +eqBlockBodyWith eqBid (Block _ Nothing t) (Block _ Nothing t') = eqTailWith eqBid t t' +eqBlockBodyWith _ _ _ = False type CmmTail = ZTail Middle Last eqTailWith :: (BlockId -> BlockId -> Bool) -> CmmTail -> CmmTail -> Bool @@ -143,10 +138,11 @@ eqLastWith :: (BlockId -> BlockId -> Bool) -> Last -> Last -> Bool eqLastWith eqBid (LastBranch bid) (LastBranch bid') = eqBid bid bid' eqLastWith eqBid c@(LastCondBranch _ _ _) c'@(LastCondBranch _ _ _) = eqBid (cml_true c) (cml_true c') && eqBid (cml_false c) (cml_false c') -eqLastWith _ LastReturn LastReturn = True -eqLastWith _ (LastJump e) (LastJump e') = e == e' -eqLastWith eqBid c@(LastCall _ _) c'@(LastCall _ _) = - cml_target c == cml_target c' && eqMaybeWith eqBid (cml_cont c) (cml_cont c') +eqLastWith _ (LastReturn s) (LastReturn s') = s == s' +eqLastWith _ (LastJump e s) (LastJump e' s') = e == e' && s == s' +eqLastWith eqBid c@(LastCall _ _ s) c'@(LastCall _ _ s') = + cml_target c == cml_target c' && eqMaybeWith eqBid (cml_cont c) (cml_cont c') && + s == s' eqLastWith eqBid (LastSwitch e bs) (LastSwitch e' bs') = e == e' && eqLstWith (eqMaybeWith eqBid) bs bs' eqLastWith _ _ _ = False diff --git a/compiler/cmm/CmmContFlowOpt.hs b/compiler/cmm/CmmContFlowOpt.hs index 6909250efb..320b1e7871 100644 --- a/compiler/cmm/CmmContFlowOpt.hs +++ b/compiler/cmm/CmmContFlowOpt.hs @@ -2,7 +2,7 @@ module CmmContFlowOpt ( runCmmOpts, cmmCfgOpts, cmmCfgOptsZ , branchChainElimZ, removeUnreachableBlocksZ, predMap - , replaceLabelsZ + , replaceLabelsZ, runCmmContFlowOptsZs ) where @@ -10,27 +10,28 @@ import BlockId import Cmm import CmmTx import qualified ZipCfg as G +import ZipCfg import ZipCfgCmmRep import Maybes import Monad +import Outputable import Panic import Prelude hiding (unzip, zip) import Util import UniqFM ------------------------------------ -mapProcs :: Tx (GenCmmTop d h s) -> Tx (GenCmm d h s) -mapProcs f (Cmm tops) = fmap Cmm (mapTx f tops) - +runCmmContFlowOptsZs :: [CmmZ] -> [CmmZ] +runCmmContFlowOptsZs prog + = [ runTx (runCmmOpts cmmCfgOptsZ) cmm_top + | cmm_top <- prog ] ------------------------------------- cmmCfgOpts :: Tx (ListGraph CmmStmt) cmmCfgOptsZ :: Tx CmmGraph cmmCfgOpts = branchChainElim -- boring, but will get more exciting later -cmmCfgOptsZ = - branchChainElimZ `seqTx` blockConcatZ `seqTx` removeUnreachableBlocksZ +cmmCfgOptsZ = branchChainElimZ `seqTx` blockConcatZ `seqTx` removeUnreachableBlocksZ -- Here branchChainElim can ultimately be replaced -- with a more exciting combination of optimisations @@ -41,10 +42,15 @@ optGraph :: Tx g -> Tx (GenCmmTop d h g) optGraph _ top@(CmmData {}) = noTx top optGraph opt (CmmProc info lbl formals g) = fmap (CmmProc info lbl formals) (opt g) +------------------------------------ +mapProcs :: Tx (GenCmmTop d h s) -> Tx (GenCmm d h s) +mapProcs f (Cmm tops) = fmap Cmm (mapTx f tops) + ---------------------------------------------------------------- branchChainElim :: Tx (ListGraph CmmStmt) --- Remove any basic block of the form L: goto L', --- and replace L with L' everywhere else +-- If L is not captured in an instruction, we can remove any +-- basic block of the form L: goto L', and replace L with L' everywhere else. +-- How does L get captured? In a CallArea. branchChainElim (ListGraph blocks) | null lone_branch_blocks -- No blocks to remove = noTx (ListGraph blocks) @@ -74,73 +80,100 @@ replaceLabels env (BasicBlock id stmts) branchChainElimZ :: Tx CmmGraph -- Remove any basic block of the form L: goto L', -- and replace L with L' everywhere else -branchChainElimZ g@(G.LGraph eid _) +branchChainElimZ g@(G.LGraph eid args _) | null lone_branch_blocks -- No blocks to remove = noTx g | otherwise - = aTx $ replaceLabelsZ env $ G.of_block_list eid (self_branches ++ others) + = aTx $ replaceLabelsZ env $ G.of_block_list eid args (self_branches ++ others) where (lone_branch_blocks, others) = partitionWith isLoneBranchZ (G.to_block_list g) - env = mkClosureBlockEnv lone_branch_blocks + env = mkClosureBlockEnvZ lone_branch_blocks self_branches = let loop_to (id, _) = if lookup id == id then - Just (G.Block id (G.ZLast (G.mkBranchNode id))) + Just (G.Block id Nothing (G.ZLast (G.mkBranchNode id))) else Nothing in mapMaybe loop_to lone_branch_blocks lookup id = lookupBlockEnv env id `orElse` id isLoneBranchZ :: CmmBlock -> Either (BlockId, BlockId) CmmBlock -isLoneBranchZ (G.Block id (G.ZLast (G.LastOther (LastBranch target)))) +isLoneBranchZ (G.Block id Nothing (G.ZLast (G.LastOther (LastBranch target)))) | id /= target = Left (id,target) isLoneBranchZ other = Right other -- An infinite loop is not a link in a branch chain! replaceLabelsZ :: BlockEnv BlockId -> CmmGraph -> CmmGraph -replaceLabelsZ env = replace_eid . G.map_nodes id id last +replaceLabelsZ env = replace_eid . G.map_nodes id middle last where - replace_eid (G.LGraph eid blocks) = G.LGraph (lookup eid) blocks - last (LastBranch id) = LastBranch (lookup id) - last (LastCondBranch e ti fi) = LastCondBranch e (lookup ti) (lookup fi) - last (LastSwitch e tbl) = LastSwitch e (map (fmap lookup) tbl) - last (LastCall tgt (Just id)) = LastCall tgt (Just $ lookup id) - last exit_jump_return = exit_jump_return - lookup id = lookupBlockEnv env id `orElse` id + replace_eid (G.LGraph eid off blocks) = G.LGraph (lookup eid) off blocks + middle m@(MidComment _) = m + middle (MidAssign r e) = MidAssign r (exp e) + middle (MidStore addr e) = MidStore (exp addr) (exp e) + middle (MidUnsafeCall tgt fs as) = MidUnsafeCall (midcall tgt) fs (map exp as) + middle (MidAddToContext e es) = MidAddToContext (exp e) (map exp es) + last (LastBranch id) = LastBranch (lookup id) + last (LastCondBranch e ti fi) = LastCondBranch (exp e) (lookup ti) (lookup fi) + last (LastSwitch e tbl) = LastSwitch (exp e) (map (fmap lookup) tbl) + last (LastCall tgt mb_id s) = LastCall (exp tgt) (fmap lookup mb_id) s + last (LastJump e s) = LastJump (exp e) s + last (LastReturn s) = LastReturn s + midcall (ForeignTarget e c) = ForeignTarget (exp e) c + midcall m@(PrimTarget _) = m + exp e@(CmmLit _) = e + exp (CmmLoad addr ty) = CmmLoad (exp addr) ty + exp e@(CmmReg _) = e + exp (CmmMachOp op es) = CmmMachOp op $ map exp es + exp e@(CmmRegOff _ _) = e + exp (CmmStackSlot (CallArea (Young id)) i) = + CmmStackSlot (CallArea (Young (lookup id))) i + exp e@(CmmStackSlot _ _) = e + lookup id = fmap lookup (lookupBlockEnv env id) `orElse` id ---------------------------------------------------------------- -- Build a map from a block to its set of predecessors. Very useful. predMap :: G.LastNode l => G.LGraph m l -> BlockEnv BlockSet predMap g = G.fold_blocks add_preds emptyBlockEnv g -- find the back edges where add_preds b env = foldl (add b) env (G.succs b) - add (G.Block bid _) env b' = + add (G.Block bid _ _) env b' = extendBlockEnv env b' $ extendBlockSet (lookupBlockEnv env b' `orElse` emptyBlockSet) bid ---------------------------------------------------------------- -blockConcatZ :: Tx CmmGraph -- If a block B branches to a label L, and L has no other predecessors, -- then we can splice the block starting with L onto the end of B. -- Because this optmization can be inhibited by unreachable blocks, --- we bundle it with a pass that drops unreachable blocks. +-- we first take a pass to drops unreachable blocks. -- Order matters, so we work bottom up (reverse postorder DFS). +-- +-- To ensure correctness, we have to make sure that the BlockId of the block +-- we are about to eliminate is not named in another instruction +-- (except an adjacent stack pointer adjustment, which we expect and also eliminate). +-- For +-- -- Note: This optimization does _not_ subsume branch chain elimination. -blockConcatZ = removeUnreachableBlocksZ `seqTx` blockConcatZ' +blockConcatZ :: Tx CmmGraph +blockConcatZ = removeUnreachableBlocksZ `seqTx` blockConcatZ' blockConcatZ' :: Tx CmmGraph -blockConcatZ' g@(G.LGraph eid blocks) = tx $ G.LGraph eid blocks' - where (changed, blocks') = foldr maybe_concat (False, blocks) $ G.postorder_dfs g - maybe_concat b@(G.Block bid _) (changed, blocks') = - let unchanged = (changed, extendBlockEnv blocks' bid b) +blockConcatZ' g@(G.LGraph eid off blocks) = + tx $ pprTrace "concatMap" (ppr concatMap) $ replaceLabelsZ concatMap $ G.LGraph eid off blocks' + where (changed, blocks', concatMap) = + foldr maybe_concat (False, blocks, emptyBlockEnv) $ G.postorder_dfs g + maybe_concat b@(G.Block bid _ _) (changed, blocks', concatMap) = + let unchanged = (changed, extendBlockEnv blocks' bid b, concatMap) in case G.goto_end $ G.unzip b of (h, G.LastOther (LastBranch b')) -> if num_preds b' == 1 then - (True, extendBlockEnv blocks' bid $ splice blocks' h b') + (True, extendBlockEnv blocks' bid $ splice blocks' h b', + extendBlockEnv concatMap b' bid) else unchanged _ -> unchanged num_preds bid = liftM sizeBlockSet (lookupBlockEnv backEdges bid) `orElse` 0 backEdges = predMap g splice blocks' h bid' = case lookupBlockEnv blocks' bid' of - Just (G.Block _ t) -> G.zip $ G.ZBlock h t + Just (G.Block _ Nothing t) -> G.zip $ G.ZBlock h t + Just (G.Block _ (Just _) _) -> + panic "trying to concatenate but successor block has incoming args" Nothing -> panic "unknown successor block" tx = if changed then aTx else noTx ---------------------------------------------------------------- @@ -151,9 +184,16 @@ mkClosureBlockEnv blocks = mkBlockEnv $ map follow blocks endChain orig id = case lookupBlockEnv singleEnv id of Just id' | id /= orig -> endChain orig id' _ -> id +mkClosureBlockEnvZ :: [(BlockId, BlockId)] -> BlockEnv BlockId +mkClosureBlockEnvZ blocks = mkBlockEnv $ map follow blocks + where singleEnv = mkBlockEnv blocks + follow (id, next) = (id, endChain id next) + endChain orig id = case lookupBlockEnv singleEnv id of + Just id' | id /= orig -> endChain orig id' + _ -> id ---------------------------------------------------------------- removeUnreachableBlocksZ :: Tx CmmGraph -removeUnreachableBlocksZ g@(G.LGraph id blocks) = - if length blocks' < sizeUFM blocks then aTx $ G.of_block_list id blocks' +removeUnreachableBlocksZ g@(G.LGraph id off blocks) = + if length blocks' < sizeUFM blocks then aTx $ G.of_block_list id off blocks' else noTx g where blocks' = G.postorder_dfs g diff --git a/compiler/cmm/CmmCvt.hs b/compiler/cmm/CmmCvt.hs index 0bfa396b9b..0f0ccd2d1b 100644 --- a/compiler/cmm/CmmCvt.hs +++ b/compiler/cmm/CmmCvt.hs @@ -5,14 +5,14 @@ module CmmCvt where import BlockId +import ClosureInfo (C_SRT(..)) import Cmm import CmmExpr -import MkZipCfg import MkZipCfgCmm hiding (CmmGraph) import ZipCfgCmmRep -- imported for reverse conversion import CmmZipUtil +import ForeignCall import PprCmm() -import PprCmmZ() import qualified ZipCfg as G import FastString @@ -31,25 +31,26 @@ cmmToZgraph (Cmm tops) = liftM Cmm $ mapM mapTop tops where mapTop (CmmProc h l args g) = toZgraph (showSDoc $ ppr l) args g >>= return . CmmProc h l args mapTop (CmmData s ds) = return $ CmmData s ds -cmmOfZgraph = cmmMapGraph ofZgraph +cmmOfZgraph = cmmMapGraph ofZgraph - -toZgraph :: String -> CmmFormalsWithoutKinds -> ListGraph CmmStmt -> UniqSM CmmGraph -toZgraph _ _ (ListGraph []) = lgraphOfAGraph emptyAGraph +toZgraph :: String -> CmmFormals -> ListGraph CmmStmt -> UniqSM CmmGraph +toZgraph _ _ (ListGraph []) = lgraphOfAGraph 0 emptyAGraph toZgraph fun_name args g@(ListGraph (BasicBlock id ss : other_blocks)) = - labelAGraph id $ mkMiddles (mkEntry area undefined args) <*> - mkStmts ss <*> foldr addBlock emptyAGraph other_blocks - where addBlock (BasicBlock id ss) g = mkLabel id <*> mkStmts ss <*> g + let (offset, entry) = mkEntry id Native args in + labelAGraph id offset $ + entry <*> mkStmts ss <*> foldr addBlock emptyAGraph other_blocks + where addBlock (BasicBlock id ss) g = mkLabel id Nothing <*> mkStmts ss <*> g mkStmts (CmmNop : ss) = mkNop <*> mkStmts ss mkStmts (CmmComment s : ss) = mkComment s <*> mkStmts ss mkStmts (CmmAssign l r : ss) = mkAssign l r <*> mkStmts ss mkStmts (CmmStore l r : ss) = mkStore l r <*> mkStmts ss mkStmts (CmmCall (CmmCallee f conv) res args (CmmSafe srt) CmmMayReturn : ss) = - mkCall f conv res args srt <*> mkStmts ss + mkCall f conv (map hintlessCmm res) (map hintlessCmm args) srt <*> mkStmts ss mkStmts (CmmCall (CmmPrim {}) _ _ (CmmSafe _) _ : _) = panic "safe call to a primitive CmmPrim CallishMachOp" mkStmts (CmmCall f res args CmmUnsafe CmmMayReturn : ss) = - mkUnsafeCall f res args <*> mkStmts ss + mkUnsafeCall (convert_target f res args) + (strip_hints res) (strip_hints args) <*> mkStmts ss mkStmts (CmmCondBranch e l : fbranch) = mkCmmIfThenElse e (mkBranch l) (mkStmts fbranch) mkStmts (last : []) = mkLast last @@ -57,32 +58,41 @@ toZgraph fun_name args g@(ListGraph (BasicBlock id ss : other_blocks)) = mkStmts (_ : _ : _) = bad "last node not at end" bad msg = pprPanic (msg ++ " in function " ++ fun_name) (ppr g) mkLast (CmmCall (CmmCallee f conv) [] args _ CmmNeverReturns) = - mkFinalCall f conv args + mkFinalCall f conv $ map hintlessCmm args mkLast (CmmCall (CmmPrim {}) _ _ _ CmmNeverReturns) = panic "Call to CmmPrim never returns?!" mkLast (CmmSwitch scrutinee table) = mkSwitch scrutinee table - mkLast (CmmJump tgt args) = mkJump area tgt args - mkLast (CmmReturn ress) = mkReturn area ress + -- SURELY, THESE HINTLESS ARGS ARE WRONG AND WILL BE FIXED WHEN CALLING + -- CONVENTIONS ARE HONORED? + mkLast (CmmJump tgt args) = mkJump tgt $ map hintlessCmm args + mkLast (CmmReturn ress) = mkReturn $ map hintlessCmm ress mkLast (CmmBranch tgt) = mkBranch tgt mkLast (CmmCall _f (_:_) _args _ CmmNeverReturns) = panic "Call never returns but has results?!" mkLast _ = panic "fell off end of block" - -- The entry, jump, and return areas should be the same. - -- This code is horrible, but there's no point trying to fix it until we've figured - -- out our interface for calling conventions. - -- All return statements are required to use return areas of equal size. - -- This isn't necessarily required to write correct programs, but it's sane. - area = case foldr retBlock (retStmts ss Nothing) other_blocks of - Just (as, _) -> mkCallArea id as $ Just args - Nothing -> mkCallArea id [] $ Just args - retBlock (BasicBlock _ ss) z = retStmts ss z - retStmts [CmmReturn ress] z@(Just (_, n)) = - if size ress == n then z - else panic "return statements in C-- procs must return the same results" - retStmts [CmmReturn ress] Nothing = Just (ress, size ress) - retStmts (_ : rst) z = retStmts rst z - retStmts [] z = z - size args = areaSize $ mkCallArea id args Nothing + +strip_hints :: [CmmHinted a] -> [a] +strip_hints = map hintlessCmm + +convert_target :: CmmCallTarget -> HintedCmmFormals -> HintedCmmActuals -> MidCallTarget +convert_target (CmmCallee e cc) ress args = ForeignTarget e (ForeignConvention cc (map cmmHint args) (map cmmHint ress)) +convert_target (CmmPrim op) _ress _args = PrimTarget op + +add_hints :: Convention -> ValueDirection -> [a] -> [CmmHinted a] +add_hints conv vd args = zipWith CmmHinted args (get_hints conv vd) + +get_hints :: Convention -> ValueDirection -> [ForeignHint] +get_hints (Foreign (ForeignConvention _ hints _)) Arguments = hints +get_hints (Foreign (ForeignConvention _ _ hints)) Results = hints +get_hints _other_conv _vd = repeat NoHint + +get_conv :: MidCallTarget -> Convention +get_conv (PrimTarget _) = Native +get_conv (ForeignTarget _ fc) = Foreign fc + +cmm_target :: MidCallTarget -> CmmCallTarget +cmm_target (PrimTarget op) = CmmPrim op +cmm_target (ForeignTarget e (ForeignConvention cc _ _)) = CmmCallee e cc ofZgraph :: CmmGraph -> ListGraph CmmStmt ofZgraph g = ListGraph $ swallow blocks @@ -92,89 +102,67 @@ ofZgraph g = ListGraph $ swallow blocks extend_block _id stmts = stmts _extend_entry stmts = scomment showblocks : scomment cscomm : stmts showblocks = "LGraph has " ++ show (length blocks) ++ " blocks:" ++ - concat (map (\(G.Block id _) -> " " ++ show id) blocks) + concat (map (\(G.Block id _ _) -> " " ++ show id) blocks) cscomm = "Call successors are" ++ (concat $ map (\id -> " " ++ show id) $ uniqSetToList call_succs) swallow [] = [] - swallow (G.Block id t : rest) = tail id [] Nothing t rest - tail id prev' out (G.ZTail (CopyOut conv actuals) t) rest = - case out of - Nothing -> tail id prev' (Just (conv, actuals)) t rest - Just _ -> panic "multiple CopyOut nodes in one basic block" - tail id prev' out (G.ZTail m t) rest = tail id (mid m : prev') out t rest - tail id prev' out (G.ZLast G.LastExit) rest = exit id prev' out rest - tail id prev' out (G.ZLast (G.LastOther l)) rest = last id prev' out l rest + swallow (G.Block id _ t : rest) = tail id [] t rest + tail id prev' (G.ZTail m t) rest = tail id (mid m : prev') t rest + tail id prev' (G.ZLast G.LastExit) rest = exit id prev' rest + tail id prev' (G.ZLast (G.LastOther l)) rest = last id prev' l rest mid (MidComment s) = CmmComment s mid (MidAssign l r) = CmmAssign l r mid (MidStore l r) = CmmStore l r - mid (MidUnsafeCall f ress args) = CmmCall f ress args CmmUnsafe CmmMayReturn + mid (MidUnsafeCall target ress args) + = CmmCall (cmm_target target) + (add_hints conv Results ress) + (add_hints conv Arguments args) + CmmUnsafe CmmMayReturn + where + conv = get_conv target mid m@(MidAddToContext {}) = pcomment (ppr m) - mid m@(CopyOut {}) = pcomment (ppr m) - mid m@(CopyIn {}) = pcomment (ppr m <+> text "(proc point)") pcomment p = scomment $ showSDoc p block' id prev' | id == G.lg_entry g = BasicBlock id $ extend_entry (reverse prev') | otherwise = BasicBlock id $ extend_block id (reverse prev') - last id prev' out l n = + last id prev' l n = let endblock stmt = block' id (stmt : prev') : swallow n in case l of LastBranch tgt -> case n of - G.Block id' t : bs - | tgt == id', unique_pred id' - -> tail id prev' out t bs -- optimize out redundant labels - _ -> if isNothing out then endblock (CmmBranch tgt) - else pprPanic "can't convert LGraph with pending CopyOut" - (text "target" <+> ppr tgt <+> ppr g) + -- THIS IS NOW WRONG -- LABELS CAN SHOW UP ELSEWHERE IN THE GRAPH + --G.Block id' _ t : bs + -- | tgt == id', unique_pred id' + -- -> tail id prev' t bs -- optimize out redundant labels + _ -> endblock (CmmBranch tgt) LastCondBranch expr tid fid -> - if isJust out then pprPanic "CopyOut before conditional branch" (ppr g) - else case n of - G.Block id' t : bs + G.Block id' _ t : bs | id' == fid, unique_pred id' -> - tail id (CmmCondBranch expr tid : prev') Nothing t bs + tail id (CmmCondBranch expr tid : prev') t bs | id' == tid, unique_pred id', Just e' <- maybeInvertCmmExpr expr -> - tail id (CmmCondBranch e' fid : prev') Nothing t bs + tail id (CmmCondBranch e' fid : prev') t bs _ -> let instrs' = CmmBranch fid : CmmCondBranch expr tid : prev' in block' id instrs' : swallow n - LastJump expr -> endblock $ with_out out $ CmmJump expr - LastReturn -> endblock $ with_out out $ CmmReturn + LastJump expr _ -> endblock $ CmmJump expr [] + LastReturn _ -> endblock $ CmmReturn [] LastSwitch arg ids -> endblock $ CmmSwitch arg $ ids - LastCall e cont - | Just (conv, args) <- out - -> let tgt = CmmCallee e (conv_to_cconv conv) in - case cont of - Nothing -> - endblock $ CmmCall tgt [] args CmmUnsafe CmmNeverReturns - Just k - | G.Block id' (G.ZTail (CopyIn _ ress srt) t) : bs <- n, - id' == k, unique_pred k - -> let call = CmmCall tgt ress args (CmmSafe srt) CmmMayReturn - in tail id (call : prev') Nothing t bs - | G.Block id' t : bs <- n, id' == k, unique_pred k - -> let (ress, srt) = findCopyIn t - call = CmmCall tgt ress args (CmmSafe srt) CmmMayReturn - delayed = scomment "delayed CopyIn follows prev. call" - in tail id (delayed : call : prev') Nothing t bs - | otherwise -> panic "unrepairable call" - | otherwise -> panic "call with no CopyOut" - with_out (Just (_conv, actuals)) f = f actuals - with_out Nothing f = pprPanic "unrepairable data flow to" (ppr $ f []) - findCopyIn (G.ZTail (CopyIn _ ress srt) _) = (ress, srt) - findCopyIn (G.ZTail _ t) = findCopyIn t - findCopyIn (G.ZLast _) = panic "missing CopyIn after call" - exit id prev' out n = -- highly irregular (assertion violation?) + LastCall e cont _ -> + let tgt = CmmCallee e CCallConv in + case cont of + Nothing -> + endblock $ CmmCall tgt [] [] CmmUnsafe CmmNeverReturns + Just _ -> + endblock $ CmmCall tgt [] [] (CmmSafe NoC_SRT) CmmMayReturn + exit id prev' n = -- highly irregular (assertion violation?) let endblock stmt = block' id (stmt : prev') : swallow n in case n of [] -> endblock (scomment "procedure falls off end") - G.Block id' t : bs -> + G.Block id' _ t : bs -> if unique_pred id' then - tail id (scomment "went thru exit" : prev') out t bs + tail id (scomment "went thru exit" : prev') t bs else endblock (CmmBranch id') - conv_to_cconv (ConventionStandard c _) = c - conv_to_cconv (ConventionPrivate {}) = - panic "tried to convert private calling convention back to Cmm" preds = zipPreds g single_preds = let add b single = @@ -189,7 +177,7 @@ ofZgraph g = ListGraph $ swallow blocks call_succs = let add b succs = case G.last (G.unzip b) of - G.LastOther (LastCall _ (Just id)) -> extendBlockSet succs id + G.LastOther (LastCall _ (Just id) _) -> extendBlockSet succs id _ -> succs in G.fold_blocks add emptyBlockSet g _is_call_succ id = elemBlockSet id call_succs diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index 69a4952ed6..5893843a20 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -1,22 +1,56 @@ module CmmExpr - ( CmmExpr(..), cmmExprRep, maybeInvertCmmExpr - , CmmReg(..), cmmRegRep - , CmmLit(..), cmmLitRep - , LocalReg(..), localRegRep, localRegGCFollow, GCKind(..) - , GlobalReg(..), globalRegRep, spReg, hpReg, spLimReg, nodeReg, node + ( CmmType -- Abstract + , b8, b16, b32, b64, f32, f64, bWord, bHalfWord, gcWord + , cInt, cLong + , cmmBits, cmmFloat + , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood + , isFloatType, isGcPtrType, isWord32, isWord64, isFloat64, isFloat32 + + , Width(..) + , widthInBits, widthInBytes, widthInLog + , wordWidth, halfWordWidth, cIntWidth, cLongWidth + + , CmmExpr(..), cmmExprType, cmmExprWidth, maybeInvertCmmExpr + , CmmReg(..), cmmRegType + , CmmLit(..), cmmLitType + , LocalReg(..), localRegType + , GlobalReg(..), globalRegType, spReg, hpReg, spLimReg, nodeReg, node + , VGcPtr(..), vgcFlag -- Temporary! , DefinerOfLocalRegs, UserOfLocalRegs, foldRegsDefd, foldRegsUsed, filterRegsUsed + , DefinerOfSlots, UserOfSlots, foldSlotsDefd, foldSlotsUsed , RegSet, emptyRegSet, elemRegSet, extendRegSet, deleteFromRegSet, mkRegSet , plusRegSet, minusRegSet, timesRegSet - , Area(..), StackSlotMap, getSlot, mkCallArea, outgoingSlot, areaId, areaSize - ) where + , Area(..), AreaId(..), SubArea, StackSlotMap, getSlot + + -- MachOp + , MachOp(..) + , pprMachOp, isCommutableMachOp, isAssociativeMachOp + , isComparisonMachOp, machOpResultType + , machOpArgReps, maybeInvertComparison + + -- MachOp builders + , mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot + , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem + , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe + , mo_wordULe, mo_wordUGt, mo_wordULt + , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr + , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32 + , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord + , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32 + ) +where + +#include "HsVersions.h" import BlockId import CLabel +import Constants +import FastString import FiniteMap -import MachOp import Maybes import Monad +import Outputable import Panic import Unique import UniqSet @@ -28,16 +62,24 @@ import UniqSet data CmmExpr = CmmLit CmmLit -- Literal - | CmmLoad CmmExpr MachRep -- Read memory location + | CmmLoad CmmExpr CmmType -- Read memory location | CmmReg CmmReg -- Contents of register | CmmMachOp MachOp [CmmExpr] -- Machine operation (+, -, *, etc.) + | CmmStackSlot Area Int -- addressing expression of a stack slot | CmmRegOff CmmReg Int -- CmmRegOff reg i -- ** is shorthand only, meaning ** -- CmmMachOp (MO_S_Add rep (CmmReg reg) (CmmLit (CmmInt i rep))) - -- where rep = cmmRegRep reg - | CmmStackSlot Area Int - deriving Eq + -- where rep = cmmRegType reg + +instance Eq CmmExpr where -- Equality ignores the types + CmmLit l1 == CmmLit l2 = l1==l2 + CmmLoad e1 _ == CmmLoad e2 _ = e1==e2 + CmmReg r1 == CmmReg r2 = r1==r2 + CmmRegOff r1 i1 == CmmRegOff r2 i2 = r1==r2 && i1==i2 + CmmMachOp op1 es1 == CmmMachOp op2 es2 = op1==op2 && es1==es2 + CmmStackSlot a1 i1 == CmmStackSlot a2 i2 = a1==a2 && i1==i2 + _e1 == _e2 = False data CmmReg = CmmLocal LocalReg @@ -48,17 +90,24 @@ data CmmReg -- or the stack space where function arguments and results are passed. data Area = RegSlot LocalReg - | CallArea BlockId Int Int + | CallArea AreaId deriving (Eq, Ord) +data AreaId + = Old -- entry parameters, jumps, and returns share one call area at old end of stack + | Young BlockId + deriving (Eq, Ord) + +type SubArea = (Area, Int, Int) -- area, offset, width + data CmmLit - = CmmInt Integer MachRep + = CmmInt Integer Width -- Interpretation: the 2's complement representation of the value -- is truncated to the specified size. This is easier than trying -- to keep the value within range, because we don't know whether - -- it will be used as a signed or unsigned value (the MachRep doesn't + -- it will be used as a signed or unsigned value (the CmmType doesn't -- distinguish between signed & unsigned). - | CmmFloat Rational MachRep + | CmmFloat Rational Width | CmmLabel CLabel -- Address of label | CmmLabelOff CLabel Int -- Address of label + byte offset @@ -72,14 +121,27 @@ data CmmLit | CmmLabelDiffOff CLabel CLabel Int -- label1 - label2 + offset deriving Eq -instance Eq LocalReg where - (LocalReg u1 _ _) == (LocalReg u2 _ _) = u1 == u2 +cmmExprType :: CmmExpr -> CmmType +cmmExprType (CmmLit lit) = cmmLitType lit +cmmExprType (CmmLoad _ rep) = rep +cmmExprType (CmmReg reg) = cmmRegType reg +cmmExprType (CmmMachOp op args) = machOpResultType op (map cmmExprType args) +cmmExprType (CmmRegOff reg _) = cmmRegType reg +cmmExprType (CmmStackSlot _ _) = bWord -- an address -instance Ord LocalReg where - compare (LocalReg u1 _ _) (LocalReg u2 _ _) = compare u1 u2 +cmmLitType :: CmmLit -> CmmType +cmmLitType (CmmInt _ width) = cmmBits width +cmmLitType (CmmFloat _ width) = cmmFloat width +cmmLitType (CmmLabel lbl) = cmmLabelType lbl +cmmLitType (CmmLabelOff lbl _) = cmmLabelType lbl +cmmLitType (CmmLabelDiffOff {}) = bWord -instance Uniquable LocalReg where - getUnique (LocalReg uniq _ _) = uniq +cmmLabelType :: CLabel -> CmmType +cmmLabelType lbl | isGcPtrLabel lbl = gcWord + | otherwise = bWord + +cmmExprWidth :: CmmExpr -> Width +cmmExprWidth e = typeWidth (cmmExprType e) -------- --- Negation for conditional branches @@ -93,17 +155,33 @@ maybeInvertCmmExpr _ = Nothing -- Local registers ----------------------------------------------------------------------------- --- | Whether a 'LocalReg' is a GC followable pointer -data GCKind = GCKindPtr | GCKindNonPtr deriving (Eq) - data LocalReg - = LocalReg !Unique MachRep GCKind + = LocalReg !Unique CmmType -- ^ Parameters: -- 1. Identifier -- 2. Type - -- 3. Should the GC follow as a pointer --- Sets of local registers +instance Eq LocalReg where + (LocalReg u1 _) == (LocalReg u2 _) = u1 == u2 + +instance Ord LocalReg where + compare (LocalReg u1 _) (LocalReg u2 _) = compare u1 u2 + +instance Uniquable LocalReg where + getUnique (LocalReg uniq _) = uniq + +cmmRegType :: CmmReg -> CmmType +cmmRegType (CmmLocal reg) = localRegType reg +cmmRegType (CmmGlobal reg) = globalRegType reg + +localRegType :: LocalReg -> CmmType +localRegType (LocalReg _ rep) = rep + +----------------------------------------------------------------------------- +-- Register-use information for expressions and other types +----------------------------------------------------------------------------- + +-- | Sets of local registers type RegSet = UniqSet LocalReg emptyRegSet :: RegSet elemRegSet :: LocalReg -> RegSet -> Bool @@ -121,45 +199,6 @@ minusRegSet = minusUniqSet plusRegSet = unionUniqSets timesRegSet = intersectUniqSets ------------------------------------------------------------------------------ --- Stack slots ------------------------------------------------------------------------------ - -mkVarSlot :: LocalReg -> CmmExpr -mkVarSlot r = CmmStackSlot (RegSlot r) 0 - --- Usually, we either want to lookup a variable's spill slot in an environment --- or else allocate it and add it to the environment. --- For a variable, we just need a single area of the appropriate size. -type StackSlotMap = FiniteMap LocalReg CmmExpr -getSlot :: StackSlotMap -> LocalReg -> (StackSlotMap, CmmExpr) -getSlot map r = case lookupFM map r of - Just s -> (map, s) - Nothing -> (addToFM map r s, s) where s = mkVarSlot r - --- Eventually, we'll want something proper that takes arguments and formals --- and gives you back the calling convention code, as well as the stack area. -mkCallArea :: BlockId -> [a] -> Maybe [b] -> Area -mkCallArea id as fs = CallArea id (length as) (liftM length fs `orElse` 0) - --- Return the last slot in the outgoing parameter area. -outgoingSlot :: Area -> CmmExpr -outgoingSlot a@(RegSlot _) = CmmStackSlot a 0 -outgoingSlot a@(CallArea _ outN _) = CmmStackSlot a outN - -areaId :: Area -> BlockId -areaId (RegSlot _) = panic "Register stack slots don't have IDs!" -areaId (CallArea id _ _) = id - -areaSize :: Area -> Int -areaSize (RegSlot _) = 1 -areaSize (CallArea _ outN inN) = max outN inN - - ------------------------------------------------------------------------------ --- Register-use information for expressions and other types ------------------------------------------------------------------------------ - class UserOfLocalRegs a where foldRegsUsed :: (b -> LocalReg -> b) -> b -> a -> b @@ -205,46 +244,69 @@ instance DefinerOfLocalRegs a => DefinerOfLocalRegs [a] where foldRegsDefd _ set [] = set foldRegsDefd f set (x:xs) = foldRegsDefd f (foldRegsDefd f set x) xs + ----------------------------------------------------------------------------- --- MachRep +-- Stack slots ----------------------------------------------------------------------------- +mkVarSlot :: LocalReg -> CmmExpr +mkVarSlot r = CmmStackSlot (RegSlot r) 0 +-- Usually, we either want to lookup a variable's spill slot in an environment +-- or else allocate it and add it to the environment. +-- For a variable, we just need a single area of the appropriate size. +type StackSlotMap = FiniteMap LocalReg CmmExpr +getSlot :: StackSlotMap -> LocalReg -> (StackSlotMap, CmmExpr) +getSlot map r = case lookupFM map r of + Just s -> (map, s) + Nothing -> (addToFM map r s, s) where s = mkVarSlot r -cmmExprRep :: CmmExpr -> MachRep -cmmExprRep (CmmLit lit) = cmmLitRep lit -cmmExprRep (CmmLoad _ rep) = rep -cmmExprRep (CmmReg reg) = cmmRegRep reg -cmmExprRep (CmmMachOp op _) = resultRepOfMachOp op -cmmExprRep (CmmRegOff reg _) = cmmRegRep reg -cmmExprRep (CmmStackSlot _ _) = wordRep +----------------------------------------------------------------------------- +-- Stack slot use information for expressions and other types [_$_] +----------------------------------------------------------------------------- -cmmRegRep :: CmmReg -> MachRep -cmmRegRep (CmmLocal reg) = localRegRep reg -cmmRegRep (CmmGlobal reg) = globalRegRep reg -localRegRep :: LocalReg -> MachRep -localRegRep (LocalReg _ rep _) = rep +-- Fold over the area, the offset into the area, and the width of the subarea. +class UserOfSlots a where + foldSlotsUsed :: (b -> SubArea -> b) -> b -> a -> b +class DefinerOfSlots a where + foldSlotsDefd :: (b -> SubArea -> b) -> b -> a -> b -localRegGCFollow :: LocalReg -> GCKind -localRegGCFollow (LocalReg _ _ p) = p +instance UserOfSlots CmmExpr where + foldSlotsUsed f z e = expr z e + where expr z (CmmLit _) = z + expr z (CmmLoad (CmmStackSlot a i) ty) = f z (a, i, widthInBytes $ typeWidth ty) + expr z (CmmLoad addr _) = foldSlotsUsed f z addr + expr z (CmmReg _) = z + expr z (CmmMachOp _ exprs) = foldSlotsUsed f z exprs + expr z (CmmRegOff _ _) = z + expr z (CmmStackSlot _ _) = z + +instance UserOfSlots a => UserOfSlots [a] where + foldSlotsUsed _ set [] = set + foldSlotsUsed f set (x:xs) = foldSlotsUsed f (foldSlotsUsed f set x) xs + + +----------------------------------------------------------------------------- +-- Global STG registers +----------------------------------------------------------------------------- -cmmLitRep :: CmmLit -> MachRep -cmmLitRep (CmmInt _ rep) = rep -cmmLitRep (CmmFloat _ rep) = rep -cmmLitRep (CmmLabel _) = wordRep -cmmLitRep (CmmLabelOff _ _) = wordRep -cmmLitRep (CmmLabelDiffOff _ _ _) = wordRep +data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show ) + -- TEMPORARY!!! ----------------------------------------------------------------------------- -- Global STG registers ----------------------------------------------------------------------------- +vgcFlag :: CmmType -> VGcPtr +vgcFlag ty | isGcPtrType ty = VGcPtr + | otherwise = VNonGcPtr data GlobalReg -- Argument and return registers = VanillaReg -- pointers, unboxed ints and chars {-# UNPACK #-} !Int -- its number + VGcPtr | FloatReg -- single-precision floating-point registers {-# UNPACK #-} !Int -- its number @@ -282,7 +344,71 @@ data GlobalReg -- from platform to platform (see module PositionIndependentCode). | PicBaseReg - deriving( Eq, Ord, Show ) + deriving( Show ) + +instance Eq GlobalReg where + VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes + FloatReg i == FloatReg j = i==j + DoubleReg i == DoubleReg j = i==j + LongReg i == LongReg j = i==j + Sp == Sp = True + SpLim == SpLim = True + Hp == Hp = True + HpLim == HpLim = True + CurrentTSO == CurrentTSO = True + CurrentNursery == CurrentNursery = True + HpAlloc == HpAlloc = True + GCEnter1 == GCEnter1 = True + GCFun == GCFun = True + BaseReg == BaseReg = True + PicBaseReg == PicBaseReg = True + _r1 == _r2 = False + +instance Ord GlobalReg where + compare (VanillaReg i _) (VanillaReg j _) = compare i j + -- Ignore type when seeking clashes + compare (FloatReg i) (FloatReg j) = compare i j + compare (DoubleReg i) (DoubleReg j) = compare i j + compare (LongReg i) (LongReg j) = compare i j + compare Sp Sp = EQ + compare SpLim SpLim = EQ + compare Hp Hp = EQ + compare HpLim HpLim = EQ + compare CurrentTSO CurrentTSO = EQ + compare CurrentNursery CurrentNursery = EQ + compare HpAlloc HpAlloc = EQ + compare GCEnter1 GCEnter1 = EQ + compare GCFun GCFun = EQ + compare BaseReg BaseReg = EQ + compare PicBaseReg PicBaseReg = EQ + compare (VanillaReg _ _) _ = LT + compare _ (VanillaReg _ _) = GT + compare (FloatReg _) _ = LT + compare _ (FloatReg _) = GT + compare (DoubleReg _) _ = LT + compare _ (DoubleReg _) = GT + compare (LongReg _) _ = LT + compare _ (LongReg _) = GT + compare Sp _ = LT + compare _ Sp = GT + compare SpLim _ = LT + compare _ SpLim = GT + compare Hp _ = LT + compare _ Hp = GT + compare HpLim _ = LT + compare _ HpLim = GT + compare CurrentTSO _ = LT + compare _ CurrentTSO = GT + compare CurrentNursery _ = LT + compare _ CurrentNursery = GT + compare HpAlloc _ = LT + compare _ HpAlloc = GT + compare GCEnter1 _ = LT + compare _ GCEnter1 = GT + compare GCFun _ = LT + compare _ GCFun = GT + compare BaseReg _ = LT + compare _ BaseReg = GT -- convenient aliases spReg, hpReg, spLimReg, nodeReg :: CmmReg @@ -292,11 +418,682 @@ spLimReg = CmmGlobal SpLim nodeReg = CmmGlobal node node :: GlobalReg -node = VanillaReg 1 - -globalRegRep :: GlobalReg -> MachRep -globalRegRep (VanillaReg _) = wordRep -globalRegRep (FloatReg _) = F32 -globalRegRep (DoubleReg _) = F64 -globalRegRep (LongReg _) = I64 -globalRegRep _ = wordRep +node = VanillaReg 1 VGcPtr + +globalRegType :: GlobalReg -> CmmType +globalRegType (VanillaReg _ VGcPtr) = gcWord +globalRegType (VanillaReg _ VNonGcPtr) = bWord +globalRegType (FloatReg _) = cmmFloat W32 +globalRegType (DoubleReg _) = cmmFloat W64 +globalRegType (LongReg _) = cmmBits W64 +globalRegType Hp = gcWord -- The initialiser for all + -- dynamically allocated closures +globalRegType _ = bWord + + +----------------------------------------------------------------------------- +-- CmmType +----------------------------------------------------------------------------- + + -- NOTE: CmmType is an abstract type, not exported from this + -- module so you can easily change its representation + -- + -- However Width is exported in a concrete way, + -- and is used extensively in pattern-matching + +data CmmType -- The important one! + = CmmType CmmCat Width + +data CmmCat -- "Category" (not exported) + = GcPtrCat -- GC pointer + | BitsCat -- Non-pointer + | FloatCat -- Float + deriving( Eq ) + -- See Note [Signed vs unsigned] at the end + +instance Outputable CmmType where + ppr (CmmType cat wid) = ppr cat <> ppr (widthInBits wid) + +instance Outputable CmmCat where + ppr FloatCat = ptext $ sLit("F") + ppr _ = ptext $ sLit("I") +-- Temp Jan 08 +-- ppr FloatCat = ptext $ sLit("float") +-- ppr BitsCat = ptext $ sLit("bits") +-- ppr GcPtrCat = ptext $ sLit("gcptr") + +-- Why is CmmType stratified? For native code generation, +-- most of the time you just want to know what sort of register +-- to put the thing in, and for this you need to know how +-- many bits thing has and whether it goes in a floating-point +-- register. By contrast, the distinction between GcPtr and +-- GcNonPtr is of interest to only a few parts of the code generator. + +-------- Equality on CmmType -------------- +-- CmmType is *not* an instance of Eq; sometimes we care about the +-- Gc/NonGc distinction, and sometimes we don't +-- So we use an explicit function to force you to think about it +cmmEqType :: CmmType -> CmmType -> Bool -- Exact equality +cmmEqType (CmmType c1 w1) (CmmType c2 w2) = c1==c2 && w1==w2 + +cmmEqType_ignoring_ptrhood :: CmmType -> CmmType -> Bool + -- This equality is temporary; used in CmmLint + -- but the RTS files are not yet well-typed wrt pointers +cmmEqType_ignoring_ptrhood (CmmType c1 w1) (CmmType c2 w2) + = c1 `weak_eq` c2 && w1==w2 + where + FloatCat `weak_eq` FloatCat = True + FloatCat `weak_eq` _other = False + _other `weak_eq` FloatCat = False + _word1 `weak_eq` _word2 = True -- Ignores GcPtr + +--- Simple operations on CmmType ----- +typeWidth :: CmmType -> Width +typeWidth (CmmType _ w) = w + +cmmBits, cmmFloat :: Width -> CmmType +cmmBits = CmmType BitsCat +cmmFloat = CmmType FloatCat + +-------- Common CmmTypes ------------ +-- Floats and words of specific widths +b8, b16, b32, b64, f32, f64 :: CmmType +b8 = cmmBits W8 +b16 = cmmBits W16 +b32 = cmmBits W32 +b64 = cmmBits W64 +f32 = cmmFloat W32 +f64 = cmmFloat W64 + +-- CmmTypes of native word widths +bWord, bHalfWord, gcWord :: CmmType +bWord = cmmBits wordWidth +bHalfWord = cmmBits halfWordWidth +gcWord = CmmType GcPtrCat wordWidth + +cInt, cLong :: CmmType +cInt = cmmBits cIntWidth +cLong = cmmBits cLongWidth + + +------------ Predicates ---------------- +isFloatType, isGcPtrType :: CmmType -> Bool +isFloatType (CmmType FloatCat _) = True +isFloatType _other = False + +isGcPtrType (CmmType GcPtrCat _) = True +isGcPtrType _other = False + +isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool +-- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise) +-- isFloat32 and 64 are obvious + +isWord64 (CmmType BitsCat W64) = True +isWord64 (CmmType GcPtrCat W64) = True +isWord64 _other = False + +isWord32 (CmmType BitsCat W32) = True +isWord32 (CmmType GcPtrCat W32) = True +isWord32 _other = False + +isFloat32 (CmmType FloatCat W32) = True +isFloat32 _other = False + +isFloat64 (CmmType FloatCat W64) = True +isFloat64 _other = False + +----------------------------------------------------------------------------- +-- Width +----------------------------------------------------------------------------- + +data Width = W8 | W16 | W32 | W64 + | W80 -- Extended double-precision float, + -- used in x86 native codegen only. + -- (we use Ord, so it'd better be in this order) + | W128 + deriving (Eq, Ord, Show) + +instance Outputable Width where + ppr rep = ptext (mrStr rep) + +mrStr :: Width -> LitString +mrStr W8 = sLit("W8") +mrStr W16 = sLit("W16") +mrStr W32 = sLit("W32") +mrStr W64 = sLit("W64") +mrStr W128 = sLit("W128") +mrStr W80 = sLit("W80") + + +-------- Common Widths ------------ +wordWidth, halfWordWidth :: Width +wordWidth | wORD_SIZE == 4 = W32 + | wORD_SIZE == 8 = W64 + | otherwise = panic "MachOp.wordRep: Unknown word size" + +halfWordWidth | wORD_SIZE == 4 = W16 + | wORD_SIZE == 8 = W32 + | otherwise = panic "MachOp.halfWordRep: Unknown word size" + +-- cIntRep is the Width for a C-language 'int' +cIntWidth, cLongWidth :: Width +#if SIZEOF_INT == 4 +cIntWidth = W32 +#elif SIZEOF_INT == 8 +cIntWidth = W64 +#endif + +#if SIZEOF_LONG == 4 +cLongWidth = W32 +#elif SIZEOF_LONG == 8 +cLongWidth = W64 +#endif + +widthInBits :: Width -> Int +widthInBits W8 = 8 +widthInBits W16 = 16 +widthInBits W32 = 32 +widthInBits W64 = 64 +widthInBits W128 = 128 +widthInBits W80 = 80 + +widthInBytes :: Width -> Int +widthInBytes W8 = 1 +widthInBytes W16 = 2 +widthInBytes W32 = 4 +widthInBytes W64 = 8 +widthInBytes W128 = 16 +widthInBytes W80 = 10 + +-- log_2 of the width in bytes, useful for generating shifts. +widthInLog :: Width -> Int +widthInLog W8 = 0 +widthInLog W16 = 1 +widthInLog W32 = 2 +widthInLog W64 = 3 +widthInLog W128 = 4 +widthInLog W80 = panic "widthInLog: F80" + + +----------------------------------------------------------------------------- +-- MachOp +----------------------------------------------------------------------------- + +{- +Implementation notes: + +It might suffice to keep just a width, without distinguishing between +floating and integer types. However, keeping the distinction will +help the native code generator to assign registers more easily. +-} + + +{- | +Machine-level primops; ones which we can reasonably delegate to the +native code generators to handle. Basically contains C's primops +and no others. + +Nomenclature: all ops indicate width and signedness, where +appropriate. Widths: 8\/16\/32\/64 means the given size, obviously. +Nat means the operation works on STG word sized objects. +Signedness: S means signed, U means unsigned. For operations where +signedness is irrelevant or makes no difference (for example +integer add), the signedness component is omitted. + +An exception: NatP is a ptr-typed native word. From the point of +view of the native code generators this distinction is irrelevant, +but the C code generator sometimes needs this info to emit the +right casts. +-} + +data MachOp + -- Integer operations (insensitive to signed/unsigned) + = MO_Add Width + | MO_Sub Width + | MO_Eq Width + | MO_Ne Width + | MO_Mul Width -- low word of multiply + + -- Signed multiply/divide + | MO_S_MulMayOflo Width -- nonzero if signed multiply overflows + | MO_S_Quot Width -- signed / (same semantics as IntQuotOp) + | MO_S_Rem Width -- signed % (same semantics as IntRemOp) + | MO_S_Neg Width -- unary - + + -- Unsigned multiply/divide + | MO_U_MulMayOflo Width -- nonzero if unsigned multiply overflows + | MO_U_Quot Width -- unsigned / (same semantics as WordQuotOp) + | MO_U_Rem Width -- unsigned % (same semantics as WordRemOp) + + -- Signed comparisons + | MO_S_Ge Width + | MO_S_Le Width + | MO_S_Gt Width + | MO_S_Lt Width + + -- Unsigned comparisons + | MO_U_Ge Width + | MO_U_Le Width + | MO_U_Gt Width + | MO_U_Lt Width + + -- Floating point arithmetic + | MO_F_Add Width + | MO_F_Sub Width + | MO_F_Neg Width -- unary - + | MO_F_Mul Width + | MO_F_Quot Width + + -- Floating point comparison + | MO_F_Eq Width + | MO_F_Ne Width + | MO_F_Ge Width + | MO_F_Le Width + | MO_F_Gt Width + | MO_F_Lt Width + + -- Bitwise operations. Not all of these may be supported + -- at all sizes, and only integral Widths are valid. + | MO_And Width + | MO_Or Width + | MO_Xor Width + | MO_Not Width + | MO_Shl Width + | MO_U_Shr Width -- unsigned shift right + | MO_S_Shr Width -- signed shift right + + -- Conversions. Some of these will be NOPs. + -- Floating-point conversions use the signed variant. + | MO_SF_Conv Width Width -- Signed int -> Float + | MO_FS_Conv Width Width -- Float -> Signed int + | MO_SS_Conv Width Width -- Signed int -> Signed int + | MO_UU_Conv Width Width -- unsigned int -> unsigned int + | MO_FF_Conv Width Width -- Float -> Float + deriving (Eq, Show) + +pprMachOp :: MachOp -> SDoc +pprMachOp mo = text (show mo) + + + +-- ----------------------------------------------------------------------------- +-- Some common MachReps + +-- A 'wordRep' is a machine word on the target architecture +-- Specifically, it is the size of an Int#, Word#, Addr# +-- and the unit of allocation on the stack and the heap +-- Any pointer is also guaranteed to be a wordRep. + +mo_wordAdd, mo_wordSub, mo_wordEq, mo_wordNe,mo_wordMul, mo_wordSQuot + , mo_wordSRem, mo_wordSNeg, mo_wordUQuot, mo_wordURem + , mo_wordSGe, mo_wordSLe, mo_wordSGt, mo_wordSLt, mo_wordUGe + , mo_wordULe, mo_wordUGt, mo_wordULt + , mo_wordAnd, mo_wordOr, mo_wordXor, mo_wordNot, mo_wordShl, mo_wordSShr, mo_wordUShr + , mo_u_8To32, mo_s_8To32, mo_u_16To32, mo_s_16To32 + , mo_u_8ToWord, mo_s_8ToWord, mo_u_16ToWord, mo_s_16ToWord, mo_u_32ToWord, mo_s_32ToWord + , mo_32To8, mo_32To16, mo_WordTo8, mo_WordTo16, mo_WordTo32 + :: MachOp + +mo_wordAdd = MO_Add wordWidth +mo_wordSub = MO_Sub wordWidth +mo_wordEq = MO_Eq wordWidth +mo_wordNe = MO_Ne wordWidth +mo_wordMul = MO_Mul wordWidth +mo_wordSQuot = MO_S_Quot wordWidth +mo_wordSRem = MO_S_Rem wordWidth +mo_wordSNeg = MO_S_Neg wordWidth +mo_wordUQuot = MO_U_Quot wordWidth +mo_wordURem = MO_U_Rem wordWidth + +mo_wordSGe = MO_S_Ge wordWidth +mo_wordSLe = MO_S_Le wordWidth +mo_wordSGt = MO_S_Gt wordWidth +mo_wordSLt = MO_S_Lt wordWidth + +mo_wordUGe = MO_U_Ge wordWidth +mo_wordULe = MO_U_Le wordWidth +mo_wordUGt = MO_U_Gt wordWidth +mo_wordULt = MO_U_Lt wordWidth + +mo_wordAnd = MO_And wordWidth +mo_wordOr = MO_Or wordWidth +mo_wordXor = MO_Xor wordWidth +mo_wordNot = MO_Not wordWidth +mo_wordShl = MO_Shl wordWidth +mo_wordSShr = MO_S_Shr wordWidth +mo_wordUShr = MO_U_Shr wordWidth + +mo_u_8To32 = MO_UU_Conv W8 W32 +mo_s_8To32 = MO_SS_Conv W8 W32 +mo_u_16To32 = MO_UU_Conv W16 W32 +mo_s_16To32 = MO_SS_Conv W16 W32 + +mo_u_8ToWord = MO_UU_Conv W8 wordWidth +mo_s_8ToWord = MO_SS_Conv W8 wordWidth +mo_u_16ToWord = MO_UU_Conv W16 wordWidth +mo_s_16ToWord = MO_SS_Conv W16 wordWidth +mo_s_32ToWord = MO_SS_Conv W32 wordWidth +mo_u_32ToWord = MO_UU_Conv W32 wordWidth + +mo_WordTo8 = MO_UU_Conv wordWidth W8 +mo_WordTo16 = MO_UU_Conv wordWidth W16 +mo_WordTo32 = MO_UU_Conv wordWidth W32 + +mo_32To8 = MO_UU_Conv W32 W8 +mo_32To16 = MO_UU_Conv W32 W16 + + +-- ---------------------------------------------------------------------------- +-- isCommutableMachOp + +{- | +Returns 'True' if the MachOp has commutable arguments. This is used +in the platform-independent Cmm optimisations. + +If in doubt, return 'False'. This generates worse code on the +native routes, but is otherwise harmless. +-} +isCommutableMachOp :: MachOp -> Bool +isCommutableMachOp mop = + case mop of + MO_Add _ -> True + MO_Eq _ -> True + MO_Ne _ -> True + MO_Mul _ -> True + MO_S_MulMayOflo _ -> True + MO_U_MulMayOflo _ -> True + MO_And _ -> True + MO_Or _ -> True + MO_Xor _ -> True + _other -> False + +-- ---------------------------------------------------------------------------- +-- isAssociativeMachOp + +{- | +Returns 'True' if the MachOp is associative (i.e. @(x+y)+z == x+(y+z)@) +This is used in the platform-independent Cmm optimisations. + +If in doubt, return 'False'. This generates worse code on the +native routes, but is otherwise harmless. +-} +isAssociativeMachOp :: MachOp -> Bool +isAssociativeMachOp mop = + case mop of + MO_Add {} -> True -- NB: does not include + MO_Mul {} -> True -- floatint point! + MO_And {} -> True + MO_Or {} -> True + MO_Xor {} -> True + _other -> False + +-- ---------------------------------------------------------------------------- +-- isComparisonMachOp + +{- | +Returns 'True' if the MachOp is a comparison. + +If in doubt, return False. This generates worse code on the +native routes, but is otherwise harmless. +-} +isComparisonMachOp :: MachOp -> Bool +isComparisonMachOp mop = + case mop of + MO_Eq _ -> True + MO_Ne _ -> True + MO_S_Ge _ -> True + MO_S_Le _ -> True + MO_S_Gt _ -> True + MO_S_Lt _ -> True + MO_U_Ge _ -> True + MO_U_Le _ -> True + MO_U_Gt _ -> True + MO_U_Lt _ -> True + MO_F_Eq {} -> True + MO_F_Ne {} -> True + MO_F_Ge {} -> True + MO_F_Le {} -> True + MO_F_Gt {} -> True + MO_F_Lt {} -> True + _other -> False + +-- ----------------------------------------------------------------------------- +-- Inverting conditions + +-- Sometimes it's useful to be able to invert the sense of a +-- condition. Not all conditional tests are invertible: in +-- particular, floating point conditionals cannot be inverted, because +-- there exist floating-point values which return False for both senses +-- of a condition (eg. !(NaN > NaN) && !(NaN /<= NaN)). + +maybeInvertComparison :: MachOp -> Maybe MachOp +maybeInvertComparison op + = case op of -- None of these Just cases include floating point + MO_Eq r -> Just (MO_Ne r) + MO_Ne r -> Just (MO_Eq r) + MO_U_Lt r -> Just (MO_U_Ge r) + MO_U_Gt r -> Just (MO_U_Le r) + MO_U_Le r -> Just (MO_U_Gt r) + MO_U_Ge r -> Just (MO_U_Lt r) + MO_S_Lt r -> Just (MO_S_Ge r) + MO_S_Gt r -> Just (MO_S_Le r) + MO_S_Le r -> Just (MO_S_Gt r) + MO_S_Ge r -> Just (MO_S_Lt r) + MO_F_Eq r -> Just (MO_F_Ne r) + MO_F_Ne r -> Just (MO_F_Eq r) + MO_F_Ge r -> Just (MO_F_Le r) + MO_F_Le r -> Just (MO_F_Ge r) + MO_F_Gt r -> Just (MO_F_Lt r) + MO_F_Lt r -> Just (MO_F_Gt r) + _other -> Nothing + +-- ---------------------------------------------------------------------------- +-- machOpResultType + +{- | +Returns the MachRep of the result of a MachOp. +-} +machOpResultType :: MachOp -> [CmmType] -> CmmType +machOpResultType mop tys = + case mop of + MO_Add {} -> ty1 -- Preserve GC-ptr-hood + MO_Sub {} -> ty1 -- of first arg + MO_Mul r -> cmmBits r + MO_S_MulMayOflo r -> cmmBits r + MO_S_Quot r -> cmmBits r + MO_S_Rem r -> cmmBits r + MO_S_Neg r -> cmmBits r + MO_U_MulMayOflo r -> cmmBits r + MO_U_Quot r -> cmmBits r + MO_U_Rem r -> cmmBits r + + MO_Eq {} -> comparisonResultRep + MO_Ne {} -> comparisonResultRep + MO_S_Ge {} -> comparisonResultRep + MO_S_Le {} -> comparisonResultRep + MO_S_Gt {} -> comparisonResultRep + MO_S_Lt {} -> comparisonResultRep + + MO_U_Ge {} -> comparisonResultRep + MO_U_Le {} -> comparisonResultRep + MO_U_Gt {} -> comparisonResultRep + MO_U_Lt {} -> comparisonResultRep + + MO_F_Add r -> cmmFloat r + MO_F_Sub r -> cmmFloat r + MO_F_Mul r -> cmmFloat r + MO_F_Quot r -> cmmFloat r + MO_F_Neg r -> cmmFloat r + MO_F_Eq {} -> comparisonResultRep + MO_F_Ne {} -> comparisonResultRep + MO_F_Ge {} -> comparisonResultRep + MO_F_Le {} -> comparisonResultRep + MO_F_Gt {} -> comparisonResultRep + MO_F_Lt {} -> comparisonResultRep + + MO_And {} -> ty1 -- Used for pointer masking + MO_Or {} -> ty1 + MO_Xor {} -> ty1 + MO_Not r -> cmmBits r + MO_Shl r -> cmmBits r + MO_U_Shr r -> cmmBits r + MO_S_Shr r -> cmmBits r + + MO_SS_Conv _ to -> cmmBits to + MO_UU_Conv _ to -> cmmBits to + MO_FS_Conv _ to -> cmmBits to + MO_SF_Conv _ to -> cmmFloat to + MO_FF_Conv _ to -> cmmFloat to + where + (ty1:_) = tys + +comparisonResultRep :: CmmType +comparisonResultRep = bWord -- is it? + + +-- ----------------------------------------------------------------------------- +-- machOpArgReps + +-- | This function is used for debugging only: we can check whether an +-- application of a MachOp is "type-correct" by checking that the MachReps of +-- its arguments are the same as the MachOp expects. This is used when +-- linting a CmmExpr. + +machOpArgReps :: MachOp -> [Width] +machOpArgReps op = + case op of + MO_Add r -> [r,r] + MO_Sub r -> [r,r] + MO_Eq r -> [r,r] + MO_Ne r -> [r,r] + MO_Mul r -> [r,r] + MO_S_MulMayOflo r -> [r,r] + MO_S_Quot r -> [r,r] + MO_S_Rem r -> [r,r] + MO_S_Neg r -> [r] + MO_U_MulMayOflo r -> [r,r] + MO_U_Quot r -> [r,r] + MO_U_Rem r -> [r,r] + + MO_S_Ge r -> [r,r] + MO_S_Le r -> [r,r] + MO_S_Gt r -> [r,r] + MO_S_Lt r -> [r,r] + + MO_U_Ge r -> [r,r] + MO_U_Le r -> [r,r] + MO_U_Gt r -> [r,r] + MO_U_Lt r -> [r,r] + + MO_F_Add r -> [r,r] + MO_F_Sub r -> [r,r] + MO_F_Mul r -> [r,r] + MO_F_Quot r -> [r,r] + MO_F_Neg r -> [r] + MO_F_Eq r -> [r,r] + MO_F_Ne r -> [r,r] + MO_F_Ge r -> [r,r] + MO_F_Le r -> [r,r] + MO_F_Gt r -> [r,r] + MO_F_Lt r -> [r,r] + + MO_And r -> [r,r] + MO_Or r -> [r,r] + MO_Xor r -> [r,r] + MO_Not r -> [r] + MO_Shl r -> [r,wordWidth] + MO_U_Shr r -> [r,wordWidth] + MO_S_Shr r -> [r,wordWidth] + + MO_SS_Conv from _ -> [from] + MO_UU_Conv from _ -> [from] + MO_SF_Conv from _ -> [from] + MO_FS_Conv from _ -> [from] + MO_FF_Conv from _ -> [from] + + +------------------------------------------------------------------------- +{- Note [Signed vs unsigned] + ~~~~~~~~~~~~~~~~~~~~~~~~~ +Should a CmmType include a signed vs. unsigned distinction? + +This is very much like a "hint" in C-- terminology: it isn't necessary +in order to generate correct code, but it might be useful in that the +compiler can generate better code if it has access to higher-level +hints about data. This is important at call boundaries, because the +definition of a function is not visible at all of its call sites, so +the compiler cannot infer the hints. + +Here in Cmm, we're taking a slightly different approach. We include +the int vs. float hint in the MachRep, because (a) the majority of +platforms have a strong distinction between float and int registers, +and (b) we don't want to do any heavyweight hint-inference in the +native code backend in order to get good code. We're treating the +hint more like a type: our Cmm is always completely consistent with +respect to hints. All coercions between float and int are explicit. + +What about the signed vs. unsigned hint? This information might be +useful if we want to keep sub-word-sized values in word-size +registers, which we must do if we only have word-sized registers. + +On such a system, there are two straightforward conventions for +representing sub-word-sized values: + +(a) Leave the upper bits undefined. Comparison operations must + sign- or zero-extend both operands before comparing them, + depending on whether the comparison is signed or unsigned. + +(b) Always keep the values sign- or zero-extended as appropriate. + Arithmetic operations must narrow the result to the appropriate + size. + +A clever compiler might not use either (a) or (b) exclusively, instead +it would attempt to minimize the coercions by analysis: the same kind +of analysis that propagates hints around. In Cmm we don't want to +have to do this, so we plump for having richer types and keeping the +type information consistent. + +If signed/unsigned hints are missing from MachRep, then the only +choice we have is (a), because we don't know whether the result of an +operation should be sign- or zero-extended. + +Many architectures have extending load operations, which work well +with (b). To make use of them with (a), you need to know whether the +value is going to be sign- or zero-extended by an enclosing comparison +(for example), which involves knowing above the context. This is +doable but more complex. + +Further complicating the issue is foreign calls: a foreign calling +convention can specify that signed 8-bit quantities are passed as +sign-extended 32 bit quantities, for example (this is the case on the +PowerPC). So we *do* need sign information on foreign call arguments. + +Pros for adding signed vs. unsigned to MachRep: + + - It would let us use convention (b) above, and get easier + code generation for extending loads. + + - Less information required on foreign calls. + + - MachOp type would be simpler + +Cons: + + - More complexity + + - What is the MachRep for a VanillaReg? Currently it is + always wordRep, but now we have to decide whether it is + signed or unsigned. The same VanillaReg can thus have + different MachReps in different parts of the program. + + - Extra coercions cluttering up expressions. + +Currently for GHC, the foreign call point is moot, because we do our +own promotion of sub-word-sized values to word-sized values. The Int8 +type is represnted by an Int# which is kept sign-extended at all times +(this is slightly naughty, because we're making assumptions about the +C calling convention rather early on in the compiler). However, given +this, the cons outweigh the pros. + +-} + diff --git a/compiler/cmm/CmmInfo.hs b/compiler/cmm/CmmInfo.hs index 314a9ad77e..eb226da03e 100644 --- a/compiler/cmm/CmmInfo.hs +++ b/compiler/cmm/CmmInfo.hs @@ -16,7 +16,6 @@ import Cmm import CmmUtils import CLabel -import MachOp import Bitmap import ClosureInfo @@ -26,6 +25,7 @@ import CgUtils import SMRep import Constants +import Outputable import StaticFlags import Unique import UniqSupply @@ -83,14 +83,15 @@ mkInfoTable uniq (CmmProc (CmmInfo _ _ info) entry_label arguments blocks) = CmmInfoTable (ProfilingInfo ty_prof cl_prof) type_tag type_info -> let info_label = entryLblToInfoLbl entry_label - ty_prof' = makeRelativeRefTo info_label ty_prof - cl_prof' = makeRelativeRefTo info_label cl_prof + ty_prof' = makeRelativeRefTo info_label ty_prof + cl_prof' = makeRelativeRefTo info_label cl_prof in case type_info of -- A function entry point. - FunInfo (ptrs, nptrs) srt fun_type fun_arity pap_bitmap slow_entry -> + FunInfo (ptrs, nptrs) srt fun_arity pap_bitmap slow_entry -> mkInfoTableAndCode info_label std_info fun_extra_bits entry_label arguments blocks where + fun_type = argDescrType pap_bitmap fun_extra_bits = [packHalfWordsCLit fun_type fun_arity] ++ case pap_bitmap of @@ -112,7 +113,6 @@ mkInfoTable uniq (CmmProc (CmmInfo _ _ info) entry_label arguments blocks) = std_info = mkStdInfoTable ty_prof' cl_prof' type_tag con_tag layout con_name = makeRelativeRefTo info_label descr layout = packHalfWordsCLit ptrs nptrs - -- A thunk. ThunkInfo (ptrs, nptrs) srt -> mkInfoTableAndCode info_label std_info srt_label entry_label @@ -150,7 +150,7 @@ mkInfoTableAndCode :: CLabel -> [CmmLit] -> [CmmLit] -> CLabel - -> CmmFormalsWithoutKinds + -> CmmFormals -> ListGraph CmmStmt -> [RawCmmTop] mkInfoTableAndCode info_lbl std_info extra_bits entry_lbl args blocks @@ -210,22 +210,19 @@ mkLiveness uniq live = -- does not fit in one word then (CmmLabel big_liveness, [data_lits], rET_BIG) -- fits in one word - else (mkWordCLit small_liveness, [], rET_SMALL) + else (mkWordCLit small_liveness, [], rET_SMALL) where mkBits [] = [] mkBits (reg:regs) = take sizeW bits ++ mkBits regs where sizeW = case reg of Nothing -> 1 - Just r -> (machRepByteWidth (localRegRep r) + wORD_SIZE - 1) + Just r -> (widthInBytes (typeWidth (localRegType r)) + wORD_SIZE - 1) `quot` wORD_SIZE -- number of words, rounded up bits = repeat $ is_non_ptr reg -- True <=> Non Ptr - is_non_ptr Nothing = True - is_non_ptr (Just reg) = - case localRegGCFollow reg of - GCKindNonPtr -> True - GCKindPtr -> False + is_non_ptr Nothing = True + is_non_ptr (Just reg) = not $ isGcPtrType (localRegType reg) bits :: [Bool] bits = mkBits live diff --git a/compiler/cmm/CmmLex.x b/compiler/cmm/CmmLex.x index 0adb610bb3..da5e4df3d8 100644 --- a/compiler/cmm/CmmLex.x +++ b/compiler/cmm/CmmLex.x @@ -95,7 +95,8 @@ $white_no_nl+ ; "&&" { kw CmmT_BoolAnd } "||" { kw CmmT_BoolOr } - R@decimal { global_regN VanillaReg } + P@decimal { global_regN (\n -> VanillaReg n VGcPtr) } + R@decimal { global_regN (\n -> VanillaReg n VNonGcPtr) } F@decimal { global_regN FloatReg } D@decimal { global_regN DoubleReg } L@decimal { global_regN LongReg } @@ -159,6 +160,7 @@ data CmmToken | CmmT_bits64 | CmmT_float32 | CmmT_float64 + | CmmT_gcptr | CmmT_GlobalReg GlobalReg | CmmT_Name FastString | CmmT_String String @@ -236,7 +238,15 @@ reservedWordsFM = listToUFM $ ( "bits32", CmmT_bits32 ), ( "bits64", CmmT_bits64 ), ( "float32", CmmT_float32 ), - ( "float64", CmmT_float64 ) + ( "float64", CmmT_float64 ), +-- New forms + ( "b8", CmmT_bits8 ), + ( "b16", CmmT_bits16 ), + ( "b32", CmmT_bits32 ), + ( "b64", CmmT_bits64 ), + ( "f32", CmmT_float32 ), + ( "f64", CmmT_float64 ), + ( "gcptr", CmmT_gcptr ) ] tok_decimal span buf len diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs index 293c20367f..7c8f2b3ce4 100644 --- a/compiler/cmm/CmmLint.hs +++ b/compiler/cmm/CmmLint.hs @@ -19,7 +19,6 @@ module CmmLint ( import BlockId import Cmm import CLabel -import MachOp import Maybe import Outputable import PprCmm @@ -32,17 +31,22 @@ import Control.Monad -- ----------------------------------------------------------------------------- -- Exported entry points: -cmmLint :: GenCmm d h (ListGraph CmmStmt) -> Maybe SDoc -cmmLint (Cmm tops) = runCmmLint $ mapM_ lintCmmTop tops +cmmLint :: (Outputable d, Outputable h) + => GenCmm d h (ListGraph CmmStmt) -> Maybe SDoc +cmmLint (Cmm tops) = runCmmLint (mapM_ lintCmmTop) tops -cmmLintTop :: GenCmmTop d h (ListGraph CmmStmt) -> Maybe SDoc -cmmLintTop top = runCmmLint $ lintCmmTop top +cmmLintTop :: (Outputable d, Outputable h) + => GenCmmTop d h (ListGraph CmmStmt) -> Maybe SDoc +cmmLintTop top = runCmmLint lintCmmTop top -runCmmLint :: CmmLint a -> Maybe SDoc -runCmmLint l = - case unCL l of - Left err -> Just (ptext (sLit "Cmm lint error:") $$ nest 2 err) - Right _ -> Nothing +runCmmLint :: Outputable a => (a -> CmmLint b) -> a -> Maybe SDoc +runCmmLint l p = + case unCL (l p) of + Left err -> Just (vcat [ptext $ sLit ("Cmm lint error:"), + nest 2 err, + ptext $ sLit ("Program was:"), + nest 2 (ppr p)]) + Right _ -> Nothing lintCmmTop :: (GenCmmTop h i (ListGraph CmmStmt)) -> CmmLint () lintCmmTop (CmmProc _ lbl _ (ListGraph blocks)) @@ -64,40 +68,33 @@ lintCmmBlock labels (BasicBlock id stmts) -- Checks whether a CmmExpr is "type-correct", and check for obvious-looking -- byte/word mismatches. -lintCmmExpr :: CmmExpr -> CmmLint MachRep +lintCmmExpr :: CmmExpr -> CmmLint CmmType lintCmmExpr (CmmLoad expr rep) = do lintCmmExpr expr - when (machRepByteWidth rep >= wORD_SIZE) $ + when (widthInBytes (typeWidth rep) >= wORD_SIZE) $ cmmCheckWordAddress expr return rep lintCmmExpr expr@(CmmMachOp op args) = do - mapM_ lintCmmExpr args - if map cmmExprRep args == machOpArgReps op - then cmmCheckMachOp op args - else cmmLintMachOpErr expr (map cmmExprRep args) (machOpArgReps op) + tys <- mapM lintCmmExpr args + if map (typeWidth . cmmExprType) args == machOpArgReps op + then cmmCheckMachOp op args tys + else cmmLintMachOpErr expr (map cmmExprType args) (machOpArgReps op) lintCmmExpr (CmmRegOff reg offset) - = lintCmmExpr (CmmMachOp (MO_Add rep) + = lintCmmExpr (CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral offset) rep)]) - where rep = cmmRegRep reg -lintCmmExpr lit@(CmmLit (CmmInt _ rep)) - | isFloatingRep rep - = cmmLintErr (text "integer literal with floating MachRep: " <> ppr lit) + where rep = typeWidth (cmmRegType reg) lintCmmExpr expr = - return (cmmExprRep expr) + return (cmmExprType expr) -- Check for some common byte/word mismatches (eg. Sp + 1) -cmmCheckMachOp :: MachOp -> [CmmExpr] -> CmmLint MachRep -cmmCheckMachOp op args@[CmmReg reg, CmmLit (CmmInt i _)] +cmmCheckMachOp :: MachOp -> [CmmExpr] -> [CmmType] -> CmmLint CmmType +cmmCheckMachOp op args@[CmmReg reg, CmmLit (CmmInt i _)] _ | isWordOffsetReg reg && isOffsetOp op && i `rem` fromIntegral wORD_SIZE /= 0 = cmmLintDubiousWordOffset (CmmMachOp op args) -cmmCheckMachOp op [lit@(CmmLit (CmmInt { })), reg@(CmmReg _)] - = cmmCheckMachOp op [reg, lit] -cmmCheckMachOp op@(MO_U_Conv from to) args - | isFloatingRep from || isFloatingRep to - = cmmLintErr (text "unsigned conversion from/to floating rep: " - <> ppr (CmmMachOp op args)) -cmmCheckMachOp op _args - = return (resultRepOfMachOp op) +cmmCheckMachOp op [lit@(CmmLit (CmmInt { })), reg@(CmmReg _)] tys + = cmmCheckMachOp op [reg, lit] tys +cmmCheckMachOp op _ tys + = return (machOpResultType op tys) isWordOffsetReg :: CmmReg -> Bool isWordOffsetReg (CmmGlobal Sp) = True @@ -134,24 +131,26 @@ lintCmmStmt labels = lint lint (CmmComment {}) = return () lint stmt@(CmmAssign reg expr) = do erep <- lintCmmExpr expr - if (erep == cmmRegRep reg) + let reg_ty = cmmRegType reg + if (erep `cmmEqType_ignoring_ptrhood` reg_ty) then return () - else cmmLintAssignErr stmt + else cmmLintAssignErr stmt erep reg_ty lint (CmmStore l r) = do lintCmmExpr l lintCmmExpr r return () lint (CmmCall target _res args _ _) = - lintTarget target >> mapM_ (lintCmmExpr . kindlessCmm) args + lintTarget target >> mapM_ (lintCmmExpr . hintlessCmm) args lint (CmmCondBranch e id) = checkTarget id >> lintCmmExpr e >> checkCond e lint (CmmSwitch e branches) = do mapM_ checkTarget $ catMaybes branches erep <- lintCmmExpr e - if (erep == wordRep) + if (erep `cmmEqType_ignoring_ptrhood` bWord) then return () - else cmmLintErr (text "switch scrutinee is not a word: " <> ppr e) - lint (CmmJump e args) = lintCmmExpr e >> mapM_ (lintCmmExpr . kindlessCmm) args - lint (CmmReturn ress) = mapM_ (lintCmmExpr . kindlessCmm) ress + else cmmLintErr (text "switch scrutinee is not a word: " <> ppr e <> + text " :: " <> ppr erep) + lint (CmmJump e args) = lintCmmExpr e >> mapM_ (lintCmmExpr . hintlessCmm) args + lint (CmmReturn ress) = mapM_ (lintCmmExpr . hintlessCmm) ress lint (CmmBranch id) = checkTarget id checkTarget id = if elemBlockSet id labels then return () else cmmLintErr (text "Branch to nonexistent id" <+> ppr id) @@ -188,16 +187,21 @@ addLintInfo info thing = CmmLint $ Left err -> Left (hang info 2 err) Right a -> Right a -cmmLintMachOpErr :: CmmExpr -> [MachRep] -> [MachRep] -> CmmLint a +cmmLintMachOpErr :: CmmExpr -> [CmmType] -> [Width] -> CmmLint a cmmLintMachOpErr expr argsRep opExpectsRep = cmmLintErr (text "in MachOp application: " $$ nest 2 (pprExpr expr) $$ (text "op is expecting: " <+> ppr opExpectsRep) $$ (text "arguments provide: " <+> ppr argsRep)) -cmmLintAssignErr :: CmmStmt -> CmmLint a -cmmLintAssignErr stmt = cmmLintErr (text "in assignment: " $$ - nest 2 (pprStmt stmt)) +cmmLintAssignErr :: CmmStmt -> CmmType -> CmmType -> CmmLint a +cmmLintAssignErr stmt e_ty r_ty + = cmmLintErr (text "in assignment: " $$ + nest 2 (vcat [pprStmt stmt, + text "Reg ty:" <+> ppr r_ty, + text "Rhs ty:" <+> ppr e_ty])) + + cmmLintDubiousWordOffset :: CmmExpr -> CmmLint a cmmLintDubiousWordOffset expr diff --git a/compiler/cmm/CmmLive.hs b/compiler/cmm/CmmLive.hs index 078fcd3603..93372fc461 100644 --- a/compiler/cmm/CmmLive.hs +++ b/compiler/cmm/CmmLive.hs @@ -164,8 +164,8 @@ addKilled new_killed live = live `minusUniqSet` new_killed -------------------------------- -- Liveness of a CmmStmt -------------------------------- -cmmFormalsToLiveLocals :: CmmFormals -> [LocalReg] -cmmFormalsToLiveLocals formals = map kindlessCmm formals +cmmFormalsToLiveLocals :: HintedCmmFormals -> [LocalReg] +cmmFormalsToLiveLocals formals = map hintlessCmm formals cmmStmtLive :: BlockEntryLiveness -> CmmStmt -> CmmLivenessTransformer cmmStmtLive _ (CmmNop) = id @@ -180,7 +180,7 @@ cmmStmtLive _ (CmmStore expr1 expr2) = cmmExprLive expr2 . cmmExprLive expr1 cmmStmtLive _ (CmmCall target results arguments _ _) = target_liveness . - foldr ((.) . cmmExprLive) id (map kindlessCmm arguments) . + foldr ((.) . cmmExprLive) id (map hintlessCmm arguments) . addKilled (mkUniqSet $ cmmFormalsToLiveLocals results) where target_liveness = case target of @@ -198,9 +198,9 @@ cmmStmtLive other_live (CmmSwitch expr targets) = id (mapCatMaybes id targets)) cmmStmtLive _ (CmmJump expr params) = - const (cmmExprLive expr $ foldr ((.) . cmmExprLive) id (map kindlessCmm params) $ emptyUniqSet) + const (cmmExprLive expr $ foldr ((.) . cmmExprLive) id (map hintlessCmm params) $ emptyUniqSet) cmmStmtLive _ (CmmReturn params) = - const (foldr ((.) . cmmExprLive) id (map kindlessCmm params) $ emptyUniqSet) + const (foldr ((.) . cmmExprLive) id (map hintlessCmm params) $ emptyUniqSet) -------------------------------- -- Liveness of a CmmExpr diff --git a/compiler/cmm/CmmLiveZ.hs b/compiler/cmm/CmmLiveZ.hs index 4dc0874eb7..b239ae3711 100644 --- a/compiler/cmm/CmmLiveZ.hs +++ b/compiler/cmm/CmmLiveZ.hs @@ -63,15 +63,13 @@ middleLiveness m = middle m middle (MidStore addr rval) = gen addr . gen rval middle (MidUnsafeCall tgt ress args) = gen tgt . gen args . kill ress middle (MidAddToContext ra args) = gen ra . gen args - middle (CopyIn _ formals _) = kill formals - middle (CopyOut _ actuals) = gen actuals lastLiveness :: Last -> (BlockId -> CmmLive) -> CmmLive lastLiveness l env = last l - where last (LastReturn) = emptyUniqSet - last (LastJump e) = gen e $ emptyUniqSet - last (LastBranch id) = env id - last (LastCall tgt (Just k)) = gen tgt $ env k - last (LastCall tgt Nothing) = gen tgt $ emptyUniqSet - last (LastCondBranch e t f) = gen e $ unionUniqSets (env t) (env f) + where last (LastReturn _) = emptyUniqSet + last (LastJump e _) = gen e $ emptyUniqSet + last (LastBranch id) = env id + last (LastCall tgt (Just k) _) = gen tgt $ env k + last (LastCall tgt Nothing _) = gen tgt $ emptyUniqSet + last (LastCondBranch e t f) = gen e $ unionUniqSets (env t) (env f) last (LastSwitch e tbl) = gen e $ unionManyUniqSets $ map env (catMaybes tbl) diff --git a/compiler/cmm/CmmOpt.hs b/compiler/cmm/CmmOpt.hs index 9873e29cfd..e459a75c42 100644 --- a/compiler/cmm/CmmOpt.hs +++ b/compiler/cmm/CmmOpt.hs @@ -25,7 +25,6 @@ import Cmm import CmmExpr import CmmUtils import CLabel -import MachOp import StaticFlags import UniqFM @@ -100,7 +99,7 @@ cmmMiniInline blocks = map do_inline blocks cmmMiniInlineStmts :: UniqFM Int -> [CmmStmt] -> [CmmStmt] cmmMiniInlineStmts uses [] = [] -cmmMiniInlineStmts uses (stmt@(CmmAssign (CmmLocal (LocalReg u _ _)) expr) : stmts) +cmmMiniInlineStmts uses (stmt@(CmmAssign (CmmLocal (LocalReg u _)) expr) : stmts) -- not used at all: just discard this assignment | Nothing <- lookupUFM uses u = cmmMiniInlineStmts uses stmts @@ -121,7 +120,7 @@ cmmMiniInlineStmts uses (stmt:stmts) -- Try to inline a temporary assignment. We can skip over assignments to -- other tempoararies, because we know that expressions aren't side-effecting -- and temporaries are single-assignment. -lookForInline u expr (stmt@(CmmAssign (CmmLocal (LocalReg u' _ _)) rhs) : rest) +lookForInline u expr (stmt@(CmmAssign (CmmLocal (LocalReg u' _)) rhs) : rest) | u /= u' = case lookupUFM (countUses rhs) u of Just 1 -> Just (inlineStmt u expr stmt : rest) @@ -155,19 +154,21 @@ inlineStmt u a (CmmCall target regs es srt ret) = CmmCall (infn target) regs es' srt ret where infn (CmmCallee fn cconv) = CmmCallee fn cconv infn (CmmPrim p) = CmmPrim p - es' = [ (CmmKinded (inlineExpr u a e) hint) | (CmmKinded e hint) <- es ] + es' = [ (CmmHinted (inlineExpr u a e) hint) | (CmmHinted e hint) <- es ] inlineStmt u a (CmmCondBranch e d) = CmmCondBranch (inlineExpr u a e) d inlineStmt u a (CmmSwitch e d) = CmmSwitch (inlineExpr u a e) d inlineStmt u a (CmmJump e d) = CmmJump (inlineExpr u a e) d inlineStmt u a other_stmt = other_stmt inlineExpr :: Unique -> CmmExpr -> CmmExpr -> CmmExpr -inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _ _))) +inlineExpr u a e@(CmmReg (CmmLocal (LocalReg u' _))) | u == u' = a | otherwise = e -inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep _)) off) - | u == u' = CmmMachOp (MO_Add rep) [a, CmmLit (CmmInt (fromIntegral off) rep)] +inlineExpr u a e@(CmmRegOff (CmmLocal (LocalReg u' rep)) off) + | u == u' = CmmMachOp (MO_Add width) [a, CmmLit (CmmInt (fromIntegral off) width)] | otherwise = e + where + width = typeWidth rep inlineExpr u a (CmmLoad e rep) = CmmLoad (inlineExpr u a e) rep inlineExpr u a (CmmMachOp op es) = CmmMachOp op (map (inlineExpr u a) es) inlineExpr u a other_expr = other_expr @@ -192,17 +193,16 @@ cmmMachOpFold op arg@[CmmLit (CmmInt x rep)] -- "from" type, in order to truncate to the correct size. -- The final narrow/widen to the destination type -- is implicit in the CmmLit. - MO_S_Conv from to - | isFloatingRep to -> CmmLit (CmmFloat (fromInteger x) to) - | otherwise -> CmmLit (CmmInt (narrowS from x) to) - MO_U_Conv from to -> CmmLit (CmmInt (narrowU from x) to) + MO_SF_Conv from to -> CmmLit (CmmFloat (fromInteger x) to) + MO_SS_Conv from to -> CmmLit (CmmInt (narrowS from x) to) + MO_UU_Conv from to -> CmmLit (CmmInt (narrowU from x) to) _ -> panic "cmmMachOpFold: unknown unary op" -- Eliminate conversion NOPs -cmmMachOpFold (MO_S_Conv rep1 rep2) [x] | rep1 == rep2 = x -cmmMachOpFold (MO_U_Conv rep1 rep2) [x] | rep1 == rep2 = x +cmmMachOpFold (MO_SS_Conv rep1 rep2) [x] | rep1 == rep2 = x +cmmMachOpFold (MO_UU_Conv rep1 rep2) [x] | rep1 == rep2 = x -- Eliminate nested conversions where possible cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]] @@ -221,20 +221,18 @@ cmmMachOpFold conv_outer args@[CmmMachOp conv_inner [x]] cmmMachOpFold (intconv signed1 rep1 rep3) [x] -- Nested narrowings: collapse | rep1 > rep2 && rep2 > rep3 -> - cmmMachOpFold (MO_U_Conv rep1 rep3) [x] + cmmMachOpFold (MO_UU_Conv rep1 rep3) [x] | otherwise -> CmmMachOp conv_outer args where - isIntConversion (MO_U_Conv rep1 rep2) - | not (isFloatingRep rep1) && not (isFloatingRep rep2) + isIntConversion (MO_UU_Conv rep1 rep2) = Just (rep1,rep2,False) - isIntConversion (MO_S_Conv rep1 rep2) - | not (isFloatingRep rep1) && not (isFloatingRep rep2) + isIntConversion (MO_SS_Conv rep1 rep2) = Just (rep1,rep2,True) isIntConversion _ = Nothing - intconv True = MO_S_Conv - intconv False = MO_U_Conv + intconv True = MO_SS_Conv + intconv False = MO_UU_Conv -- ToDo: a narrow of a load can be collapsed into a narrow load, right? -- but what if the architecture only supports word-sized loads, should @@ -244,18 +242,18 @@ cmmMachOpFold mop args@[CmmLit (CmmInt x xrep), CmmLit (CmmInt y _)] = case mop of -- for comparisons: don't forget to narrow the arguments before -- comparing, since they might be out of range. - MO_Eq r -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordRep) - MO_Ne r -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordRep) + MO_Eq r -> CmmLit (CmmInt (if x_u == y_u then 1 else 0) wordWidth) + MO_Ne r -> CmmLit (CmmInt (if x_u /= y_u then 1 else 0) wordWidth) - MO_U_Gt r -> CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordRep) - MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordRep) - MO_U_Lt r -> CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordRep) - MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordRep) + MO_U_Gt r -> CmmLit (CmmInt (if x_u > y_u then 1 else 0) wordWidth) + MO_U_Ge r -> CmmLit (CmmInt (if x_u >= y_u then 1 else 0) wordWidth) + MO_U_Lt r -> CmmLit (CmmInt (if x_u < y_u then 1 else 0) wordWidth) + MO_U_Le r -> CmmLit (CmmInt (if x_u <= y_u then 1 else 0) wordWidth) - MO_S_Gt r -> CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordRep) - MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordRep) - MO_S_Lt r -> CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordRep) - MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordRep) + MO_S_Gt r -> CmmLit (CmmInt (if x_s > y_s then 1 else 0) wordWidth) + MO_S_Ge r -> CmmLit (CmmInt (if x_s >= y_s then 1 else 0) wordWidth) + MO_S_Lt r -> CmmLit (CmmInt (if x_s < y_s then 1 else 0) wordWidth) + MO_S_Le r -> CmmLit (CmmInt (if x_s <= y_s then 1 else 0) wordWidth) MO_Add r -> CmmLit (CmmInt (x + y) r) MO_Sub r -> CmmLit (CmmInt (x - y) r) @@ -350,12 +348,13 @@ cmmMachOpFold cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)] -- then we can do the comparison at the smaller size = cmmMachOpFold narrow_cmp [x, CmmLit (CmmInt i rep)] where - maybe_conversion (MO_U_Conv from to) + maybe_conversion (MO_UU_Conv from to) | to > from = Just (from, False, narrowU) - maybe_conversion (MO_S_Conv from to) - | to > from, not (isFloatingRep from) + maybe_conversion (MO_SS_Conv from to) + | to > from = Just (from, True, narrowS) + -- don't attempt to apply this optimisation when the source -- is a float; see #1916 maybe_conversion _ = Nothing @@ -397,10 +396,10 @@ cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 0 _))] MO_Eq r | Just x' <- maybeInvertCmmExpr x -> x' MO_U_Gt r | isComparisonExpr x -> x MO_S_Gt r | isComparisonExpr x -> x - MO_U_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep) - MO_S_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep) - MO_U_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep) - MO_S_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep) + MO_U_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordWidth) + MO_S_Lt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordWidth) + MO_U_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordWidth) + MO_S_Ge r | isComparisonExpr x -> CmmLit (CmmInt 1 wordWidth) MO_U_Le r | Just x' <- maybeInvertCmmExpr x -> x' MO_S_Le r | Just x' <- maybeInvertCmmExpr x -> x' other -> CmmMachOp mop args @@ -416,10 +415,10 @@ cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt 1 rep))] MO_Eq r | isComparisonExpr x -> x MO_U_Lt r | Just x' <- maybeInvertCmmExpr x -> x' MO_S_Lt r | Just x' <- maybeInvertCmmExpr x -> x' - MO_U_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep) - MO_S_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordRep) - MO_U_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep) - MO_S_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordRep) + MO_U_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordWidth) + MO_S_Gt r | isComparisonExpr x -> CmmLit (CmmInt 0 wordWidth) + MO_U_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordWidth) + MO_S_Le r | isComparisonExpr x -> CmmLit (CmmInt 1 wordWidth) MO_U_Ge r | isComparisonExpr x -> x MO_S_Ge r | isComparisonExpr x -> x other -> CmmMachOp mop args @@ -451,7 +450,7 @@ cmmMachOpFold mop args@[x, y@(CmmLit (CmmInt n _))] -- x1 = x >> word_size-1 (unsigned) -- return = (x + x1) >>= log2(divisor) let - bits = fromIntegral (machRepBitWidth rep) - 1 + bits = fromIntegral (widthInBits rep) - 1 shr = if p == 1 then MO_U_Shr rep else MO_S_Shr rep x1 = CmmMachOp shr [x, CmmLit (CmmInt bits rep)] x2 = if p == 1 then x1 else @@ -503,18 +502,18 @@ exactLog2 x_ -- ----------------------------------------------------------------------------- -- widening / narrowing -narrowU :: MachRep -> Integer -> Integer -narrowU I8 x = fromIntegral (fromIntegral x :: Word8) -narrowU I16 x = fromIntegral (fromIntegral x :: Word16) -narrowU I32 x = fromIntegral (fromIntegral x :: Word32) -narrowU I64 x = fromIntegral (fromIntegral x :: Word64) +narrowU :: Width -> Integer -> Integer +narrowU W8 x = fromIntegral (fromIntegral x :: Word8) +narrowU W16 x = fromIntegral (fromIntegral x :: Word16) +narrowU W32 x = fromIntegral (fromIntegral x :: Word32) +narrowU W64 x = fromIntegral (fromIntegral x :: Word64) narrowU _ _ = panic "narrowTo" -narrowS :: MachRep -> Integer -> Integer -narrowS I8 x = fromIntegral (fromIntegral x :: Int8) -narrowS I16 x = fromIntegral (fromIntegral x :: Int16) -narrowS I32 x = fromIntegral (fromIntegral x :: Int32) -narrowS I64 x = fromIntegral (fromIntegral x :: Int64) +narrowS :: Width -> Integer -> Integer +narrowS W8 x = fromIntegral (fromIntegral x :: Int8) +narrowS W16 x = fromIntegral (fromIntegral x :: Int16) +narrowS W32 x = fromIntegral (fromIntegral x :: Int32) +narrowS W64 x = fromIntegral (fromIntegral x :: Int64) narrowS _ _ = panic "narrowTo" -- ----------------------------------------------------------------------------- diff --git a/compiler/cmm/CmmParse.y b/compiler/cmm/CmmParse.y index 9d83e2f1a8..9382994ae1 100644 --- a/compiler/cmm/CmmParse.y +++ b/compiler/cmm/CmmParse.y @@ -36,7 +36,6 @@ import PprCmm import CmmUtils import CmmLex import CLabel -import MachOp import SMRep import Lexer @@ -127,6 +126,7 @@ import System.Exit 'bits64' { L _ (CmmT_bits64) } 'float32' { L _ (CmmT_float32) } 'float64' { L _ (CmmT_float64) } + 'gcptr' { L _ (CmmT_gcptr) } GLOBALREG { L _ (CmmT_GlobalReg $$) } NAME { L _ (CmmT_Name $$) } @@ -191,12 +191,12 @@ static :: { ExtFCode [CmmStatic] } | type expr ';' { do e <- $2; return [CmmStaticLit (getLit e)] } | type ';' { return [CmmUninitialised - (machRepByteWidth $1)] } + (widthInBytes (typeWidth $1))] } | 'bits8' '[' ']' STRING ';' { return [mkString $4] } | 'bits8' '[' INT ']' ';' { return [CmmUninitialised (fromIntegral $3)] } | typenot8 '[' INT ']' ';' { return [CmmUninitialised - (machRepByteWidth $1 * + (widthInBytes (typeWidth $1) * fromIntegral $3)] } | 'align' INT ';' { return [CmmAlign (fromIntegral $2)] } | 'CLOSURE' '(' NAME lits ')' @@ -214,7 +214,7 @@ lits :: { [ExtFCode CmmExpr] } cmmproc :: { ExtCode } -- TODO: add real SRT/info tables to parsed Cmm - : info maybe_formals_without_kinds maybe_gc_block maybe_frame '{' body '}' + : info maybe_formals_without_hints maybe_gc_block maybe_frame '{' body '}' { do ((entry_ret_label, info, live, formals, gc_block, frame), stmts) <- getCgStmtsEC' $ loopDecls $ do { (entry_ret_label, info, live) <- $1; @@ -226,12 +226,12 @@ cmmproc :: { ExtCode } blks <- code (cgStmtsToBlocks stmts) code (emitInfoTableAndCode entry_ret_label (CmmInfo gc_block frame info) formals blks) } - | info maybe_formals_without_kinds ';' + | info maybe_formals_without_hints ';' { do (entry_ret_label, info, live) <- $1; formals <- sequence $2; code (emitInfoTableAndCode entry_ret_label (CmmInfo Nothing Nothing info) formals []) } - | NAME maybe_formals_without_kinds maybe_gc_block maybe_frame '{' body '}' + | NAME maybe_formals_without_hints maybe_gc_block maybe_frame '{' body '}' { do ((formals, gc_block, frame), stmts) <- getCgStmtsEC' $ loopDecls $ do { formals <- sequence $2; @@ -256,8 +256,9 @@ info :: { ExtFCode (CLabel, CmmInfoTable, [Maybe LocalReg]) } { do prof <- profilingInfo $11 $13 return (mkRtsEntryLabelFS $3, CmmInfoTable prof (fromIntegral $9) - (FunInfo (fromIntegral $5, fromIntegral $7) NoC_SRT (fromIntegral $15) 0 - (ArgSpec 0) + (FunInfo (fromIntegral $5, fromIntegral $7) NoC_SRT + 0 -- Arity zero + (ArgSpec (fromIntegral $15)) zeroCLit), []) } -- we leave most of the fields zero here. This is only used @@ -269,8 +270,8 @@ info :: { ExtFCode (CLabel, CmmInfoTable, [Maybe LocalReg]) } { do prof <- profilingInfo $11 $13 return (mkRtsEntryLabelFS $3, CmmInfoTable prof (fromIntegral $9) - (FunInfo (fromIntegral $5, fromIntegral $7) NoC_SRT (fromIntegral $15) (fromIntegral $17) - (ArgSpec 0) + (FunInfo (fromIntegral $5, fromIntegral $7) NoC_SRT (fromIntegral $17) + (ArgSpec (fromIntegral $15)) zeroCLit), []) } -- we leave most of the fields zero here. This is only used @@ -303,7 +304,7 @@ info :: { ExtFCode (CLabel, CmmInfoTable, [Maybe LocalReg]) } (ContInfo [] NoC_SRT), []) } - | 'INFO_TABLE_RET' '(' NAME ',' INT ',' formals_without_kinds0 ')' + | 'INFO_TABLE_RET' '(' NAME ',' INT ',' formals_without_hints0 ')' -- closure type, live regs { do live <- sequence (map (liftM Just) $7) return (mkRtsRetLabelFS $3, @@ -317,10 +318,7 @@ body :: { ExtCode } | stmt body { do $1; $2 } decl :: { ExtCode } - : type names ';' { mapM_ (newLocal defaultKind $1) $2 } - | STRING type names ';' {% do k <- parseGCKind $1; - return $ mapM_ (newLocal k $2) $3 } - + : type names ';' { mapM_ (newLocal $1) $2 } | 'import' names ';' { mapM_ newImport $2 } | 'export' names ';' { return () } -- ignore exports @@ -345,9 +343,9 @@ stmt :: { ExtCode } -- we tweak the syntax to avoid the conflict. The later -- option is taken here because the other way would require -- multiple levels of expanding and get unwieldy. - | maybe_results 'foreign' STRING expr '(' cmm_kind_exprs0 ')' safety vols opt_never_returns ';' + | maybe_results 'foreign' STRING expr '(' cmm_hint_exprs0 ')' safety vols opt_never_returns ';' {% foreignCall $3 $1 $4 $6 $9 $8 $10 } - | maybe_results 'prim' '%' NAME '(' cmm_kind_exprs0 ')' safety vols ';' + | maybe_results 'prim' '%' NAME '(' cmm_hint_exprs0 ')' safety vols ';' {% primCall $1 $4 $6 $9 $8 } -- stmt-level macros, stealing syntax from ordinary C-- function calls. -- Perhaps we ought to use the %%-form? @@ -446,8 +444,8 @@ expr :: { ExtFCode CmmExpr } | expr0 { $1 } expr0 :: { ExtFCode CmmExpr } - : INT maybe_ty { return (CmmLit (CmmInt $1 $2)) } - | FLOAT maybe_ty { return (CmmLit (CmmFloat $1 $2)) } + : INT maybe_ty { return (CmmLit (CmmInt $1 (typeWidth $2))) } + | FLOAT maybe_ty { return (CmmLit (CmmFloat $1 (typeWidth $2))) } | STRING { do s <- code (mkStringCLit $1); return (CmmLit s) } | reg { $1 } @@ -457,27 +455,27 @@ expr0 :: { ExtFCode CmmExpr } -- leaving out the type of a literal gives you the native word size in C-- -maybe_ty :: { MachRep } - : {- empty -} { wordRep } +maybe_ty :: { CmmType } + : {- empty -} { bWord } | '::' type { $2 } -maybe_actuals :: { [ExtFCode CmmActual] } +maybe_actuals :: { [ExtFCode HintedCmmActual] } : {- empty -} { [] } - | '(' cmm_kind_exprs0 ')' { $2 } + | '(' cmm_hint_exprs0 ')' { $2 } -cmm_kind_exprs0 :: { [ExtFCode CmmActual] } +cmm_hint_exprs0 :: { [ExtFCode HintedCmmActual] } : {- empty -} { [] } - | cmm_kind_exprs { $1 } + | cmm_hint_exprs { $1 } -cmm_kind_exprs :: { [ExtFCode CmmActual] } - : cmm_kind_expr { [$1] } - | cmm_kind_expr ',' cmm_kind_exprs { $1 : $3 } +cmm_hint_exprs :: { [ExtFCode HintedCmmActual] } + : cmm_hint_expr { [$1] } + | cmm_hint_expr ',' cmm_hint_exprs { $1 : $3 } -cmm_kind_expr :: { ExtFCode CmmActual } - : expr { do e <- $1; return (CmmKinded e (inferCmmKind e)) } - | expr STRING {% do h <- parseCmmKind $2; +cmm_hint_expr :: { ExtFCode HintedCmmActual } + : expr { do e <- $1; return (CmmHinted e (inferCmmHint e)) } + | expr STRING {% do h <- parseCmmHint $2; return $ do - e <- $1; return (CmmKinded e h) } + e <- $1; return (CmmHinted e h) } exprs0 :: { [ExtFCode CmmExpr] } : {- empty -} { [] } @@ -491,20 +489,20 @@ reg :: { ExtFCode CmmExpr } : NAME { lookupName $1 } | GLOBALREG { return (CmmReg (CmmGlobal $1)) } -maybe_results :: { [ExtFCode CmmFormal] } +maybe_results :: { [ExtFCode HintedCmmFormal] } : {- empty -} { [] } | '(' cmm_formals ')' '=' { $2 } -cmm_formals :: { [ExtFCode CmmFormal] } +cmm_formals :: { [ExtFCode HintedCmmFormal] } : cmm_formal { [$1] } | cmm_formal ',' { [$1] } | cmm_formal ',' cmm_formals { $1 : $3 } -cmm_formal :: { ExtFCode CmmFormal } - : local_lreg { do e <- $1; return (CmmKinded e (inferCmmKind (CmmReg (CmmLocal e)))) } - | STRING local_lreg {% do h <- parseCmmKind $1; +cmm_formal :: { ExtFCode HintedCmmFormal } + : local_lreg { do e <- $1; return (CmmHinted e (inferCmmHint (CmmReg (CmmLocal e)))) } + | STRING local_lreg {% do h <- parseCmmHint $1; return $ do - e <- $2; return (CmmKinded e h) } + e <- $2; return (CmmHinted e h) } local_lreg :: { ExtFCode LocalReg } : NAME { do e <- lookupName $1; @@ -521,23 +519,21 @@ lreg :: { ExtFCode CmmReg } other -> pprPanic "CmmParse:" (ftext $1 <> text " not a register") } | GLOBALREG { return (CmmGlobal $1) } -maybe_formals_without_kinds :: { [ExtFCode LocalReg] } +maybe_formals_without_hints :: { [ExtFCode LocalReg] } : {- empty -} { [] } - | '(' formals_without_kinds0 ')' { $2 } + | '(' formals_without_hints0 ')' { $2 } -formals_without_kinds0 :: { [ExtFCode LocalReg] } +formals_without_hints0 :: { [ExtFCode LocalReg] } : {- empty -} { [] } - | formals_without_kinds { $1 } + | formals_without_hints { $1 } -formals_without_kinds :: { [ExtFCode LocalReg] } - : formal_without_kind ',' { [$1] } - | formal_without_kind { [$1] } - | formal_without_kind ',' formals_without_kinds { $1 : $3 } +formals_without_hints :: { [ExtFCode LocalReg] } + : formal_without_hint ',' { [$1] } + | formal_without_hint { [$1] } + | formal_without_hint ',' formals_without_hints { $1 : $3 } -formal_without_kind :: { ExtFCode LocalReg } - : type NAME { newLocal defaultKind $1 $2 } - | STRING type NAME {% do k <- parseGCKind $1; - return $ newLocal k $2 $3 } +formal_without_hint :: { ExtFCode LocalReg } + : type NAME { newLocal $1 $2 } maybe_frame :: { ExtFCode (Maybe UpdateFrame) } : {- empty -} { return Nothing } @@ -550,16 +546,17 @@ maybe_gc_block :: { ExtFCode (Maybe BlockId) } | 'goto' NAME { do l <- lookupLabel $2; return (Just l) } -type :: { MachRep } - : 'bits8' { I8 } +type :: { CmmType } + : 'bits8' { b8 } | typenot8 { $1 } -typenot8 :: { MachRep } - : 'bits16' { I16 } - | 'bits32' { I32 } - | 'bits64' { I64 } - | 'float32' { F32 } - | 'float64' { F64 } +typenot8 :: { CmmType } + : 'bits16' { b16 } + | 'bits32' { b32 } + | 'bits64' { b64 } + | 'float32' { f32 } + | 'float64' { f64 } + | 'gcptr' { gcWord } { section :: String -> Section section "text" = Text @@ -576,17 +573,17 @@ mkString s = CmmString (map (fromIntegral.ord) s) -- argument. We assume that this is correct: for MachOps that don't have -- symmetrical args (e.g. shift ops), the first arg determines the type of -- the op. -mkMachOp :: (MachRep -> MachOp) -> [ExtFCode CmmExpr] -> ExtFCode CmmExpr +mkMachOp :: (Width -> MachOp) -> [ExtFCode CmmExpr] -> ExtFCode CmmExpr mkMachOp fn args = do arg_exprs <- sequence args - return (CmmMachOp (fn (cmmExprRep (head arg_exprs))) arg_exprs) + return (CmmMachOp (fn (typeWidth (cmmExprType (head arg_exprs)))) arg_exprs) getLit :: CmmExpr -> CmmLit getLit (CmmLit l) = l getLit (CmmMachOp (MO_S_Neg _) [CmmLit (CmmInt i r)]) = CmmInt (negate i) r getLit _ = panic "invalid literal" -- TODO messy failure -nameToMachOp :: FastString -> P (MachRep -> MachOp) +nameToMachOp :: FastString -> P (Width -> MachOp) nameToMachOp name = case lookupUFM machOps name of Nothing -> fail ("unknown primitive " ++ unpackFS name) @@ -656,24 +653,27 @@ machOps = listToUFM $ ( "shrl", MO_U_Shr ), ( "shra", MO_S_Shr ), - ( "lobits8", flip MO_U_Conv I8 ), - ( "lobits16", flip MO_U_Conv I16 ), - ( "lobits32", flip MO_U_Conv I32 ), - ( "lobits64", flip MO_U_Conv I64 ), - ( "sx16", flip MO_S_Conv I16 ), - ( "sx32", flip MO_S_Conv I32 ), - ( "sx64", flip MO_S_Conv I64 ), - ( "zx16", flip MO_U_Conv I16 ), - ( "zx32", flip MO_U_Conv I32 ), - ( "zx64", flip MO_U_Conv I64 ), - ( "f2f32", flip MO_S_Conv F32 ), -- TODO; rounding mode - ( "f2f64", flip MO_S_Conv F64 ), -- TODO; rounding mode - ( "f2i8", flip MO_S_Conv I8 ), - ( "f2i16", flip MO_S_Conv I16 ), - ( "f2i32", flip MO_S_Conv I32 ), - ( "f2i64", flip MO_S_Conv I64 ), - ( "i2f32", flip MO_S_Conv F32 ), - ( "i2f64", flip MO_S_Conv F64 ) + ( "lobits8", flip MO_UU_Conv W8 ), + ( "lobits16", flip MO_UU_Conv W16 ), + ( "lobits32", flip MO_UU_Conv W32 ), + ( "lobits64", flip MO_UU_Conv W64 ), + + ( "zx16", flip MO_UU_Conv W16 ), + ( "zx32", flip MO_UU_Conv W32 ), + ( "zx64", flip MO_UU_Conv W64 ), + + ( "sx16", flip MO_SS_Conv W16 ), + ( "sx32", flip MO_SS_Conv W32 ), + ( "sx64", flip MO_SS_Conv W64 ), + + ( "f2f32", flip MO_FF_Conv W32 ), -- TODO; rounding mode + ( "f2f64", flip MO_FF_Conv W64 ), -- TODO; rounding mode + ( "f2i8", flip MO_FS_Conv W8 ), + ( "f2i16", flip MO_FS_Conv W16 ), + ( "f2i32", flip MO_FS_Conv W32 ), + ( "f2i64", flip MO_FS_Conv W64 ), + ( "i2f32", flip MO_SF_Conv W32 ), + ( "i2f64", flip MO_SF_Conv W64 ) ] callishMachOps = listToUFM $ @@ -687,32 +687,25 @@ parseSafety "safe" = return (CmmSafe NoC_SRT) parseSafety "unsafe" = return CmmUnsafe parseSafety str = fail ("unrecognised safety: " ++ str) -parseCmmKind :: String -> P CmmKind -parseCmmKind "ptr" = return PtrHint -parseCmmKind "signed" = return SignedHint -parseCmmKind "float" = return FloatHint -parseCmmKind str = fail ("unrecognised hint: " ++ str) - -parseGCKind :: String -> P GCKind -parseGCKind "ptr" = return GCKindPtr -parseGCKind str = fail ("unrecognized kin: " ++ str) - -defaultKind :: GCKind -defaultKind = GCKindNonPtr +parseCmmHint :: String -> P ForeignHint +parseCmmHint "ptr" = return AddrHint +parseCmmHint "signed" = return SignedHint +parseCmmHint str = fail ("unrecognised hint: " ++ str) -- labels are always pointers, so we might as well infer the hint -inferCmmKind :: CmmExpr -> CmmKind -inferCmmKind (CmmLit (CmmLabel _)) = PtrHint -inferCmmKind (CmmReg (CmmGlobal g)) | isPtrGlobalReg g = PtrHint -inferCmmKind _ = NoHint - -isPtrGlobalReg Sp = True -isPtrGlobalReg SpLim = True -isPtrGlobalReg Hp = True -isPtrGlobalReg HpLim = True -isPtrGlobalReg CurrentTSO = True -isPtrGlobalReg CurrentNursery = True -isPtrGlobalReg _ = False +inferCmmHint :: CmmExpr -> ForeignHint +inferCmmHint (CmmLit (CmmLabel _)) = AddrHint +inferCmmHint (CmmReg (CmmGlobal g)) | isPtrGlobalReg g = AddrHint +inferCmmHint _ = NoHint + +isPtrGlobalReg Sp = True +isPtrGlobalReg SpLim = True +isPtrGlobalReg Hp = True +isPtrGlobalReg HpLim = True +isPtrGlobalReg CurrentTSO = True +isPtrGlobalReg CurrentNursery = True +isPtrGlobalReg (VanillaReg _ VGcPtr) = True +isPtrGlobalReg _ = False happyError :: P a happyError = srcParseFail @@ -819,10 +812,10 @@ addVarDecl var expr = EC $ \e s -> return ((var, Var expr):s, ()) addLabel :: FastString -> BlockId -> ExtCode addLabel name block_id = EC $ \e s -> return ((name, Label block_id):s, ()) -newLocal :: GCKind -> MachRep -> FastString -> ExtFCode LocalReg -newLocal kind ty name = do +newLocal :: CmmType -> FastString -> ExtFCode LocalReg +newLocal ty name = do u <- code newUnique - let reg = LocalReg u ty kind + let reg = LocalReg u ty addVarDecl name (CmmReg (CmmLocal reg)) return reg @@ -895,9 +888,9 @@ staticClosure cl_label info payload foreignCall :: String - -> [ExtFCode CmmFormal] + -> [ExtFCode HintedCmmFormal] -> ExtFCode CmmExpr - -> [ExtFCode CmmActual] + -> [ExtFCode HintedCmmActual] -> Maybe [GlobalReg] -> CmmSafety -> CmmReturnInfo @@ -927,22 +920,22 @@ foreignCall conv_string results_code expr_code args_code vols safety ret (CmmCallee expr' convention) args vols NoC_SRT ret) where unused = panic "not used by emitForeignCall'" -adjCallTarget :: CCallConv -> CmmExpr -> [CmmKinded CmmExpr] -> CmmExpr +adjCallTarget :: CCallConv -> CmmExpr -> [CmmHinted CmmExpr] -> CmmExpr #ifdef mingw32_TARGET_OS -- On Windows, we have to add the '@N' suffix to the label when making -- a call with the stdcall calling convention. adjCallTarget StdCallConv (CmmLit (CmmLabel lbl)) args = CmmLit (CmmLabel (addLabelSize lbl (sum (map size args)))) - where size (CmmKinded e _) = max wORD_SIZE (machRepByteWidth (cmmExprRep e)) + where size (CmmHinted e _) = max wORD_SIZE (widthInBytes (typeWidth (cmmExprType e))) -- c.f. CgForeignCall.emitForeignCall #endif adjCallTarget _ expr _ = expr primCall - :: [ExtFCode CmmFormal] + :: [ExtFCode HintedCmmFormal] -> FastString - -> [ExtFCode CmmActual] + -> [ExtFCode HintedCmmActual] -> Maybe [GlobalReg] -> CmmSafety -> P ExtCode @@ -961,7 +954,7 @@ primCall results_code name args_code vols safety (CmmPrim p) args vols NoC_SRT CmmMayReturn) where unused = panic "not used by emitForeignCall'" -doStore :: MachRep -> ExtFCode CmmExpr -> ExtFCode CmmExpr -> ExtCode +doStore :: CmmType -> ExtFCode CmmExpr -> ExtFCode CmmExpr -> ExtCode doStore rep addr_code val_code = do addr <- addr_code val <- val_code @@ -970,9 +963,11 @@ doStore rep addr_code val_code -- mismatch to be flagged by cmm-lint. If we don't do this, then -- the store will happen at the wrong type, and the error will not -- be noticed. + let val_width = typeWidth (cmmExprType val) + rep_width = typeWidth rep let coerce_val - | cmmExprRep val /= rep = CmmMachOp (MO_U_Conv rep rep) [val] - | otherwise = val + | val_width /= rep_width = CmmMachOp (MO_UU_Conv val_width rep_width) [val] + | otherwise = val stmtEC (CmmStore addr coerce_val) -- Return an unboxed tuple. @@ -982,7 +977,7 @@ emitRetUT args = do (sp, stmts) <- pushUnboxedTuple 0 args emitStmts stmts when (sp /= 0) $ stmtC (CmmAssign spReg (cmmRegOffW spReg (-sp))) - stmtC (CmmJump (entryCode (CmmLoad (cmmRegOffW spReg sp) wordRep)) []) + stmtC (CmmJump (entryCode (CmmLoad (cmmRegOffW spReg sp) bWord)) []) -- TODO (when using CPS): emitStmt (CmmReturn (map snd args)) -- ----------------------------------------------------------------------------- @@ -1088,9 +1083,9 @@ doSwitch mb_range scrut arms deflt initEnv :: Env initEnv = listToUFM [ ( fsLit "SIZEOF_StgHeader", - Var (CmmLit (CmmInt (fromIntegral (fixedHdrSize * wORD_SIZE)) wordRep) )), + Var (CmmLit (CmmInt (fromIntegral (fixedHdrSize * wORD_SIZE)) wordWidth) )), ( fsLit "SIZEOF_StgInfoTable", - Var (CmmLit (CmmInt (fromIntegral stdInfoTableSizeB) wordRep) )) + Var (CmmLit (CmmInt (fromIntegral stdInfoTableSizeB) wordWidth) )) ] parseCmmFile :: DynFlags -> FilePath -> IO (Messages, Maybe Cmm) diff --git a/compiler/cmm/CmmProcPointZ.hs b/compiler/cmm/CmmProcPointZ.hs index 82d3e26452..cedb9ef726 100644 --- a/compiler/cmm/CmmProcPointZ.hs +++ b/compiler/cmm/CmmProcPointZ.hs @@ -1,16 +1,19 @@ module CmmProcPointZ ( callProcPoints, minimalProcPointSet - , addProcPointProtocols - , splitAtProcPoints + , addProcPointProtocols, splitAtProcPoints, procPointAnalysis + , liveSlotAnal, cafAnal, layout, manifestSP, igraph, areaBuilder ) where +import Constants +import qualified Prelude as P import Prelude hiding (zip, unzip, last) +import Util (sortLe) import BlockId +import Bitmap import CLabel ---import ClosureInfo import Cmm hiding (blockId) import CmmExpr import CmmContFlowOpt @@ -18,13 +21,17 @@ import CmmLiveZ import CmmTx import DFMonad import FiniteMap -import MachOp (MachHint(NoHint)) +import IdInfo +import List (sortBy) import Maybes -import MkZipCfgCmm hiding (CmmBlock, CmmGraph) +import MkZipCfgCmm hiding (CmmBlock, CmmGraph, CmmTopZ) import Monad import Name import Outputable import Panic +import SMRep (rET_SMALL) +import StgCmmClosure +import StgCmmUtils import UniqFM import UniqSet import UniqSupply @@ -66,7 +73,7 @@ be the start of a new procedure to which the continuations can jump: You might think then that a criterion to make a node a proc point is that it is directly reached by two distinct proc points. (Note -[Direct reachability].) But this criterion is a bit two simple; for +[Direct reachability].) But this criterion is a bit too simple; for example, 'return x' is also reached by two proc points, yet there is no point in pulling it out of k_join. A good criterion would be to say that a node should be made a proc point if it is reached by a set @@ -123,7 +130,7 @@ forward = ForwardTransfers first middle last exit where first ProcPoint id = ReachedBy $ unitUniqSet id first x _ = x middle x _ = x - last _ (LastCall _ (Just id)) = LastOutFacts [(id, ProcPoint)] + last _ (LastCall _ (Just id) _) = LastOutFacts [(id, ProcPoint)] last x l = LastOutFacts $ map (\id -> (id, x)) (succs l) exit x = x @@ -136,32 +143,31 @@ minimalProcPointSet :: ProcPointSet -> CmmGraph -> FuelMonad ProcPointSet callProcPoints g = fold_blocks add entryPoint g where entryPoint = unitUniqSet (lg_entry g) add b set = case last $ unzip b of - LastOther (LastCall _ (Just k)) -> extendBlockSet set k + LastOther (LastCall _ (Just k) _) -> extendBlockSet set k _ -> set minimalProcPointSet callProcPoints g = extendPPSet g (postorder_dfs g) callProcPoints type PPFix = FuelMonad (ForwardFixedPoint Middle Last Status ()) -procPointAnalysis :: ProcPointSet -> CmmGraph -> FuelMonad PPFix +procPointAnalysis :: ProcPointSet -> CmmGraph -> FuelMonad (BlockEnv Status) procPointAnalysis procPoints g = let addPP env id = extendBlockEnv env id ProcPoint initProcPoints = foldl addPP emptyBlockEnv (uniqSetToList procPoints) - in runDFM lattice $ -- init with old facts and solve - return $ (zdfSolveFrom initProcPoints "proc-point reachability" lattice + in liftM zdfFpFacts $ + (zdfSolveFrom initProcPoints "proc-point reachability" lattice forward (fact_bot lattice) $ graphOfLGraph g :: PPFix) extendPPSet :: CmmGraph -> [CmmBlock] -> ProcPointSet -> FuelMonad ProcPointSet extendPPSet g blocks procPoints = - do res <- procPointAnalysis procPoints g - env <- liftM zdfFpFacts res + do env <- procPointAnalysis procPoints g let add block pps = let id = blockId block in case lookupBlockEnv env id of Just ProcPoint -> extendBlockSet pps id _ -> pps procPoints' = fold_blocks add emptyBlockSet g newPoint = listToMaybe (mapMaybe ppSuccessor blocks) - ppSuccessor b@(Block id _) = + ppSuccessor b@(Block id _ _) = let nreached id = case lookupBlockEnv env id `orElse` panic "no ppt" of ProcPoint -> 1 ReachedBy ps -> sizeUniqSet ps @@ -178,8 +184,6 @@ extendPPSet g blocks procPoints = Nothing -> return procPoints' - - ------------------------------------------------------------------------ -- Computing Proc-Point Protocols -- ------------------------------------------------------------------------ @@ -243,12 +247,13 @@ addProcPointProtocols callPPs procPoints g = do liveness <- cmmLivenessZ g (protos, g') <- return $ optimize_calls liveness g blocks'' <- add_CopyOuts protos procPoints g' - return $ LGraph (lg_entry g) blocks'' + return $ LGraph (lg_entry g) (lg_argoffset g) blocks'' where optimize_calls liveness g = -- see Note [Separate Adams optimization] let (protos, blocks') = fold_blocks maybe_add_call (init_protocols, emptyBlockEnv) g protos' = add_unassigned liveness procPoints protos - g' = LGraph (lg_entry g) $ add_CopyIns callPPs protos' blocks' + g' = LGraph (lg_entry g) (lg_argoffset g) $ + add_CopyIns callPPs protos' blocks' in (protos', runTx removeUnreachableBlocksZ g') maybe_add_call :: CmmBlock -> (BlockEnv Protocol, BlockEnv CmmBlock) -> (BlockEnv Protocol, BlockEnv CmmBlock) @@ -257,11 +262,10 @@ addProcPointProtocols callPPs procPoints g = -- redirect the call (cf 'newblock') and set the protocol if necessary maybe_add_call block (protos, blocks) = case goto_end $ unzip block of - (h, LastOther (LastCall tgt (Just k))) + (h, LastOther (LastCall tgt (Just k) s)) | Just proto <- lookupBlockEnv protos k, - Just pee <- jumpsToProcPoint k - -> let newblock = - zipht h (tailOfLast (LastCall tgt (Just pee))) + Just pee <- branchesToProcPoint k + -> let newblock = zipht h (tailOfLast (LastCall tgt (Just pee) s)) changed_blocks = insertBlock newblock blocks unchanged_blocks = insertBlock block blocks in case lookupBlockEnv protos pee of @@ -271,21 +275,20 @@ addProcPointProtocols callPPs procPoints g = else (protos, unchanged_blocks) _ -> (protos, insertBlock block blocks) - jumpsToProcPoint :: BlockId -> Maybe BlockId - -- ^ Tells whether the named block is just a jump to a proc point - jumpsToProcPoint id = - let (Block _ t) = lookupBlockEnv (lg_blocks g) id `orElse` - panic "jump out of graph" + branchesToProcPoint :: BlockId -> Maybe BlockId + -- ^ Tells whether the named block is just a branch to a proc point + branchesToProcPoint id = + let (Block _ _ t) = lookupBlockEnv (lg_blocks g) id `orElse` + panic "branch out of graph" in case t of - ZTail (CopyIn {}) (ZLast (LastOther (LastBranch pee))) + ZLast (LastOther (LastBranch pee)) | elemBlockSet pee procPoints -> Just pee _ -> Nothing init_protocols = fold_blocks maybe_add_proto emptyBlockEnv g maybe_add_proto :: CmmBlock -> BlockEnv Protocol -> BlockEnv Protocol - maybe_add_proto (Block id (ZTail (CopyIn c fs _srt) _)) env = - extendBlockEnv env id (Protocol c fs $ toArea id fs) + --maybe_add_proto (Block id (ZTail (CopyIn c _ fs _srt) _)) env = + -- extendBlockEnv env id (Protocol c fs $ toArea id fs) maybe_add_proto _ env = env - toArea id fs = mkCallArea id fs $ Just fs -- | For now, following a suggestion by Ben Lippmeier, we pass all -- live variables as arguments, hoping that a clever register @@ -297,17 +300,17 @@ add_unassigned = pass_live_vars_as_args pass_live_vars_as_args :: BlockEnv CmmLive -> ProcPointSet -> BlockEnv Protocol -> BlockEnv Protocol -pass_live_vars_as_args liveness procPoints protos = protos' +pass_live_vars_as_args _liveness procPoints protos = protos' where protos' = foldUniqSet addLiveVars protos procPoints addLiveVars :: BlockId -> BlockEnv Protocol -> BlockEnv Protocol addLiveVars id protos = case lookupBlockEnv protos id of Just _ -> protos - Nothing -> let live = lookupBlockEnv liveness id `orElse` - panic ("no liveness at block " ++ show id) - formals = map (\x -> CmmKinded x NoHint) $ uniqSetToList live - prot = Protocol ConventionPrivate formals $ - mkCallArea id formals $ Just formals + Nothing -> let live = emptyBlockEnv + --lookupBlockEnv _liveness id `orElse` + --panic ("no liveness at block " ++ show id) + formals = uniqSetToList live + prot = Protocol Private formals $ CallArea $ Young id in extendBlockEnv protos id prot @@ -315,131 +318,597 @@ pass_live_vars_as_args liveness procPoints protos = protos' -- instruction. (Proc-points that arise from calls already have their copy-in instructions.) add_CopyIns :: ProcPointSet -> BlockEnv Protocol -> BlockEnv CmmBlock -> BlockEnv CmmBlock -add_CopyIns callPPs protos = mapUFM maybe_insert_CopyIns +add_CopyIns callPPs protos blocks = mapUFM maybe_insert_CopyIns blocks where maybe_insert_CopyIns :: CmmBlock -> CmmBlock - maybe_insert_CopyIns b@(Block id t) | not $ elementOfUniqSet id callPPs = - case lookupBlockEnv protos id of - Nothing -> b - Just (Protocol c fs area) -> - case t of - --ZTail (CopyIn c' fs' _) _ -> - -- if c == c' && fs == fs' then b - -- else panic ("mismatched protocols for block " ++ show id) - _ -> Block id -- (ZTail (CopyIn c fs NoC_SRT) t) - $ foldr ZTail t (copyIn c area fs) + maybe_insert_CopyIns b@(Block id off t) | not $ elementOfUniqSet id callPPs = + case (off, lookupBlockEnv protos id) of + (Just _, _) -> panic "shouldn't copy arguments twice into a block" + (_, Just (Protocol c fs area)) -> Block id (Just off) $ foldr ZTail t copies + where (off, copies) = copyIn c False area fs + (_, Nothing) -> b maybe_insert_CopyIns b = b -- | Add a CopyOut node before each procpoint. --- If the predecessor is a call, then the CopyOut should already exist (in the callee). +-- If the predecessor is a call, then the copy outs should already be done by the callee. +-- Note: If we need to add copy-out instructions, they may require stack space, +-- so we accumulate a map from the successors to the necessary stack space, +-- then update the successors after we have finished inserting the copy-outs. add_CopyOuts :: BlockEnv Protocol -> ProcPointSet -> CmmGraph -> FuelMonad (BlockEnv CmmBlock) -add_CopyOuts protos procPoints g = fold_blocks maybe_insert_CopyOut (return emptyBlockEnv) g - where maybe_insert_CopyOut :: CmmBlock -> FuelMonad (BlockEnv CmmBlock) -> - FuelMonad (BlockEnv CmmBlock) - maybe_insert_CopyOut b@(Block bid _) blocks | bid == lg_entry g = skip b blocks - maybe_insert_CopyOut b blocks = +add_CopyOuts protos procPoints g = fold_blocks mb_copy_out (return emptyBlockEnv) g + where mb_copy_out :: CmmBlock -> FuelMonad (BlockEnv CmmBlock) -> + FuelMonad (BlockEnv CmmBlock) + mb_copy_out b@(Block bid _ _) z | bid == lg_entry g = skip b z + mb_copy_out b z = case last $ unzip b of - LastOther (LastCall _ _) -> skip b blocks -- copy out done by callee - _ -> maybe_insert_CopyOut' b blocks - maybe_insert_CopyOut' b blocks = fold_succs trySucc b init >>= finish - where init = blocks >>= (\bmap -> return (b, bmap)) + LastOther (LastCall _ _ _) -> skip b z -- copy out done by callee + _ -> mb_copy_out' b z + mb_copy_out' b z = fold_succs trySucc b init >>= finish + where init = z >>= (\bmap -> return (b, bmap)) trySucc succId z = if elemBlockSet succId procPoints then case lookupBlockEnv protos succId of Nothing -> z Just (Protocol c fs area) -> - insert z succId $ copyOut c area $ map fetch fs - -- CopyOut c $ map fetch fs + let (_, copies) = copyOut c Jump area $ map (CmmReg . CmmLocal) fs + in insert z succId copies else z - fetch k = k {kindlessCmm = CmmReg $ CmmLocal $ kindlessCmm k} insert z succId m = do (b, bmap) <- z (b, bs) <- insertBetween b m succId - return $ (b, foldl (flip insertBlock) bmap bs) - finish (b@(Block bid _), bmap) = return $ extendBlockEnv bmap bid b - skip b@(Block bid _) bs = bs >>= (\bmap -> return $ extendBlockEnv bmap bid b) - - - + pprTrace "insert for succ" (ppr succId <> ppr m) $ + return $ (b, foldl (flip insertBlock) bmap bs) + finish (b@(Block bid _ _), bmap) = + return $ (extendBlockEnv bmap bid b) + skip b@(Block bid _ _) bs = + bs >>= (\bmap -> return (extendBlockEnv bmap bid b)) + +-- At this point, we have found a set of procpoints, each of which should be +-- the entry point of a procedure. +-- Now, we create the procedure for each proc point, +-- which requires that we: +-- 1. build a map from proc points to the blocks reachable from the proc point +-- 2. turn each branch to a proc point into a jump +-- 3. turn calls and returns into jumps +-- 4. build info tables for the procedures -- and update the info table for +-- the SRTs in the entry procedure as well. -- Input invariant: A block should only be reachable from a single ProcPoint. --- If you want to duplicate blocks, do it before this gets called. -splitAtProcPoints :: CmmFormalsWithoutKinds -> CLabel -> ProcPointSet -> - CmmGraph -> FuelMonad [CmmGraph] -splitAtProcPoints formals entry_label procPoints g@(LGraph entry _) = - do let layout = layout_stack formals g - pprTrace "stack layout" (ppr layout) $ return () - res <- procPointAnalysis procPoints g - procMap <- liftM zdfFpFacts res - let addBlock b@(Block bid _) graphEnv = - case lookupBlockEnv procMap bid of - Just ProcPoint -> add graphEnv bid bid b - Just (ReachedBy set) -> - case uniqSetToList set of - [] -> graphEnv - [id] -> add graphEnv id bid b - _ -> panic "Each block should be reachable from only one ProcPoint" - Nothing -> panic "block not reached by a proc point?" +splitAtProcPoints :: CLabel -> ProcPointSet-> ProcPointSet -> BlockEnv Status -> + BlockEnv SubAreaSet -> AreaMap -> CmmTopZ -> FuelMonad [CmmTopZ] +splitAtProcPoints entry_label callPPs procPoints procMap slotEnv areaMap + (CmmProc top_info top_l top_args g@(LGraph entry e_off blocks)) = + do -- Build a map from procpoints to the blocks they reach + let addBlock b@(Block bid _ _) graphEnv = + case lookupBlockEnv procMap bid of + Just ProcPoint -> add graphEnv bid bid b + Just (ReachedBy set) -> + case uniqSetToList set of + [] -> graphEnv + [id] -> add graphEnv id bid b + _ -> panic "Each block should be reachable from only one ProcPoint" + Nothing -> pprPanic "block not reached by a proc point?" (ppr bid) add graphEnv procId bid b = extendBlockEnv graphEnv procId graph' where graph = lookupBlockEnv graphEnv procId `orElse` emptyBlockEnv graph' = extendBlockEnv graph bid b graphEnv <- return $ fold_blocks addBlock emptyBlockEnv g -- Build a map from proc point BlockId to labels for their new procedures - let add_label map pp = clabel pp >>= (\l -> return $ (pp, l) : map) - clabel procPoint = if procPoint == entry then return entry_label - else getUniqueM >>= return . to_label - to_label u = mkEntryLabel (mkFCallName u "procpoint") - procLabels <- foldM add_label [] (uniqSetToList procPoints) + let add_label map pp = return $ addToFM map pp lbl + where lbl = if pp == entry then entry_label else blockLbl pp + procLabels <- foldM add_label emptyFM (uniqSetToList procPoints) + -- Convert call and return instructions to jumps. + let last (LastCall e _ n) = LastJump e n + last l = l + graphEnv <- return $ mapUFM (mapUFM (map_one_block id id last)) graphEnv -- In each new graph, add blocks jumping off to the new procedures, -- and replace branches to procpoints with branches to the jump-off blocks let add_jump_block (env, bs) (pp, l) = do bid <- liftM mkBlockId getUniqueM - let b = Block bid (ZLast (LastOther (LastJump $ CmmLit $ CmmLabel l))) + let b = Block bid Nothing (ZLast (LastOther jump)) + argSpace = case lookupBlockEnv blocks pp of + Just (Block _ (Just s) _) -> s + Just (Block _ Nothing _) -> panic "no args at procpoint" + _ -> panic "can't find procpoint block" + jump = LastJump (CmmLit (CmmLabel l)) argSpace return $ (extendBlockEnv env pp bid, b : bs) add_jumps newGraphEnv (guniq, blockEnv) = - do (jumpEnv, jumpBlocks) <- foldM add_jump_block (emptyBlockEnv, []) procLabels + do (jumpEnv, jumpBlocks) <- foldM add_jump_block (emptyBlockEnv, []) + $ fmToList procLabels let ppId = mkBlockId guniq - LGraph _ blockEnv' = replaceLabelsZ jumpEnv $ LGraph ppId blockEnv - blockEnv'' = foldl (flip insertBlock) blockEnv' jumpBlocks + (b_off, b) = + case lookupBlockEnv blockEnv ppId of + Just (Block id (Just b_off) t) -> (b_off, Block id Nothing t) + Just b@(Block _ Nothing _) -> (0, b) + Nothing -> panic "couldn't find entry block while splitting" + off = if ppId == entry then e_off else b_off + LGraph _ _ blockEnv' = pprTrace "jumpEnv" (ppr jumpEnv) $ + replaceLabelsZ jumpEnv $ LGraph ppId off blockEnv + blockEnv'' = foldl (flip insertBlock) (extendBlockEnv blockEnv' ppId b) + jumpBlocks return $ extendBlockEnv newGraphEnv ppId $ - runTx cmmCfgOptsZ $ LGraph ppId blockEnv'' - _ <- return $ replaceLabelsZ + runTx cmmCfgOptsZ $ LGraph ppId off blockEnv'' + upd_info_tbl srt' (CmmInfoTable p t typeinfo) = CmmInfoTable p t typeinfo' + where typeinfo' = case typeinfo of + t@(ConstrInfo _ _ _) -> t + (FunInfo c _ a d e) -> FunInfo c srt' a d e + (ThunkInfo c _) -> ThunkInfo c srt' + (ThunkSelectorInfo s _) -> ThunkSelectorInfo s srt' + (ContInfo vars _) -> ContInfo vars srt' + upd_info_tbl _ CmmNonInfoTable = CmmNonInfoTable + to_proc cafMap (ppUniq, g) | elementOfUniqSet bid callPPs = + if bid == entry then + CmmProc (CmmInfo gc upd_fr (upd_info_tbl srt' info_tbl)) top_l top_args g + else + pprTrace "adding infotable for" (ppr bid) $ + CmmProc (CmmInfo Nothing Nothing $ infoTbl) lbl [] g + where bid = mkBlockId ppUniq + lbl = expectJust "pp label" $ lookupFM procLabels bid + infoTbl = CmmInfoTable (ProfilingInfo zero zero) rET_SMALL + (ContInfo stack_vars srt') + stack_vars = pprTrace "slotEnv" (ppr slotEnv) $ + live_vars slotEnv areaMap bid + zero = CmmInt 0 wordWidth + srt' = expectJust "procpoint.infoTbl" $ lookupBlockEnv cafMap bid + CmmInfo gc upd_fr info_tbl = top_info + to_proc _ (ppUniq, g) = + pprTrace "not adding infotable for" (ppr bid) $ + CmmProc (CmmInfo Nothing Nothing CmmNonInfoTable) lbl [] g + where bid = mkBlockId ppUniq + lbl = expectJust "pp label" $ lookupFM procLabels bid graphEnv <- foldM add_jumps emptyBlockEnv $ ufmToList graphEnv + cafEnv <- cafAnal g + (cafTable, blockCafs) <- buildCafs cafEnv + procs <- return $ map (to_proc blockCafs) $ ufmToList graphEnv return $ pprTrace "procLabels" (ppr procLabels) $ - pprTrace "splitting graphs" (ppr graphEnv) $ [g] + pprTrace "splitting graphs" (ppr graphEnv) $ cafTable ++ procs +splitAtProcPoints _ _ _ _ _ _ t@(CmmData _ _) = return [t] ------------------------------------------------------------------------ --- Stack Layout (completely bogus for now) -- +-- Stack Layout -- ------------------------------------------------------------------------ --- At some point, we'll do stack layout properly. --- But for now, we can move forward on generating code by just producing --- a brain dead layout, giving a separate slot to every variable, --- and (incorrectly) assuming that all parameters are passed on the stack. - --- For now, variables are placed at explicit offsets from a virtual --- frame pointer. --- We may want to use abstract stack slots at some point. -data Placement = VFPMinus Int - -instance Outputable Placement where - ppr (VFPMinus k) = text "VFP - " <> int k - --- Build a map from registers to stack locations. --- Return that map along with the offset to the end of the block --- containing local registers. -layout_stack ::CmmFormalsWithoutKinds -> CmmGraph -> - (Int, FiniteMap LocalReg Placement, FiniteMap LocalReg Placement) -layout_stack formals g = (ix', incomingMap, localMap) - where (ix, incomingMap) = foldl (flip place) (1, emptyFM) formals -- IGNORES CC'S - -- 1 leaves space for the return infotable - (ix', localMap) = foldUniqSet place (ix, emptyFM) regs - place r (ix, map) = (ix', addToFM map r $ VFPMinus ix') where ix' = ix + 1 - regs = fold_blocks (fold_fwd_block (\_ y -> y) add addL) emptyRegSet g - add x y = foldRegsDefd extendRegSet y x - addL (LastOther l) z = add l z - addL LastExit z = z +-- | Before we lay out the stack, we need to know something about the +-- liveness of the stack slots. In particular, to decide whether we can +-- reuse a stack location to hold multiple stack slots, we need to know +-- when each of the stack slots is used. +-- Although tempted to use something simpler, we really need a full interference +-- graph. Consider the following case: +-- case <...> of +-- 1 -> <spill x>; // y is dead out +-- 2 -> <spill y>; // x is dead out +-- 3 -> <spill x and y> +-- If we consider the arms in order and we use just the deadness information given by a +-- dataflow analysis, we might decide to allocate the stack slots for x and y +-- to the same stack location, which will lead to incorrect code in the third arm. +-- We won't make this mistake with an interference graph. + +-- First, the liveness analysis. +-- We represent a slot with an area, an offset into the area, and a width. +-- Tracking the live slots is a bit tricky because there may be loads and stores +-- into only a part of a stack slot (e.g. loading the low word of a 2-word long), +-- e.g. Slot A 0 8 overlaps with Slot A 4 4. +-- +-- The definition of a slot set is intended to reduce the number of overlap +-- checks we have to make. There's no reason to check for overlap between +-- slots in different areas, so we segregate the map by Area's. +-- We expect few slots in each Area, so we collect them in an unordered list. +-- To keep these lists short, any contiguous live slots are coalesced into +-- a single slot, on insertion. + +type SubAreaSet = FiniteMap Area [SubArea] +fold_subareas :: (SubArea -> z -> z) -> SubAreaSet -> z -> z +fold_subareas f m z = foldFM (\_ s z -> foldr (\a z -> f a z) z s) z m + +liveGen :: SubArea -> [SubArea] -> (Bool, [SubArea]) +liveGen s set = liveGen' s set [] + where liveGen' s [] z = (True, s : z) + liveGen' s@(a, hi, w) (s'@(a', hi', w') : rst) z = + if a /= a' || hi < lo' || lo > hi' then -- no overlap + liveGen' s rst (s' : z) + else if s' `contains` s then -- old contains new + (False, set) + else -- overlap: coalesce the slots + let new_hi = max hi hi' + new_lo = min lo lo' + in liveGen' (a, new_hi, new_hi - new_lo) rst z + where lo = hi - w -- remember: areas grow down + lo' = hi' - w' + contains (a, hi, w) (a', hi', w') = + a == a' && hi >= hi' && hi - w <= hi' - w' + +liveKill :: SubArea -> [SubArea] -> [SubArea] +liveKill (a, hi, w) set = pprTrace "killing slots in area" (ppr a) $ liveKill' set [] + where liveKill' [] z = z + liveKill' (s'@(a', hi', w') : rst) z = + if a /= a' || hi < lo' || lo > hi' then -- no overlap + liveKill' rst (s' : z) + else -- overlap: split the old slot + let z' = if hi' > hi then (a, hi', hi' - hi) : z else z + z'' = if lo > lo' then (a, lo, lo - lo') : z' else z' + in liveKill' rst z'' + where lo = hi - w -- remember: areas grow down + lo' = hi' - w' + +slotLattice :: DataflowLattice SubAreaSet +slotLattice = DataflowLattice "live slots" emptyFM add True + where add new old = case foldFM addArea (False, old) new of + (True, x) -> aTx x + (False, x) -> noTx x + addArea a newSlots z = foldr (addSlot a) z newSlots + addSlot a slot (changed, map) = + let (c, live) = liveGen slot $ lookupWithDefaultFM map [] a + in (c || changed, addToFM map a live) + +liveInSlots :: (DefinerOfSlots s, UserOfSlots s) => SubAreaSet -> s -> SubAreaSet +liveInSlots live x = foldSlotsUsed add (foldSlotsDefd remove live x) x + where add live (a, i, w) = liftToArea a (snd . liveGen (a, i, w)) live + remove live (a, i, w) = liftToArea a (liveKill (a, i, w)) live + liftToArea a f map = addToFM map a $ f (lookupWithDefaultFM map [] a) + +-- Unlike the liveness transfer functions @gen@ and @kill@, this function collects +-- _any_ slot that is named. +--addNamedSlots :: (DefinerOfSlots s, UserOfSlots s) => SubAreaSet -> s -> SubAreaSet +--addNamedSlots live x = foldSlotsUsed add (foldSlotsDefd add live x) x +-- where add live (a, i, w) = liftToArea a (snd . liveGen (a, i, w)) live +-- liftToArea a f map = addToFM map a $ f (lookupWithDefaultFM map [] a) + +-- Note: the stack slots that hold variables returned on the stack are not +-- considered live in to the block -- we treat the first node as a definition site. +-- BEWARE: I'm being a little careless here in failing to check for the +-- entry Id (which would use the CallArea Old). +liveTransfers :: BackwardTransfers Middle Last SubAreaSet +liveTransfers = BackwardTransfers first liveInSlots liveLastIn + where first live id = delFromFM live (CallArea (Young id)) + +liveLastIn :: (BlockId -> SubAreaSet) -> Last -> SubAreaSet +liveLastIn env l = liveInSlots (liveLastOut env l) l + +-- Don't forget to keep the outgoing parameters in the CallArea live. +liveLastOut :: (BlockId -> SubAreaSet) -> Last -> SubAreaSet +liveLastOut env l = + case l of + LastReturn n -> add_area (CallArea Old) n out + LastJump _ n -> add_area (CallArea Old) n out + LastCall _ Nothing n -> add_area (CallArea Old) n out + LastCall _ (Just k) n -> add_area (CallArea (Young k)) n out + _ -> out + where out = joinOuts slotLattice env l +add_area :: Area -> Int -> SubAreaSet -> SubAreaSet +add_area a n live = + addToFM live a $ snd $ liveGen (a, n, n) $ lookupWithDefaultFM live [] a + +type SlotFix a = FuelMonad (BackwardFixedPoint Middle Last SubAreaSet a) +liveSlotAnal :: LGraph Middle Last -> FuelMonad (BlockEnv SubAreaSet) +liveSlotAnal g = liftM zdfFpFacts (res :: SlotFix ()) + where res = zdfSolveFromL emptyBlockEnv "live slot analysis" slotLattice + liveTransfers (fact_bot slotLattice) g + +-- The liveness analysis must be precise: otherwise, we won't know if a definition +-- should really kill a live-out stack slot. +-- But the interference graph does not have to be precise -- it might decide that +-- any live areas interfere. To maintain both a precise analysis and an imprecise +-- interference graph, we need to convert the live-out stack slots to graph nodes +-- at each and every instruction; rather than reconstruct a new list of nodes +-- every time, I provide a function to fold over the nodes, which should be a +-- reasonably efficient approach for the implementations we envision. +-- Of course, it will probably be much easier to program if we just return a list... +type Set x = FiniteMap x () +type AreaMap = FiniteMap Area Int +data IGraphBuilder n = + Builder { foldNodes :: forall z. SubArea -> (n -> z -> z) -> z -> z + , _wordsOccupied :: AreaMap -> AreaMap -> n -> [Int] + } + +areaBuilder :: IGraphBuilder Area +areaBuilder = Builder fold words + where fold (a, _, _) f z = f a z + words areaSize areaMap a = + case lookupFM areaMap a of + Just addr -> [addr .. addr + (lookupFM areaSize a `orElse` + pprPanic "wordsOccupied: unknown area" (ppr a))] + Nothing -> [] + +--slotBuilder :: IGraphBuilder (Area, Int) +--slotBuilder = undefined + +-- Now, we can build the interference graph. +-- The usual story: a definition interferes with all live outs and all other +-- definitions. +type IGraph x = FiniteMap x (Set x) +type IGPair x = (IGraph x, IGraphBuilder x) +igraph :: (Ord x) => IGraphBuilder x -> BlockEnv SubAreaSet -> LGraph Middle Last -> IGraph x +igraph builder env g = foldr interfere emptyFM (postorder_dfs g) + where foldN = foldNodes builder + interfere block igraph = + let (h, l) = goto_end (unzip block) + --heads :: ZHead Middle -> (IGraph x, SubAreaSet) -> IGraph x + heads (ZFirst _ _) (igraph, _) = igraph + heads (ZHead h m) (igraph, liveOut) = + heads h (addEdges igraph m liveOut, liveInSlots liveOut m) + -- add edges between a def and the other defs and liveouts + addEdges igraph i out = fst $ foldSlotsDefd addDef (igraph, out) i + addDef (igraph, out) def@(a, _, _) = + (foldN def (addDefN out) igraph, + addToFM out a (snd $ liveGen def (lookupWithDefaultFM out [] a))) + addDefN out n igraph = + let addEdgeNO o igraph = foldN o addEdgeNN igraph + addEdgeNN n' igraph = addEdgeNN' n n' $ addEdgeNN' n' n igraph + addEdgeNN' n n' igraph = addToFM igraph n (addToFM set n' ()) + where set = lookupWithDefaultFM igraph emptyFM n + in foldFM (\ _ os igraph -> foldr addEdgeNO igraph os) igraph out + env' bid = lookupBlockEnv env bid `orElse` panic "unknown blockId in igraph" + in heads h $ case l of LastExit -> (igraph, emptyFM) + LastOther l -> (addEdges igraph l $ liveLastOut env' l, + liveLastIn env' l) + +-- Before allocating stack slots, we need to collect one more piece of information: +-- what's the highest offset (in bytes) used in each Area? +-- We'll need to allocate that much space for each Area. +getAreaSize :: LGraph Middle Last -> AreaMap +getAreaSize g@(LGraph _ off _) = + fold_blocks (fold_fwd_block first add add) (unitFM (CallArea Old) off) g + where first _ z = z + add x z = foldSlotsUsed addSlot (foldSlotsDefd addSlot z x) x + addSlot z (a, off, _) = addToFM z a $ max off $ lookupWithDefaultFM z 0 a + + +-- Find the Stack slots occupied by the subarea's conflicts +conflictSlots :: Ord x => IGPair x -> AreaMap -> AreaMap -> SubArea -> Set Int +conflictSlots (ig, Builder foldNodes wordsOccupied) areaSize areaMap subarea = + foldNodes subarea foldNode emptyFM + where foldNode n set = foldFM conflict set $ lookupWithDefaultFM ig emptyFM n + conflict n' () set = liveInSlots areaMap n' set + -- Add stack slots occupied by igraph node n + liveInSlots areaMap n set = foldr setAdd set (wordsOccupied areaSize areaMap n) + setAdd w s = addToFM s w () + +-- Find any open space on the stack, starting from the offset. +freeSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> Int +freeSlotFrom ig areaSize offset areaMap area = + let size = lookupFM areaSize area `orElse` 0 + conflicts = conflictSlots ig areaSize areaMap (area, size, size) + -- Find a space big enough to hold the area + findSpace curr 0 = curr + findSpace curr cnt = -- target slot, considerand, # left to check + if elemFM curr conflicts then + findSpace (curr + size) size + else findSpace (curr - 1) (cnt - 1) + in findSpace (offset + size) size + +-- Find an open space on the stack, and assign it to the area. +allocSlotFrom :: Ord x => IGPair x -> AreaMap -> Int -> AreaMap -> Area -> AreaMap +allocSlotFrom ig areaSize from areaMap area = + if elemFM area areaMap then areaMap + else addToFM areaMap area $ freeSlotFrom ig areaSize from areaMap area + +-- | Greedy stack layout. +-- Compute liveness, build the interference graph, and allocate slots for the areas. +-- We visit each basic block in a (generally) forward order. +-- At each instruction that names a register subarea r, we immediately allocate +-- any available slot on the stack by the following procedure: +-- 1. Find the nodes N' that conflict with r +-- 2. Find the stack slots used for N' +-- 3. Choose a contiguous stack space s not in N' (s must be large enough to hold r) +-- For a CallArea, we allocate the stack space only when we reach a function +-- call that returns to the CallArea's blockId. +-- We use a similar procedure, with one exception: the stack space +-- must be allocated below the youngest stack slot that is live out. + +-- Note: The stack pointer only has to be younger than the youngest live stack slot +-- at proc points. Otherwise, the stack pointer can point anywhere. +layout :: ProcPointSet -> BlockEnv SubAreaSet -> LGraph Middle Last -> AreaMap +layout procPoints env g@(LGraph _ entrySp _) = + let builder = areaBuilder + ig = (igraph builder env g, builder) + env' bid = lookupBlockEnv env bid `orElse` panic "unknown blockId in igraph" + areaSize = getAreaSize g + -- Find the slots that are live-in to the block + live_in (ZTail m l) = liveInSlots (live_in l) m + live_in (ZLast (LastOther l)) = liveLastIn env' l + live_in (ZLast LastExit) = emptyFM + -- Find the youngest live stack slot + youngest_live areaMap live = fold_subareas young_slot live 0 + where young_slot (a, o, _) z = case lookupFM areaMap a of + Just top -> max z $ top + o + Nothing -> z + -- Allocate space for spill slots and call areas + allocVarSlot = allocSlotFrom ig areaSize 0 + allocCallSlot areaMap (Block id _ t) | elemBlockSet id procPoints = + allocSlotFrom ig areaSize (youngest_live areaMap $ live_in t) + areaMap (CallArea (Young id)) + allocCallSlot areaMap _ = areaMap + alloc i areaMap = foldSlotsDefd alloc' (foldSlotsUsed alloc' areaMap i) i + where alloc' areaMap (a@(RegSlot _), _, _) = allocVarSlot areaMap a + alloc' areaMap _ = areaMap + layoutAreas areaMap b@(Block _ _ t) = layout areaMap t + where layout areaMap (ZTail m t) = layout (alloc m areaMap) t + layout areaMap (ZLast _) = allocCallSlot areaMap b + areaMap = foldl layoutAreas (addToFM emptyFM (CallArea Old) 0) $ postorder_dfs g + in pprTrace "ProcPoints" (ppr procPoints) $ + pprTrace "Area SizeMap" (ppr areaSize) $ + pprTrace "Entry SP" (ppr entrySp) $ + pprTrace "Area Map" (ppr areaMap) $ areaMap + +-- After determining the stack layout, we can: +-- 1. Replace references to stack Areas with addresses relative to the stack +-- pointer. +-- 2. Insert adjustments to the stack pointer to ensure that it is at a +-- conventional location at each proc point. +-- Because we don't take interrupts on the execution stack, we only need the +-- stack pointer to be younger than the live values on the stack at proc points. +-- 3. At some point, we should check for stack overflow, but not just yet. +manifestSP :: ProcPointSet -> BlockEnv Status -> AreaMap -> + LGraph Middle Last -> FuelMonad (LGraph Middle Last) +manifestSP procPoints procMap areaMap g@(LGraph entry args blocks) = + liftM (LGraph entry args) blocks' + where blocks' = foldl replB (return emptyBlockEnv) (postorder_dfs g) + slot a = pprTrace "slot" (ppr a) $ lookupFM areaMap a `orElse` panic "unallocated Area" + slot' id = pprTrace "slot'" (ppr id)$ slot $ CallArea (Young id) + sp_on_entry id | id == entry = slot (CallArea Old) + args + sp_on_entry id | elemBlockSet id procPoints = + case lookupBlockEnv blocks id of + Just (Block _ (Just o) _) -> slot' id + o + Just (Block _ Nothing _) -> slot' id + Nothing -> panic "procpoint dropped from block env" + sp_on_entry id = + case lookupBlockEnv procMap id of + Just (ReachedBy pp) -> case uniqSetToList pp of + [id] -> sp_on_entry id + _ -> panic "block not reached by single proc point" + Just ProcPoint -> panic "procpoint not in procpoint set" + Nothing -> panic "block not found in procmap" + -- On entry to procpoints, the stack pointer is conventional; + -- otherwise, we check the SP set by predecessors. + replB :: FuelMonad (BlockEnv CmmBlock) -> CmmBlock -> FuelMonad (BlockEnv CmmBlock) + replB blocks (Block id o t) = + do bs <- replTail (Block id o) spIn t + pprTrace "spIn" (ppr id <+> ppr spIn)$ liftM (flip (foldr insertBlock) bs) blocks + where spIn = sp_on_entry id + replTail :: (ZTail Middle Last -> CmmBlock) -> Int -> (ZTail Middle Last) -> + FuelMonad ([CmmBlock]) + replTail h spOff (ZTail m t) = replTail (h . ZTail (middle spOff m)) spOff t + replTail h spOff (ZLast (LastOther l)) = fixSp h spOff l + replTail h _ l@(ZLast LastExit) = return [h l] + middle spOff m = mapExpDeepMiddle (replSlot spOff) m + last spOff l = mapExpDeepLast (replSlot spOff) l + replSlot spOff (CmmStackSlot a i) = CmmRegOff (CmmGlobal Sp) (spOff - (slot a + i)) + replSlot _ e = e + -- The block must establish the SP expected at each successsor. + fixSp :: (ZTail Middle Last -> CmmBlock) -> Int -> Last -> FuelMonad ([CmmBlock]) + fixSp h spOff l@(LastReturn n) = updSp h spOff (slot (CallArea Old) + n) l + fixSp h spOff l@(LastJump _ n) = updSp h spOff (slot (CallArea Old) + n) l + fixSp h spOff l@(LastCall _ (Just k) n) = updSp h spOff (slot' k + n) l + fixSp h spOff l@(LastCall _ Nothing n) = updSp h spOff (slot (CallArea Old) + n) l + fixSp h spOff l@(LastBranch k) | elemBlockSet k procPoints = + pprTrace "updSp" (ppr k <> ppr spOff <> ppr (sp_on_entry k)) $ updSp h spOff (sp_on_entry k) l + fixSp h spOff l = liftM (uncurry (:)) $ fold_succs succ l $ return (b, []) + where b = h (ZLast (LastOther (last spOff l))) + succ succId z = + let succSp = sp_on_entry succId in + if elemBlockSet succId procPoints && succSp /= spOff then + do (b, bs) <- z + (b', bs') <- insertBetween b [setSpMid spOff succSp] succId + return (b', bs ++ bs') + else z + updSp h old new l = return [h $ setSp old new $ ZLast $ LastOther (last new l)] + setSpMid sp sp' = MidAssign (CmmGlobal Sp) e + where e = CmmMachOp (MO_Add wordWidth) [CmmReg (CmmGlobal Sp), off] + off = CmmLit $ CmmInt (toInteger $ sp - sp') wordWidth + setSp sp sp' t = if sp == sp' then t else ZTail (setSpMid sp sp') t +---------------------------------------------------------------- +-- Building InfoTables + +type CAFSet = FiniteMap CLabel () + +-- First, an analysis to find live CAFs. +cafLattice :: DataflowLattice CAFSet +cafLattice = DataflowLattice "live cafs" emptyFM add True + where add new old = if sizeFM new' > sizeFM old then aTx new' else noTx new' + where new' = new `plusFM` old + +cafTransfers :: BackwardTransfers Middle Last CAFSet +cafTransfers = BackwardTransfers first middle last + where first live _ = live + middle live m = pprTrace "cafmiddle" (ppr m) $ foldExpDeepMiddle addCaf m live + last env l = foldExpDeepLast addCaf l (joinOuts cafLattice env l) + addCaf e set = case e of + CmmLit (CmmLabel c) -> add c set + CmmLit (CmmLabelOff c _) -> add c set + CmmLit (CmmLabelDiffOff c1 c2 _) -> add c1 $ add c2 set + _ -> set + add c s = pprTrace "CAF analysis saw label" (ppr c) $ + if hasCAF c then (pprTrace "has caf" (ppr c) $ addToFM s c ()) else (pprTrace "no cafs" (ppr c) $ s) + +type CafFix a = FuelMonad (BackwardFixedPoint Middle Last CAFSet a) +cafAnal :: LGraph Middle Last -> FuelMonad (BlockEnv CAFSet) +cafAnal g = liftM zdfFpFacts (res :: CafFix ()) + where res = zdfSolveFromL emptyBlockEnv "live CAF analysis" cafLattice + cafTransfers (fact_bot cafLattice) g + +-- Once we have found the CAFs, we need to do two things: +-- 1. Build a table of all the CAFs used in the procedure. +-- 2. Compute the C_SRT describing the subset of CAFs live at each procpoint. +buildCafs :: (BlockEnv CAFSet) -> FuelMonad ([CmmTopZ], BlockEnv C_SRT) +buildCafs blockCafs = + -- This is surely the wrong way to get names, as in BlockId + do top_lbl <- getUniqueM >>= \ u -> return $ mkSRTLabel (mkFCallName u "srt") MayHaveCafRefs + let allCafs = foldBlockEnv (\_ x y -> plusFM x y) emptyFM blockCafs + caf_entry (ix, map, tbl') caf = (ix + 1, addToFM map caf ix, entry : tbl') + where entry = CmmStaticLit $ CmmLabel caf + (_::Int, cafMap, tbl') = foldl caf_entry (0, emptyFM, []) $ keysFM allCafs + top_tbl = CmmData RelocatableReadOnlyData $ CmmDataLabel top_lbl : reverse tbl' + sub_srt id cafs z = + do (tbls, blocks) <- z + (top, srt) <- procpointSRT top_lbl cafMap cafs + let blocks' = extendBlockEnv blocks id srt + case top of Just t -> return (t:tbls, blocks') + Nothing -> return (tbls, blocks') + (sub_tbls, blockSRTs) <- foldBlockEnv sub_srt (return ([], emptyBlockEnv)) blockCafs + return (top_tbl : sub_tbls, blockSRTs) + +-- Construct an SRT bitmap. +-- Adapted from simpleStg/SRT.lhs, which expects Id's. +procpointSRT :: CLabel -> FiniteMap CLabel Int -> FiniteMap CLabel () -> + FuelMonad (Maybe CmmTopZ, C_SRT) +procpointSRT top_srt top_table entries + | isEmptyFM entries = pprTrace "nil SRT" (ppr top_srt) $ return (Nothing, NoC_SRT) + | otherwise = pprTrace "non-nil SRT" (ppr top_srt) $ bitmap `seq` to_SRT top_srt offset len bitmap + where + ints = map (expectJust "constructSRT" . lookupFM top_table) (keysFM entries) + sorted_ints = sortLe (<=) ints + offset = head sorted_ints + bitmap_entries = map (subtract offset) sorted_ints + len = P.last bitmap_entries + 1 + bitmap = intsToBitmap len bitmap_entries + +-- Adapted from codeGen/StgCmmUtils, which converts from SRT to C_SRT. +to_SRT :: CLabel -> Int -> Int -> Bitmap -> FuelMonad (Maybe CmmTopZ, C_SRT) +to_SRT top_srt off len bmp + | len > widthInBits wordWidth `div` 2 || bmp == [fromIntegral srt_escape] + = do id <- getUniqueM + let srt_desc_lbl = mkLargeSRTLabel id + tbl = CmmData RelocatableReadOnlyData $ + CmmDataLabel srt_desc_lbl : map CmmStaticLit + ( cmmLabelOffW top_srt off + : mkWordCLit (fromIntegral len) + : map mkWordCLit bmp) + return (Just tbl, C_SRT srt_desc_lbl 0 srt_escape) + | otherwise + = return (Nothing, C_SRT top_srt off (fromIntegral (head bmp))) + -- The fromIntegral converts to StgHalfWord + +-- Given a block ID, we return a representation of the layout of the stack. +-- If the element is `Nothing`, then it represents an empty or dead +-- word on the stack. +-- If the element is `Just` a register, then it represents a live spill slot +-- for the register; note that a register may occupy multiple words. +-- The head of the list represents the young end of the stack where the infotable +-- pointer for the block `Bid` is stored. +-- The infotable pointer itself is not included in the list. +live_vars :: BlockEnv SubAreaSet -> AreaMap -> BlockId -> [Maybe LocalReg] +live_vars slotEnv areaMap bid = slotsToList youngByte liveSlots + where slotsToList 0 [] = [] + slotsToList 0 ((_, r, _) : _) = pprPanic "slot left off live_vars" (ppr r) + slotsToList n _ | n < 0 = panic "stack slots not allocated on word boundaries?" + slotsToList n ((n', r, w) : rst) = + if n == n' then Just r : slotsToList (n - w) rst + else Nothing : slotsToList (n - wORD_SIZE) rst + slotsToList n [] = Nothing : slotsToList (n - wORD_SIZE) [] + liveSlots = sortBy (\ (_,off,_) (_,off',_) -> compare off' off) + (foldFM (\_ -> flip $ foldr add_slot) [] slots) + add_slot (a@(RegSlot r@(LocalReg _ ty)), off, w) rst = + if off == w && widthInBytes (typeWidth ty) == w then + (expectJust "add_slot" (lookupFM areaMap a), r, w) : rst + else panic "live_vars: only part of a variable live at a proc point" + add_slot (CallArea Old, off, w) rst = + if off == wORD_SIZE && w == wORD_SIZE then + rst -- the return infotable should be live + else pprPanic "CallAreas must not be live across function calls" (ppr bid) + add_slot (CallArea (Young _), _, _) _ = + pprPanic "CallAreas must not be live across function calls" (ppr bid) + slots = expectJust "live_vars slots" $ lookupBlockEnv slotEnv bid + youngByte = expectJust "live_vars bid_pos" $ lookupFM areaMap (CallArea (Young bid)) ---------------------------------------------------------------- diff --git a/compiler/cmm/CmmSpillReload.hs b/compiler/cmm/CmmSpillReload.hs index 3cc102f1ca..67cf8d31df 100644 --- a/compiler/cmm/CmmSpillReload.hs +++ b/compiler/cmm/CmmSpillReload.hs @@ -1,11 +1,9 @@ module CmmSpillReload - ( ExtendWithSpills(..) - , DualLive(..) + ( DualLive(..) , dualLiveLattice, dualLiveTransfers, dualLiveness --, insertSpillsAndReloads --- XXX todo check live-in at entry against formals , dualLivenessWithInsertion - , elimSpillAndReload , availRegsLattice , cmmAvailableReloads @@ -41,17 +39,10 @@ import Prelude hiding (zip) -- establish the invariant that at a call (or at any proc point with -- an established protocol) all live variables not expected in -- registers are sitting on the stack. We use a backward analysis to --- insert spills and reloads. It should some day be followed by a +-- insert spills and reloads. It should be followed by a -- forward transformation to sink reloads as deeply as possible, so as -- to reduce register pressure. -data ExtendWithSpills m - = NotSpillOrReload m - | Spill RegSet - | Reload RegSet - -type M = ExtendWithSpills Middle - -- A variable can be expected to be live in a register, live on the -- stack, or both. This analysis ensures that spills and reloads are -- inserted as needed to make sure that every live variable needed @@ -70,8 +61,8 @@ dualUnionList ls = DualLive ss rs where ss = unionManyUniqSets $ map on_stack ls rs = unionManyUniqSets $ map in_regs ls -_changeStack, changeRegs :: (RegSet -> RegSet) -> DualLive -> DualLive -_changeStack f live = live { on_stack = f (on_stack live) } +changeStack, changeRegs :: (RegSet -> RegSet) -> DualLive -> DualLive +changeStack f live = live { on_stack = f (on_stack live) } changeRegs f live = live { in_regs = f (in_regs live) } @@ -85,24 +76,23 @@ dualLiveLattice = return $ DualLive stack regs add1 = fact_add_to liveLattice -type LiveReloadFix a = FuelMonad (BackwardFixedPoint M Last DualLive a) +type LiveReloadFix a = FuelMonad (BackwardFixedPoint Middle Last DualLive a) -dualLivenessWithInsertion :: BlockSet -> (Graph M Last) -> FuelMonad (Graph M Last) +dualLivenessWithInsertion :: BlockSet -> (LGraph Middle Last) -> FuelMonad (LGraph Middle Last) dualLivenessWithInsertion procPoints g = - liftM zdfFpContents $ (res :: LiveReloadFix (Graph M Last)) - where res = zdfRewriteFrom RewriteDeep emptyBlockEnv "dual liveness with insertion" - dualLiveLattice (dualLiveTransfers procPoints) - (insertSpillAndReloadRewrites procPoints) empty g + liftM zdfFpContents $ (res :: LiveReloadFix (LGraph Middle Last)) + where res = zdfBRewriteFromL RewriteDeep emptyBlockEnv "dual liveness with insertion" + dualLiveLattice (dualLiveTransfers procPoints) + (insertSpillAndReloadRewrites procPoints) empty g empty = fact_bot dualLiveLattice --- = a_ft_b_unlimited dualLiveness insertSpillsAndReloads -dualLiveness :: BlockSet -> Graph M Last -> FuelMonad (BlockEnv DualLive) +dualLiveness :: BlockSet -> LGraph Middle Last -> FuelMonad (BlockEnv DualLive) dualLiveness procPoints g = liftM zdfFpFacts $ (res :: LiveReloadFix ()) - where res = zdfSolveFrom emptyBlockEnv "dual liveness" dualLiveLattice - (dualLiveTransfers procPoints) empty g + where res = zdfSolveFromL emptyBlockEnv "dual liveness" dualLiveLattice + (dualLiveTransfers procPoints) empty g empty = fact_bot dualLiveLattice -dualLiveTransfers :: BlockSet -> BackwardTransfers M Last DualLive +dualLiveTransfers :: BlockSet -> BackwardTransfers Middle Last DualLive dualLiveTransfers procPoints = BackwardTransfers first middle last where last = lastDualLiveness middle = middleDualLiveness @@ -112,29 +102,25 @@ dualLiveTransfers procPoints = BackwardTransfers first middle last , in_regs = emptyRegSet } else live - -middleDualLiveness :: DualLive -> M -> DualLive -middleDualLiveness live (Spill regs) = live' - -- live-in on-stack requirements are satisfied; - -- live-out in-regs obligations are created - where live' = DualLive { on_stack = on_stack live `minusRegSet` regs - , in_regs = in_regs live `plusRegSet` regs } - -middleDualLiveness live (Reload regs) = live' - -- live-in in-regs requirements are satisfied; - -- live-out on-stack obligations are created - where live' = DualLive { on_stack = on_stack live `plusRegSet` regs - , in_regs = in_regs live `minusRegSet` regs } - -middleDualLiveness live (NotSpillOrReload m) = changeRegs (middleLiveness m) live +middleDualLiveness :: DualLive -> Middle -> DualLive +middleDualLiveness live m = + changeStack updSlots $ changeRegs (middleLiveness m) live + where updSlots live = foldSlotsUsed reload (foldSlotsDefd spill live m) m + spill live s@(RegSlot r, _, _) = check s $ deleteFromRegSet live r + spill live _ = live + reload live s@(RegSlot r, _, _) = check s $ extendRegSet live r + reload live _ = live + check (RegSlot (LocalReg _ ty), o, w) x + | o == w && w == widthInBytes (typeWidth ty) = x + check _ _ = panic "middleDualLiveness unsupported: slices" lastDualLiveness :: (BlockId -> DualLive) -> Last -> DualLive lastDualLiveness env l = last l - where last (LastReturn) = empty - last (LastJump e) = changeRegs (gen e) empty - last (LastBranch id) = env id - last (LastCall tgt Nothing) = changeRegs (gen tgt) empty - last (LastCall tgt (Just k)) = + where last (LastReturn _) = empty + last (LastJump e _) = changeRegs (gen e) empty + last (LastBranch id) = env id + last (LastCall tgt Nothing _) = changeRegs (gen tgt) empty + last (LastCall tgt (Just k) _) = -- nothing can be live in registers at this point let live = env k in if isEmptyUniqSet (in_regs live) then @@ -142,77 +128,52 @@ lastDualLiveness env l = last l else pprTrace "Offending party:" (ppr k <+> ppr live) $ panic "live values in registers at call continuation" - last (LastCondBranch e t f) = changeRegs (gen e) $ dualUnion (env t) (env f) - last (LastSwitch e tbl) = changeRegs (gen e) $ dualUnionList $ + last (LastCondBranch e t f) = changeRegs (gen e) $ dualUnion (env t) (env f) + last (LastSwitch e tbl) = changeRegs (gen e) $ dualUnionList $ map env (catMaybes tbl) empty = fact_bot dualLiveLattice -gen, kill :: UserOfLocalRegs a => a -> RegSet -> RegSet -gen a live = foldRegsUsed extendRegSet live a -kill a live = foldRegsUsed delOneFromUniqSet live a +gen :: UserOfLocalRegs a => a -> RegSet -> RegSet +gen a live = foldRegsUsed extendRegSet live a -insertSpillAndReloadRewrites :: BlockSet -> BackwardRewrites M Last DualLive +insertSpillAndReloadRewrites :: BlockSet -> BackwardRewrites Middle Last DualLive insertSpillAndReloadRewrites procPoints = BackwardRewrites first middle last exit where middle = middleInsertSpillsAndReloads last = \_ _ -> Nothing exit = Nothing first live id = if elemBlockSet id procPoints && not (isEmptyUniqSet reloads) then - Just $ mkMiddles $ [Reload reloads] + Just $ mkMiddles $ map reload $ uniqSetToList reloads else Nothing - where reloads = in_regs live + where reloads = in_regs live -middleInsertSpillsAndReloads :: DualLive -> M -> Maybe (AGraph M Last) -middleInsertSpillsAndReloads _ (Spill _) = Nothing -middleInsertSpillsAndReloads _ (Reload _) = Nothing -middleInsertSpillsAndReloads live m@(NotSpillOrReload nsr) = middle nsr - where middle (MidAssign (CmmLocal reg) _) = +middleInsertSpillsAndReloads :: DualLive -> Middle -> Maybe (AGraph Middle Last) +middleInsertSpillsAndReloads live m = middle m + where middle (MidAssign (CmmLocal reg) (CmmLoad (CmmStackSlot (RegSlot reg') _) _)) + | reg == reg' = Nothing + middle (MidAssign (CmmLocal reg) _) = if reg `elemRegSet` on_stack live then -- must spill - my_trace "Spilling" (f4sep [text "spill" <+> ppr reg, - text "after", ppr m]) $ - Just $ mkMiddles [m, Spill $ mkRegSet [reg]] - else - Nothing - middle (CopyIn _ formals _) = - -- only 'formals' can be in regs at this point - let regs' = kill formals (in_regs live) -- live in regs; must reload - is_stack_var r = elemRegSet r (on_stack live) - needs_spilling = filterRegsUsed is_stack_var formals - -- a formal that is expected on the stack; must spill - in if isEmptyUniqSet regs' && isEmptyUniqSet needs_spilling then - Nothing - else - let code = if isEmptyUniqSet regs' then [] - else Reload regs' : [] - code' = if isEmptyUniqSet needs_spilling then code - else Spill needs_spilling : code - in - my_trace "At CopyIn" (f4sep [text "Triggered by ", ppr live, - ppr (Reload regs' :: M), - ppr (Spill needs_spilling :: M), - text "after", ppr m]) $ - Just $ mkMiddles (m : code') + my_trace "Spilling" (f4sep [text "spill" <+> ppr reg, + text "after", ppr m]) $ + Just $ mkMiddles $ [m, spill reg] + else Nothing middle _ = Nothing --- | For conversion back to vanilla C-- - -elimSpillAndReload :: StackSlotMap -> LGraph M l -> (StackSlotMap, LGraph Middle l) -elimSpillAndReload slots g = toGraph $ fold_blocks block ((slots, [])) g - where toGraph (slots, l) = (slots, of_block_list (lg_entry g) l) - block (Block id t) (slots, blocks) = - lift (\ t' -> Block id t' : blocks) $ tail t slots - tail (ZLast l) slots = (slots, ZLast l) - tail (ZTail m t) slots = middle m $ tail t slots - middle (NotSpillOrReload m) (slots, t) = (slots, ZTail m t) - middle (Spill regs) z = foldUniqSet spill z regs - middle (Reload regs) z = foldUniqSet reload z regs - move f r (slots, t) = - lift (\ slot -> ZTail (f slot (CmmLocal r)) t) $ getSlot slots r - spill = move (\ slot reg -> MidStore slot (CmmReg reg)) - reload = move (\ slot reg -> MidAssign reg slot) - lift f (slots, x) = (slots, f x) +-- Generating spill and reload code +regSlot :: LocalReg -> CmmExpr +regSlot r = CmmStackSlot (RegSlot r) (widthInBytes $ typeWidth $ localRegType r) + +spill, reload :: LocalReg -> Middle +spill r = MidStore (regSlot r) (CmmReg $ CmmLocal r) +reload r = MidAssign (CmmLocal r) (CmmLoad (regSlot r) $ localRegType r) +spillHead :: ZHead Middle -> RegSet -> ZHead Middle +reloadTail :: RegSet -> ZTail Middle Last -> ZTail Middle Last +spillHead h regset = foldl spl h $ uniqSetToList regset + where spl h r = ZHead h $ spill r +reloadTail regset t = foldl rel t $ uniqSetToList regset + where rel t r = ZTail (reload r) t ---------------------------------------------------------------- --- sinking reloads @@ -249,9 +210,9 @@ smallerAvail (UniverseMinus _) (AvailRegs _) = False smallerAvail (AvailRegs s) (AvailRegs s') = sizeUniqSet s < sizeUniqSet s' smallerAvail (UniverseMinus s) (UniverseMinus s') = sizeUniqSet s > sizeUniqSet s' -extendAvail :: AvailRegs -> LocalReg -> AvailRegs -extendAvail (UniverseMinus s) r = UniverseMinus (deleteFromRegSet s r) -extendAvail (AvailRegs s) r = AvailRegs (extendRegSet s r) +--extendAvail :: AvailRegs -> LocalReg -> AvailRegs +--extendAvail (UniverseMinus s) r = UniverseMinus (deleteFromRegSet s r) +--extendAvail (AvailRegs s) r = AvailRegs (extendRegSet s r) deleteFromAvail :: AvailRegs -> LocalReg -> AvailRegs deleteFromAvail (UniverseMinus s) r = UniverseMinus (extendRegSet s r) @@ -262,15 +223,15 @@ elemAvail (UniverseMinus s) r = not $ elemRegSet r s elemAvail (AvailRegs s) r = elemRegSet r s type CmmAvail = BlockEnv AvailRegs -type AvailFix = FuelMonad (ForwardFixedPoint M Last AvailRegs ()) +type AvailFix = FuelMonad (ForwardFixedPoint Middle Last AvailRegs ()) -cmmAvailableReloads :: Graph M Last -> FuelMonad CmmAvail +cmmAvailableReloads :: LGraph Middle Last -> FuelMonad CmmAvail cmmAvailableReloads g = liftM zdfFpFacts $ (res :: AvailFix) - where res = zdfSolveFrom emptyBlockEnv "available reloads" availRegsLattice - avail_reloads_transfer empty g + where res = zdfSolveFromL emptyBlockEnv "available reloads" availRegsLattice + avail_reloads_transfer empty g empty = (fact_bot availRegsLattice) -avail_reloads_transfer :: ForwardTransfers M Last AvailRegs +avail_reloads_transfer :: ForwardTransfers Middle Last AvailRegs avail_reloads_transfer = ForwardTransfers first middle last id where first avail _ = avail middle = flip middleAvail @@ -278,36 +239,33 @@ avail_reloads_transfer = ForwardTransfers first middle last id -- | The transfer equations use the traditional 'gen' and 'kill' -- notations, which should be familiar from the dragon book. -agen, akill :: UserOfLocalRegs a => a -> AvailRegs -> AvailRegs -agen a live = foldRegsUsed extendAvail live a +--agen, +akill :: UserOfLocalRegs a => a -> AvailRegs -> AvailRegs +--agen a live = foldRegsUsed extendAvail live a akill a live = foldRegsUsed deleteFromAvail live a -- Note: you can't sink the reload past a use. -middleAvail :: M -> AvailRegs -> AvailRegs -middleAvail (Spill _) = id -middleAvail (Reload regs) = agen regs -middleAvail (NotSpillOrReload m) = middle m +middleAvail :: Middle -> AvailRegs -> AvailRegs +middleAvail m = middle m where middle m live = middle' m $ foldRegsUsed deleteFromAvail live m middle' (MidComment {}) = id middle' (MidAssign lhs _expr) = akill lhs middle' (MidStore {}) = id middle' (MidUnsafeCall _tgt ress _args) = akill ress middle' (MidAddToContext {}) = id - middle' (CopyIn _ formals _) = akill formals - middle' (CopyOut {}) = id lastAvail :: AvailRegs -> Last -> LastOutFacts AvailRegs -lastAvail _ (LastCall _ (Just k)) = LastOutFacts [(k, AvailRegs emptyRegSet)] +lastAvail _ (LastCall _ (Just k) _) = LastOutFacts [(k, AvailRegs emptyRegSet)] lastAvail avail l = LastOutFacts $ map (\id -> (id, avail)) $ succs l -insertLateReloads :: Graph M Last -> FuelMonad (Graph M Last) +insertLateReloads :: LGraph Middle Last -> FuelMonad (LGraph Middle Last) insertLateReloads g = do env <- cmmAvailableReloads g - g <- lGraphOfGraph g - liftM graphOfLGraph $ mapM_blocks (insertM env) g + mapM_blocks (insertM env) g where insertM env b = fuelConsumingPass "late reloads" (insert b) where avail id = lookupBlockEnv env id `orElse` AvailRegs emptyRegSet - insert (Block id tail) fuel = propagate (ZFirst id) (avail id) tail fuel + insert (Block id off tail) fuel = + propagate (ZFirst id off) (avail id) tail fuel propagate h avail (ZTail m t) fuel = let (h', fuel') = maybe_add_reload h avail m fuel in propagate (ZHead h' m) (middleAvail m avail) t fuel' @@ -318,31 +276,31 @@ insertLateReloads g = let used = filterRegsUsed (elemAvail avail) node in if not (canRewriteWithFuel fuel) || isEmptyUniqSet used then (h,fuel) - else (ZHead h (Reload used), oneLessFuel fuel) + else (spillHead h used, oneLessFuel fuel) -type LateReloadFix = FuelMonad (ForwardFixedPoint M Last AvailRegs (Graph M Last)) +type LateReloadFix = FuelMonad (ForwardFixedPoint Middle Last AvailRegs (LGraph Middle Last)) -insertLateReloads' :: (Graph M Last) -> FuelMonad (Graph M Last) +insertLateReloads' :: (LGraph Middle Last) -> FuelMonad (LGraph Middle Last) insertLateReloads' g = liftM zdfFpContents $ (res :: LateReloadFix) - where res = zdfRewriteFrom RewriteShallow emptyBlockEnv "insert late reloads" - availRegsLattice avail_reloads_transfer rewrites bot g + where res = zdfFRewriteFromL RewriteShallow emptyBlockEnv "insert late reloads" + availRegsLattice avail_reloads_transfer rewrites bot g bot = fact_bot availRegsLattice rewrites = ForwardRewrites first middle last exit first _ _ = Nothing - middle :: AvailRegs -> M -> Maybe (AGraph M Last) - last :: AvailRegs -> Last -> Maybe (AGraph M Last) + middle :: AvailRegs -> Middle -> Maybe (AGraph Middle Last) + last :: AvailRegs -> Last -> Maybe (AGraph Middle Last) middle avail m = maybe_reload_before avail m (ZTail m (ZLast LastExit)) last avail l = maybe_reload_before avail l (ZLast (LastOther l)) exit _ = Nothing maybe_reload_before avail node tail = let used = filterRegsUsed (elemAvail avail) node in if isEmptyUniqSet used then Nothing - else Just $ mkZTail $ ZTail (Reload used) tail + else Just $ mkZTail $ reloadTail used tail -removeDeadAssignmentsAndReloads :: BlockSet -> (Graph M Last) -> FuelMonad (Graph M Last) +removeDeadAssignmentsAndReloads :: BlockSet -> (LGraph Middle Last) -> FuelMonad (LGraph Middle Last) removeDeadAssignmentsAndReloads procPoints g = - liftM zdfFpContents $ (res :: LiveReloadFix (Graph M Last)) - where res = zdfRewriteFrom RewriteDeep emptyBlockEnv "dead-assignment & -reload elim" + liftM zdfFpContents $ (res :: LiveReloadFix (LGraph Middle Last)) + where res = zdfBRewriteFromL RewriteDeep emptyBlockEnv "dead-assignment & -reload elim" dualLiveLattice (dualLiveTransfers procPoints) rewrites (fact_bot dualLiveLattice) g rewrites = BackwardRewrites first middle last exit @@ -351,16 +309,8 @@ removeDeadAssignmentsAndReloads procPoints g = middle = middleRemoveDeads first _ _ = Nothing -middleRemoveDeads :: DualLive -> M -> Maybe (AGraph M Last) -middleRemoveDeads _ (Spill _) = Nothing -middleRemoveDeads live (Reload s) = - if sizeUniqSet worth_reloading < sizeUniqSet s then - Just $ if isEmptyUniqSet worth_reloading then emptyAGraph - else mkMiddles [Reload worth_reloading] - else - Nothing - where worth_reloading = intersectUniqSets s (in_regs live) -middleRemoveDeads live (NotSpillOrReload m) = middle m +middleRemoveDeads :: DualLive -> Middle -> Maybe (AGraph Middle Last) +middleRemoveDeads live m = middle m where middle (MidAssign (CmmLocal reg') _) | not (reg' `elemRegSet` in_regs live) = Just emptyAGraph middle _ = Nothing @@ -368,23 +318,8 @@ middleRemoveDeads live (NotSpillOrReload m) = middle m --------------------- --- register usage - -instance UserOfLocalRegs m => UserOfLocalRegs (ExtendWithSpills m) where - foldRegsUsed f z (Spill regs) = foldRegsUsed f z regs - foldRegsUsed _f z (Reload _) = z - foldRegsUsed f z (NotSpillOrReload m) = foldRegsUsed f z m - ---------------------- -- prettyprinting -instance Outputable m => Outputable (ExtendWithSpills m) where - ppr (Spill regs) = ppr_regs "Spill" regs - ppr (Reload regs) = ppr_regs "Reload" regs - ppr (NotSpillOrReload m) = ppr m - -instance Outputable m => DebugNodes (ExtendWithSpills m) Last - ppr_regs :: String -> RegSet -> SDoc ppr_regs s regs = text s <+> commafy (map ppr $ uniqSetToList regs) where commafy xs = hsep $ punctuate comma xs diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs index 1922ee05c4..841f65b7fa 100644 --- a/compiler/cmm/CmmUtils.hs +++ b/compiler/cmm/CmmUtils.hs @@ -17,6 +17,9 @@ module CmmUtils( CmmStmts, noStmts, oneStmt, mkStmts, plusStmts, stmtList, isNopStmt, + primRepCmmType, primRepForeignHint, + typeCmmType, typeForeignHint, + isTrivialCmmExpr, hasNoGlobalRegs, cmmRegOff, cmmLabelOff, cmmOffset, cmmOffsetLit, cmmIndex, @@ -26,20 +29,57 @@ module CmmUtils( mkLblExpr, - loadArgsIntoTemps, maybeAssignTemp, + maybeAssignTemp, loadArgsIntoTemps ) where #include "HsVersions.h" +import TyCon ( PrimRep(..) ) +import Type ( Type, typePrimRep ) + import CLabel import Cmm -import MachOp import OrdList import Outputable import Unique --------------------------------------------------- -- +-- CmmTypes +-- +--------------------------------------------------- + +primRepCmmType :: PrimRep -> CmmType +primRepCmmType VoidRep = panic "primRepCmmType:VoidRep" +primRepCmmType PtrRep = gcWord +primRepCmmType IntRep = bWord +primRepCmmType WordRep = bWord +primRepCmmType Int64Rep = b64 +primRepCmmType Word64Rep = b64 +primRepCmmType AddrRep = bWord +primRepCmmType FloatRep = f32 +primRepCmmType DoubleRep = f64 + +typeCmmType :: Type -> CmmType +typeCmmType ty = primRepCmmType (typePrimRep ty) + +primRepForeignHint :: PrimRep -> ForeignHint +primRepForeignHint VoidRep = panic "primRepForeignHint:VoidRep" +primRepForeignHint PtrRep = AddrHint +primRepForeignHint IntRep = SignedHint +primRepForeignHint WordRep = NoHint +primRepForeignHint Int64Rep = SignedHint +primRepForeignHint Word64Rep = NoHint +primRepForeignHint AddrRep = AddrHint -- NB! AddrHint, but NonPtrArg +primRepForeignHint FloatRep = NoHint +primRepForeignHint DoubleRep = NoHint + +typeForeignHint :: Type -> ForeignHint +typeForeignHint = primRepForeignHint . typePrimRep + + +--------------------------------------------------- +-- -- CmmStmts -- --------------------------------------------------- @@ -115,12 +155,11 @@ hasNoGlobalRegs _ = False --------------------------------------------------- cmmOffsetExpr :: CmmExpr -> CmmExpr -> CmmExpr --- assumes base and offset have the same MachRep +-- assumes base and offset have the same CmmType cmmOffsetExpr e (CmmLit (CmmInt n _)) = cmmOffset e (fromInteger n) -cmmOffsetExpr e byte_off = CmmMachOp (MO_Add (cmmExprRep e)) [e, byte_off] +cmmOffsetExpr e byte_off = CmmMachOp (MO_Add (cmmExprWidth e)) [e, byte_off] -- NB. Do *not* inspect the value of the offset in these smart constructors!!! --- -- because the offset is sometimes involved in a loop in the code generator -- (we don't know the real Hp offset until we've generated code for the entire -- basic block, for example). So we cannot eliminate zero offsets at this @@ -136,9 +175,9 @@ cmmOffset (CmmMachOp (MO_Add rep) [expr, CmmLit (CmmInt byte_off1 _rep)]) byte_o = CmmMachOp (MO_Add rep) [expr, CmmLit (CmmInt (byte_off1 + toInteger byte_off2) rep)] cmmOffset expr byte_off - = CmmMachOp (MO_Add rep) [expr, CmmLit (CmmInt (toInteger byte_off) rep)] + = CmmMachOp (MO_Add width) [expr, CmmLit (CmmInt (toInteger byte_off) width)] where - rep = cmmExprRep expr + width = cmmExprWidth expr -- Smart constructor for CmmRegOff. Same caveats as cmmOffset above. cmmRegOff :: CmmReg -> Int -> CmmExpr @@ -156,21 +195,27 @@ cmmLabelOff lbl 0 = CmmLabel lbl cmmLabelOff lbl byte_off = CmmLabelOff lbl byte_off -- | Useful for creating an index into an array, with a staticaly known offset. -cmmIndex :: MachRep -> CmmExpr -> Int -> CmmExpr -cmmIndex rep base idx = cmmOffset base (idx * machRepByteWidth rep) +-- The type is the element type; used for making the multiplier +cmmIndex :: Width -- Width w + -> CmmExpr -- Address of vector of items of width w + -> Int -- Which element of the vector (0 based) + -> CmmExpr -- Address of i'th element +cmmIndex width base idx = cmmOffset base (idx * widthInBytes width) -- | Useful for creating an index into an array, with an unknown offset. -cmmIndexExpr :: MachRep -> CmmExpr -> CmmExpr -> CmmExpr -cmmIndexExpr rep base (CmmLit (CmmInt n _)) = cmmIndex rep base (fromInteger n) -cmmIndexExpr rep base idx = +cmmIndexExpr :: Width -- Width w + -> CmmExpr -- Address of vector of items of width w + -> CmmExpr -- Which element of the vector (0 based) + -> CmmExpr -- Address of i'th element +cmmIndexExpr width base (CmmLit (CmmInt n _)) = cmmIndex width base (fromInteger n) +cmmIndexExpr width base idx = cmmOffsetExpr base byte_off where - idx_rep = cmmExprRep idx - byte_off = CmmMachOp (MO_Shl idx_rep) [ - idx, CmmLit (mkIntCLit (machRepLogWidth rep))] + idx_w = cmmExprWidth idx + byte_off = CmmMachOp (MO_Shl idx_w) [idx, CmmLit (mkIntCLit (widthInLog width))] -cmmLoadIndex :: MachRep -> CmmExpr -> Int -> CmmExpr -cmmLoadIndex rep expr ix = CmmLoad (cmmIndex rep expr ix) rep +cmmLoadIndex :: CmmType -> CmmExpr -> Int -> CmmExpr +cmmLoadIndex ty expr ix = CmmLoad (cmmIndex (typeWidth ty) expr ix) ty --------------------------------------------------- -- @@ -179,10 +224,10 @@ cmmLoadIndex rep expr ix = CmmLoad (cmmIndex rep expr ix) rep --------------------------------------------------- mkIntCLit :: Int -> CmmLit -mkIntCLit i = CmmInt (toInteger i) wordRep +mkIntCLit i = CmmInt (toInteger i) wordWidth zeroCLit :: CmmLit -zeroCLit = CmmInt 0 wordRep +zeroCLit = CmmInt 0 wordWidth mkLblExpr :: CLabel -> CmmExpr mkLblExpr lbl = CmmLit (CmmLabel lbl) @@ -194,20 +239,21 @@ mkLblExpr lbl = CmmLit (CmmLabel lbl) --------------------------------------------------- loadArgsIntoTemps :: [Unique] - -> CmmActuals - -> ([Unique], [CmmStmt], CmmActuals) + -> HintedCmmActuals + -> ([Unique], [CmmStmt], HintedCmmActuals) loadArgsIntoTemps uniques [] = (uniques, [], []) -loadArgsIntoTemps uniques ((CmmKinded e hint):args) = +loadArgsIntoTemps uniques ((CmmHinted e hint):args) = (uniques'', new_stmts ++ remaining_stmts, - (CmmKinded new_e hint) : remaining_e) + (CmmHinted new_e hint) : remaining_e) where (uniques', new_stmts, new_e) = maybeAssignTemp uniques e (uniques'', remaining_stmts, remaining_e) = loadArgsIntoTemps uniques' args + maybeAssignTemp :: [Unique] -> CmmExpr -> ([Unique], [CmmStmt], CmmExpr) maybeAssignTemp uniques e | hasNoGlobalRegs e = (uniques, [], e) | otherwise = (tail uniques, [CmmAssign local e], CmmReg local) - where local = CmmLocal (LocalReg (head uniques) (cmmExprRep e) GCKindNonPtr) + where local = CmmLocal (LocalReg (head uniques) (cmmExprType e)) diff --git a/compiler/cmm/CmmZipUtil.hs b/compiler/cmm/CmmZipUtil.hs index 6a343f836f..9f0993dc49 100644 --- a/compiler/cmm/CmmZipUtil.hs +++ b/compiler/cmm/CmmZipUtil.hs @@ -14,7 +14,7 @@ import UniqSet -- | Compute the predecessors of each /reachable/ block zipPreds :: LastNode l => LGraph m l -> BlockEnv BlockSet zipPreds g = foldl add emptyBlockEnv (postorder_dfs g) - where add env block@(Block id _) = + where add env block@(Block id _ _) = foldl (\env sid -> let preds = lookupBlockEnv env sid `orElse` emptyBlockSet in extendBlockEnv env sid (extendBlockSet preds id)) diff --git a/compiler/cmm/DFMonad.hs b/compiler/cmm/DFMonad.hs index 209403e540..cce112bff5 100644 --- a/compiler/cmm/DFMonad.hs +++ b/compiler/cmm/DFMonad.hs @@ -7,7 +7,6 @@ module DFMonad , DFM, runDFM, liftToDFM , markGraphRewritten, graphWasRewritten - , freshBlockId , module OptimizationFuel ) where @@ -194,9 +193,6 @@ graphWasRewritten :: DFM f ChangeFlag graphWasRewritten = DFM' f where f _ s = return (df_rewritten s, s) -freshBlockId :: String -> DFM f BlockId -freshBlockId _s = getUniqueM >>= return . BlockId - instance Monad m => Monad (DFM' m f) where DFM' f >>= k = DFM' (\l s -> do (a, s') <- f l s let DFM' f' = k a in f' l s') diff --git a/compiler/cmm/MachOp.hs b/compiler/cmm/MachOp.hs deleted file mode 100644 index 422ed5eac0..0000000000 --- a/compiler/cmm/MachOp.hs +++ /dev/null @@ -1,661 +0,0 @@ -{-# OPTIONS -w #-} --- The above warning supression flag is a temporary kludge. --- While working on this module you are encouraged to remove it and fix --- any warnings in the module. See --- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings --- for details - ------------------------------------------------------------------------------ --- --- (c) The University of Glasgow 2002-2006 --- --- Low-level machine operations, used in the Cmm datatype. --- ------------------------------------------------------------------------------ - -module MachOp ( - MachRep(..), - machRepBitWidth, - machRepByteWidth, - machRepLogWidth, - isFloatingRep, - - MachHint(..), - - MachOp(..), - pprMachOp, - isCommutableMachOp, - isAssociativeMachOp, - isComparisonMachOp, - resultRepOfMachOp, - machOpArgReps, - maybeInvertComparison, - - CallishMachOp(..), - pprCallishMachOp, - - wordRep, - halfWordRep, - cIntRep, cLongRep, - - mo_wordAdd, - mo_wordSub, - mo_wordEq, - mo_wordNe, - mo_wordMul, - mo_wordSQuot, - mo_wordSRem, - mo_wordSNeg, - mo_wordUQuot, - mo_wordURem, - - mo_wordSGe, - mo_wordSLe, - mo_wordSGt, - mo_wordSLt, - - mo_wordUGe, - mo_wordULe, - mo_wordUGt, - mo_wordULt, - - mo_wordAnd, - mo_wordOr, - mo_wordXor, - mo_wordNot, - mo_wordShl, - mo_wordSShr, - mo_wordUShr, - - mo_u_8To32, - mo_s_8To32, - mo_u_16To32, - mo_s_16To32, - - mo_u_8ToWord, - mo_s_8ToWord, - mo_u_16ToWord, - mo_s_16ToWord, - mo_u_32ToWord, - mo_s_32ToWord, - - mo_32To8, - mo_32To16, - mo_WordTo8, - mo_WordTo16, - mo_WordTo32, - ) where - -#include "HsVersions.h" - -import Constants -import Outputable -import FastString - --- ----------------------------------------------------------------------------- --- MachRep - -{- | -A MachRep is the "representation" of a value in Cmm. It is used for -resource allocation: eg. which kind of register a value should be -stored in. - -The primary requirement is that there exists a function - - cmmExprRep :: CmmExpr -> MachRep - -This means that: - - - a register has an implicit MachRep - - a literal has an implicit MachRep - - an operation (MachOp) has an implicit result MachRep - -It also means that we can check that the arguments to a MachOp have -the correct MachRep, i.e. we can do a kind of lint-style type checking -on Cmm. --} - -data MachRep - = I8 - | I16 - | I32 - | I64 - | I128 - | F32 - | F64 - | F80 -- extended double-precision, used in x86 native codegen only. - deriving (Eq, Ord, Show) - -mrStr I8 = sLit "I8" -mrStr I16 = sLit "I16" -mrStr I32 = sLit "I32" -mrStr I64 = sLit "I64" -mrStr I128 = sLit "I128" -mrStr F32 = sLit "F32" -mrStr F64 = sLit "F64" -mrStr F80 = sLit "F80" - -instance Outputable MachRep where - ppr rep = ptext (mrStr rep) - -{- -Implementation notes: - -It might suffice to keep just a width, without distinguishing between -floating and integer types. However, keeping the distinction will -help the native code generator to assign registers more easily. --} - -{- -Should a MachRep include a signed vs. unsigned distinction? - -This is very much like a "hint" in C-- terminology: it isn't necessary -in order to generate correct code, but it might be useful in that the -compiler can generate better code if it has access to higher-level -hints about data. This is important at call boundaries, because the -definition of a function is not visible at all of its call sites, so -the compiler cannot infer the hints. - -Here in Cmm, we're taking a slightly different approach. We include -the int vs. float hint in the MachRep, because (a) the majority of -platforms have a strong distinction between float and int registers, -and (b) we don't want to do any heavyweight hint-inference in the -native code backend in order to get good code. We're treating the -hint more like a type: our Cmm is always completely consistent with -respect to hints. All coercions between float and int are explicit. - -What about the signed vs. unsigned hint? This information might be -useful if we want to keep sub-word-sized values in word-size -registers, which we must do if we only have word-sized registers. - -On such a system, there are two straightforward conventions for -representing sub-word-sized values: - -(a) Leave the upper bits undefined. Comparison operations must - sign- or zero-extend both operands before comparing them, - depending on whether the comparison is signed or unsigned. - -(b) Always keep the values sign- or zero-extended as appropriate. - Arithmetic operations must narrow the result to the appropriate - size. - -A clever compiler might not use either (a) or (b) exclusively, instead -it would attempt to minimize the coercions by analysis: the same kind -of analysis that propagates hints around. In Cmm we don't want to -have to do this, so we plump for having richer types and keeping the -type information consistent. - -If signed/unsigned hints are missing from MachRep, then the only -choice we have is (a), because we don't know whether the result of an -operation should be sign- or zero-extended. - -Many architectures have extending load operations, which work well -with (b). To make use of them with (a), you need to know whether the -value is going to be sign- or zero-extended by an enclosing comparison -(for example), which involves knowing above the context. This is -doable but more complex. - -Further complicating the issue is foreign calls: a foreign calling -convention can specify that signed 8-bit quantities are passed as -sign-extended 32 bit quantities, for example (this is the case on the -PowerPC). So we *do* need sign information on foreign call arguments. - -Pros for adding signed vs. unsigned to MachRep: - - - It would let us use convention (b) above, and get easier - code generation for extending loads. - - - Less information required on foreign calls. - - - MachOp type would be simpler - -Cons: - - - More complexity - - - What is the MachRep for a VanillaReg? Currently it is - always wordRep, but now we have to decide whether it is - signed or unsigned. The same VanillaReg can thus have - different MachReps in different parts of the program. - - - Extra coercions cluttering up expressions. - -Currently for GHC, the foreign call point is moot, because we do our -own promotion of sub-word-sized values to word-sized values. The Int8 -type is represnted by an Int# which is kept sign-extended at all times -(this is slightly naughty, because we're making assumptions about the -C calling convention rather early on in the compiler). However, given -this, the cons outweigh the pros. - --} - - -machRepBitWidth :: MachRep -> Int -machRepBitWidth I8 = 8 -machRepBitWidth I16 = 16 -machRepBitWidth I32 = 32 -machRepBitWidth I64 = 64 -machRepBitWidth I128 = 128 -machRepBitWidth F32 = 32 -machRepBitWidth F64 = 64 -machRepBitWidth F80 = 80 - -machRepByteWidth :: MachRep -> Int -machRepByteWidth I8 = 1 -machRepByteWidth I16 = 2 -machRepByteWidth I32 = 4 -machRepByteWidth I64 = 8 -machRepByteWidth I128 = 16 -machRepByteWidth F32 = 4 -machRepByteWidth F64 = 8 -machRepByteWidth F80 = 10 - --- log_2 of the width in bytes, useful for generating shifts. -machRepLogWidth :: MachRep -> Int -machRepLogWidth I8 = 0 -machRepLogWidth I16 = 1 -machRepLogWidth I32 = 2 -machRepLogWidth I64 = 3 -machRepLogWidth I128 = 4 -machRepLogWidth F32 = 2 -machRepLogWidth F64 = 3 -machRepLogWidth F80 = panic "machRepLogWidth: F80" - -isFloatingRep :: MachRep -> Bool -isFloatingRep F32 = True -isFloatingRep F64 = True -isFloatingRep F80 = True -isFloatingRep _ = False - --- ----------------------------------------------------------------------------- --- Hints - -{- -A hint gives a little more information about a data value. Hints are -used on the arguments to a foreign call, where the code generator needs -to know some extra information on top of the MachRep of each argument in -order to generate a correct call. --} - -data MachHint - = NoHint - | PtrHint - | SignedHint - | FloatHint - deriving Eq - -mhStr NoHint = sLit "NoHint" -mhStr PtrHint = sLit "PtrHint" -mhStr SignedHint = sLit "SignedHint" -mhStr FloatHint = sLit "FloatHint" - -instance Outputable MachHint where - ppr hint = ptext (mhStr hint) - --- ----------------------------------------------------------------------------- --- MachOp - -{- | -Machine-level primops; ones which we can reasonably delegate to the -native code generators to handle. Basically contains C's primops -and no others. - -Nomenclature: all ops indicate width and signedness, where -appropriate. Widths: 8\/16\/32\/64 means the given size, obviously. -Nat means the operation works on STG word sized objects. -Signedness: S means signed, U means unsigned. For operations where -signedness is irrelevant or makes no difference (for example -integer add), the signedness component is omitted. - -An exception: NatP is a ptr-typed native word. From the point of -view of the native code generators this distinction is irrelevant, -but the C code generator sometimes needs this info to emit the -right casts. --} - -data MachOp - - -- Integer operations - = MO_Add MachRep - | MO_Sub MachRep - | MO_Eq MachRep - | MO_Ne MachRep - | MO_Mul MachRep -- low word of multiply - | MO_S_MulMayOflo MachRep -- nonzero if signed multiply overflows - | MO_S_Quot MachRep -- signed / (same semantics as IntQuotOp) - | MO_S_Rem MachRep -- signed % (same semantics as IntRemOp) - | MO_S_Neg MachRep -- unary - - | MO_U_MulMayOflo MachRep -- nonzero if unsigned multiply overflows - | MO_U_Quot MachRep -- unsigned / (same semantics as WordQuotOp) - | MO_U_Rem MachRep -- unsigned % (same semantics as WordRemOp) - - -- Signed comparisons (floating-point comparisons also use these) - | MO_S_Ge MachRep - | MO_S_Le MachRep - | MO_S_Gt MachRep - | MO_S_Lt MachRep - - -- Unsigned comparisons - | MO_U_Ge MachRep - | MO_U_Le MachRep - | MO_U_Gt MachRep - | MO_U_Lt MachRep - - -- Bitwise operations. Not all of these may be supported at all sizes, - -- and only integral MachReps are valid. - | MO_And MachRep - | MO_Or MachRep - | MO_Xor MachRep - | MO_Not MachRep - | MO_Shl MachRep - | MO_U_Shr MachRep -- unsigned shift right - | MO_S_Shr MachRep -- signed shift right - - -- Conversions. Some of these will be NOPs. - -- Floating-point conversions use the signed variant. - | MO_S_Conv MachRep{-from-} MachRep{-to-} -- signed conversion - | MO_U_Conv MachRep{-from-} MachRep{-to-} -- unsigned conversion - - deriving (Eq, Show) - -pprMachOp :: MachOp -> SDoc -pprMachOp mo = text (show mo) - - --- These MachOps tend to be implemented by foreign calls in some backends, --- so we separate them out. In Cmm, these can only occur in a --- statement position, in contrast to an ordinary MachOp which can occur --- anywhere in an expression. -data CallishMachOp - = MO_F64_Pwr - | MO_F64_Sin - | MO_F64_Cos - | MO_F64_Tan - | MO_F64_Sinh - | MO_F64_Cosh - | MO_F64_Tanh - | MO_F64_Asin - | MO_F64_Acos - | MO_F64_Atan - | MO_F64_Log - | MO_F64_Exp - | MO_F64_Sqrt - | MO_F32_Pwr - | MO_F32_Sin - | MO_F32_Cos - | MO_F32_Tan - | MO_F32_Sinh - | MO_F32_Cosh - | MO_F32_Tanh - | MO_F32_Asin - | MO_F32_Acos - | MO_F32_Atan - | MO_F32_Log - | MO_F32_Exp - | MO_F32_Sqrt - | MO_WriteBarrier - deriving (Eq, Show) - -pprCallishMachOp :: CallishMachOp -> SDoc -pprCallishMachOp mo = text (show mo) - --- ----------------------------------------------------------------------------- --- Some common MachReps - --- A 'wordRep' is a machine word on the target architecture --- Specifically, it is the size of an Int#, Word#, Addr# --- and the unit of allocation on the stack and the heap --- Any pointer is also guaranteed to be a wordRep. - -wordRep | wORD_SIZE == 4 = I32 - | wORD_SIZE == 8 = I64 - | otherwise = panic "MachOp.wordRep: Unknown word size" - -halfWordRep | wORD_SIZE == 4 = I16 - | wORD_SIZE == 8 = I32 - | otherwise = panic "MachOp.halfWordRep: Unknown word size" - -mo_wordAdd = MO_Add wordRep -mo_wordSub = MO_Sub wordRep -mo_wordEq = MO_Eq wordRep -mo_wordNe = MO_Ne wordRep -mo_wordMul = MO_Mul wordRep -mo_wordSQuot = MO_S_Quot wordRep -mo_wordSRem = MO_S_Rem wordRep -mo_wordSNeg = MO_S_Neg wordRep -mo_wordUQuot = MO_U_Quot wordRep -mo_wordURem = MO_U_Rem wordRep - -mo_wordSGe = MO_S_Ge wordRep -mo_wordSLe = MO_S_Le wordRep -mo_wordSGt = MO_S_Gt wordRep -mo_wordSLt = MO_S_Lt wordRep - -mo_wordUGe = MO_U_Ge wordRep -mo_wordULe = MO_U_Le wordRep -mo_wordUGt = MO_U_Gt wordRep -mo_wordULt = MO_U_Lt wordRep - -mo_wordAnd = MO_And wordRep -mo_wordOr = MO_Or wordRep -mo_wordXor = MO_Xor wordRep -mo_wordNot = MO_Not wordRep -mo_wordShl = MO_Shl wordRep -mo_wordSShr = MO_S_Shr wordRep -mo_wordUShr = MO_U_Shr wordRep - -mo_u_8To32 = MO_U_Conv I8 I32 -mo_s_8To32 = MO_S_Conv I8 I32 -mo_u_16To32 = MO_U_Conv I16 I32 -mo_s_16To32 = MO_S_Conv I16 I32 - -mo_u_8ToWord = MO_U_Conv I8 wordRep -mo_s_8ToWord = MO_S_Conv I8 wordRep -mo_u_16ToWord = MO_U_Conv I16 wordRep -mo_s_16ToWord = MO_S_Conv I16 wordRep -mo_s_32ToWord = MO_S_Conv I32 wordRep -mo_u_32ToWord = MO_U_Conv I32 wordRep - -mo_WordTo8 = MO_U_Conv wordRep I8 -mo_WordTo16 = MO_U_Conv wordRep I16 -mo_WordTo32 = MO_U_Conv wordRep I32 - -mo_32To8 = MO_U_Conv I32 I8 -mo_32To16 = MO_U_Conv I32 I16 - --- cIntRep is the MachRep for a C-language 'int' -#if SIZEOF_INT == 4 -cIntRep = I32 -#elif SIZEOF_INT == 8 -cIntRep = I64 -#endif - -#if SIZEOF_LONG == 4 -cLongRep = I32 -#elif SIZEOF_LONG == 8 -cLongRep = I64 -#endif - --- ---------------------------------------------------------------------------- --- isCommutableMachOp - -{- | -Returns @True@ if the MachOp has commutable arguments. This is used -in the platform-independent Cmm optimisations. - -If in doubt, return @False@. This generates worse code on the -native routes, but is otherwise harmless. --} -isCommutableMachOp :: MachOp -> Bool -isCommutableMachOp mop = - case mop of - MO_Add _ -> True - MO_Eq _ -> True - MO_Ne _ -> True - MO_Mul _ -> True - MO_S_MulMayOflo _ -> True - MO_U_MulMayOflo _ -> True - MO_And _ -> True - MO_Or _ -> True - MO_Xor _ -> True - _other -> False - --- ---------------------------------------------------------------------------- --- isAssociativeMachOp - -{- | -Returns @True@ if the MachOp is associative (i.e. @(x+y)+z == x+(y+z)@) -This is used in the platform-independent Cmm optimisations. - -If in doubt, return @False@. This generates worse code on the -native routes, but is otherwise harmless. --} -isAssociativeMachOp :: MachOp -> Bool -isAssociativeMachOp mop = - case mop of - MO_Add r -> not (isFloatingRep r) - MO_Mul r -> not (isFloatingRep r) - MO_And _ -> True - MO_Or _ -> True - MO_Xor _ -> True - _other -> False - --- ---------------------------------------------------------------------------- --- isComparisonMachOp - -{- | -Returns @True@ if the MachOp is a comparison. - -If in doubt, return False. This generates worse code on the -native routes, but is otherwise harmless. --} -isComparisonMachOp :: MachOp -> Bool -isComparisonMachOp mop = - case mop of - MO_Eq _ -> True - MO_Ne _ -> True - MO_S_Ge _ -> True - MO_S_Le _ -> True - MO_S_Gt _ -> True - MO_S_Lt _ -> True - MO_U_Ge _ -> True - MO_U_Le _ -> True - MO_U_Gt _ -> True - MO_U_Lt _ -> True - _other -> False - --- ----------------------------------------------------------------------------- --- Inverting conditions - --- Sometimes it's useful to be able to invert the sense of a --- condition. Not all conditional tests are invertible: in --- particular, floating point conditionals cannot be inverted, because --- there exist floating-point values which return False for both senses --- of a condition (eg. !(NaN > NaN) && !(NaN /<= NaN)). - -maybeInvertComparison :: MachOp -> Maybe MachOp -maybeInvertComparison op - = case op of - MO_Eq r | not (isFloatingRep r) -> Just (MO_Ne r) - MO_Ne r | not (isFloatingRep r) -> Just (MO_Eq r) - MO_U_Lt r | not (isFloatingRep r) -> Just (MO_U_Ge r) - MO_U_Gt r | not (isFloatingRep r) -> Just (MO_U_Le r) - MO_U_Le r | not (isFloatingRep r) -> Just (MO_U_Gt r) - MO_U_Ge r | not (isFloatingRep r) -> Just (MO_U_Lt r) - MO_S_Lt r | not (isFloatingRep r) -> Just (MO_S_Ge r) - MO_S_Gt r | not (isFloatingRep r) -> Just (MO_S_Le r) - MO_S_Le r | not (isFloatingRep r) -> Just (MO_S_Gt r) - MO_S_Ge r | not (isFloatingRep r) -> Just (MO_S_Lt r) - _other -> Nothing - --- ---------------------------------------------------------------------------- --- resultRepOfMachOp - -{- | -Returns the MachRep of the result of a MachOp. --} -resultRepOfMachOp :: MachOp -> MachRep -resultRepOfMachOp mop = - case mop of - MO_Add r -> r - MO_Sub r -> r - MO_Eq r -> comparisonResultRep - MO_Ne r -> comparisonResultRep - MO_Mul r -> r - MO_S_MulMayOflo r -> r - MO_S_Quot r -> r - MO_S_Rem r -> r - MO_S_Neg r -> r - MO_U_MulMayOflo r -> r - MO_U_Quot r -> r - MO_U_Rem r -> r - - MO_S_Ge r -> comparisonResultRep - MO_S_Le r -> comparisonResultRep - MO_S_Gt r -> comparisonResultRep - MO_S_Lt r -> comparisonResultRep - - MO_U_Ge r -> comparisonResultRep - MO_U_Le r -> comparisonResultRep - MO_U_Gt r -> comparisonResultRep - MO_U_Lt r -> comparisonResultRep - - MO_And r -> r - MO_Or r -> r - MO_Xor r -> r - MO_Not r -> r - MO_Shl r -> r - MO_U_Shr r -> r - MO_S_Shr r -> r - - MO_S_Conv from to -> to - MO_U_Conv from to -> to - - -comparisonResultRep = wordRep -- is it? - - --- ----------------------------------------------------------------------------- --- machOpArgReps - --- | This function is used for debugging only: we can check whether an --- application of a MachOp is "type-correct" by checking that the MachReps of --- its arguments are the same as the MachOp expects. This is used when --- linting a CmmExpr. - -machOpArgReps :: MachOp -> [MachRep] -machOpArgReps op = - case op of - MO_Add r -> [r,r] - MO_Sub r -> [r,r] - MO_Eq r -> [r,r] - MO_Ne r -> [r,r] - MO_Mul r -> [r,r] - MO_S_MulMayOflo r -> [r,r] - MO_S_Quot r -> [r,r] - MO_S_Rem r -> [r,r] - MO_S_Neg r -> [r] - MO_U_MulMayOflo r -> [r,r] - MO_U_Quot r -> [r,r] - MO_U_Rem r -> [r,r] - - MO_S_Ge r -> [r,r] - MO_S_Le r -> [r,r] - MO_S_Gt r -> [r,r] - MO_S_Lt r -> [r,r] - - MO_U_Ge r -> [r,r] - MO_U_Le r -> [r,r] - MO_U_Gt r -> [r,r] - MO_U_Lt r -> [r,r] - - MO_And r -> [r,r] - MO_Or r -> [r,r] - MO_Xor r -> [r,r] - MO_Not r -> [r] - MO_Shl r -> [r,wordRep] - MO_U_Shr r -> [r,wordRep] - MO_S_Shr r -> [r,wordRep] - - MO_S_Conv from to -> [from] - MO_U_Conv from to -> [from] diff --git a/compiler/cmm/MkZipCfg.hs b/compiler/cmm/MkZipCfg.hs index b4053521f9..0b549fad9d 100644 --- a/compiler/cmm/MkZipCfg.hs +++ b/compiler/cmm/MkZipCfg.hs @@ -1,6 +1,7 @@ {-# LANGUAGE ScopedTypeVariables #-} module MkZipCfg ( AGraph, (<*>), catAGraphs + , freshBlockId , emptyAGraph, withFreshLabel, withUnique , mkMiddle, mkMiddles, mkLast, mkZTail, mkBranch, mkLabel, mkIfThenElse, mkWhileDo , outOfLine @@ -166,7 +167,7 @@ catAGraphs :: [AGraph m l] -> AGraph m l emptyAGraph :: AGraph m l mkLabel :: (LastNode l) => - BlockId -> AGraph m l -- graph contains the label + BlockId -> Maybe Int -> AGraph m l -- graph contains the label mkMiddle :: m -> AGraph m l -- graph contains the node mkLast :: (Outputable m, Outputable l, LastNode l) => l -> AGraph m l -- graph contains the node @@ -230,9 +231,9 @@ mkWhileDo :: (Outputable m, Outputable l, LastNode l) -- because it may require the allocation of fresh, unique labels. graphOfAGraph :: AGraph m l -> UniqSM (Graph m l) -lgraphOfAGraph :: AGraph m l -> UniqSM (LGraph m l) +lgraphOfAGraph :: Int -> AGraph m l -> UniqSM (LGraph m l) -- ^ allocate a fresh label for the entry point -labelAGraph :: BlockId -> AGraph m l -> UniqSM (LGraph m l) +labelAGraph :: BlockId -> Int -> AGraph m l -> UniqSM (LGraph m l) -- ^ use the given BlockId as the label of the entry point @@ -261,20 +262,20 @@ emptyAGraph = AGraph return graphOfAGraph (AGraph f) = f emptyGraph emptyGraph = Graph (ZLast LastExit) emptyBlockEnv -labelAGraph id g = +labelAGraph id args g = do Graph tail blocks <- graphOfAGraph g - return $ LGraph id $ insertBlock (Block id tail) blocks + return $ LGraph id args $ insertBlock (Block id Nothing tail) blocks -lgraphOfAGraph g = do id <- freshBlockId "graph entry" - labelAGraph id g +lgraphOfAGraph args g = do id <- freshBlockId "graph entry" + labelAGraph id args g ------------------------------------- -- constructors -mkLabel id = AGraph f +mkLabel id args = AGraph f where f (Graph tail blocks) = return $ Graph (ZLast (mkBranchNode id)) - (insertBlock (Block id tail) blocks) + (insertBlock (Block id args tail) blocks) mkBranch target = mkLast $ mkBranchNode target @@ -314,24 +315,21 @@ outOfLine (AGraph f) = AGraph f' note_this_code_becomes_unreachable emptyEntrance return $ Graph tail' (blocks `plusUFM` blocks') - mkIfThenElse cbranch tbranch fbranch = withFreshLabel "end of if" $ \endif -> withFreshLabel "start of then" $ \tid -> withFreshLabel "start of else" $ \fid -> cbranch tid fid <*> - mkLabel tid <*> tbranch <*> mkBranch endif <*> - mkLabel fid <*> fbranch <*> mkLabel endif - + mkLabel tid Nothing <*> tbranch <*> mkBranch endif <*> + mkLabel fid Nothing <*> fbranch <*> mkLabel endif Nothing mkWhileDo cbranch body = withFreshLabel "loop test" $ \test -> withFreshLabel "loop head" $ \head -> withFreshLabel "end while" $ \endwhile -> -- Forrest Baskett's while-loop layout - mkBranch test <*> mkLabel head <*> body <*> mkLabel test - <*> cbranch head endwhile <*> mkLabel endwhile - + mkBranch test <*> mkLabel head Nothing <*> body <*> mkLabel test Nothing + <*> cbranch head endwhile <*> mkLabel endwhile Nothing -- | Bleat if the insertion of a last node will create unreachable code note_this_code_becomes_unreachable :: @@ -360,6 +358,6 @@ Emitting a Branch at this point is fine: -- thrown away at this spot---there's no reason a BlockId couldn't one day carry -- a string. -freshBlockId :: String -> UniqSM BlockId -freshBlockId _ = do { u <- getUniqueM; return $ BlockId u } +freshBlockId :: MonadUnique m => String -> m BlockId +freshBlockId _s = getUniqueM >>= return . BlockId diff --git a/compiler/cmm/MkZipCfgCmm.hs b/compiler/cmm/MkZipCfgCmm.hs index dd29aa8bf2..1d80650858 100644 --- a/compiler/cmm/MkZipCfgCmm.hs +++ b/compiler/cmm/MkZipCfgCmm.hs @@ -13,7 +13,8 @@ module MkZipCfgCmm , (<*>), catAGraphs, mkLabel, mkBranch , emptyAGraph, withFreshLabel, withUnique, outOfLine , lgraphOfAGraph, graphOfAGraph, labelAGraph - , CmmZ, CmmTopZ, CmmGraph, CmmBlock, CmmAGraph, Middle, Last, Convention(..) + , CmmZ, CmmTopZ, CmmGraph, CmmBlock, CmmAGraph + , Middle, Last, Convention(..), ForeignConvention(..), MidCallTarget(..), Transfer(..) ) where @@ -22,10 +23,9 @@ where import BlockId import CmmExpr import Cmm ( GenCmm(..), GenCmmTop(..), CmmStatic, CmmInfo - , CmmCallTarget(..), CmmActuals, CmmFormals, CmmFormalsWithoutKinds - , CmmKinded (..) + , CmmActuals, CmmFormals ) -import MachOp (MachHint(..), wordRep) +import CmmCallConv (assignArgumentsPos, ParamLocation(..)) import ZipCfgCmmRep hiding (CmmGraph, CmmAGraph, CmmBlock, CmmZ, CmmTopZ) -- to make this module more self-contained, the above definitions are -- duplicated below @@ -34,8 +34,9 @@ import PprCmm() import ClosureInfo import FastString import ForeignCall -import ZipCfg import MkZipCfg +import Panic +import ZipCfg type CmmGraph = LGraph Middle Last type CmmAGraph = AGraph Middle Last @@ -43,6 +44,8 @@ type CmmBlock = Block Middle Last type CmmZ = GenCmm CmmStatic CmmInfo CmmGraph type CmmTopZ = GenCmmTop CmmStatic CmmInfo CmmGraph +data Transfer = Call | Jump | Ret deriving Eq + ---------- No-ops mkNop :: CmmAGraph mkComment :: FastString -> CmmAGraph @@ -55,7 +58,7 @@ mkStore :: CmmExpr -> CmmExpr -> CmmAGraph mkCall :: CmmExpr -> CCallConv -> CmmFormals -> CmmActuals -> C_SRT -> CmmAGraph mkCmmCall :: CmmExpr -> CmmFormals -> CmmActuals -> C_SRT -> CmmAGraph -- Native C-- calling convention -mkUnsafeCall :: CmmCallTarget -> CmmFormals -> CmmActuals -> CmmAGraph +mkUnsafeCall :: MidCallTarget -> CmmFormals -> CmmActuals -> CmmAGraph mkFinalCall :: CmmExpr -> CCallConv -> CmmActuals -> CmmAGraph -- Never returns; like exit() or barf() @@ -63,10 +66,10 @@ mkFinalCall :: CmmExpr -> CCallConv -> CmmActuals -> CmmAGraph mkAddToContext :: CmmExpr -> [CmmExpr] -> CmmAGraph ---------- Control transfer -mkJump :: Area -> CmmExpr -> CmmActuals -> CmmAGraph -mkCbranch :: CmmExpr -> BlockId -> BlockId -> CmmAGraph -mkSwitch :: CmmExpr -> [Maybe BlockId] -> CmmAGraph -mkReturn :: Area -> CmmActuals -> CmmAGraph +mkJump :: CmmExpr -> CmmActuals -> CmmAGraph +mkCbranch :: CmmExpr -> BlockId -> BlockId -> CmmAGraph +mkSwitch :: CmmExpr -> [Maybe BlockId] -> CmmAGraph +mkReturn :: CmmActuals -> CmmAGraph mkCmmIfThenElse :: CmmExpr -> CmmAGraph -> CmmAGraph -> CmmAGraph mkCmmIfThen :: CmmExpr -> CmmAGraph -> CmmAGraph @@ -74,7 +77,7 @@ mkCmmWhileDo :: CmmExpr -> CmmAGraph -> CmmAGraph -- Not to be forgotten, but exported by MkZipCfg: -- mkBranch :: BlockId -> CmmAGraph --- mkLabel :: BlockId -> CmmAGraph +-- mkLabel :: BlockId -> Maybe Int -> CmmAGraph -- outOfLine :: CmmAGraph -> CmmAGraph -- withUnique :: (Unique -> CmmAGraph) -> CmmAGraph -- withFreshLabel :: String -> (BlockId -> CmmAGraph) -> CmmAGraph @@ -88,8 +91,8 @@ mkCmmIfThen e tbranch = withFreshLabel "end of if" $ \endif -> withFreshLabel "start of then" $ \tid -> mkCbranch e tid endif <*> - mkLabel tid <*> tbranch <*> mkBranch endif <*> - mkLabel endif + mkLabel tid Nothing <*> tbranch <*> mkBranch endif <*> + mkLabel endif Nothing @@ -100,65 +103,89 @@ mkComment fs = mkMiddle $ MidComment fs mkAssign l r = mkMiddle $ MidAssign l r mkStore l r = mkMiddle $ MidStore l r -mkCbranch pred ifso ifnot = mkLast $ LastCondBranch pred ifso ifnot + +-- Why are we inserting extra blocks that simply branch to the successors? +-- Because in addition to the branch instruction, @mkBranch@ will insert +-- a necessary adjustment to the stack pointer. +mkCbranch pred ifso ifnot = mkLast (LastCondBranch pred ifso ifnot) mkSwitch e tbl = mkLast $ LastSwitch e tbl mkUnsafeCall tgt results actuals = mkMiddle $ MidUnsafeCall tgt results actuals mkAddToContext ra actuals = mkMiddle $ MidAddToContext ra actuals cmmResConv :: Convention -cmmResConv = ConventionStandard CmmCallConv Results - -copyIn :: Convention -> Area -> CmmFormals -> [Middle] -copyIn _ area formals = reverse $ snd $ foldl ci (1, []) formals - where ci (n, ms) v = (n+1, MidAssign (CmmLocal $ kindlessCmm v) - (CmmLoad (CmmStackSlot area n) wordRep) : ms) - -copyOut :: Convention -> Area -> CmmActuals -> [Middle] -copyOut conv area actuals = moveSP conv $ snd $ foldl co (1, []) actuals - where moveSP (ConventionStandard _ Arguments) args = - MidAssign spReg (outgoingSlot area) : reverse args - moveSP _ args = reverse $ MidAssign spReg (outgoingSlot area) : args - co (n, ms) v = (n+1, MidStore (CmmStackSlot area n) (kindlessCmm v) : ms) -mkEntry :: Area -> Convention -> CmmFormalsWithoutKinds -> [Middle] -mkEntry area conv formals = copyIn conv area fs - where fs = map (\f -> CmmKinded f NoHint) formals +cmmResConv = Native + +-- Return the number of bytes used for copying arguments, as well as the +-- instructions to copy the arguments. +copyIn :: Convention -> Bool -> Area -> CmmFormals -> (Int, [Middle]) +copyIn _ isCall area formals = + foldr ci (init_offset, []) $ assignArgumentsPos isCall localRegType formals + where ci (reg, RegisterParam r) (n, ms) = + (n, MidAssign (CmmLocal reg) (CmmReg $ CmmGlobal r) : ms) + ci (reg, StackParam off) (n, ms) = + let ty = localRegType reg + off' = off + init_offset + in (max n off', + MidAssign (CmmLocal reg) (CmmLoad (CmmStackSlot area off') ty) : ms) + init_offset = widthInBytes wordWidth + +-- The argument layout function ignores the pointer to the info table, so we slot that +-- in here. When copying-out to a young area, we set the info table for return +-- and adjust the offsets of the other parameters. +-- If this is a call instruction, we adjust the offsets of the other parameters. +copyOut :: Convention -> Transfer -> Area -> CmmActuals -> (Int, [Middle]) +copyOut _ transfer area@(CallArea a) actuals = + foldr co (init_offset, []) args' + where args = assignArgumentsPos skip_node cmmExprType actuals + skip_node = transfer /= Ret + (setRA, init_offset) = + case a of Young id -> -- set RA if making a call + if transfer == Call then + ([(CmmLit (CmmLabel (infoTblLbl id)), + StackParam init_offset)], ra_width) + else ([], 0) + Old -> ([], ra_width) + ra_width = widthInBytes wordWidth + args' = foldl adjust setRA args + where adjust rst (v, StackParam off) = (v, StackParam (off + init_offset)) : rst + adjust rst x@(_, RegisterParam _) = x : rst + co (v, RegisterParam r) (n, ms) = (n, MidAssign (CmmGlobal r) v : ms) + co (v, StackParam off) (n, ms) = + (max n off, MidStore (CmmStackSlot area off) v : ms) +copyOut _ _ (RegSlot _) _ = panic "cannot copy arguments into a register slot" + +mkEntry :: BlockId -> Convention -> CmmFormals -> (Int, CmmAGraph) +mkEntry _ conv formals = + let (off, copies) = copyIn conv False (CallArea Old) formals in + (off, mkMiddles copies) -- I'm not sure how to get the calling conventions right yet, -- and I suspect this should not be resolved until sometime after -- Simon's patch is applied. -- For now, I apply a bogus calling convention: all arguments go on the -- stack, using the same amount of stack space. -lastWithArgs' :: BlockId -> Area -> Convention -> CmmActuals -> Maybe CmmFormals -> - (BlockId -> Last) -> CmmAGraph -lastWithArgs' k area conv actuals formals toLast = - (mkMiddles $ copyOut conv area actuals) <*> - -- adjust the sp - mkLast (toLast k) <*> - case formals of - Just formals -> mkLabel k <*> (mkMiddles $ copyIn conv area formals) - Nothing -> emptyAGraph -lastWithArgs :: Convention -> CmmActuals -> Maybe CmmFormals -> (BlockId -> Last) -> CmmAGraph -lastWithArgs c a f l = - withFreshLabel "call successor" $ \k -> lastWithArgs' k (mkCallArea k a f) c a f l - -always :: a -> b -> a -always x _ = x + +lastWithArgs :: Transfer -> Area -> Convention -> CmmActuals -> (Int -> Last) -> CmmAGraph +lastWithArgs transfer area conv actuals last = + let (outArgs, copies) = copyOut conv transfer area actuals in + mkMiddles copies <*> mkLast (last outArgs) -- The area created for the jump and return arguments is the same area as the -- procedure entry. -mkJump area e actuals = - lastWithArgs' (areaId area) area cmmResConv actuals Nothing $ always $ LastJump e -mkReturn area actuals = - lastWithArgs' (areaId area) area cmmResConv actuals Nothing $ always LastReturn +mkJump e actuals = lastWithArgs Jump (CallArea Old) cmmResConv actuals $ LastJump e +mkReturn actuals = lastWithArgs Ret (CallArea Old) cmmResConv actuals $ LastJump e + where e = CmmStackSlot (CallArea Old) (widthInBytes wordWidth) -mkFinalCall f conv actuals = - lastWithArgs (ConventionStandard conv Arguments) actuals Nothing - $ always $ LastCall f Nothing --mkFinalCall f conv actuals = +mkFinalCall f _ actuals = + lastWithArgs Call (CallArea Old) Native actuals $ LastCall f Nothing mkCmmCall f results actuals srt = mkCall f CmmCallConv results actuals srt -- I'm dropping the SRT, but that should be okay: we plan to reconstruct it later. -mkCall f conv results actuals _ = - lastWithArgs (ConventionStandard conv Arguments) actuals (Just results) - $ \k -> LastCall f (Just k) +mkCall f _ results actuals _ = + withFreshLabel "call successor" $ \k -> + let area = CallArea $ Young k + (off, copyin) = copyIn Native False area results + copyout = lastWithArgs Call area Native actuals $ LastCall f (Just k) + in copyout <*> mkLabel k (Just off) <*> (mkMiddles copyin) diff --git a/compiler/cmm/OptimizationFuel.hs b/compiler/cmm/OptimizationFuel.hs index 3d5b64522d..d9e8365017 100644 --- a/compiler/cmm/OptimizationFuel.hs +++ b/compiler/cmm/OptimizationFuel.hs @@ -128,7 +128,7 @@ fuelDecrementState new_optimizer old new s = optimizer = if diffFuel old new > 0 then new_optimizer else fs_lastpass s -- lGraphOfGraph is here because we need uniques to implement it. -lGraphOfGraph :: Graph m l -> FuelMonad (LGraph m l) -lGraphOfGraph (Graph tail blocks) = +lGraphOfGraph :: Graph m l -> Int -> FuelMonad (LGraph m l) +lGraphOfGraph (Graph tail blocks) args = do entry <- liftM BlockId $ getUniqueM - return $ LGraph entry (insertBlock (Block entry tail) blocks) + return $ LGraph entry args (insertBlock (Block entry Nothing tail) blocks) diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index 2a01217803..fea2374a9e 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -37,7 +37,6 @@ import BlockId import Cmm import PprCmm () -- Instances only import CLabel -import MachOp import ForeignCall import ClosureInfo @@ -191,18 +190,15 @@ pprStmt stmt = case stmt of CmmAssign dest src -> pprAssign dest src CmmStore dest src - | rep == I64 && wordRep /= I64 - -> ptext (sLit "ASSIGN_Word64") <> - parens (mkP_ <> pprExpr1 dest <> comma <> pprExpr src) <> semi - - | rep == F64 && wordRep /= I64 - -> ptext (sLit "ASSIGN_DBL") <> - parens (mkP_ <> pprExpr1 dest <> comma <> pprExpr src) <> semi + | typeWidth rep == W64 && wordWidth /= W64 + -> (if isFloatType rep then ptext (sLit "ASSIGN_DBL") + else ptext (sLit ("ASSIGN_Word64"))) <> + parens (mkP_ <> pprExpr1 dest <> comma <> pprExpr src) <> semi | otherwise -> hsep [ pprExpr (CmmLoad dest rep), equals, pprExpr src <> semi ] where - rep = cmmExprRep src + rep = cmmExprType src CmmCall (CmmCallee fn cconv) results args safety ret -> maybe_proto $$ @@ -254,16 +250,16 @@ pprStmt stmt = case stmt of CmmJump lbl _params -> mkJMP_(pprExpr lbl) <> semi CmmSwitch arg ids -> pprSwitch arg ids -pprCFunType :: SDoc -> CCallConv -> CmmFormals -> CmmActuals -> SDoc +pprCFunType :: SDoc -> CCallConv -> HintedCmmFormals -> HintedCmmActuals -> SDoc pprCFunType ppr_fn cconv ress args = res_type ress <+> parens (text (ccallConvAttribute cconv) <> ppr_fn) <> parens (commafy (map arg_type args)) where res_type [] = ptext (sLit "void") - res_type [CmmKinded one hint] = machRepHintCType (localRegRep one) hint + res_type [CmmHinted one hint] = machRepHintCType (localRegType one) hint - arg_type (CmmKinded expr hint) = machRepHintCType (cmmExprRep expr) hint + arg_type (CmmHinted expr hint) = machRepHintCType (cmmExprType expr) hint -- --------------------------------------------------------------------- -- unconditional branches @@ -304,11 +300,11 @@ pprSwitch e maybe_ids caseify (ix:ixs, ident) = vcat (map do_fallthrough ixs) $$ final_branch ix where do_fallthrough ix = - hsep [ ptext (sLit "case") , pprHexVal ix wordRep <> colon , + hsep [ ptext (sLit "case") , pprHexVal ix wordWidth <> colon , ptext (sLit "/* fall through */") ] final_branch ix = - hsep [ ptext (sLit "case") , pprHexVal ix wordRep <> colon , + hsep [ ptext (sLit "case") , pprHexVal ix wordWidth <> colon , ptext (sLit "goto") , (pprBlockId ident) <> semi ] -- --------------------------------------------------------------------- @@ -321,7 +317,7 @@ pprSwitch e maybe_ids -- -- has a type in C which is also given by -- --- machRepCType (cmmExprRep e) +-- machRepCType (cmmExprType e) -- -- (similar invariants apply to the rest of the pretty printer). @@ -329,30 +325,8 @@ pprExpr :: CmmExpr -> SDoc pprExpr e = case e of CmmLit lit -> pprLit lit - CmmLoad e I64 | wordRep /= I64 - -> ptext (sLit "PK_Word64") <> parens (mkP_ <> pprExpr1 e) - - CmmLoad e F64 | wordRep /= I64 - -> ptext (sLit "PK_DBL") <> parens (mkP_ <> pprExpr1 e) - - CmmLoad (CmmReg r) rep - | isPtrReg r && rep == wordRep - -> char '*' <> pprAsPtrReg r - - CmmLoad (CmmRegOff r 0) rep - | isPtrReg r && rep == wordRep - -> char '*' <> pprAsPtrReg r - - CmmLoad (CmmRegOff r off) rep - | isPtrReg r && rep == wordRep && (off `rem` wORD_SIZE == 0) - -- ToDo: check that the offset is a word multiple? - -- (For tagging to work, I had to avoid unaligned loads. --ARY) - -> pprAsPtrReg r <> brackets (ppr (off `shiftR` wordShift)) - - CmmLoad expr rep -> - -- the general case: - cLoad expr rep + CmmLoad e ty -> pprLoad e ty CmmReg reg -> pprCastReg reg CmmRegOff reg 0 -> pprCastReg reg @@ -364,6 +338,32 @@ pprExpr e = case e of CmmMachOp mop args -> pprMachOpApp mop args + +pprLoad :: CmmExpr -> CmmType -> SDoc +pprLoad e ty + | width == W64, wordWidth /= W64 + = (if isFloatType ty then ptext (sLit "PK_DBL") + else ptext (sLit "PK_Word64")) + <> parens (mkP_ <> pprExpr1 e) + + | otherwise + = case e of + CmmReg r | isPtrReg r && width == wordWidth && not (isFloatType ty) + -> char '*' <> pprAsPtrReg r + + CmmRegOff r 0 | isPtrReg r && width == wordWidth && not (isFloatType ty) + -> char '*' <> pprAsPtrReg r + + CmmRegOff r off | isPtrReg r && width == wordWidth + , off `rem` wORD_SIZE == 0 && not (isFloatType ty) + -- ToDo: check that the offset is a word multiple? + -- (For tagging to work, I had to avoid unaligned loads. --ARY) + -> pprAsPtrReg r <> brackets (ppr (off `shiftR` wordShift)) + + _other -> cLoad e ty + where + width = typeWidth ty + pprExpr1 :: CmmExpr -> SDoc pprExpr1 (CmmLit lit) = pprLit1 lit pprExpr1 e@(CmmReg _reg) = pprExpr e @@ -406,8 +406,15 @@ pprMachOpApp' mop args _ -> panic "PprC.pprMachOp : machop with wrong number of args" where - pprArg e | signedOp mop = cCast (machRepSignedCType (cmmExprRep e)) e + -- Cast needed for signed integer ops + pprArg e | signedOp mop = cCast (machRep_S_CType (typeWidth (cmmExprType e))) e + | needsFCasts mop = cCast (machRep_F_CType (typeWidth (cmmExprType e))) e | otherwise = pprExpr1 e + needsFCasts (MO_F_Eq _) = False + needsFCasts (MO_F_Ne _) = False + needsFCasts (MO_F_Neg _) = True + needsFCasts (MO_F_Quot _) = True + needsFCasts mop = floatComparison mop -- -------------------------------------------------------------------------- -- Literals @@ -416,7 +423,7 @@ pprLit :: CmmLit -> SDoc pprLit lit = case lit of CmmInt i rep -> pprHexVal i rep - CmmFloat f rep -> parens (machRepCType rep) <> str + CmmFloat f w -> parens (machRep_F_CType w) <> str where d = fromRational f :: Double str | isInfinite d && d < 0 = ptext (sLit "-INFINITY") | isInfinite d = ptext (sLit "INFINITY") @@ -449,29 +456,29 @@ pprLit1 other = pprLit other pprStatics :: [CmmStatic] -> [SDoc] pprStatics [] = [] -pprStatics (CmmStaticLit (CmmFloat f F32) : rest) +pprStatics (CmmStaticLit (CmmFloat f W32) : rest) -- floats are padded to a word, see #1852 - | wORD_SIZE == 8, CmmStaticLit (CmmInt 0 I32) : rest' <- rest + | wORD_SIZE == 8, CmmStaticLit (CmmInt 0 W32) : rest' <- rest = pprLit1 (floatToWord f) : pprStatics rest' | wORD_SIZE == 4 = pprLit1 (floatToWord f) : pprStatics rest | otherwise - = pprPanic "pprStatics: float" (vcat (map (\(CmmStaticLit l) -> ppr (cmmLitRep l)) rest)) -pprStatics (CmmStaticLit (CmmFloat f F64) : rest) + = pprPanic "pprStatics: float" (vcat (map (\(CmmStaticLit l) -> ppr (cmmLitType l)) rest)) +pprStatics (CmmStaticLit (CmmFloat f W64) : rest) = map pprLit1 (doubleToWords f) ++ pprStatics rest -pprStatics (CmmStaticLit (CmmInt i I64) : rest) - | machRepByteWidth I32 == wORD_SIZE +pprStatics (CmmStaticLit (CmmInt i W64) : rest) + | wordWidth == W32 #ifdef WORDS_BIGENDIAN - = pprStatics (CmmStaticLit (CmmInt q I32) : - CmmStaticLit (CmmInt r I32) : rest) + = pprStatics (CmmStaticLit (CmmInt q W32) : + CmmStaticLit (CmmInt r W32) : rest) #else - = pprStatics (CmmStaticLit (CmmInt r I32) : - CmmStaticLit (CmmInt q I32) : rest) + = pprStatics (CmmStaticLit (CmmInt r W32) : + CmmStaticLit (CmmInt q W32) : rest) #endif where r = i .&. 0xffffffff q = i `shiftR` 32 -pprStatics (CmmStaticLit (CmmInt i rep) : rest) - | machRepByteWidth rep /= wORD_SIZE +pprStatics (CmmStaticLit (CmmInt i w) : rest) + | w /= wordWidth = panic "pprStatics: cannot emit a non-word-sized static literal" pprStatics (CmmStaticLit lit : rest) = pprLit1 lit : pprStatics rest @@ -518,18 +525,33 @@ pprMachOp_for_C mop = case mop of MO_U_Quot _ -> char '/' MO_U_Rem _ -> char '%' - -- Signed comparisons (floating-point comparisons also use these) - -- & Unsigned comparisons + -- & Floating-point operations + MO_F_Add _ -> char '+' + MO_F_Sub _ -> char '-' + MO_F_Neg _ -> char '-' + MO_F_Mul _ -> char '*' + MO_F_Quot _ -> char '/' + + -- Signed comparisons MO_S_Ge _ -> ptext (sLit ">=") MO_S_Le _ -> ptext (sLit "<=") MO_S_Gt _ -> char '>' MO_S_Lt _ -> char '<' + -- & Unsigned comparisons MO_U_Ge _ -> ptext (sLit ">=") MO_U_Le _ -> ptext (sLit "<=") MO_U_Gt _ -> char '>' MO_U_Lt _ -> char '<' + -- & Floating-point comparisons + MO_F_Eq _ -> ptext (sLit "==") + MO_F_Ne _ -> ptext (sLit "!=") + MO_F_Ge _ -> ptext (sLit ">=") + MO_F_Le _ -> ptext (sLit "<=") + MO_F_Gt _ -> char '>' + MO_F_Lt _ -> char '<' + -- Bitwise operations. Not all of these may be supported at all -- sizes, and only integral MachReps are valid. MO_And _ -> char '&' @@ -540,29 +562,31 @@ pprMachOp_for_C mop = case mop of MO_U_Shr _ -> ptext (sLit ">>") -- unsigned shift right MO_S_Shr _ -> ptext (sLit ">>") -- signed shift right --- Conversions. Some of these will be NOPs. +-- Conversions. Some of these will be NOPs, but never those that convert +-- between ints and floats. -- Floating-point conversions use the signed variant. -- We won't know to generate (void*) casts here, but maybe from -- context elsewhere -- noop casts - MO_U_Conv I8 I8 -> empty - MO_U_Conv I16 I16 -> empty - MO_U_Conv I32 I32 -> empty - MO_U_Conv I64 I64 -> empty - MO_U_Conv I128 I128 -> empty - MO_S_Conv I8 I8 -> empty - MO_S_Conv I16 I16 -> empty - MO_S_Conv I32 I32 -> empty - MO_S_Conv I64 I64 -> empty - MO_S_Conv I128 I128 -> empty - - MO_U_Conv _from to -> parens (machRepCType to) - MO_S_Conv _from to -> parens (machRepSignedCType to) - - _ -> panic "PprC.pprMachOp_for_C: unknown machop" - -signedOp :: MachOp -> Bool + MO_UU_Conv from to | from == to -> empty + MO_UU_Conv _from to -> parens (machRep_U_CType to) + + MO_SS_Conv from to | from == to -> empty + MO_SS_Conv _from to -> parens (machRep_S_CType to) + + -- TEMPORARY: the old code didn't check this case, so let's leave it out + -- to facilitate comparisons against the old output code. + --MO_FF_Conv from to | from == to -> empty + MO_FF_Conv _from to -> parens (machRep_F_CType to) + + MO_SF_Conv _from to -> parens (machRep_F_CType to) + MO_FS_Conv _from to -> parens (machRep_S_CType to) + + _ -> pprTrace "offending mop" (ptext $ sLit $ show mop) $ + panic "PprC.pprMachOp_for_C: unknown machop" + +signedOp :: MachOp -> Bool -- Argument type(s) are signed ints signedOp (MO_S_Quot _) = True signedOp (MO_S_Rem _) = True signedOp (MO_S_Neg _) = True @@ -571,9 +595,19 @@ signedOp (MO_S_Le _) = True signedOp (MO_S_Gt _) = True signedOp (MO_S_Lt _) = True signedOp (MO_S_Shr _) = True -signedOp (MO_S_Conv _ _) = True +signedOp (MO_SS_Conv _ _) = True +signedOp (MO_SF_Conv _ _) = True signedOp _ = False +floatComparison :: MachOp -> Bool -- comparison between float args +floatComparison (MO_F_Eq _) = True +floatComparison (MO_F_Ne _) = True +floatComparison (MO_F_Ge _) = True +floatComparison (MO_F_Le _) = True +floatComparison (MO_F_Gt _) = True +floatComparison (MO_F_Lt _) = True +floatComparison _ = False + -- --------------------------------------------------------------------- -- tend to be implemented by foreign calls @@ -692,9 +726,13 @@ isFixedPtrReg (CmmLocal _) = False isFixedPtrReg (CmmGlobal r) = isFixedPtrGlobalReg r -- True if (pprAsPtrReg reg) will give an expression with type StgPtr +-- JD: THIS IS HORRIBLE AND SHOULD BE RENAMED, AT THE VERY LEAST. +-- THE GARBAGE WITH THE VNonGcPtr HELPS MATCH THE OLD CODE GENERATOR'S OUTPUT; +-- I'M NOT SURE IF IT SHOULD REALLY STAY THAT WAY. isPtrReg :: CmmReg -> Bool isPtrReg (CmmLocal _) = False -isPtrReg (CmmGlobal (VanillaReg n)) = True -- if we print via pprAsPtrReg +isPtrReg (CmmGlobal (VanillaReg n VGcPtr)) = True -- if we print via pprAsPtrReg +isPtrReg (CmmGlobal (VanillaReg n VNonGcPtr)) = False --if we print via pprAsPtrReg isPtrReg (CmmGlobal reg) = isFixedPtrGlobalReg reg -- True if this global reg has type StgPtr @@ -706,7 +744,7 @@ isFixedPtrGlobalReg SpLim = True isFixedPtrGlobalReg _ = False -- True if in C this register doesn't have the type given by --- (machRepCType (cmmRegRep reg)), so it has to be cast. +-- (machRepCType (cmmRegType reg)), so it has to be cast. isStrangeTypeReg :: CmmReg -> Bool isStrangeTypeReg (CmmLocal _) = False isStrangeTypeReg (CmmGlobal g) = isStrangeTypeGlobal g @@ -731,12 +769,16 @@ pprReg r = case r of CmmGlobal global -> pprGlobalReg global pprAsPtrReg :: CmmReg -> SDoc -pprAsPtrReg (CmmGlobal (VanillaReg n)) = char 'R' <> int n <> ptext (sLit ".p") +pprAsPtrReg (CmmGlobal (VanillaReg n gcp)) + = WARN( gcp /= VGcPtr, ppr n ) char 'R' <> int n <> ptext (sLit ".p") pprAsPtrReg other_reg = pprReg other_reg pprGlobalReg :: GlobalReg -> SDoc pprGlobalReg gr = case gr of - VanillaReg n -> char 'R' <> int n <> ptext (sLit ".w") + VanillaReg n _ -> char 'R' <> int n <> ptext (sLit ".w") + -- pprGlobalReg prints a VanillaReg as a .w regardless + -- Example: R1.w = R1.w & (-0x8UL); + -- JMP_(*R1.p); FloatReg n -> char 'F' <> int n DoubleReg n -> char 'D' <> int n LongReg n -> char 'L' <> int n @@ -753,12 +795,12 @@ pprGlobalReg gr = case gr of GCFun -> ptext (sLit "stg_gc_fun") pprLocalReg :: LocalReg -> SDoc -pprLocalReg (LocalReg uniq _ _) = char '_' <> ppr uniq +pprLocalReg (LocalReg uniq _) = char '_' <> ppr uniq -- ----------------------------------------------------------------------------- -- Foreign Calls -pprCall :: SDoc -> CCallConv -> CmmFormals -> CmmActuals -> CmmSafety +pprCall :: SDoc -> CCallConv -> HintedCmmFormals -> HintedCmmActuals -> CmmSafety -> SDoc pprCall ppr_fn cconv results args _ @@ -781,26 +823,27 @@ pprCall ppr_fn cconv results args _ ppr_assign results (ppr_fn <> parens (commafy (map pprArg args))) <> semi where ppr_assign [] rhs = rhs - ppr_assign [CmmKinded one hint] rhs + ppr_assign [CmmHinted one hint] rhs = pprLocalReg one <> ptext (sLit " = ") - <> pprUnHint hint (localRegRep one) <> rhs + <> pprUnHint hint (localRegType one) <> rhs ppr_assign _other _rhs = panic "pprCall: multiple results" - pprArg (CmmKinded expr hint) - | hint `elem` [PtrHint,SignedHint] - = cCast (machRepHintCType (cmmExprRep expr) hint) expr + pprArg (CmmHinted expr AddrHint) + = cCast (ptext (sLit "void *")) expr -- see comment by machRepHintCType below - pprArg (CmmKinded expr _other) - = pprExpr expr + pprArg (CmmHinted expr SignedHint) + = cCast (machRep_S_CType $ typeWidth $ cmmExprType expr) expr + pprArg (CmmHinted expr _other) + = pprExpr expr - pprUnHint PtrHint rep = parens (machRepCType rep) + pprUnHint AddrHint rep = parens (machRepCType rep) pprUnHint SignedHint rep = parens (machRepCType rep) pprUnHint _ _ = empty pprGlobalRegName :: GlobalReg -> SDoc pprGlobalRegName gr = case gr of - VanillaReg n -> char 'R' <> int n -- without the .w suffix - _ -> pprGlobalReg gr + VanillaReg n _ -> char 'R' <> int n -- without the .w suffix + _ -> pprGlobalReg gr -- Currently we only have these two calling conventions, but this might -- change in the future... @@ -823,7 +866,7 @@ pprDataExterns statics where (_, lbls) = runTE (mapM_ te_Static statics) pprTempDecl :: LocalReg -> SDoc -pprTempDecl l@(LocalReg _ rep _) +pprTempDecl l@(LocalReg _ rep) = hcat [ machRepCType rep, space, pprLocalReg l, semi ] pprExternDecl :: Bool -> CLabel -> SDoc @@ -832,11 +875,11 @@ pprExternDecl in_srt lbl | not (needsCDecl lbl) = empty | Just sz <- foreignLabelStdcallInfo lbl = stdcall_decl sz | otherwise = - hcat [ visibility, label_type (labelType lbl), + hcat [ visibility, label_type lbl, lparen, pprCLabel lbl, text ");" ] where - label_type CodeLabel = ptext (sLit "F_") - label_type DataLabel = ptext (sLit "I_") + label_type lbl | isCFunctionLabel lbl = ptext (sLit "F_") + | otherwise = ptext (sLit "I_") visibility | externallyVisibleCLabel lbl = char 'E' @@ -847,7 +890,7 @@ pprExternDecl in_srt lbl -- add the @n suffix to the label (#2276) stdcall_decl sz = ptext (sLit "extern __attribute__((stdcall)) void ") <> pprCLabel lbl - <> parens (commafy (replicate (sz `quot` wORD_SIZE) (machRepCType wordRep))) + <> parens (commafy (replicate (sz `quot` wORD_SIZE) (machRep_U_CType wordWidth))) <> semi type TEState = (UniqSet LocalReg, FiniteMap CLabel ()) @@ -882,8 +925,8 @@ te_Lit _ = return () te_Stmt :: CmmStmt -> TE () te_Stmt (CmmAssign r e) = te_Reg r >> te_Expr e te_Stmt (CmmStore l r) = te_Expr l >> te_Expr r -te_Stmt (CmmCall _ rs es _ _) = mapM_ (te_temp.kindlessCmm) rs >> - mapM_ (te_Expr.kindlessCmm) es +te_Stmt (CmmCall _ rs es _ _) = mapM_ (te_temp.hintlessCmm) rs >> + mapM_ (te_Expr.hintlessCmm) es te_Stmt (CmmCondBranch e _) = te_Expr e te_Stmt (CmmSwitch e _) = te_Expr e te_Stmt (CmmJump e _) = te_Expr e @@ -907,7 +950,7 @@ te_Reg _ = return () cCast :: SDoc -> CmmExpr -> SDoc cCast ty expr = parens ty <> pprExpr1 expr -cLoad :: CmmExpr -> MachRep -> SDoc +cLoad :: CmmExpr -> CmmType -> SDoc #ifdef BEWARE_LOAD_STORE_ALIGNMENT cLoad expr rep = let decl = machRepCType rep <+> ptext (sLit "x") <> semi @@ -919,41 +962,50 @@ cLoad expr rep = cLoad expr rep = char '*' <> parens (cCast (machRepPtrCType rep) expr) #endif +isCmmWordType :: CmmType -> Bool +-- True of GcPtrReg/NonGcReg of native word size +isCmmWordType ty = not (isFloatType ty) + && typeWidth ty == wordWidth + -- This is for finding the types of foreign call arguments. For a pointer -- argument, we always cast the argument to (void *), to avoid warnings from -- the C compiler. -machRepHintCType :: MachRep -> MachHint -> SDoc -machRepHintCType rep PtrHint = ptext (sLit "void *") -machRepHintCType rep SignedHint = machRepSignedCType rep +machRepHintCType :: CmmType -> ForeignHint -> SDoc +machRepHintCType rep AddrHint = ptext (sLit "void *") +machRepHintCType rep SignedHint = machRep_S_CType (typeWidth rep) machRepHintCType rep _other = machRepCType rep -machRepPtrCType :: MachRep -> SDoc -machRepPtrCType r | r == wordRep = ptext (sLit "P_") - | otherwise = machRepCType r <> char '*' - -machRepCType :: MachRep -> SDoc -machRepCType r | r == wordRep = ptext (sLit "W_") - | otherwise = sized_type - where sized_type = case r of - I8 -> ptext (sLit "StgWord8") - I16 -> ptext (sLit "StgWord16") - I32 -> ptext (sLit "StgWord32") - I64 -> ptext (sLit "StgWord64") - F32 -> ptext (sLit "StgFloat") -- ToDo: correct? - F64 -> ptext (sLit "StgDouble") - _ -> panic "machRepCType" - -machRepSignedCType :: MachRep -> SDoc -machRepSignedCType r | r == wordRep = ptext (sLit "I_") - | otherwise = sized_type - where sized_type = case r of - I8 -> ptext (sLit "StgInt8") - I16 -> ptext (sLit "StgInt16") - I32 -> ptext (sLit "StgInt32") - I64 -> ptext (sLit "StgInt64") - F32 -> ptext (sLit "StgFloat") -- ToDo: correct? - F64 -> ptext (sLit "StgDouble") - _ -> panic "machRepCType" +machRepPtrCType :: CmmType -> SDoc +machRepPtrCType r | isCmmWordType r = ptext (sLit "P_") + | otherwise = machRepCType r <> char '*' + +machRepCType :: CmmType -> SDoc +machRepCType ty | isFloatType ty = machRep_F_CType w + | otherwise = machRep_U_CType w + where + w = typeWidth ty + +machRep_F_CType :: Width -> SDoc +machRep_F_CType W32 = ptext (sLit "StgFloat") -- ToDo: correct? +machRep_F_CType W64 = ptext (sLit "StgDouble") +machRep_F_CType _ = panic "machRep_F_CType" + +machRep_U_CType :: Width -> SDoc +machRep_U_CType w | w == wordWidth = ptext (sLit "W_") +machRep_U_CType W8 = ptext (sLit "StgWord8") +machRep_U_CType W16 = ptext (sLit "StgWord16") +machRep_U_CType W32 = ptext (sLit "StgWord32") +machRep_U_CType W64 = ptext (sLit "StgWord64") +machRep_U_CType _ = panic "machRep_U_CType" + +machRep_S_CType :: Width -> SDoc +machRep_S_CType w | w == wordWidth = ptext (sLit "I_") +machRep_S_CType W8 = ptext (sLit "StgInt8") +machRep_S_CType W16 = ptext (sLit "StgInt16") +machRep_S_CType W32 = ptext (sLit "StgInt32") +machRep_S_CType W64 = ptext (sLit "StgInt64") +machRep_S_CType _ = panic "machRep_S_CType" + -- --------------------------------------------------------------------- -- print strings as valid C strings @@ -982,8 +1034,8 @@ charToC w = -- can safely initialise to static locations. big_doubles - | machRepByteWidth F64 == 2 * wORD_SIZE = True - | machRepByteWidth F64 == wORD_SIZE = False + | widthInBytes W64 == 2 * wORD_SIZE = True + | widthInBytes W64 == wORD_SIZE = False | otherwise = panic "big_doubles" castFloatToIntArray :: STUArray s Int Float -> ST s (STUArray s Int Int) @@ -1000,7 +1052,7 @@ floatToWord r writeArray arr 0 (fromRational r) arr' <- castFloatToIntArray arr i <- readArray arr' 0 - return (CmmInt (toInteger i) wordRep) + return (CmmInt (toInteger i) wordWidth) ) doubleToWords :: Rational -> [CmmLit] @@ -1012,8 +1064,8 @@ doubleToWords r arr' <- castDoubleToIntArray arr i1 <- readArray arr' 0 i2 <- readArray arr' 1 - return [ CmmInt (toInteger i1) wordRep - , CmmInt (toInteger i2) wordRep + return [ CmmInt (toInteger i1) wordWidth + , CmmInt (toInteger i2) wordWidth ] ) | otherwise -- doubles are 1 word @@ -1022,20 +1074,20 @@ doubleToWords r writeArray arr 0 (fromRational r) arr' <- castDoubleToIntArray arr i <- readArray arr' 0 - return [ CmmInt (toInteger i) wordRep ] + return [ CmmInt (toInteger i) wordWidth ] ) -- --------------------------------------------------------------------------- -- Utils wordShift :: Int -wordShift = machRepLogWidth wordRep +wordShift = widthInLog wordWidth commafy :: [SDoc] -> SDoc commafy xs = hsep $ punctuate comma xs -- Print in C hex format: 0x13fa -pprHexVal :: Integer -> MachRep -> SDoc +pprHexVal :: Integer -> Width -> SDoc pprHexVal 0 _ = ptext (sLit "0x0") pprHexVal w rep | w < 0 = parens (char '-' <> ptext (sLit "0x") <> go (-w) <> repsuffix rep) @@ -1048,9 +1100,9 @@ pprHexVal w rep -- warnings about integer overflow from gcc. -- on 32-bit platforms, add "ULL" to 64-bit literals - repsuffix I64 | wORD_SIZE == 4 = ptext (sLit "ULL") + repsuffix W64 | wORD_SIZE == 4 = ptext (sLit "ULL") -- on 64-bit platforms with 32-bit int, add "L" to 64-bit literals - repsuffix I64 | cINT_SIZE == 4 = ptext (sLit "UL") + repsuffix W64 | cINT_SIZE == 4 = ptext (sLit "UL") repsuffix _ = char 'U' go 0 = empty diff --git a/compiler/cmm/PprCmm.hs b/compiler/cmm/PprCmm.hs index e801aeee26..4478dfd966 100644 --- a/compiler/cmm/PprCmm.hs +++ b/compiler/cmm/PprCmm.hs @@ -33,16 +33,17 @@ -- module PprCmm - ( writeCmms, pprCmms, pprCmm, pprStmt, pprExpr, pprSection, pprStatic, pprLit + ( writeCmms, pprCmms, pprCmm, pprStmt, pprExpr, + pprSection, pprStatic, pprLit ) where import BlockId import Cmm import CmmUtils -import MachOp import CLabel + import ForeignCall import Unique import Outputable @@ -52,6 +53,12 @@ import Data.List import System.IO import Data.Maybe +-- Temp Jan08 +import SMRep +import ClosureInfo +#include "../includes/StgFun.h" + + pprCmms :: (Outputable info, Outputable g) => [GenCmm CmmStatic info g] -> SDoc pprCmms cmms = pprCode CStyle (vcat (intersperse separator $ map ppr cmms)) where @@ -167,11 +174,13 @@ pprTypeInfo (ConstrInfo layout constr descr) = ptext (sLit "nptrs: ") <> integer (toInteger (snd layout)), ptext (sLit "constructor: ") <> integer (toInteger constr), pprLit descr] -pprTypeInfo (FunInfo layout srt fun_type arity _args slow_entry) = +pprTypeInfo (FunInfo layout srt arity _args slow_entry) = vcat [ptext (sLit "ptrs: ") <> integer (toInteger (fst layout)), ptext (sLit "nptrs: ") <> integer (toInteger (snd layout)), ptext (sLit "srt: ") <> ppr srt, - ptext (sLit "fun_type: ") <> integer (toInteger fun_type), +-- Temp Jan08 + ptext (sLit ("fun_type: ")) <> integer (toInteger (argDescrType _args)), + ptext (sLit "arity: ") <> integer (toInteger arity), --ptext (sLit "args: ") <> ppr args, -- TODO: needs to be printed ptext (sLit "slow: ") <> pprLit slow_entry @@ -187,6 +196,20 @@ pprTypeInfo (ContInfo stack srt) = vcat [ptext (sLit "stack: ") <> ppr stack, ptext (sLit "srt: ") <> ppr srt] +-- Temp Jan08 +argDescrType :: ArgDescr -> StgHalfWord +-- The "argument type" RTS field type +argDescrType (ArgSpec n) = n +argDescrType (ArgGen liveness) + | isBigLiveness liveness = ARG_GEN_BIG + | otherwise = ARG_GEN + +-- Temp Jan08 +isBigLiveness :: Liveness -> Bool +isBigLiveness (BigLiveness _) = True +isBigLiveness (SmallLiveness _) = False + + pprUpdateFrame :: UpdateFrame -> SDoc pprUpdateFrame (UpdateFrame expr args) = hcat [ ptext (sLit "jump") @@ -225,39 +248,28 @@ pprStmt stmt = case stmt of -- rep[lv] = expr; CmmStore lv expr -> rep <> brackets(ppr lv) <+> equals <+> ppr expr <> semi where - rep = ppr ( cmmExprRep expr ) + rep = ppr ( cmmExprType expr ) -- call "ccall" foo(x, y)[r1, r2]; -- ToDo ppr volatile CmmCall (CmmCallee fn cconv) results args safety ret -> - hcat [ if null results - then empty - else parens (commafy $ map ppr results) <> - ptext (sLit " = "), - ptext (sLit "foreign"), space, - doubleQuotes(ppr cconv), space, - target fn, parens ( commafy $ map ppr args ), - brackets (ppr safety), - case ret of CmmMayReturn -> empty - CmmNeverReturns -> ptext (sLit " never returns"), - semi ] + sep [ pp_lhs <+> pp_conv + , nest 2 (pprExpr9 fn <> + parens (commafy (map ppr_ar args))) + <> brackets (ppr safety) + , case ret of CmmMayReturn -> empty + CmmNeverReturns -> ptext $ sLit (" never returns") + ] <> semi where - ---- With the following three functions, I was going somewhere - ---- useful, but I don't remember where. Probably making - ---- emitted Cmm output look better. ---NR, 2 May 2008 - _pp_lhs | null results = empty - | otherwise = commafy (map ppr_ar results) <+> equals + pp_lhs | null results = empty + | otherwise = commafy (map ppr_ar results) <+> equals -- Don't print the hints on a native C-- call - ppr_ar arg = case cconv of - CmmCallConv -> ppr (kindlessCmm arg) - _ -> doubleQuotes (ppr $ cmmKind arg) <+> - ppr (kindlessCmm arg) - _pp_conv = case cconv of + ppr_ar (CmmHinted ar k) = case cconv of + CmmCallConv -> ppr ar + _ -> ppr (ar,k) + pp_conv = case cconv of CmmCallConv -> empty - _ -> ptext (sLit "foreign") <+> doubleQuotes (ppr cconv) - - target (CmmLit lit) = pprLit lit - target fn' = parens (ppr fn') + _ -> ptext (sLit("foreign")) <+> doubleQuotes (ppr cconv) CmmCall (CmmPrim op) results args safety ret -> pprStmt (CmmCall (CmmCallee (CmmLit lbl) CCallConv) @@ -271,6 +283,18 @@ pprStmt stmt = case stmt of CmmReturn params -> genReturn params CmmSwitch arg ids -> genSwitch arg ids +instance Outputable ForeignHint where + ppr NoHint = empty + ppr SignedHint = quotes(text "signed") +-- ppr AddrHint = quotes(text "address") +-- Temp Jan08 + ppr AddrHint = (text "PtrHint") + +-- Just look like a tuple, since it was a tuple before +-- ... is that a good idea? --Isaac Dupree +instance (Outputable a) => Outputable (CmmHinted a) where + ppr (CmmHinted a k) = ppr (a, k) + -- -------------------------------------------------------------------------- -- goto local label. [1], section 6.6 -- @@ -297,9 +321,8 @@ genCondBranch expr ident = -- -- jump foo(a, b, c); -- -genJump :: CmmExpr -> [CmmKinded CmmExpr] -> SDoc +genJump :: CmmExpr -> [CmmHinted CmmExpr] -> SDoc genJump expr args = - hcat [ ptext (sLit "jump") , space , if isTrivialCmmExpr expr @@ -308,23 +331,17 @@ genJump expr args = CmmLoad (CmmReg _) _ -> pprExpr expr _ -> parens (pprExpr expr) , space - , parens ( commafy $ map pprKinded args ) + , parens ( commafy $ map ppr args ) , semi ] -pprKinded :: Outputable a => (CmmKinded a) -> SDoc -pprKinded (CmmKinded a NoHint) = ppr a -pprKinded (CmmKinded a PtrHint) = quotes(text "address") <+> ppr a -pprKinded (CmmKinded a SignedHint) = quotes(text "signed") <+> ppr a -pprKinded (CmmKinded a FloatHint) = quotes(text "float") <+> ppr a -- -------------------------------------------------------------------------- -- Return from a function. [1], Section 6.8.2 of version 1.128 -- -- return (a, b, c); -- -genReturn :: [CmmKinded CmmExpr] -> SDoc +genReturn :: [CmmHinted CmmExpr] -> SDoc genReturn args = - hcat [ ptext (sLit "return") , space , parens ( commafy $ map ppr args ) @@ -376,7 +393,7 @@ pprExpr e CmmRegOff reg i -> pprExpr (CmmMachOp (MO_Add rep) [CmmReg reg, CmmLit (CmmInt (fromIntegral i) rep)]) - where rep = cmmRegRep reg + where rep = typeWidth (cmmRegType reg) CmmLit lit -> pprLit lit _other -> pprExpr1 e @@ -488,7 +505,7 @@ pprLit :: CmmLit -> SDoc pprLit lit = case lit of CmmInt i rep -> hcat [ (if i < 0 then parens else id)(integer i) - , (if rep == wordRep + , (if rep == wordWidth then empty else space <> dcolon <+> ppr rep) ] @@ -534,27 +551,37 @@ pprReg r -- We only print the type of the local reg if it isn't wordRep -- pprLocalReg :: LocalReg -> SDoc -pprLocalReg (LocalReg uniq rep follow) - = hcat [ char '_', ppr uniq, ty ] where - ty = if rep == wordRep && follow == GCKindNonPtr - then empty - else dcolon <> ptr <> ppr rep - ptr = if follow == GCKindNonPtr - then empty - else doubleQuotes (text "ptr") +pprLocalReg (LocalReg uniq rep) +-- = ppr rep <> char '_' <> ppr uniq +-- Temp Jan08 + = char '_' <> ppr uniq <> + (if isWord32 rep -- && not (isGcPtrType rep) -- Temp Jan08 -- sigh + then dcolon <> ptr <> ppr rep + else dcolon <> ptr <> ppr rep) + where + ptr = empty + --if isGcPtrType rep + -- then doubleQuotes (text "ptr") + -- else empty -- Stack areas pprArea :: Area -> SDoc -pprArea (RegSlot r) = hcat [ text "slot<", ppr r, text ">" ] -pprArea (CallArea id n n') = - hcat [ text "callslot<", ppr id, char '+', ppr n, char '/', ppr n', text ">" ] +pprArea (RegSlot r) = hcat [ text "slot<", ppr r, text ">" ] +pprArea (CallArea id) = pprAreaId id + +pprAreaId :: AreaId -> SDoc +pprAreaId Old = text "old" +pprAreaId (Young id) = hcat [ text "young<", ppr id, text ">" ] -- needs to be kept in syn with Cmm.hs.GlobalReg -- pprGlobalReg :: GlobalReg -> SDoc pprGlobalReg gr = case gr of - VanillaReg n -> char 'R' <> int n + VanillaReg n _ -> char 'R' <> int n +-- Temp Jan08 +-- VanillaReg n VNonGcPtr -> char 'R' <> int n +-- VanillaReg n VGcPtr -> char 'P' <> int n FloatReg n -> char 'F' <> int n DoubleReg n -> char 'D' <> int n LongReg n -> char 'L' <> int n diff --git a/compiler/cmm/PprCmmZ.hs b/compiler/cmm/PprCmmZ.hs index 1e5f52f45f..c588466051 100644 --- a/compiler/cmm/PprCmmZ.hs +++ b/compiler/cmm/PprCmmZ.hs @@ -7,7 +7,6 @@ where import BlockId import Cmm import CmmExpr -import ForeignCall import PprCmm import Outputable import qualified ZipCfgCmmRep as G @@ -28,34 +27,28 @@ pprCmmGraphLikeCmm g = vcat (swallow blocks) where blocks = Z.postorder_dfs g swallow :: [G.CmmBlock] -> [SDoc] swallow [] = [] - swallow (Z.Block id t : rest) = tail id [] Nothing t rest - tail id prev' out (Z.ZTail (G.CopyOut conv args) t) rest = - if isJust out then panic "multiple CopyOut nodes in one basic block" - else - tail id (prev') (Just (conv, args)) t rest + swallow (Z.Block id off t : rest) = tail (id, off) [] Nothing t rest tail id prev' out (Z.ZTail m t) rest = tail id (mid m : prev') out t rest - tail id prev' out (Z.ZLast Z.LastExit) rest = exit id prev' out rest tail id prev' out (Z.ZLast (Z.LastOther l)) rest = last id prev' out l rest - mid (G.CopyIn _ [] _) = text "// proc point (no parameters)" - mid m@(G.CopyIn {}) = ppr m <+> text "(proc point)" + tail id prev' _ (Z.ZLast Z.LastExit) rest = exit id prev' rest mid m = ppr m - block' id prev' + block' (id, off) prev' | id == Z.lg_entry g, entry_has_no_pred = - vcat (text "<entry>" : reverse prev') - | otherwise = hang (ppr id <> colon) 4 (vcat (reverse prev')) + vcat (text "<entry>" <> parens (ppr off) : reverse prev') + | otherwise = hang (ppr id <> parens (ppr off) <> colon) 4 (vcat (reverse prev')) last id prev' out l n = let endblock stmt = block' id (stmt : prev') : swallow n in case l of G.LastBranch tgt -> case n of - Z.Block id' t : bs + Z.Block id' _ t : bs | tgt == id', unique_pred id' -> tail id prev' out t bs -- optimize out redundant labels _ -> endblock (ppr $ CmmBranch tgt) l@(G.LastCondBranch expr tid fid) -> let ft id = text "// fall through to " <> ppr id in case n of - Z.Block id' t : bs + Z.Block id' _ t : bs | id' == fid, isNothing out -> tail id (ft fid : ppr (CmmCondBranch expr tid) : prev') Nothing t bs | id' == tid, Just e' <- maybeInvertCmmExpr expr, isNothing out-> @@ -64,35 +57,10 @@ pprCmmGraphLikeCmm g = vcat (swallow blocks) l@(G.LastJump {}) -> endblock $ with_out out l l@(G.LastReturn {}) -> endblock $ with_out out l l@(G.LastSwitch {}) -> endblock $ with_out out l - l@(G.LastCall _ Nothing) -> endblock $ with_out out l - l@(G.LastCall tgt (Just k)) - | Z.Block id' (Z.ZTail (G.CopyIn _ ress srt) t) : bs <- n, - Just (conv, args) <- out, - id' == k -> - let call = CmmCall tgt' ress args (CmmSafe srt) CmmMayReturn - tgt' = CmmCallee tgt (cconv_of_conv conv) - ppcall = ppr call <+> parens (text "ret to" <+> ppr k) - in if unique_pred k then - tail id (ppcall : prev') Nothing t bs - else - endblock (ppcall) - | Z.Block id' t : bs <- n, id' == k, unique_pred k, - Just (conv, args) <- out, - Just (ress, srt) <- findCopyIn t -> - let call = CmmCall tgt' ress args (CmmSafe srt) CmmMayReturn - tgt' = CmmCallee tgt (cconv_of_conv conv) - delayed = - ptext (sLit "// delayed CopyIn follows previous call") - in tail id (delayed : ppr call : prev') Nothing t bs - | otherwise -> endblock $ with_out out l - findCopyIn (Z.ZTail (G.CopyIn _ ress srt) _) = Just (ress, srt) - findCopyIn (Z.ZTail _ t) = findCopyIn t - findCopyIn (Z.ZLast _) = Nothing - exit id prev' out n = -- highly irregular (assertion violation?) + l@(G.LastCall _ _ _)-> endblock $ with_out out l + exit id prev' n = -- highly irregular (assertion violation?) let endblock stmt = block' id (stmt : prev') : swallow n in - case out of Nothing -> endblock (text "// <exit>") - Just (conv, args) -> endblock (ppr (G.CopyOut conv args) $$ - text "// <exit>") + endblock (text "// <exit>") preds = zipPreds g entry_has_no_pred = case lookupBlockEnv preds (Z.lg_entry g) of Nothing -> True @@ -107,13 +75,11 @@ pprCmmGraphLikeCmm g = vcat (swallow blocks) else single in Z.fold_blocks add emptyBlockSet g unique_pred id = elemBlockSet id single_preds - cconv_of_conv (G.ConventionStandard conv _) = conv - cconv_of_conv (G.ConventionPrivate {}) = CmmCallConv -- XXX totally bogus with_out :: Maybe (G.Convention, CmmActuals) -> G.Last -> SDoc with_out Nothing l = ptext (sLit "??no-arguments??") <+> ppr l with_out (Just (conv, args)) l = last l - where last (G.LastCall e k) = + where last (G.LastCall e k _) = hcat [ptext (sLit "... = foreign "), doubleQuotes(ppr conv), space, ppr_target e, parens ( commafy $ map ppr args ), @@ -121,9 +87,13 @@ with_out (Just (conv, args)) l = last l case k of Nothing -> ptext (sLit " never returns") Just _ -> empty, semi ] - last (G.LastReturn) = ppr (CmmReturn args) - last (G.LastJump e) = ppr (CmmJump e args) - last l = ppr (G.CopyOut conv args) $$ ppr l + last (G.LastReturn _) = ppr (CmmReturn $ noHints args) + last (G.LastJump e _) = ppr (CmmJump e $ noHints args) + last l = ppr l ppr_target (CmmLit lit) = pprLit lit ppr_target fn' = parens (ppr fn') commafy xs = hsep $ punctuate comma xs + +-- Anything that uses this is bogus! +noHints :: [a] -> [CmmHinted a] +noHints = map (\v -> CmmHinted v NoHint) diff --git a/compiler/cmm/README b/compiler/cmm/README index c0d1c68f0c..fd87e88748 100644 --- a/compiler/cmm/README +++ b/compiler/cmm/README @@ -45,7 +45,6 @@ Sketch of the new arrivals: DFMonad Support for dataflow analysis and dataflow-based transformation. This module needs work. Includes DataflowLattice - for tracking dataflow facts (good) - DFA - monad for iterative dataflow analysis (OK) DFM - monad for iterative dataflow analysis and rewriting (OK) DFTx - monad to track Whalley/Davidson transactions (ugly) type class DataflowAnalysis - operations common to DFA, DFM @@ -89,8 +88,6 @@ Sketch of the new arrivals: CmmContFlowOpt Branch-chain elimination and elimination of unreachable code. - CmmCvt Conversion to and from the new format. - CmmOpt Changed optimization to use 'foldRegsUsed'; eliminated significant duplication of code. diff --git a/compiler/cmm/StackColor.hs b/compiler/cmm/StackColor.hs index 4d544bdbc8..f3c1c32cdb 100644 --- a/compiler/cmm/StackColor.hs +++ b/compiler/cmm/StackColor.hs @@ -8,7 +8,6 @@ import CmmExpr import CmmSpillReload import DFMonad import qualified GraphOps -import MachOp import ZipCfg import ZipCfgCmmRep import ZipDataflow @@ -19,8 +18,6 @@ import UniqSet import Data.List -type M = ExtendWithSpills Middle - fold_edge_facts_b :: LastNode l => (DualLive -> a -> a) -> BackwardTransfers m l DualLive -> LGraph m l -> (BlockId -> DualLive) -> a -> a @@ -32,12 +29,12 @@ fold_edge_facts_b f comp graph env z = last_in _ LastExit = fact_bot dualLiveLattice last_in env (LastOther l) = bt_last_in comp env l in head_fold h (last_in env l) z - head_fold (ZHead h m) out z = head_fold h (bt_middle_in comp out m) (f out z) - head_fold (ZFirst id) out z = f (bt_first_in comp out id) (f out z) + head_fold (ZHead h m) out z = head_fold h (bt_middle_in comp out m) (f out z) + head_fold (ZFirst id _) out z = f (bt_first_in comp out id) (f out z) -foldConflicts :: (RegSet -> a -> a) -> a -> LGraph M Last -> FuelMonad a +foldConflicts :: (RegSet -> a -> a) -> a -> LGraph Middle Last -> FuelMonad a foldConflicts f z g = - do env <- dualLiveness emptyBlockSet $ graphOfLGraph g + do env <- dualLiveness emptyBlockSet g let lookup id = lookupBlockEnv env id `orElse` fact_bot dualLiveLattice f' dual z = f (on_stack dual) z return $ fold_edge_facts_b f' (dualLiveTransfers emptyBlockSet) g lookup z @@ -50,7 +47,7 @@ foldConflicts f z g = type IGraph = Color.Graph LocalReg SlotClass StackPlacement type ClassCount = [(SlotClass, Int)] -buildIGraphAndCounts :: LGraph M Last -> FuelMonad (IGraph, ClassCount) +buildIGraphAndCounts :: LGraph Middle Last -> FuelMonad (IGraph, ClassCount) buildIGraphAndCounts g = igraph_and_counts where igraph_and_counts = foldConflicts add (Color.initGraph, zero) g zero = map (\c -> (c, 0)) allSlotClasses @@ -73,16 +70,14 @@ graphAddConflictSet :: RegSet -> IGraph -> IGraph graphAddConflictSet set graph = GraphOps.addConflicts set slotClass graph slotClass :: LocalReg -> SlotClass -slotClass (LocalReg _ machRep _) = - case machRep of -- the horror, the horror - I8 -> SlotClass32 - I16 -> SlotClass32 - I32 -> SlotClass32 - I64 -> SlotClass64 - I128 -> SlotClass128 - F32 -> SlotClass32 - F64 -> SlotClass64 - F80 -> SlotClass64 +slotClass (LocalReg _ ty) = + case typeWidth ty of -- the horror, the horror + W8 -> SlotClass32 + W16 -> SlotClass32 + W32 -> SlotClass32 + W64 -> SlotClass64 + W128 -> SlotClass128 + W80 -> SlotClass64 {- colorMe :: (IGraph, ClassCount) -> (IGraph, UniqSet LocalReg) diff --git a/compiler/cmm/ZipCfg.hs b/compiler/cmm/ZipCfg.hs index 78eeaed3e6..634bc8cccf 100644 --- a/compiler/cmm/ZipCfg.hs +++ b/compiler/cmm/ZipCfg.hs @@ -12,7 +12,7 @@ module ZipCfg , splice_tail, splice_head, splice_head_only', splice_head' , of_block_list, to_block_list , graphOfLGraph - , map_blocks, map_nodes, mapM_blocks + , map_blocks, map_one_block, map_nodes, mapM_blocks , postorder_dfs, postorder_dfs_from, postorder_dfs_from_except , fold_layout , fold_blocks, fold_fwd_block @@ -38,7 +38,8 @@ where import BlockId ( BlockId, BlockEnv, emptyBlockEnv, lookupBlockEnv, extendBlockEnv , BlockSet, emptyBlockSet, elemBlockSet, extendBlockSet) -import CmmExpr ( UserOfLocalRegs(..) ) --for an instance +import CmmExpr ( UserOfLocalRegs(..) ) +import PprCmm() import Outputable hiding (empty) import Panic @@ -150,18 +151,22 @@ instance UserOfLocalRegs a => UserOfLocalRegs (ZLast a) where foldRegsUsed _f z LastExit = z -data ZHead m = ZFirst BlockId | ZHead (ZHead m) m +data ZHead m = ZFirst BlockId (Maybe Int) + | ZHead (ZHead m) m -- ZHead is a (reversed) sequence of middle nodes labeled by a BlockId data ZTail m l = ZLast (ZLast l) | ZTail m (ZTail m l) -- ZTail is a sequence of middle nodes followed by a last node -- | Blocks and flow graphs; see Note [Kinds of graphs] -data Block m l = Block BlockId (ZTail m l) +-- In addition to its id, the block carries the number of bytes of stack space +-- used for incoming parameters on entry to the block. +data Block m l = Block BlockId (Maybe Int) (ZTail m l) data Graph m l = Graph { g_entry :: (ZTail m l), g_blocks :: (BlockEnv (Block m l)) } -data LGraph m l = LGraph { lg_entry :: BlockId - , lg_blocks :: BlockEnv (Block m l) } +data LGraph m l = LGraph { lg_entry :: BlockId + , lg_argoffset :: Int -- space (bytes) for incoming args + , lg_blocks :: BlockEnv (Block m l)} -- Invariant: lg_entry is in domain( lg_blocks ) -- | And now the zipper. The focus is between the head and tail. @@ -220,13 +225,13 @@ ht_to_last :: ZHead m -> ZTail m l -> (ZHead m, ZLast l) -- , (???, [<blocks>, -- N: y:=x; return (y,x)]) -splice_head :: ZHead m -> LGraph m l -> (LGraph m l, ZHead m) -splice_head' :: ZHead m -> Graph m l -> (BlockEnv (Block m l), ZHead m) +splice_head :: ZHead m -> LGraph m l -> (LGraph m l, ZHead m) +splice_head' :: ZHead m -> Graph m l -> (BlockEnv (Block m l), ZHead m) splice_tail :: Graph m l -> ZTail m l -> Graph m l -- | We can also splice a single-entry, no-exit Graph into a head. -splice_head_only :: ZHead m -> LGraph m l -> LGraph m l -splice_head_only' :: ZHead m -> Graph m l -> LGraph m l +splice_head_only :: ZHead m -> LGraph m l -> LGraph m l +splice_head_only' :: ZHead m -> Graph m l -> LGraph m l -- | A safe operation @@ -235,12 +240,12 @@ splice_head_only' :: ZHead m -> Graph m l -> LGraph m l -- layout or dataflow, however, one will want to use 'postorder_dfs' -- in order to get the blocks in an order that relates to the control -- flow in the procedure. -of_block_list :: BlockId -> [Block m l] -> LGraph m l -- N log N +of_block_list :: BlockId -> Int -> [Block m l] -> LGraph m l -- N log N to_block_list :: LGraph m l -> [Block m l] -- N log N -- | Conversion from LGraph to Graph graphOfLGraph :: LastNode l => LGraph m l -> Graph m l -graphOfLGraph (LGraph eid blocks) = Graph (ZLast $ mkBranchNode eid) blocks +graphOfLGraph (LGraph eid _ blocks) = Graph (ZLast $ mkBranchNode eid) blocks -- | Traversal: 'postorder_dfs' returns a list of blocks reachable @@ -282,6 +287,8 @@ fold_blocks :: (Block m l -> a -> a) -> a -> LGraph m l -> a fold_fwd_block :: (BlockId -> a -> a) -> (m -> a -> a) -> (ZLast l -> a -> a) -> Block m l -> a -> a +map_one_block :: (BlockId -> BlockId) -> (m -> m') -> (l -> l') -> Block m l -> Block m' l' + map_nodes :: (BlockId -> BlockId) -> (m -> m') -> (l -> l') -> LGraph m l -> LGraph m' l' -- mapping includes the entry id! @@ -350,14 +357,14 @@ instance LastNode l => HavingSuccessors (ZTail m l) where ----- block manipulations -blockId (Block id _) = id +blockId (Block id _ _) = id -- | Convert block between forms. -- These functions are tail-recursive, so we can go as deep as we like -- without fear of stack overflow. ht_to_block head tail = case head of - ZFirst id -> Block id tail + ZFirst id off -> Block id off tail ZHead h m -> ht_to_block h (ZTail m tail) ht_to_last head (ZLast l) = (head, l) @@ -367,11 +374,11 @@ zipht h t = ht_to_block h t zip (ZBlock h t) = ht_to_block h t goto_end (ZBlock h t) = ht_to_last h t -unzip (Block id t) = ZBlock (ZFirst id) t +unzip (Block id off t) = ZBlock (ZFirst id off) t head_id :: ZHead m -> BlockId -head_id (ZFirst id) = id -head_id (ZHead h _) = head_id h +head_id (ZFirst id _) = id +head_id (ZHead h _) = head_id h last (ZBlock _ t) = lastTail t @@ -385,13 +392,13 @@ tailOfLast l = ZLast (LastOther l) -- tedious to write in every client ------------------ simple graph manipulations focus :: BlockId -> LGraph m l -> FGraph m l -- focus on edge out of node with id -focus id (LGraph entry blocks) = +focus id (LGraph entry _ blocks) = case lookupBlockEnv blocks id of Just b -> FGraph entry (unzip b) (delFromUFM blocks id) Nothing -> panic "asked for nonexistent block in flow graph" entry :: LGraph m l -> FGraph m l -- focus on edge out of entry node -entry g@(LGraph eid _) = focus eid g +entry g@(LGraph eid _ _) = focus eid g -- | pull out a block satisfying the predicate, if any splitp_blocks :: (Block m l -> Bool) -> BlockEnv (Block m l) -> @@ -452,7 +459,7 @@ single_exitg (Graph tail blocks) = foldUFM add (exit_count (lastTail tail)) bloc -- Better to geot [A,B,C,D] -postorder_dfs g@(LGraph _ blockenv) = +postorder_dfs g@(LGraph _ _ blockenv) = let FGraph id eblock _ = entry g in zip eblock : postorder_dfs_from_except blockenv eblock (unitUniqSet id) @@ -463,7 +470,7 @@ postorder_dfs_from_except blocks b visited = where -- vnode :: -- Block m l -> ([Block m l] -> BlockSet -> a) -> [Block m l] -> BlockSet -> a - vnode block@(Block id _) cont acc visited = + vnode block@(Block id _ _) cont acc visited = if elemBlockSet id visited then cont acc visited else @@ -489,42 +496,42 @@ postorder_dfs_from blocks b = postorder_dfs_from_except blocks b emptyBlockSet -- 'b1' what its inline successor is going to be, so that if 'b1' ends with -- 'goto b2', the goto can be omitted. -fold_layout f z g@(LGraph eid _) = fold (postorder_dfs g) z +fold_layout f z g@(LGraph eid _ _) = fold (postorder_dfs g) z where fold blocks z = case blocks of [] -> z [b] -> f b Nothing z b1 : b2 : bs -> fold (b2 : bs) (f b1 (nextlabel b2) z) - nextlabel (Block id _) = + nextlabel (Block id _ _) = if id == eid then panic "entry as successor" else Just id -- | The rest of the traversals are straightforward -map_blocks f (LGraph eid blocks) = LGraph eid (mapUFM f blocks) +map_blocks f (LGraph eid off blocks) = LGraph eid off (mapUFM f blocks) + +map_nodes idm middle last (LGraph eid off blocks) = + LGraph (idm eid) off (mapUFM (map_one_block idm middle last) blocks) -map_nodes idm middle last (LGraph eid blocks) = LGraph (idm eid) (mapUFM block blocks) - where block (Block id t) = Block (idm id) (tail t) - tail (ZTail m t) = ZTail (middle m) (tail t) +map_one_block idm middle last (Block id off t) = Block (idm id) off (tail t) + where tail (ZTail m t) = ZTail (middle m) (tail t) tail (ZLast LastExit) = ZLast LastExit tail (ZLast (LastOther l)) = ZLast (LastOther (last l)) -mapM_blocks f (LGraph eid blocks) = blocks' >>= return . LGraph eid +mapM_blocks f (LGraph eid off blocks) = blocks' >>= return . LGraph eid off where blocks' = foldUFM (\b mblocks -> do { blocks <- mblocks ; b <- f b ; return $ insertBlock b blocks }) (return emptyBlockEnv) blocks -fold_blocks f z (LGraph _ blocks) = foldUFM f z blocks -fold_fwd_block first middle last (Block id t) z = tail t (first id z) +fold_blocks f z (LGraph _ _ blocks) = foldUFM f z blocks +fold_fwd_block first middle last (Block id _ t) z = tail t (first id z) where tail (ZTail m t) z = tail t (middle m z) tail (ZLast l) z = last l z -of_block_list e blocks = LGraph e $ foldr insertBlock emptyBlockEnv blocks -to_block_list (LGraph _ blocks) = eltsUFM blocks - - +of_block_list e off blocks = LGraph e off $ foldr insertBlock emptyBlockEnv blocks +to_block_list (LGraph _ _ blocks) = eltsUFM blocks -- We want to be able to scrutinize a single-entry, single-exit 'LGraph' for @@ -568,15 +575,15 @@ prepare_for_splicing' (Graph etail gblocks) single multi = is_exit :: Block m l -> Bool is_exit b = case last (unzip b) of { LastExit -> True; _ -> False } -splice_head head g = +splice_head head g@(LGraph _ off _) = ASSERT (single_exit g) prepare_for_splicing g splice_one_block splice_many_blocks where eid = head_id head splice_one_block tail' = case ht_to_last head tail' of - (head, LastExit) -> (LGraph eid emptyBlockEnv, head) + (head, LastExit) -> (LGraph eid off emptyBlockEnv, head) _ -> panic "spliced LGraph without exit" splice_many_blocks entry exit others = - (LGraph eid (insertBlock (zipht head entry) others), exit) + (LGraph eid off (insertBlock (zipht head entry) others), exit) splice_head' head g = ASSERT (single_exitg g) prepare_for_splicing' g splice_one_block splice_many_blocks @@ -614,25 +621,27 @@ splice_tail g tail = splice_head_only head g = let FGraph eid gentry gblocks = entry g in case gentry of - ZBlock (ZFirst _) tail -> LGraph eid (insertBlock (zipht head tail) gblocks) + ZBlock (ZFirst _ _) tail -> + LGraph eid 0 (insertBlock (zipht head tail) gblocks) _ -> panic "entry not at start of block?!" splice_head_only' head (Graph tail gblocks) = let eblock = zipht head tail in - LGraph (blockId eblock) (insertBlock eblock gblocks) + LGraph (blockId eblock) 0 (insertBlock eblock gblocks) + -- the offset probably should never be used, but well, it's correct for this LGraph --- Translation -translate txm txl (LGraph eid blocks) = +translate txm txl (LGraph eid off blocks) = do blocks' <- foldUFM txblock (return emptyBlockEnv) blocks - return $ LGraph eid blocks' + return $ LGraph eid off blocks' where -- txblock :: -- Block m l -> tm (BlockEnv (Block m' l')) -> tm (BlockEnv (Block m' l')) - txblock (Block id t) expanded = + txblock (Block id boff t) expanded = do blocks' <- expanded - txtail (ZFirst id) t blocks' + txtail (ZFirst id boff) t blocks' -- txtail :: ZHead m' -> ZTail m l -> BlockEnv (Block m' l') -> -- tm (BlockEnv (Block m' l')) txtail h (ZTail m t) blocks' = @@ -675,10 +684,11 @@ pprLast LastExit = text "<exit>" pprLast (LastOther l) = ppr l pprBlock :: (Outputable m, Outputable l, LastNode l) => Block m l -> SDoc -pprBlock (Block id tail) = ppr id <> colon $$ ppr tail +pprBlock (Block id args tail) = ppr id <> parens (ppr args) <> colon $$ ppr tail pprLgraph :: (Outputable m, Outputable l, LastNode l) => LGraph m l -> SDoc -pprLgraph g = text "{" $$ nest 2 (vcat $ map ppr blocks) $$ text "}" +pprLgraph g = text "{" <> text "offset" <> parens (ppr $ lg_argoffset g) $$ + nest 2 (vcat $ map ppr blocks) $$ text "}" where blocks = postorder_dfs g pprGraph :: (Outputable m, Outputable l, LastNode l) => Graph m l -> SDoc diff --git a/compiler/cmm/ZipCfgCmmRep.hs b/compiler/cmm/ZipCfgCmmRep.hs index af6216835c..e030f4bc58 100644 --- a/compiler/cmm/ZipCfgCmmRep.hs +++ b/compiler/cmm/ZipCfgCmmRep.hs @@ -1,4 +1,4 @@ - + -- This module is pure representation and should be imported only by -- clients that need to manipulate representation and know what @@ -6,29 +6,32 @@ -- instead import MkZipCfgCmm. module ZipCfgCmmRep - ( CmmZ, CmmTopZ, CmmGraph, CmmBlock, CmmAGraph, Middle(..), Last(..), Convention(..) - , ValueDirection(..), CmmBackwardFixedPoint, CmmForwardFixedPoint - , insertBetween, pprCmmGraphLikeCmm + ( CmmZ, CmmTopZ, CmmGraph, CmmBlock, CmmAGraph + , Middle(..), Last(..), MidCallTarget(..) + , Convention(..), ForeignConvention(..) + , ValueDirection(..), ForeignHint(..) + , CmmBackwardFixedPoint, CmmForwardFixedPoint, pprHinted + , insertBetween, mapExpMiddle, mapExpLast, mapExpDeepMiddle, mapExpDeepLast + , foldExpMiddle, foldExpLast, foldExpDeepMiddle, foldExpDeepLast + , joinOuts ) where import BlockId import CmmExpr import Cmm ( GenCmm(..), GenCmmTop(..), CmmStatic, CmmInfo - , CmmCallTarget(..), CmmActuals, CmmFormals, CmmKinded(..) + , CallishMachOp(..), ForeignHint(..) + , CmmActuals, CmmFormals, CmmHinted(..) , CmmStmt(..) -- imported in order to call ppr on Switch and to -- implement pprCmmGraphLikeCmm - , CmmSafety(CmmSafe) -- for pprCmmGraphLikeCmm - , CmmReturnInfo(CmmMayReturn) -- for pprCmmGraphLikeCmm ) +import DFMonad import PprCmm() +import CmmTx import CLabel -import CmmZipUtil -import ClosureInfo import FastString import ForeignCall -import MachOp import qualified ZipCfg as Z import qualified ZipDataflow as DF import ZipCfg @@ -39,7 +42,7 @@ import Maybes import Monad import Outputable import Prelude hiding (zip, unzip, last) -import UniqSet +import qualified Data.List as L import UniqSupply ---------------------------------------------------------------------- @@ -59,36 +62,20 @@ data Middle | MidAssign CmmReg CmmExpr -- Assign to register | MidStore CmmExpr CmmExpr -- Assign to memory location. Size is - -- given by cmmExprRep of the rhs. + -- given by cmmExprType of the rhs. | MidUnsafeCall -- An "unsafe" foreign call; - CmmCallTarget -- just a fat machine instruction + MidCallTarget -- just a fat machine instructoin CmmFormals -- zero or more results CmmActuals -- zero or more arguments - | MidAddToContext -- push a frame on the stack; - -- I will return to this frame + | MidAddToContext -- Push a frame on the stack; + -- I will return to this frame CmmExpr -- The frame's return address; it must be -- preceded by an info table that describes the -- live variables. [CmmExpr] -- The frame's live variables, to go on the -- stack with the first one at the young end - - | CopyIn -- Move incoming parameters or results from conventional - -- locations to registers. Note [CopyIn invariant] - Convention - CmmFormals -- eventually [CmmKind] will be used only for foreign - -- calls and will migrate into 'Convention' (helping to - -- drain "the swamp"), leaving this as [LocalReg] - C_SRT -- Static things kept alive by this block - - | CopyOut Convention CmmActuals - -- Move outgoing parameters or results from registers to - -- conventional locations. Every 'LastReturn', - -- 'LastJump', or 'LastCall' must be dominated by a - -- matching 'CopyOut' in the same basic block. - -- As above, '[CmmKind]' will migrate into the foreign calling - -- convention, leaving the actuals as '[CmmExpr]'. deriving Eq data Last @@ -98,47 +85,54 @@ data Last cml_pred :: CmmExpr, cml_true, cml_false :: BlockId } - - | LastReturn -- Return from a function; values in a previous CopyOut node - - | LastJump CmmExpr -- Tail call to another procedure; args in a CopyOut node - - | LastCall { -- A call (native or safe foreign); args in CopyOut node - cml_target :: CmmExpr, -- never a CmmPrim to a CallishMachOp! - cml_cont :: Maybe BlockId } -- BlockId of continuation, if call returns - | LastSwitch CmmExpr [Maybe BlockId] -- Table branch -- The scrutinee is zero-based; -- zero -> first block -- one -> second block etc -- Undefined outside range, and when there's a Nothing + | LastReturn Int -- Return from a function; values in previous copy middles + | LastJump CmmExpr Int -- Tail call to another procedure; args in a copy middles + | LastCall { -- A call (native or safe foreign); args in copy middles + cml_target :: CmmExpr, -- never a CmmPrim to a CallishMachOp! + cml_cont :: Maybe BlockId,-- BlockId of continuation, if call returns + cml_args :: Int } -- liveness info for outgoing args + -- All the last nodes that pass arguments carry the size of the outgoing CallArea + +data MidCallTarget -- The target of a MidUnsafeCall + = ForeignTarget -- A foreign procedure + CmmExpr -- Its address + ForeignConvention -- Its calling convention + + | PrimTarget -- A possibly-side-effecting machine operation + CallishMachOp -- Which one + deriving Eq data Convention - = ConventionStandard CCallConv ValueDirection - | ConventionPrivate - -- Used for control transfers within a (pre-CPS) procedure All - -- jump sites known, never pushed on the stack (hence no SRT) - -- You can choose whatever calling convention you please - -- (provided you make sure all the call sites agree)! - -- This data type eventually to be extended to record the convention. - - deriving Eq + = Native -- Native C-- call/return + + | Foreign -- Foreign call/return + ForeignConvention + + | Private + -- Used for control transfers within a (pre-CPS) procedure All + -- jump sites known, never pushed on the stack (hence no SRT) + -- You can choose whatever calling convention you please + -- (provided you make sure all the call sites agree)! + -- This data type eventually to be extended to record the convention. + deriving( Eq ) + +data ForeignConvention + = ForeignConvention + CCallConv -- Which foreign-call convention + [ForeignHint] -- Extra info about the args + [ForeignHint] -- Extra info about the result + deriving Eq data ValueDirection = Arguments | Results -- Arguments go with procedure definitions, jumps, and arguments to calls -- Results go with returns and with results of calls. deriving Eq -{- -Note [CopyIn invariant] -~~~~~~~~~~~~~~~~~~~~~~~ -One might wish for CopyIn to be a First node, but in practice, the -possibility raises all sorts of hairy issues with graph splicing, -rewriting, and so on. In the end, NR finds it better to make the -placement of CopyIn a dynamic invariant; it should normally be the first -Middle node in the basic block in which it occurs. --} - ---------------------------------------------------------------------- ----- Splicing between blocks -- Given a middle node, a block, and a successor BlockId, @@ -151,41 +145,35 @@ Middle node in the basic block in which it occurs. -- a fresh basic block, enabling some common blockification. -- o For a conditional branch, switch statement, or call, we must insert -- a new basic block. --- o For a jump, or return, this operation is impossible. +-- o For a jump or return, this operation is impossible. insertBetween :: MonadUnique m => CmmBlock -> [Middle] -> BlockId -> m (CmmBlock, [CmmBlock]) insertBetween b ms succId = insert $ goto_end $ unzip b where insert (h, LastOther (LastBranch bid)) = if bid == succId then do (bid', bs) <- newBlocks - return (zipht h $ ZLast $ LastOther (LastBranch bid'), bs) - else panic "tried to insert between non-adjacent blocks" + return (zipht h (ZLast (LastOther (LastBranch bid'))), bs) + else panic "tried invalid block insertBetween" insert (h, LastOther (LastCondBranch c t f)) = do (t', tbs) <- if t == succId then newBlocks else return $ (t, []) (f', fbs) <- if f == succId then newBlocks else return $ (f, []) return (zipht h $ ZLast $ LastOther (LastCondBranch c t' f'), tbs ++ fbs) - insert (h, LastOther (LastCall e (Just k))) = - if k == succId then - do (id', bs) <- newBlocks - return (zipht h $ ZLast $ LastOther (LastCall e (Just id')), bs) - else panic "tried to insert between non-adjacent blocks" - insert (_, LastOther (LastCall _ Nothing)) = - panic "cannot insert after non-returning call" insert (h, LastOther (LastSwitch e ks)) = do (ids, bs) <- mapAndUnzipM mbNewBlocks ks return (zipht h $ ZLast $ LastOther (LastSwitch e ids), join bs) - insert (_, LastOther LastReturn) = panic "cannot insert after return" - insert (_, LastOther (LastJump _)) = panic "cannot insert after jump" + insert (_, LastOther (LastCall _ _ _)) = + panic "unimp: insertBetween after a call -- probably not a good idea" + insert (_, LastOther (LastReturn _)) = panic "cannot insert after return" + insert (_, LastOther (LastJump _ _)) = panic "cannot insert after jump" insert (_, LastExit) = panic "cannot insert after exit" newBlocks = do id <- liftM BlockId $ getUniqueM - return $ (id, [Block id $ + return $ (id, [Block id Nothing $ foldr ZTail (ZLast (LastOther (LastBranch succId))) ms]) mbNewBlocks (Just k) = if k == succId then liftM lift newBlocks else return (Just k, []) mbNewBlocks Nothing = return (Nothing, []) lift (id, bs) = (Just id, bs) - ---------------------------------------------------------------------- ----- Instance declarations for control flow @@ -201,20 +189,20 @@ instance LastNode Last where branchNodeTarget _ = panic "asked for target of non-branch" cmmSuccs :: Last -> [BlockId] -cmmSuccs (LastReturn {}) = [] -cmmSuccs (LastJump {}) = [] -cmmSuccs (LastBranch id) = [id] -cmmSuccs (LastCall _ (Just id)) = [id] -cmmSuccs (LastCall _ Nothing) = [] -cmmSuccs (LastCondBranch _ t f) = [f, t] -- meets layout constraint -cmmSuccs (LastSwitch _ edges) = catMaybes edges +cmmSuccs (LastReturn _) = [] +cmmSuccs (LastJump {}) = [] +cmmSuccs (LastBranch id) = [id] +cmmSuccs (LastCall _ (Just id) _) = [id] +cmmSuccs (LastCall _ Nothing _) = [] +cmmSuccs (LastCondBranch _ t f) = [f, t] -- meets layout constraint +cmmSuccs (LastSwitch _ edges) = catMaybes edges fold_cmm_succs :: (BlockId -> a -> a) -> Last -> a -> a -fold_cmm_succs _f (LastReturn {}) z = z +fold_cmm_succs _f (LastReturn _) z = z fold_cmm_succs _f (LastJump {}) z = z fold_cmm_succs f (LastBranch id) z = f id z -fold_cmm_succs f (LastCall _ (Just id)) z = f id z -fold_cmm_succs _f (LastCall _ Nothing) z = z +fold_cmm_succs f (LastCall _ (Just id) _) z = f id z +fold_cmm_succs _f (LastCall _ Nothing _) z = z fold_cmm_succs f (LastCondBranch _ te fe) z = f te (f fe z) fold_cmm_succs f (LastSwitch _ edges) z = foldl (flip f) z $ catMaybes edges @@ -223,43 +211,164 @@ fold_cmm_succs f (LastSwitch _ edges) z = foldl (flip f) z $ catMaybes edge instance UserOfLocalRegs Middle where foldRegsUsed f z m = middle m - where middle (MidComment {}) = z - middle (MidAssign _lhs expr) = fold f z expr - middle (MidStore addr rval) = fold f (fold f z addr) rval - middle (MidUnsafeCall tgt _ress args) = fold f (fold f z tgt) args - middle (MidAddToContext ra args) = fold f (fold f z ra) args - middle (CopyIn _ _formals _) = z - middle (CopyOut _ actuals) = fold f z actuals + where middle (MidComment {}) = z + middle (MidAssign _lhs expr) = fold f z expr + middle (MidStore addr rval) = fold f (fold f z addr) rval + middle (MidUnsafeCall tgt _ args) = fold f (fold f z tgt) args + middle (MidAddToContext ra args) = fold f (fold f z ra) args fold f z m = foldRegsUsed f z m -- avoid monomorphism restriction +instance UserOfLocalRegs MidCallTarget where + foldRegsUsed _f z (PrimTarget _) = z + foldRegsUsed f z (ForeignTarget e _) = foldRegsUsed f z e + +instance UserOfSlots MidCallTarget where + foldSlotsUsed _f z (PrimTarget _) = z + foldSlotsUsed f z (ForeignTarget e _) = foldSlotsUsed f z e + instance UserOfLocalRegs Last where foldRegsUsed f z l = last l - where last (LastReturn) = z - last (LastJump e) = foldRegsUsed f z e + where last (LastReturn _) = z + last (LastJump e _) = foldRegsUsed f z e last (LastBranch _id) = z - last (LastCall tgt _) = foldRegsUsed f z tgt + last (LastCall tgt _ _) = foldRegsUsed f z tgt last (LastCondBranch e _ _) = foldRegsUsed f z e last (LastSwitch e _tbl) = foldRegsUsed f z e instance DefinerOfLocalRegs Middle where foldRegsDefd f z m = middle m + where middle (MidComment {}) = z + middle (MidAssign _lhs _) = fold f z _lhs + middle (MidStore _ _) = z + middle (MidUnsafeCall _ _ _) = z + middle (MidAddToContext _ _) = z + fold f z m = foldRegsDefd f z m -- avoid monomorphism restriction + +instance DefinerOfLocalRegs Last where + foldRegsDefd _ z _ = z + + +---------------------------------------------------------------------- +----- Instance declarations for stack slot use + +instance UserOfSlots Middle where + foldSlotsUsed f z m = middle m + where middle (MidComment {}) = z + middle (MidAssign _lhs expr) = fold f z expr + middle (MidStore addr rval) = fold f (fold f z addr) rval + middle (MidUnsafeCall tgt _ress args) = fold f (fold f z tgt) args + middle (MidAddToContext ra args) = fold f (fold f z ra) args + fold f z e = foldSlotsUsed f z e -- avoid monomorphism restriction + +instance UserOfSlots Last where + foldSlotsUsed f z l = last l + where last (LastReturn _) = z + last (LastJump e _) = foldSlotsUsed f z e + last (LastBranch _id) = z + last (LastCall tgt _ _) = foldSlotsUsed f z tgt + last (LastCondBranch e _ _) = foldSlotsUsed f z e + last (LastSwitch e _tbl) = foldSlotsUsed f z e + +instance UserOfSlots l => UserOfSlots (ZLast l) where + foldSlotsUsed f z (LastOther l) = foldSlotsUsed f z l + foldSlotsUsed _ z LastExit = z + +instance DefinerOfSlots Middle where + foldSlotsDefd f z m = middle m where middle (MidComment {}) = z - middle (MidAssign _lhs _) = fold f z _lhs + middle (MidAssign _ _) = z + middle (MidStore (CmmStackSlot a i) e) = + f z (a, i, widthInBytes $ typeWidth $ cmmExprType e) middle (MidStore _ _) = z middle (MidUnsafeCall _ _ _) = z middle (MidAddToContext _ _) = z - middle (CopyIn _ _formals _) = fold f z _formals - middle (CopyOut _ _) = z - fold f z m = foldRegsDefd f z m -- avoid monomorphism restriction -instance DefinerOfLocalRegs Last where - foldRegsDefd _ z l = last l - where last (LastReturn) = z - last (LastJump _) = z - last (LastBranch _) = z - last (LastCall _ _) = z - last (LastCondBranch _ _ _) = z - last (LastSwitch _ _) = z +instance DefinerOfSlots Last where + foldSlotsDefd _ z _ = z + +instance DefinerOfSlots l => DefinerOfSlots (ZLast l) where + foldSlotsDefd f z (LastOther l) = foldSlotsDefd f z l + foldSlotsDefd _ z LastExit = z + +---------------------------------------------------------------------- +----- Code for manipulating Middle and Last nodes + +mapExpMiddle :: (CmmExpr -> CmmExpr) -> Middle -> Middle +mapExpMiddle _ m@(MidComment _) = m +mapExpMiddle exp (MidAssign r e) = MidAssign r (exp e) +mapExpMiddle exp (MidStore addr e) = MidStore (exp addr) (exp e) +mapExpMiddle exp (MidUnsafeCall tgt fs as) = + MidUnsafeCall (mapExpMidcall exp tgt) fs (map exp as) +mapExpMiddle exp (MidAddToContext e es) = MidAddToContext (exp e) (map exp es) + +foldExpMiddle :: (CmmExpr -> z -> z) -> Middle -> z -> z +foldExpMiddle _ (MidComment _) z = z +foldExpMiddle exp (MidAssign _ e) z = exp e z +foldExpMiddle exp (MidStore addr e) z = exp addr $ exp e z +foldExpMiddle exp (MidUnsafeCall tgt _ as) z = foldExpMidcall exp tgt $ foldr exp z as +foldExpMiddle exp (MidAddToContext e es) z = exp e $ foldr exp z es + +mapExpLast :: (CmmExpr -> CmmExpr) -> Last -> Last +mapExpLast _ l@(LastBranch _) = l +mapExpLast exp (LastCondBranch e ti fi) = LastCondBranch (exp e) ti fi +mapExpLast exp (LastSwitch e tbl) = LastSwitch (exp e) tbl +mapExpLast exp (LastCall tgt mb_id s) = LastCall (exp tgt) mb_id s +mapExpLast exp (LastJump e s) = LastJump (exp e) s +mapExpLast _ (LastReturn s) = LastReturn s + +foldExpLast :: (CmmExpr -> z -> z) -> Last -> z -> z +foldExpLast _ (LastBranch _) z = z +foldExpLast exp (LastCondBranch e _ _) z = exp e z +foldExpLast exp (LastSwitch e _) z = exp e z +foldExpLast exp (LastCall tgt _ _) z = exp tgt z +foldExpLast exp (LastJump e _) z = exp e z +foldExpLast _ (LastReturn _) z = z + +mapExpMidcall :: (CmmExpr -> CmmExpr) -> MidCallTarget -> MidCallTarget +mapExpMidcall exp (ForeignTarget e c) = ForeignTarget (exp e) c +mapExpMidcall _ m@(PrimTarget _) = m + +foldExpMidcall :: (CmmExpr -> z -> z) -> MidCallTarget -> z -> z +foldExpMidcall exp (ForeignTarget e _) z = exp e z +foldExpMidcall _ (PrimTarget _) z = z + +-- Take a transformer on expressions and apply it recursively. +wrapRecExp :: (CmmExpr -> CmmExpr) -> CmmExpr -> CmmExpr +wrapRecExp f (CmmMachOp op es) = f (CmmMachOp op $ map f es) +wrapRecExp f (CmmLoad addr ty) = f (CmmLoad (f addr) ty) +wrapRecExp f e = f e + +mapExpDeepMiddle :: (CmmExpr -> CmmExpr) -> Middle -> Middle +mapExpDeepLast :: (CmmExpr -> CmmExpr) -> Last -> Last +mapExpDeepMiddle f = mapExpMiddle $ wrapRecExp f +mapExpDeepLast f = mapExpLast $ wrapRecExp f + +-- Take a folder on expressions and apply it recursively. +wrapRecExpf :: (CmmExpr -> z -> z) -> CmmExpr -> z -> z +wrapRecExpf f e@(CmmMachOp _ es) z = foldr f (f e z) es +wrapRecExpf f e@(CmmLoad addr _) z = f addr (f e z) +wrapRecExpf f e z = f e z + +foldExpDeepMiddle :: (CmmExpr -> z -> z) -> Middle -> z -> z +foldExpDeepLast :: (CmmExpr -> z -> z) -> Last -> z -> z +foldExpDeepMiddle f = foldExpMiddle $ wrapRecExpf f +foldExpDeepLast f = foldExpLast $ wrapRecExpf f + +---------------------------------------------------------------------- +-- Compute the join of facts live out of a Last node. Useful for most backward +-- analyses. +joinOuts :: DataflowLattice a -> (BlockId -> a) -> Last -> a +joinOuts lattice env l = + let bot = fact_bot lattice + join x y = txVal $ fact_add_to lattice x y + in case l of + (LastReturn _) -> bot + (LastJump _ _) -> bot + (LastBranch id) -> env id + (LastCall _ Nothing _) -> bot + (LastCall _ (Just k) _) -> env k + (LastCondBranch _ t f) -> join (env t) (env f) + (LastSwitch _ tbl) -> foldr join bot (map env $ catMaybes tbl) ---------------------------------------------------------------------- ----- Instance declarations for prettyprinting (avoids recursive imports) @@ -273,6 +382,13 @@ instance Outputable Last where instance Outputable Convention where ppr = pprConvention +instance Outputable ForeignConvention where + ppr = pprForeignConvention + +instance Outputable ValueDirection where + ppr Arguments = ptext $ sLit "args" + ppr Results = ptext $ sLit "results" + instance DF.DebugNodes Middle Last debugPpr :: Bool @@ -280,94 +396,78 @@ debugPpr = debugIsOn pprMiddle :: Middle -> SDoc pprMiddle stmt = pp_stmt <+> pp_debug - where - pp_stmt = case stmt of - - CopyIn conv args _ -> - if null args then ptext (sLit "empty CopyIn") - else commafy (map pprKinded args) <+> equals <+> - ptext (sLit "foreign") <+> doubleQuotes(ppr conv) <+> ptext (sLit "...") - - CopyOut conv args -> - ptext (sLit "PreCopyOut: next, pass") <+> doubleQuotes(ppr conv) <+> - parens (commafy (map pprKinded args)) - - -- // text - MidComment s -> text "//" <+> ftext s - - -- reg = expr; - MidAssign reg expr -> ppr reg <+> equals <+> ppr expr <> semi - - -- rep[lv] = expr; - MidStore lv expr -> rep <> brackets(ppr lv) <+> equals <+> ppr expr <> semi - where - rep = ppr ( cmmExprRep expr ) - - -- call "ccall" foo(x, y)[r1, r2]; - -- ToDo ppr volatile - MidUnsafeCall (CmmCallee fn cconv) results args -> - hcat [ if null results - then empty - else parens (commafy $ map ppr results) <> - ptext (sLit " = "), - ptext (sLit "call"), space, - doubleQuotes(ppr cconv), space, - ppr_target fn, parens ( commafy $ map ppr args ), - semi ] - - MidUnsafeCall (CmmPrim op) results args -> - pprMiddle (MidUnsafeCall (CmmCallee (CmmLit lbl) CCallConv) results args) - where - lbl = CmmLabel (mkForeignLabel (mkFastString (show op)) Nothing False) - - MidAddToContext ra args -> - hcat [ ptext (sLit "return via ") - , ppr_target ra, parens (commafy $ map ppr args), semi ] - - pp_debug = - if not debugPpr then empty - else text " //" <+> - case stmt of - CopyIn {} -> text "CopyIn" - CopyOut {} -> text "CopyOut" - MidComment {} -> text "MidComment" - MidAssign {} -> text "MidAssign" - MidStore {} -> text "MidStore" - MidUnsafeCall {} -> text "MidUnsafeCall" - MidAddToContext {} -> text "MidAddToContext" - + where + pp_stmt = case stmt of + -- // text + MidComment s -> text "//" <+> ftext s + + -- reg = expr; + MidAssign reg expr -> ppr reg <+> equals <+> ppr expr <> semi + + -- rep[lv] = expr; + MidStore lv expr -> rep <> brackets(ppr lv) <+> equals <+> ppr expr <> semi + where + rep = ppr ( cmmExprType expr ) + + -- call "ccall" foo(x, y)[r1, r2]; + -- ToDo ppr volatile + MidUnsafeCall target results args -> + hsep [ if null results + then empty + else parens (commafy $ map ppr results) <+> equals, + ptext $ sLit "call", + ppr_call_target target <> parens (commafy $ map ppr args) <> semi] + + MidAddToContext ra args -> + hcat [ ptext $ sLit "return via " + , ppr_target ra, parens (commafy $ map ppr args), semi ] + + pp_debug = + if not debugPpr then empty + else text " //" <+> + case stmt of + MidComment {} -> text "MidComment" + MidAssign {} -> text "MidAssign" + MidStore {} -> text "MidStore" + MidUnsafeCall {} -> text "MidUnsafeCall" + MidAddToContext {} -> text "MidAddToContext" + +ppr_fc :: ForeignConvention -> SDoc +ppr_fc (ForeignConvention c _ _) = doubleQuotes (ppr c) + +ppr_call_target :: MidCallTarget -> SDoc +ppr_call_target (ForeignTarget fn c) = ppr_fc c <+> ppr_target fn +ppr_call_target (PrimTarget op) = ppr (CmmLabel (mkForeignLabel (mkFastString (show op)) Nothing False)) ppr_target :: CmmExpr -> SDoc ppr_target t@(CmmLit _) = ppr t ppr_target fn' = parens (ppr fn') - -pprKinded :: Outputable a => CmmKinded a -> SDoc -pprKinded (CmmKinded a NoHint) = ppr a -pprKinded (CmmKinded a PtrHint) = doubleQuotes (text "address") <+> ppr a -pprKinded (CmmKinded a SignedHint) = doubleQuotes (text "signed") <+> ppr a -pprKinded (CmmKinded a FloatHint) = doubleQuotes (text "float") <+> ppr a +pprHinted :: Outputable a => CmmHinted a -> SDoc +pprHinted (CmmHinted a NoHint) = ppr a +pprHinted (CmmHinted a AddrHint) = doubleQuotes (text "address") <+> ppr a +pprHinted (CmmHinted a SignedHint) = doubleQuotes (text "signed") <+> ppr a pprLast :: Last -> SDoc -pprLast stmt = (case stmt of - LastBranch ident -> ptext (sLit "goto") <+> ppr ident <> semi - LastCondBranch expr t f -> genFullCondBranch expr t f - LastJump expr -> hcat [ ptext (sLit "jump"), space, pprFun expr - , ptext (sLit "(...)"), semi] - LastReturn -> hcat [ ptext (sLit "return"), space - , ptext (sLit "(...)"), semi] - LastSwitch arg ids -> ppr $ CmmSwitch arg ids - LastCall tgt k -> genBareCall tgt k - ) <> - if debugPpr then empty - else text " //" <+> - case stmt of - LastBranch {} -> text "LastBranch" - LastCondBranch {} -> text "LastCondBranch" - LastJump {} -> text "LastJump" - LastReturn {} -> text "LastReturn" - LastSwitch {} -> text "LastSwitch" - LastCall {} -> text "LastCall" +pprLast stmt = pp_stmt <+> pp_debug + where + pp_stmt = case stmt of + LastBranch ident -> ptext (sLit "goto") <+> ppr ident <> semi + LastCondBranch expr t f -> genFullCondBranch expr t f + LastJump expr _ -> hcat [ ptext (sLit "jump"), space, pprFun expr + , ptext (sLit "(...)"), semi] + LastReturn _ -> hcat [ ptext (sLit "return"), space + , ptext (sLit "(...)"), semi] + LastSwitch arg ids -> ppr $ CmmSwitch arg ids + LastCall tgt k _ -> genBareCall tgt k + + pp_debug = text " //" <+> case stmt of + LastBranch {} -> text "LastBranch" + LastCondBranch {} -> text "LastCondBranch" + LastJump {} -> text "LastJump" + LastReturn {} -> text "LastReturn" + LastSwitch {} -> text "LastSwitch" + LastCall {} -> text "LastCall" genBareCall :: CmmExpr -> Maybe BlockId -> SDoc genBareCall fn k = @@ -393,119 +493,12 @@ genFullCondBranch expr t f = ] pprConvention :: Convention -> SDoc -pprConvention (ConventionStandard c _) = ppr c -pprConvention (ConventionPrivate {} ) = text "<private-convention>" +pprConvention (Native {}) = empty +pprConvention (Foreign c) = ppr c +pprConvention (Private {}) = text "<private-convention>" + +pprForeignConvention :: ForeignConvention -> SDoc +pprForeignConvention (ForeignConvention c as rs) = ppr c <> ppr as <> ppr rs commafy :: [SDoc] -> SDoc commafy xs = hsep $ punctuate comma xs - - ----------------------------------------------------------------- --- | The purpose of this function is to print a Cmm zipper graph "as if it were" --- a Cmm program. The objective is dodgy, so it's unsurprising parts of the --- code are dodgy as well. - -pprCmmGraphLikeCmm :: CmmGraph -> SDoc -pprCmmGraphLikeCmm g = vcat (swallow blocks) - where blocks = Z.postorder_dfs g - swallow :: [CmmBlock] -> [SDoc] - swallow [] = [] - swallow (Z.Block id t : rest) = tail id [] Nothing t rest - tail id prev' out (Z.ZTail (CopyOut conv args) t) rest = - if isJust out then panic "multiple CopyOut nodes in one basic block" - else - tail id (prev') (Just (conv, args)) t rest - tail id prev' out (Z.ZTail m t) rest = tail id (mid m : prev') out t rest - tail id prev' out (Z.ZLast Z.LastExit) rest = exit id prev' out rest - tail id prev' out (Z.ZLast (Z.LastOther l)) rest = last id prev' out l rest - mid (CopyIn _ [] _) = text "// proc point (no parameters)" - mid m@(CopyIn {}) = ppr m <+> text "(proc point)" - mid m = ppr m - block' id prev' - | id == Z.lg_entry g, entry_has_no_pred = - vcat (text "<entry>" : reverse prev') - | otherwise = hang (ppr id <> colon) 4 (vcat (reverse prev')) - last id prev' out l n = - let endblock stmt = block' id (stmt : prev') : swallow n in - case l of - LastBranch tgt -> - case n of - Z.Block id' t : bs - | tgt == id', unique_pred id' - -> tail id prev' out t bs -- optimize out redundant labels - _ -> endblock (ppr $ CmmBranch tgt) - l@(LastCondBranch expr tid fid) -> - let ft id = text "// fall through to " <> ppr id in - case n of - Z.Block id' t : bs - | id' == fid, isNothing out -> - tail id (ft fid : ppr (CmmCondBranch expr tid) : prev') Nothing t bs - | id' == tid, Just e' <- maybeInvertCmmExpr expr, isNothing out-> - tail id (ft tid : ppr (CmmCondBranch e' fid) : prev') Nothing t bs - _ -> endblock $ with_out out l - l@(LastJump {}) -> endblock $ with_out out l - l@(LastReturn {}) -> endblock $ with_out out l - l@(LastSwitch {}) -> endblock $ with_out out l - l@(LastCall _ Nothing) -> endblock $ with_out out l - l@(LastCall tgt (Just k)) - | Z.Block id' (Z.ZTail (CopyIn _ ress srt) t) : bs <- n, - Just (conv, args) <- out, - id' == k -> - let call = CmmCall tgt' ress args (CmmSafe srt) CmmMayReturn - tgt' = CmmCallee tgt (cconv_of_conv conv) - ppcall = ppr call <+> parens (text "ret to" <+> ppr k) - in if unique_pred k then - tail id (ppcall : prev') Nothing t bs - else - endblock (ppcall) - | Z.Block id' t : bs <- n, id' == k, unique_pred k, - Just (conv, args) <- out, - Just (ress, srt) <- findCopyIn t -> - let call = CmmCall tgt' ress args (CmmSafe srt) CmmMayReturn - tgt' = CmmCallee tgt (cconv_of_conv conv) - delayed = - ptext (sLit "// delayed CopyIn follows previous call") - in tail id (delayed : ppr call : prev') Nothing t bs - | otherwise -> endblock $ with_out out l - findCopyIn (Z.ZTail (CopyIn _ ress srt) _) = Just (ress, srt) - findCopyIn (Z.ZTail _ t) = findCopyIn t - findCopyIn (Z.ZLast _) = Nothing - exit id prev' out n = -- highly irregular (assertion violation?) - let endblock stmt = block' id (stmt : prev') : swallow n in - case out of Nothing -> endblock (text "// <exit>") - Just (conv, args) -> endblock (ppr (CopyOut conv args) $$ - text "// <exit>") - preds = zipPreds g - entry_has_no_pred = case lookupBlockEnv preds (Z.lg_entry g) of - Nothing -> True - Just s -> isEmptyUniqSet s - single_preds = - let add b single = - let id = Z.blockId b - in case lookupBlockEnv preds id of - Nothing -> single - Just s -> if sizeUniqSet s == 1 then - extendBlockSet single id - else single - in Z.fold_blocks add emptyBlockSet g - unique_pred id = elemBlockSet id single_preds - cconv_of_conv (ConventionStandard conv _) = conv - cconv_of_conv (ConventionPrivate {}) = CmmCallConv -- XXX totally bogus - -with_out :: Maybe (Convention, CmmActuals) -> Last -> SDoc -with_out Nothing l = ptext (sLit "??no-arguments??") <+> ppr l -with_out (Just (conv, args)) l = last l - where last (LastCall e k) = - hcat [ptext (sLit "... = foreign "), - doubleQuotes(ppr conv), space, - ppr_target e, parens ( commafy $ map ppr args ), - ptext (sLit " \"safe\""), - case k of Nothing -> ptext (sLit " never returns") - Just _ -> empty, - semi ] - last (LastReturn) = ppr (CmmReturn args) - last (LastJump e) = ppr (CmmJump e args) - last l = ppr (CopyOut conv args) $$ ppr l - ppr_target (CmmLit lit) = ppr lit - ppr_target fn' = parens (ppr fn') - commafy xs = hsep $ punctuate comma xs diff --git a/compiler/cmm/ZipCfgExtras.hs b/compiler/cmm/ZipCfgExtras.hs index bd26aca976..acddbae58b 100644 --- a/compiler/cmm/ZipCfgExtras.hs +++ b/compiler/cmm/ZipCfgExtras.hs @@ -24,7 +24,7 @@ exit :: LGraph m l -> FGraph m l -- focus on edge into default exit n -- (fails if there isn't one) focusp :: (Block m l -> Bool) -> LGraph m l -> Maybe (FGraph m l) -- focus on start of block satisfying predicate -unfocus :: FGraph m l -> LGraph m l -- lose focus +-- unfocus :: FGraph m l -> LGraph m l -- lose focus -- | We can insert a single-entry, single-exit subgraph at -- the current focus. @@ -37,16 +37,16 @@ splice_focus_exit :: FGraph m l -> LGraph m l -> FGraph m l _unused :: () _unused = all `seq` () - where all = ( exit, focusp, unfocus {- , splice_focus_entry, splice_focus_exit -} + where all = ( exit, focusp --, unfocus {- , splice_focus_entry, splice_focus_exit -} , foldM_fwd_block (\_ a -> Just a) ) -unfocus (FGraph e bz bs) = LGraph e (insertBlock (zip bz) bs) +--unfocus (FGraph e bz bs) = LGraph e (insertBlock (zip bz) bs) -focusp p (LGraph entry blocks) = +focusp p (LGraph entry _ blocks) = fmap (\(b, bs) -> FGraph entry (unzip b) bs) (splitp_blocks p blocks) -exit g@(LGraph eid _) = FGraph eid (ZBlock h (ZLast l)) others +exit g@(LGraph eid _ _) = FGraph eid (ZBlock h (ZLast l)) others where FGraph _ b others = focusp is_exit g `orElse` panic "no exit in flow graph" (h, l) = goto_end b @@ -65,7 +65,7 @@ splice_focus_exit (FGraph eid (ZBlock head tail) blocks) g = foldM_fwd_block :: Monad m => (BlockId -> a -> m a) -> (mid -> a -> m a) -> (ZLast l -> a -> m a) -> Block mid l -> a -> m a -foldM_fwd_block first middle last (Block id t) z = do { z <- first id z; tail t z } +foldM_fwd_block first middle last (Block id _ t) z = do { z <- first id z; tail t z } where tail (ZTail m t) z = do { z <- middle m z; tail t z } tail (ZLast l) z = last l z diff --git a/compiler/cmm/ZipDataflow.hs b/compiler/cmm/ZipDataflow.hs index 97b146c0ff..de2f53d640 100644 --- a/compiler/cmm/ZipDataflow.hs +++ b/compiler/cmm/ZipDataflow.hs @@ -5,6 +5,7 @@ module ZipDataflow ( DebugNodes(), RewritingDepth(..), LastOutFacts(..) , zdfSolveFrom, zdfRewriteFrom + , zdfSolveFromL , ForwardTransfers(..), BackwardTransfers(..) , ForwardRewrites(..), BackwardRewrites(..) , ForwardFixedPoint, BackwardFixedPoint @@ -14,12 +15,14 @@ module ZipDataflow , zdfDecoratedGraph -- not yet implemented , zdfFpContents , zdfFpLastOuts + , zdfBRewriteFromL, zdfFRewriteFromL ) where import BlockId import CmmTx import DFMonad +import OptimizationFuel as F import MkZipCfg import ZipCfg import qualified ZipCfg as G @@ -263,6 +266,15 @@ class DataflowSolverDirection transfers fixedpt where -> a -- ^ Fact flowing in (at entry or exit) -> Graph m l -- ^ Graph to be analyzed -> FuelMonad (fixedpt m l a ()) -- ^ Answers + zdfSolveFromL :: (DebugNodes m l, Outputable a) + => BlockEnv a -- Initial facts (unbound == bottom) + -> PassName + -> DataflowLattice a -- Lattice + -> transfers m l a -- Dataflow transfer functions + -> a -- Fact flowing in (at entry or exit) + -> LGraph m l -- Graph to be analyzed + -> FuelMonad (fixedpt m l a ()) -- Answers + zdfSolveFromL b p l t a g = zdfSolveFrom b p l t a $ quickGraph g -- There are exactly two instances: forward and backward instance DataflowSolverDirection ForwardTransfers ForwardFixedPoint @@ -307,6 +319,59 @@ class DataflowSolverDirection transfers fixedpt => -> Graph m l -> FuelMonad (fixedpt m l a (Graph m l)) +-- Temporarily lifting from Graph to LGraph -- an experiment to see how we +-- can eliminate some hysteresis between Graph and LGraph. +-- Perhaps Graph should be confined to dataflow code. +-- Trading space for time +quickGraph :: LastNode l => LGraph m l -> Graph m l +quickGraph g = Graph (ZLast $ mkBranchNode $ lg_entry g) $ lg_blocks g + +quickLGraph :: LastNode l => Int -> Graph m l -> FuelMonad (LGraph m l) +quickLGraph args (Graph (ZLast (LastOther l)) blockenv) + | isBranchNode l = return $ LGraph (branchNodeTarget l) args blockenv +quickLGraph args g = F.lGraphOfGraph g args + +fixptWithLGraph :: LastNode l => Int -> CommonFixedPoint m l fact (Graph m l) -> + FuelMonad (CommonFixedPoint m l fact (LGraph m l)) +fixptWithLGraph args cfp = + do fp_c <- quickLGraph args $ fp_contents cfp + return $ cfp {fp_contents = fp_c} + +ffixptWithLGraph :: LastNode l => Int -> ForwardFixedPoint m l fact (Graph m l) -> + FuelMonad (ForwardFixedPoint m l fact (LGraph m l)) +ffixptWithLGraph args fp = + do common <- fixptWithLGraph args $ ffp_common fp + return $ fp {ffp_common = common} + +zdfFRewriteFromL :: (DebugNodes m l, Outputable a) + => RewritingDepth -- whether to rewrite a rewritten graph + -> BlockEnv a -- initial facts (unbound == botton) + -> PassName + -> DataflowLattice a + -> ForwardTransfers m l a + -> ForwardRewrites m l a + -> a -- fact flowing in (at entry or exit) + -> LGraph m l + -> FuelMonad (ForwardFixedPoint m l a (LGraph m l)) +zdfFRewriteFromL d b p l t r a g@(LGraph _ args _) = + do fp <- zdfRewriteFrom d b p l t r a $ quickGraph g + ffixptWithLGraph args fp + +zdfBRewriteFromL :: (DebugNodes m l, Outputable a) + => RewritingDepth -- whether to rewrite a rewritten graph + -> BlockEnv a -- initial facts (unbound == botton) + -> PassName + -> DataflowLattice a + -> BackwardTransfers m l a + -> BackwardRewrites m l a + -> a -- fact flowing in (at entry or exit) + -> LGraph m l + -> FuelMonad (BackwardFixedPoint m l a (LGraph m l)) +zdfBRewriteFromL d b p l t r a g@(LGraph _ args _) = + do fp <- zdfRewriteFrom d b p l t r a $ quickGraph g + fixptWithLGraph args fp + + data RewritingDepth = RewriteShallow | RewriteDeep -- When a transformation proposes to rewrite a node, -- you can either ask the system to @@ -363,25 +428,15 @@ rewrite_f_agraph depth start_facts name lattice transfers rewrites in_fact g = areturn :: AGraph m l -> DFM a (Graph m l) areturn g = liftToDFM $ liftUniq $ graphOfAGraph g - -{- -graphToLGraph :: LastNode l => Graph m l -> DFM a (LGraph m l) -graphToLGraph (Graph (ZLast (LastOther l)) blockenv) - | isBranchNode l = return $ LGraph (branchNodeTarget l) blockenv -graphToLGraph (Graph tail blockenv) = - do id <- freshBlockId "temporary entry label" - return $ LGraph id $ insertBlock (Block id tail) blockenv --} - -- | Here we prefer not simply to slap on 'goto eid' because this -- introduces an unnecessary basic block at each rewrite, and we don't -- want to stress out the finite map more than necessary lgraphToGraph :: LastNode l => LGraph m l -> Graph m l -lgraphToGraph (LGraph eid blocks) = +lgraphToGraph (LGraph eid _ blocks) = if flip any (eltsUFM blocks) $ \block -> any (== eid) (succs block) then Graph (ZLast (mkBranchNode eid)) blocks else -- common case: entry is not a branch target - let Block _ entry = lookupBlockEnv blocks eid `orElse` panic "missing entry!" + let Block _ _ entry = lookupBlockEnv blocks eid `orElse` panic "missing entry!" in Graph entry (delFromUFM blocks eid) @@ -473,7 +528,7 @@ forward_sol check_maybe = forw solve finish in_fact (Graph entry blockenv) fuel = let blocks = G.postorder_dfs_from blockenv entry set_or_save = mk_set_or_save (isJust . lookupBlockEnv blockenv) - set_successor_facts (Block id tail) fuel = + set_successor_facts (Block id _ tail) fuel = do { idfact <- getFact id ; (last_outs, fuel) <- case check_maybe fuel $ fr_first rewrites idfact id of @@ -588,10 +643,10 @@ forward_rew check_maybe = forw in do { solve depth name start transfers rewrites in_fact g fuel ; eid <- freshBlockId "temporary entry id" ; (rewritten, fuel) <- - rew_tail (ZFirst eid) in_fact entry emptyBlockEnv fuel + rew_tail (ZFirst eid Nothing) in_fact entry emptyBlockEnv fuel ; (rewritten, fuel) <- rewrite_blocks blocks rewritten fuel ; a <- finish - ; return (a, lgraphToGraph (LGraph eid rewritten), fuel) + ; return (a, lgraphToGraph (LGraph eid 0 rewritten), fuel) } don't_rewrite facts finish in_fact g fuel = do { solve depth name facts transfers rewrites in_fact g fuel @@ -614,8 +669,8 @@ forward_rew check_maybe = forw rewrite_blocks :: [Block m l] -> (BlockEnv (Block m l)) -> Fuel -> DFM a (BlockEnv (Block m l), Fuel) rewrite_blocks [] rewritten fuel = return (rewritten, fuel) - rewrite_blocks (G.Block id t : bs) rewritten fuel = - do let h = ZFirst id + rewrite_blocks (G.Block id off t : bs) rewritten fuel = + do let h = ZFirst id off a <- getFact id case check_maybe fuel $ fr_first rewrites a id of Nothing -> do { (rewritten, fuel) <- @@ -625,7 +680,7 @@ forward_rew check_maybe = forw Just g -> do { markGraphRewritten ; g <- areturn g ; (outfact, g, fuel) <- inner_rew getExitFact a g fuel - ; let (blocks, h) = splice_head' (ZFirst id) g + ; let (blocks, h) = splice_head' h g ; (rewritten, fuel) <- rew_tail h outfact t (blocks `plusUFM` rewritten) fuel ; rewrite_blocks bs rewritten fuel } @@ -756,15 +811,16 @@ backward_sol check_maybe = back in do { fuel <- run "backward" name set_block_fact blocks fuel ; eid <- freshBlockId "temporary entry id" - ; fuel <- set_block_fact (Block eid entry) fuel + ; fuel <- set_block_fact (Block eid Nothing entry) fuel ; a <- getFact eid ; forgetFact eid ; return (a, fuel) } - set_head_fact (G.ZFirst id) a fuel = + set_head_fact (G.ZFirst id _) a fuel = case check_maybe fuel $ br_first rewrites a id of - Nothing -> do { my_trace "set_head_fact" (ppr id) $ + Nothing -> do { my_trace "set_head_fact" (ppr id <+> text "=" <+> + ppr (bt_first_in transfers a id)) $ setFact id $ bt_first_in transfers a id ; return fuel } Just g -> do { (a, fuel) <- subsolve g a fuel @@ -839,16 +895,19 @@ backward_rew check_maybe = back rewrite start g exit_fact fuel = let Graph entry blockenv = g blocks = reverse $ G.postorder_dfs_from blockenv entry - in do { solve depth name start transfers rewrites g exit_fact fuel - ; env <- getAllFacts + in do { (FP env in_fact _ _ _, _) <- -- don't drop the entry fact! + solve depth name start transfers rewrites g exit_fact fuel + --; env <- getAllFacts ; my_trace "facts after solving" (ppr env) $ return () ; eid <- freshBlockId "temporary entry id" ; (rewritten, fuel) <- rewrite_blocks True blocks emptyBlockEnv fuel -- We can't have the fact check fail on the bogus entry, which _may_ change - ; (rewritten, fuel) <- rewrite_blocks False [Block eid entry] rewritten fuel - ; a <- getFact eid - ; return (a, lgraphToGraph (LGraph eid rewritten), fuel) - } + ; (rewritten, fuel) <- rewrite_blocks False [Block eid Nothing entry] rewritten fuel + ; my_trace "eid" (ppr eid) $ return () + ; my_trace "exit_fact" (ppr exit_fact) $ return () + ; my_trace "in_fact" (ppr in_fact) $ return () + ; return (in_fact, lgraphToGraph (LGraph eid 0 rewritten), fuel) + } -- Remember: the entry fact computed by @solve@ accounts for rewriting don't_rewrite facts g exit_fact fuel = do { (fp, _) <- solve depth name facts transfers rewrites g exit_fact fuel @@ -901,12 +960,13 @@ backward_rew check_maybe = back return () ; (a, g, fuel) <- inner_rew g a fuel ; let Graph t newblocks = G.splice_tail g tail - ; propagate check fuel h a t (newblocks `plusUFM` rewritten) } - propagate check fuel (ZFirst id) a tail rewritten = + ; my_trace "propagating facts" (ppr a) $ + propagate check fuel h a t (newblocks `plusUFM` rewritten) } + propagate check fuel (ZFirst id off) a tail rewritten = case maybeRewriteWithFuel fuel $ br_first rewrites a id of Nothing -> do { if check then checkFactMatch id $ bt_first_in transfers a id else return () - ; return (insertBlock (Block id tail) rewritten, fuel) } + ; return (insertBlock (Block id off tail) rewritten, fuel) } Just g -> do { markGraphRewritten ; g <- areturn g @@ -915,7 +975,7 @@ backward_rew check_maybe = back ; (a, g, fuel) <- inner_rew g a fuel ; if check then checkFactMatch id a else return () ; let Graph t newblocks = G.splice_tail g tail - ; let r = insertBlock (Block id t) (newblocks `plusUFM` rewritten) + ; let r = insertBlock (Block id off t) (newblocks `plusUFM` rewritten) ; return (r, fuel) } in fixed_pt_and_fuel @@ -978,13 +1038,14 @@ run dir name do_block blocks b = unchanged depth = my_nest depth (text "facts for" <+> graphId <+> text "are unchanged") - graphId = case blocks of { Block id _ : _ -> ppr id ; [] -> text "<empty>" } + graphId = case blocks of { Block id _ _ : _ -> ppr id ; [] -> text "<empty>" } show_blocks = my_trace "Blocks:" (vcat (map pprBlock blocks)) - pprBlock (Block id t) = nest 2 (pprFact (id, t)) + pprBlock (Block id off t) = nest 2 (pprFact' (id, off, t)) pprFacts depth n env = my_nest depth (text "facts for iteration" <+> pp_i n <+> text "are:" $$ (nest 2 $ vcat $ map pprFact $ ufmToList env)) - pprFact (id, a) = hang (ppr id <> colon) 4 (ppr a) + pprFact (id, a) = hang (ppr id <> colon) 4 (ppr a) + pprFact' (id, off, a) = hang (ppr id <> parens (ppr off) <> colon) 4 (ppr a) f4sep :: [SDoc] -> SDoc |