diff options
author | Andreas Klebinger <klebinger.andreas@gmx.at> | 2021-05-10 22:06:51 +0200 |
---|---|---|
committer | Matthew Pickering <matthewtpickering@gmail.com> | 2022-02-12 13:59:41 +0000 |
commit | 0e93023eef174262310737004d398bc7a606939a (patch) | |
tree | 091a34f78b7911d8b38f414ff8eab90796581c47 /compiler | |
parent | 90a26f8b0dd99129d3fd7fe28127cb69abd46328 (diff) | |
download | haskell-0e93023eef174262310737004d398bc7a606939a.tar.gz |
Tag inference work.
This does three major things:
* Enforce the invariant that all strict fields must contain tagged
pointers.
* Try to predict the tag on bindings in order to omit tag checks.
* Allows functions to pass arguments unlifted (call-by-value).
The former is "simply" achieved by wrapping any constructor allocations with
a case which will evaluate the respective strict bindings.
The prediction is done by a new data flow analysis based on the STG
representation of a program. This also helps us to avoid generating
redudant cases for the above invariant.
StrictWorkers are created by W/W directly and SpecConstr indirectly.
See the Note [Strict Worker Ids]
Other minor changes:
* Add StgUtil module containing a few functions needed by, but
not specific to the tag analysis.
-------------------------
Metric Decrease:
T12545
T18698b
T18140
T18923
LargeRecord
Metric Increase:
LargeRecord
ManyAlternatives
ManyConstructors
T10421
T12425
T12707
T13035
T13056
T13253
T13253-spj
T13379
T15164
T18282
T18304
T18698a
T1969
T20049
T3294
T4801
T5321FD
T5321Fun
T783
T9233
T9675
T9961
T19695
WWRec
-------------------------
Diffstat (limited to 'compiler')
63 files changed, 2909 insertions, 278 deletions
diff --git a/compiler/GHC/Cmm/CLabel.hs b/compiler/GHC/Cmm/CLabel.hs index 3acace8be2..dd7e1f14f5 100644 --- a/compiler/GHC/Cmm/CLabel.hs +++ b/compiler/GHC/Cmm/CLabel.hs @@ -25,6 +25,7 @@ module GHC.Cmm.CLabel ( mkInfoTableLabel, mkEntryLabel, mkRednCountsLabel, + mkTagHitLabel, mkConInfoTableLabel, mkApEntryLabel, mkApInfoTableLabel, @@ -301,7 +302,7 @@ isIdLabel _ = False -- Used in SRT analysis. See Note [Ticky labels in SRT analysis] in -- GHC.Cmm.Info.Build. isTickyLabel :: CLabel -> Bool -isTickyLabel (IdLabel _ _ RednCounts) = True +isTickyLabel (IdLabel _ _ IdTickyInfo{}) = True isTickyLabel _ = False -- | Indicate if "GHC.CmmToC" has to generate an extern declaration for the @@ -447,6 +448,26 @@ pprDebugCLabel platform lbl = pprCLabel platform AsmStyle lbl <> parens extra _ -> text "other CLabel" +-- Dynamic ticky info for the id. +data TickyIdInfo + = TickyRednCounts -- ^ Used for dynamic allocations + | TickyInferedTag !Unique -- ^ Used to track dynamic hits of tag inference. + deriving (Eq,Show) + +instance Outputable TickyIdInfo where + ppr TickyRednCounts = text "ct_rdn" + ppr (TickyInferedTag unique) = text "ct_tag[" <> ppr unique <> char ']' + +-- | Don't depend on this if you need determinism. +-- No determinism in the ncg backend, so we use the unique for Ord. +-- Even if it pains me slightly. +instance Ord TickyIdInfo where + compare TickyRednCounts TickyRednCounts = EQ + compare TickyRednCounts _ = LT + compare _ TickyRednCounts = GT + compare (TickyInferedTag unique1) (TickyInferedTag unique2) = + nonDetCmpUnique unique1 unique2 + data IdLabelInfo = Closure -- ^ Label for closure @@ -457,7 +478,7 @@ data IdLabelInfo | LocalInfoTable -- ^ Like InfoTable but not externally visible | LocalEntry -- ^ Like Entry but not externally visible - | RednCounts -- ^ Label of place to keep Ticky-ticky info for this Id + | IdTickyInfo !TickyIdInfo -- ^ Label of place to keep Ticky-ticky hit info for this Id | ConEntry ConInfoTableLocation -- ^ Constructor entry point, when `-fdistinct-info-tables` is enabled then @@ -504,12 +525,12 @@ instance Outputable IdLabelInfo where ppr LocalInfoTable = text "LocalInfoTable" ppr LocalEntry = text "LocalEntry" - ppr RednCounts = text "RednCounts" ppr (ConEntry mn) = text "ConEntry" <+> ppr mn ppr (ConInfoTable mn) = text "ConInfoTable" <+> ppr mn ppr ClosureTable = text "ClosureTable" ppr Bytes = text "Bytes" ppr BlockInfoTable = text "BlockInfoTable" + ppr (IdTickyInfo info) = text "IdTickyInfo" <+> ppr info data RtsLabelInfo @@ -559,8 +580,12 @@ data DynamicLinkerLabelInfo mkSRTLabel :: Unique -> CLabel mkSRTLabel u = SRTLabel u +-- See Note [ticky for LNE] mkRednCountsLabel :: Name -> CLabel -mkRednCountsLabel name = IdLabel name NoCafRefs RednCounts -- Note [ticky for LNE] +mkRednCountsLabel name = IdLabel name NoCafRefs (IdTickyInfo TickyRednCounts) + +mkTagHitLabel :: Name -> Unique -> CLabel +mkTagHitLabel name !uniq = IdLabel name NoCafRefs (IdTickyInfo (TickyInferedTag uniq)) -- These have local & (possibly) external variants: mkLocalClosureLabel :: Name -> CafInfo -> CLabel @@ -892,7 +917,7 @@ hasIdLabelInfo _ = Nothing -- ----------------------------------------------------------------------------- -- Does a CLabel's referent itself refer to a CAF? hasCAF :: CLabel -> Bool -hasCAF (IdLabel _ _ RednCounts) = False -- Note [ticky for LNE] +hasCAF (IdLabel _ _ (IdTickyInfo TickyRednCounts)) = False -- See Note [ticky for LNE] hasCAF (IdLabel _ MayHaveCafRefs _) = True hasCAF _ = False @@ -1146,7 +1171,7 @@ idInfoLabelType info = Closure -> GcPtrLabel ConInfoTable {} -> DataLabel ClosureTable -> DataLabel - RednCounts -> DataLabel + IdTickyInfo{} -> DataLabel Bytes -> DataLabel _ -> CodeLabel @@ -1503,7 +1528,10 @@ ppIdFlavor x = pp_cSEP <> case x of Entry -> text "entry" LocalEntry -> text "entry" Slow -> text "slow" - RednCounts -> text "ct" + IdTickyInfo TickyRednCounts + -> text "ct" + IdTickyInfo (TickyInferedTag unique) + -> text "ct_inf_tag" <> char '_' <> ppr unique ConEntry loc -> case loc of DefinitionSite -> text "con_entry" diff --git a/compiler/GHC/Cmm/Liveness.hs b/compiler/GHC/Cmm/Liveness.hs index f047ea4367..a1526be099 100644 --- a/compiler/GHC/Cmm/Liveness.hs +++ b/compiler/GHC/Cmm/Liveness.hs @@ -71,6 +71,8 @@ cmmGlobalLiveness platform graph = analyzeCmmBwd liveLattice (xferLive platform) graph mapEmpty -- | On entry to the procedure, there had better not be any LocalReg's live-in. +-- If you see this error it most likely means you are trying to use a variable +-- without it being defined in the given scope. noLiveOnEntry :: BlockId -> CmmLive LocalReg -> a -> a noLiveOnEntry bid in_fact x = if nullRegSet in_fact then x diff --git a/compiler/GHC/Cmm/Utils.hs b/compiler/GHC/Cmm/Utils.hs index 9e9566b334..2060be5bda 100644 --- a/compiler/GHC/Cmm/Utils.hs +++ b/compiler/GHC/Cmm/Utils.hs @@ -48,7 +48,7 @@ module GHC.Cmm.Utils( currentTSOExpr, currentNurseryExpr, cccsExpr, -- Tagging - cmmTagMask, cmmPointerMask, cmmUntag, cmmIsTagged, + cmmTagMask, cmmPointerMask, cmmUntag, cmmIsTagged, cmmIsNotTagged, cmmConstrTag1, mAX_PTR_TAG, tAG_MASK, -- Overlap and usage @@ -447,13 +447,14 @@ cmmPointerMask platform = mkIntExpr platform (complement (tAG_MASK platform)) -- Used to untag a possibly tagged pointer -- A static label need not be untagged -cmmUntag, cmmIsTagged, cmmConstrTag1 :: Platform -> CmmExpr -> CmmExpr +cmmUntag, cmmIsTagged, cmmIsNotTagged, cmmConstrTag1 :: Platform -> CmmExpr -> CmmExpr cmmUntag _ e@(CmmLit (CmmLabel _)) = e -- Default case cmmUntag platform e = cmmAndWord platform e (cmmPointerMask platform) --- Test if a closure pointer is untagged +-- Test if a closure pointer is untagged/tagged. cmmIsTagged platform e = cmmNeWord platform (cmmAndWord platform e (cmmTagMask platform)) (zeroExpr platform) +cmmIsNotTagged platform e = cmmEqWord platform (cmmAndWord platform e (cmmTagMask platform)) (zeroExpr platform) -- Get constructor tag, but one based. cmmConstrTag1 platform e = cmmAndWord platform e (cmmTagMask platform) diff --git a/compiler/GHC/Core/DataCon.hs b/compiler/GHC/Core/DataCon.hs index feca5d3754..07419b9c5c 100644 --- a/compiler/GHC/Core/DataCon.hs +++ b/compiler/GHC/Core/DataCon.hs @@ -58,7 +58,7 @@ module GHC.Core.DataCon ( isUnboxedSumDataCon, isVanillaDataCon, isNewDataCon, classDataCon, dataConCannotMatch, dataConUserTyVarsArePermuted, - isBanged, isMarkedStrict, eqHsBang, isSrcStrict, isSrcUnpacked, + isBanged, isMarkedStrict, cbvFromStrictMark, eqHsBang, isSrcStrict, isSrcUnpacked, specialPromotedDc, -- ** Promotion related functions @@ -745,9 +745,10 @@ data SrcUnpackedness = SrcUnpack -- ^ {-# UNPACK #-} specified ------------------------- --- StrictnessMark is internal only, used to indicate strictness +-- StrictnessMark is used to indicate strictness -- of the DataCon *worker* fields data StrictnessMark = MarkedStrict | NotMarkedStrict + deriving Eq -- | An 'EqSpec' is a tyvar/type pair representing an equality made in -- rejigging a GADT constructor @@ -944,6 +945,16 @@ instance Outputable StrictnessMark where ppr MarkedStrict = text "!" ppr NotMarkedStrict = empty +instance Binary StrictnessMark where + put_ bh NotMarkedStrict = putByte bh 0 + put_ bh MarkedStrict = putByte bh 1 + get bh = + do h <- getByte bh + case h of + 0 -> return NotMarkedStrict + 1 -> return MarkedStrict + _ -> panic "Invalid binary format" + instance Binary SrcStrictness where put_ bh SrcLazy = putByte bh 0 put_ bh SrcStrict = putByte bh 1 @@ -994,6 +1005,11 @@ isMarkedStrict :: StrictnessMark -> Bool isMarkedStrict NotMarkedStrict = False isMarkedStrict _ = True -- All others are strict +cbvFromStrictMark :: StrictnessMark -> CbvMark +cbvFromStrictMark NotMarkedStrict = NotMarkedCbv +cbvFromStrictMark MarkedStrict = MarkedCbv + + {- ********************************************************************* * * \subsection{Construction} diff --git a/compiler/GHC/Core/Opt/CprAnal.hs b/compiler/GHC/Core/Opt/CprAnal.hs index 65468cd037..64551f9498 100644 --- a/compiler/GHC/Core/Opt/CprAnal.hs +++ b/compiler/GHC/Core/Opt/CprAnal.hs @@ -179,7 +179,7 @@ cprAnalProgram logger fam_envs binds = do let env = emptyAnalEnv fam_envs let binds_plus_cpr = snd $ mapAccumL cprAnalTopBind env binds putDumpFileMaybe logger Opt_D_dump_cpr_signatures "Cpr signatures" FormatText $ - dumpIdInfoOfProgram (ppr . cprSigInfo) binds_plus_cpr + dumpIdInfoOfProgram False (ppr . cprSigInfo) binds_plus_cpr -- See Note [Stamp out space leaks in demand analysis] in GHC.Core.Opt.DmdAnal seqBinds binds_plus_cpr `seq` return binds_plus_cpr diff --git a/compiler/GHC/Core/Opt/DmdAnal.hs b/compiler/GHC/Core/Opt/DmdAnal.hs index e6b404ff61..cd58ff89d7 100644 --- a/compiler/GHC/Core/Opt/DmdAnal.hs +++ b/compiler/GHC/Core/Opt/DmdAnal.hs @@ -1466,7 +1466,7 @@ finaliseArgBoxities env fn arity rhs go_arg :: Budgets -> (Type,StrictnessMark,Demand) -> (Budgets, Demand) go_arg bg@(MkB bg_top bg_inner) (ty, str_mark, dmd@(n :* _)) - = case wantToUnboxArg fam_envs ty dmd of + = case wantToUnboxArg False fam_envs ty dmd of DropAbsent -> (bg, dmd) StopUnboxing -> (MkB (bg_top-1) bg_inner, trimBoxity dmd) @@ -1487,6 +1487,7 @@ finaliseArgBoxities env fn arity rhs = (bg_inner', dmd') | otherwise = (bg_inner, trimBoxity dmd) + Unlift -> panic "No unlifting in DmdAnal" add_demands :: [Demand] -> CoreExpr -> CoreExpr -- Attach the demands to the outer lambdas of this expression @@ -1509,7 +1510,7 @@ finaliseLetBoxity env ty dmd = go ty NotMarkedStrict dmd where go ty mark dmd@(n :* _) = - case wantToUnboxArg env ty dmd of + case wantToUnboxArg False env ty dmd of DropAbsent -> dmd StopUnboxing -> trimBoxity dmd Unbox DataConPatContext{dcpc_dc=dc, dcpc_tc_args=tc_args} dmds @@ -1520,6 +1521,7 @@ finaliseLetBoxity env ty dmd -> n :* (mkProd Unboxed $! dmds') | otherwise -> trimBoxity dmd + Unlift -> panic "No unlifting in DmdAnal" {- ********************************************************************* diff --git a/compiler/GHC/Core/Opt/Pipeline.hs b/compiler/GHC/Core/Opt/Pipeline.hs index 741552f815..276bfee45d 100644 --- a/compiler/GHC/Core/Opt/Pipeline.hs +++ b/compiler/GHC/Core/Opt/Pipeline.hs @@ -1079,6 +1079,6 @@ dmdAnal logger dflags fam_envs rules binds = do } binds_plus_dmds = dmdAnalProgram opts fam_envs rules binds Logger.putDumpFileMaybe logger Opt_D_dump_str_signatures "Strictness signatures" FormatText $ - dumpIdInfoOfProgram (ppr . zapDmdEnvSig . dmdSigInfo) binds_plus_dmds + dumpIdInfoOfProgram (hasPprDebug dflags) (ppr . zapDmdEnvSig . dmdSigInfo) binds_plus_dmds -- See Note [Stamp out space leaks in demand analysis] in GHC.Core.Opt.DmdAnal seqBinds binds_plus_dmds `seq` return binds_plus_dmds diff --git a/compiler/GHC/Core/Opt/Simplify/Monad.hs b/compiler/GHC/Core/Opt/Simplify/Monad.hs index c730a3e981..83d27f4fe5 100644 --- a/compiler/GHC/Core/Opt/Simplify/Monad.hs +++ b/compiler/GHC/Core/Opt/Simplify/Monad.hs @@ -215,6 +215,7 @@ newId :: FastString -> Mult -> Type -> SimplM Id newId fs w ty = do uniq <- getUniqueM return (mkSysLocalOrCoVar fs uniq w ty) +-- | Make a join id with given type and arity but without call-by-value annotations. newJoinId :: [Var] -> Type -> SimplM Id newJoinId bndrs body_ty = do { uniq <- getUniqueM @@ -223,7 +224,7 @@ newJoinId bndrs body_ty arity = count isId bndrs -- arity: See Note [Invariants on join points] invariant 2b, in GHC.Core join_arity = length bndrs - details = JoinId join_arity + details = JoinId join_arity Nothing id_info = vanillaIdInfo `setArityInfo` arity -- `setOccInfo` strongLoopBreaker diff --git a/compiler/GHC/Core/Opt/SpecConstr.hs b/compiler/GHC/Core/Opt/SpecConstr.hs index afd8afc5ea..14fe9bec00 100644 --- a/compiler/GHC/Core/Opt/SpecConstr.hs +++ b/compiler/GHC/Core/Opt/SpecConstr.hs @@ -656,6 +656,11 @@ containers package we have a merge function with this specialization: We give sc_s5lZ and sc_s5m0 a evaluated unfolding since they come out of strict field fields in the Bin constructor. +This is especially important since tag inference can then use this +information to adjust the calling convention of +`$wmerge0_s4UK` to enforce arguments being passed fully evaluated+tagged. +See Note [Tag Inference], Note [Strict Worker Ids] for more information on +how we can take advantage of this. ----------------------------------------------------- Stuff not yet handled @@ -1771,11 +1776,15 @@ spec_one env fn arg_bndrs body (call_pat, rule_number) -- Annotate the variables with the strictness information from -- the function (see Note [Strictness information in worker binders]) - (spec_lam_args, spec_call_args) = mkWorkerArgs fn False - spec_lam_args1 + (spec_lam_args, spec_call_args,_) = mkWorkerArgs fn False + spec_lam_args1 [] spec_body_ty -- mkWorkerArgs: usual w/w hack to avoid generating -- a spec_rhs of unlifted type and no args. + -- Unlike W/W we don't turn functions into strict workers + -- immediately here instead letting tidy handle this. + -- For this reason we can ignore the cbv marks. + -- See Note [Strict Worker Ids]. See Note [Tag Inference]. spec_id = mkLocalId spec_name Many (mkLamTypes spec_lam_args spec_body_ty) diff --git a/compiler/GHC/Core/Opt/WorkWrap.hs b/compiler/GHC/Core/Opt/WorkWrap.hs index 3e4770a997..f07e8dde37 100644 --- a/compiler/GHC/Core/Opt/WorkWrap.hs +++ b/compiler/GHC/Core/Opt/WorkWrap.hs @@ -763,11 +763,12 @@ splitFun ww_opts fn_id rhs mkWWBindPair :: WwOpts -> Id -> IdInfo -> [Var] -> CoreExpr -> Unique -> Divergence - -> ([Demand], JoinArity, Id -> CoreExpr, Expr CoreBndr -> CoreExpr) + -> ([Demand],[CbvMark], JoinArity, Id -> CoreExpr, Expr CoreBndr -> CoreExpr) -> [(Id, CoreExpr)] mkWWBindPair ww_opts fn_id fn_info fn_args fn_body work_uniq div - (work_demands, join_arity, wrap_fn, work_fn) - = [(work_id, work_rhs), (wrap_id, wrap_rhs)] + (work_demands, cbv_marks :: [CbvMark], join_arity, wrap_fn, work_fn) + = -- pprTrace "mkWWBindPair" (ppr fn_id <+> ppr wrap_id <+> ppr work_id $$ ppr wrap_rhs) $ + [(work_id, work_rhs), (wrap_id, wrap_rhs)] -- Worker first, because wrapper mentions it where arity = arityInfo fn_info @@ -819,9 +820,13 @@ mkWWBindPair ww_opts fn_id fn_info fn_args fn_body work_uniq div -- Set the arity so that the Core Lint check that the -- arity is consistent with the demand type goes -- through + + `setIdCbvMarks` cbv_marks + `asJoinId_maybe` work_join_arity + -- `setIdThing` (undefined cbv_marks) - work_arity = length work_demands + work_arity = length work_demands :: Int -- See Note [Demand on the worker] single_call = saturatedByOneShots arity (demandInfo fn_info) @@ -1014,7 +1019,8 @@ splitThunk :: WwOpts -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)] splitThunk ww_opts is_rec x rhs = assert (not (isJoinId x)) $ do { let x' = localiseId x -- See comment above - ; (useful,_, wrap_fn, fn_arg) <- mkWWstr_one ww_opts x' + ; (useful,_args, wrap_fn, fn_arg) + <- mkWWstr_one ww_opts x' NotMarkedCbv ; let res = [ (x, Let (NonRec x' rhs) (wrap_fn fn_arg)) ] ; if useful then assertPpr (isNonRec is_rec) (ppr x) -- The thunk must be non-recursive return res diff --git a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs index 1b2d3ca1ba..8936ccdfe5 100644 --- a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs +++ b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs @@ -6,6 +6,7 @@ A library for the ``worker\/wrapper'' back-end to the strictness analyser {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} module GHC.Core.Opt.WorkWrap.Utils ( WwOpts(..), initWwOpts, mkWwBodies, mkWWstr, mkWWstr_one, mkWorkerArgs @@ -14,6 +15,7 @@ module GHC.Core.Opt.WorkWrap.Utils , findTypeShape, IsRecDataConResult(..), isRecDataCon , mkAbsentFiller , isWorkerSmallEnough, dubiousDataConInstArgTys + , isGoodWorker, badWorker , goodWorker ) where @@ -144,6 +146,9 @@ data WwOpts -- Used for absent argument error message , wo_module :: !Module + , wo_unlift_strict :: !Bool -- Generate workers even if the only effect is some args + -- get passed unlifted. + -- See Note [WW for calling convention] } initWwOpts :: Module -> DynFlags -> FamInstEnvs -> WwOpts @@ -153,10 +158,12 @@ initWwOpts this_mod dflags fam_envs = MkWwOpts , wo_cpr_anal = gopt Opt_CprAnal dflags , wo_fun_to_thunk = gopt Opt_FunToThunk dflags , wo_module = this_mod + , wo_unlift_strict = gopt Opt_WorkerWrapperUnlift dflags } type WwResult = ([Demand], -- Demands for worker (value) args + [CbvMark], -- Cbv semantics for worker (value) args JoinArity, -- Number of worker (type OR value) args Id -> CoreExpr, -- Wrapper body, lacking only the worker Id CoreExpr -> CoreExpr) -- Worker body, lacking the original function rhs @@ -226,25 +233,29 @@ mkWwBodies opts fun_id arg_vars res_ty demands res_cpr zapped_arg_vars = map zap_var arg_vars (subst, cloned_arg_vars) = cloneBndrs empty_subst uniq_supply zapped_arg_vars res_ty' = GHC.Core.Subst.substTy subst res_ty + init_cbv_marks = map (const NotMarkedCbv) cloned_arg_vars - ; (useful1, work_args, wrap_fn_str, fn_args) - <- mkWWstr opts cloned_arg_vars + ; (useful1, work_args_cbv, wrap_fn_str, fn_args) + <- mkWWstr opts cloned_arg_vars init_cbv_marks + + ; let (work_args, work_marks) = unzip work_args_cbv -- Do CPR w/w. See Note [Always do CPR w/w] ; (useful2, wrap_fn_cpr, work_fn_cpr, cpr_res_ty) <- mkWWcpr_entry opts res_ty' res_cpr - ; let (work_lam_args, work_call_args) = mkWorkerArgs fun_id (wo_fun_to_thunk opts) - work_args cpr_res_ty + ; let (work_lam_args, work_call_args, work_call_cbv) = mkWorkerArgs fun_id (wo_fun_to_thunk opts) + work_args work_marks cpr_res_ty + call_work work_fn = mkVarApps (Var work_fn) work_call_args call_rhs fn_rhs = mkAppsBeta fn_rhs fn_args -- See Note [Join points and beta-redexes] wrapper_body = mkLams cloned_arg_vars . wrap_fn_cpr . wrap_fn_str . call_work worker_body = mkLams work_lam_args . work_fn_cpr . call_rhs - worker_args_dmds = [idDemandInfo v | v <- work_call_args, isId v] + (worker_args_dmds, work_val_cbvs)= unzip [(idDemandInfo v,cbv) | (v,cbv) <- zipEqual "mkWwBodies" work_call_args work_call_cbv, isId v] ; if ((useful1 && not only_one_void_argument) || useful2) - then return (Just (worker_args_dmds, length work_call_args, + then return (Just (worker_args_dmds, work_val_cbvs, length work_call_args, wrapper_body, worker_body)) else return Nothing } @@ -359,20 +370,25 @@ add a void argument. E.g. We use the state-token type which generates no code. -} +-- | Prevent a function from becoming a thunk by adding a void argument if +-- required. mkWorkerArgs :: Id -- The wrapper Id - -> Bool + -> Bool -- Allow fun->thunk conversion. -> [Var] + -> [CbvMark] -> Type -- Type of body -> ([Var], -- Lambda bound args - [Var]) -- Args at call site -mkWorkerArgs wrap_id fun_to_thunk args res_ty + [Var], -- Args at call site + [CbvMark] -- cbv semantics for the worker args. + ) +mkWorkerArgs wrap_id fun_to_thunk args cbv_marks res_ty | not (isJoinId wrap_id) -- Join Ids never need an extra arg , not (any isId args) -- No existing value lambdas , needs_a_value_lambda -- and we need to add one - = (args ++ [voidArgId], args ++ [voidPrimId]) + = (args ++ [voidArgId], args ++ [voidPrimId], cbv_marks ++ [NotMarkedCbv]) | otherwise - = (args, args) + = (args, args, cbv_marks) where -- If fun_to_thunk is False we always keep at least one value -- argument: see Note [Protecting the last value argument] @@ -512,17 +528,38 @@ data UnboxingDecision s -- instantiation with 'dataConRepInstPat'. -- The @[s]@ carries the bits of information with which we can continue -- unboxing, e.g. @s@ will be 'Demand' or 'Cpr'. + | Unlift + -- ^ The argument can't be unboxed, but we want it to be passed evaluated to the worker. + +-- Do we want to create workers just for unlifting? +wwForUnlifting :: WwOpts -> Bool +wwForUnlifting !opts + -- Always unlift if possible + | wo_unlift_strict opts = goodWorker + -- Don't unlift it would cause additional W/W splits. + | otherwise = badWorker + +badWorker :: Bool +badWorker = False + +goodWorker :: Bool +goodWorker = True + +isGoodWorker :: Bool -> Bool +isGoodWorker = id + -- | Unwraps the 'Boxity' decision encoded in the given 'SubDemand' and returns -- a 'DataConPatContext' as well the nested demands on fields of the 'DataCon' -- to unbox. wantToUnboxArg - :: FamInstEnvs + :: Bool -- ^ Consider unlifting + -> FamInstEnvs -> Type -- ^ Type of the argument -> Demand -- ^ How the arg was used -> UnboxingDecision Demand -- See Note [Which types are unboxed?] -wantToUnboxArg fam_envs ty (n :* sd) +wantToUnboxArg do_unlifting fam_envs ty dmd@(n :* sd) | isAbs n = DropAbsent @@ -530,10 +567,17 @@ wantToUnboxArg fam_envs ty (n :* sd) , Just dc <- tyConSingleAlgDataCon_maybe tc , let arity = dataConRepArity dc , Just (Unboxed, ds) <- viewProd arity sd -- See Note [Boxity analysis] - -- NB: No strictness or evaluatedness checks here. + -- NB: No strictness or evaluatedness checks for unboxing here. -- That is done by 'finaliseArgBoxities'! = Unbox (DataConPatContext dc tc_args co) ds + -- See Note [Strict Worker Ids] + | do_unlifting + , isStrUsedDmd dmd + , not (isFunTy ty) + , not (isUnliftedType ty) -- Already unlifted! + = Unlift + | otherwise = StopUnboxing @@ -637,6 +681,65 @@ other cases where something went avoidably wrong. This warning also triggers for the stream fusion library within `text`. We can'easily W/W constructed results like `Stream` because we have no simple way to express existential types in the worker's type signature. + +Note [WW for calling convention] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we know a function f will always evaluate a particular argument +we might decide that it should rather get evaluated by the caller. +We call this "unlifting" the argument. +Sometimes the caller knows that the argument is already evaluated, +so we won't generate any code to enter/evaluate the argument. +This evaluation avoidance can be quite beneficial. +Especially for recursive functions who pass the same lifted argument +along on each iteration or walk over strict data structures. + +One way to achieve this is to do a W/W split, where the wrapper does +the evaluation, and the worker can treat its arguments as unlifted. +The wrapper is small and will be inlined at almost all call sites and +the evaluation code in the wrapper can then cancel out with evaluation +done by the calling context if the argument is evaluated there. +Same idea as W/W to avoid allocation really, just for a different kind +of work. + +Performing W/W might not always be a win. In particular it's easy to break +(badly written, but common) rule frameworks by doing additional W/W splits. +See #20364 for a more detailed explaination. + +Hence we have the following strategies with different trade-offs: +A) Never do W/W *just* for unlifting of arguments. + + Very conservative - doesn't break any rules + - Lot's of performance left on the table +B) Do W/W on just about anything where it might be + beneficial. + + Exploits pretty much every oppertunity for unlifting. + - A bit of compile time/code size cost for all the wrappers. + - Can break rules which would otherwise fire. See #20364. +C) Unlift *any* (non-boot exported) functions arguments if they are strict. + That is instead of creating a Worker with the new calling convention we + change the calling convention of the binding itself. + + Exploits every opportunity for unlifting. + + Maybe less bad interactions with rules. + - Requires tracking of boot-exported definitions. + - Requires either: + ~ Eta-expansion at *all* call sites in order to generate + an impedance matcher function. Leading to massive code bloat. + Essentially we end up creating a imprompto wrapper function + wherever we wouldn't inline the wrapper with a W/W approach. + ~ There is the option of achieving this without eta-expansion if we instead expand + the partial application code to check for demands on the calling convention and + for it to evaluate the arguments. The main downsides there would be the complexity + of the implementation and that it carries a certain overhead even for functions who + don't take advantage of this functionality. I haven't tried this approach because it's + not trivial to implement and doing W/W splits seems to work well enough. + +Currently we use the first approach A) by default, with a flag that allows users to fall back to the +more aggressive approach B). +I also tried the third approach C) using eta-expansion at call sites to avoid modifying the PAP-handling +code which wasn't fruitful. See https://gitlab.haskell.org/ghc/ghc/-/merge_requests/5614#note_389903. +We could still try to do C) in the future by having PAP calls which will evaluate the required arguments +before calling the partially applied function. But this would be neither a small nor simple change so we +stick with A) and a flag for B) for now. +See also Note [Tag Inference] and Note [Strict Worker Ids] -} {- @@ -650,26 +753,29 @@ way to express existential types in the worker's type signature. mkWWstr :: WwOpts -> [Var] -- Wrapper args; have their demand info on them -- *Includes type variables* - -> UniqSM (Bool, -- Is this useful - [Var], -- Worker args + -> [CbvMark] -- cbv info for arguments + -> UniqSM (Bool, -- Will this result in a useful worker + [(Var,CbvMark)], -- Worker args/their call-by-value semantics. CoreExpr -> CoreExpr, -- Wrapper body, lacking the worker call -- and without its lambdas -- This fn adds the unboxing [CoreExpr]) -- Reboxed args for the call to the -- original RHS. Corresponds one-to-one -- with the wrapper arg vars -mkWWstr opts args - = go args +mkWWstr opts args cbv_info + = go args cbv_info where - go_one arg = mkWWstr_one opts arg + go_one arg cbv = mkWWstr_one opts arg cbv - go [] = return (False, [], nop_fn, []) - go (arg : args) = do { (useful1, args1, wrap_fn1, wrap_arg) <- go_one arg - ; (useful2, args2, wrap_fn2, wrap_args) <- go args + go [] _ = return (badWorker, [], nop_fn, []) + go (arg : args) (cbv:cbvs) + = do { (useful1, args1, wrap_fn1, wrap_arg) <- go_one arg cbv + ; (useful2, args2, wrap_fn2, wrap_args) <- go args cbvs ; return ( useful1 || useful2 , args1 ++ args2 , wrap_fn1 . wrap_fn2 , wrap_arg:wrap_args ) } + go _ _ = panic "mkWWstr: Impossible - cbv/arg length missmatch" ---------------------- -- mkWWstr_one wrap_var = (useful, work_args, wrap_fn, wrap_arg) @@ -678,19 +784,28 @@ mkWWstr opts args -- * wrap_arg assumes work_args are in scope, and builds a ConApp that -- reconstructs the RHS of wrap_var that we pass to the original RHS -- See Note [Worker/wrapper for Strictness and Absence] -mkWWstr_one :: WwOpts -> Var -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr) -mkWWstr_one opts arg = - case wantToUnboxArg fam_envs arg_ty arg_dmd of +mkWWstr_one :: WwOpts + -> Var + -> CbvMark + -> UniqSM (Bool, [(Var,CbvMark)], CoreExpr -> CoreExpr, CoreExpr) +mkWWstr_one opts arg marked_cbv = + case wantToUnboxArg True fam_envs arg_ty arg_dmd of _ | isTyVar arg -> do_nothing DropAbsent | Just absent_filler <- mkAbsentFiller opts arg - -- Absent case. We can't always handle absence for arbitrary + -- Absent case. Dropt the argument from the worker. + -- We can't always handle absence for arbitrary -- unlifted types, so we need to choose just the cases we can -- (that's what mkAbsentFiller does) - -> return (True, [], nop_fn, absent_filler) + -> return (goodWorker, [], nop_fn, absent_filler) + + Unbox dcpc ds -> unbox_one_arg opts arg ds dcpc marked_cbv - Unbox dcpc ds -> unbox_one_arg opts arg ds dcpc + Unlift -> return ( wwForUnlifting opts + , [(setIdUnfolding arg evaldUnfolding, MarkedCbv)] + , nop_fn + , varToCoreExpr arg) _ -> do_nothing -- Other cases, like StopUnboxing @@ -698,27 +813,44 @@ mkWWstr_one opts arg = fam_envs = wo_fam_envs opts arg_ty = idType arg arg_dmd = idDemandInfo arg - do_nothing = return (False, [arg], nop_fn, varToCoreExpr arg) + -- Type args don't get cbv marks + arg_cbv = if isTyVar arg then NotMarkedCbv else marked_cbv + do_nothing = return (badWorker, [(arg,arg_cbv)], nop_fn, varToCoreExpr arg) unbox_one_arg :: WwOpts -> Var -> [Demand] -> DataConPatContext - -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr) + -> CbvMark + -> UniqSM (Bool, [(Var,CbvMark)], CoreExpr -> CoreExpr, CoreExpr) unbox_one_arg opts arg_var ds DataConPatContext { dcpc_dc = dc, dcpc_tc_args = tc_args , dcpc_co = co } + _marked_cbv = do { pat_bndrs_uniqs <- getUniquesM ; let ex_name_fss = map getOccFS $ dataConExTyCoVars dc + -- Create new arguments we get when unboxing dc (ex_tvs', arg_ids) = dataConRepFSInstPat (ex_name_fss ++ repeat ww_prefix) pat_bndrs_uniqs (idMult arg_var) dc tc_args + -- Apply str info to new args. arg_ids' = zipWithEqual "unbox_one_arg" setIdDemandInfo arg_ids ds unbox_fn = mkUnpackCase (Var arg_var) co (idMult arg_var) dc (ex_tvs' ++ arg_ids') - ; (_, worker_args, wrap_fn, wrap_args) <- mkWWstr opts (ex_tvs' ++ arg_ids') + -- Mark arguments coming out of strict fields as evaluated and give them cbv semantics. See Note [Strict Worker Ids] + cbv_arg_marks = zipWithEqual "unbox_one_arg" bangToMark (dataConRepStrictness dc) arg_ids' + unf_args = zipWith setEvald arg_ids' cbv_arg_marks + cbv_marks = (map (const NotMarkedCbv) ex_tvs') ++ cbv_arg_marks + ; (_sub_args_quality, worker_args, wrap_fn, wrap_args) <- mkWWstr opts (ex_tvs' ++ unf_args) cbv_marks ; let wrap_arg = mkConApp dc (map Type tc_args ++ wrap_args) `mkCast` mkSymCo co - ; return (True, worker_args, unbox_fn . wrap_fn, wrap_arg) } + ; return (goodWorker, worker_args, unbox_fn . wrap_fn, wrap_arg) } -- Don't pass the arg, rebox instead + where bangToMark :: StrictnessMark -> Id -> CbvMark + bangToMark NotMarkedStrict _ = NotMarkedCbv + bangToMark MarkedStrict v + | isUnliftedType (idType v) = NotMarkedCbv + | otherwise = MarkedCbv + setEvald var NotMarkedCbv = var + setEvald var MarkedCbv = setIdUnfolding var evaldUnfolding -- | Tries to find a suitable absent filler to bind the given absent identifier -- to. See Note [Absent fillers]. @@ -795,7 +927,7 @@ function is worthy for splitting: g c p = case p of (a,b) -> $gw c a b $gw c a b = if c then a else b -2a But do /not/ split if Boxity Analysis said "Boxed". +2a But do /not/ unbox if Boxity Analysis said "Boxed". In this case, 'wantToUnboxArg' returns 'StopUnboxing'. Otherwise we risk decomposing and reboxing a massive tuple which is barely used. Example: @@ -809,6 +941,11 @@ function is worthy for splitting: Imagine that it had millions of fields. This actually happened in GHC itself where the tuple was DynFlags +2b But if e.g. a large tuple or product type is always demanded we might + decide to "unlift" it. That is tighten the calling convention for that + argument to require it to be passed as a pointer to the value itself. + See Note [WW for calling convention]. + 3. In all other cases (e.g., lazy, used demand and not eval'd), 'finaliseArgBoxities' will have cleared the Boxity flag to 'Boxed' (see Note [Finalising boxity for demand signatures] in GHC.Core.Opt.DmdAnal) @@ -1163,14 +1300,14 @@ mkWWcpr_entry :: WwOpts -> Type -- function body -> Cpr -- CPR analysis results - -> UniqSM (Bool, -- Is w/w'ing useful? + -> UniqSM (Bool, -- Is w/w'ing useful? CoreExpr -> CoreExpr, -- New wrapper. 'nop_fn' if not useful CoreExpr -> CoreExpr, -- New worker. 'nop_fn' if not useful Type) -- Type of worker's body. -- Just the input body_ty if not useful -- ^ Entrypoint to CPR W/W. See Note [Worker/wrapper for CPR] for an overview. mkWWcpr_entry opts body_ty body_cpr - | not (wo_cpr_anal opts) = return (False, nop_fn, nop_fn, body_ty) + | not (wo_cpr_anal opts) = return (badWorker, nop_fn, nop_fn, body_ty) | otherwise = do -- Part (1) res_bndr <- mk_res_bndr body_ty @@ -1188,8 +1325,8 @@ mkWWcpr_entry opts body_ty body_cpr work_fn body = bind_res_bndr body (work_unpack_res transit_tup) -- 1 2 3 work_body_ty = exprType transit_tup return $ if not useful - then (False, nop_fn, nop_fn, body_ty) - else (True, wrap_fn, work_fn, work_body_ty) + then (badWorker, nop_fn, nop_fn, body_ty) + else (goodWorker, wrap_fn, work_fn, work_body_ty) -- | Part (1) of Note [Worker/wrapper for CPR]. mk_res_bndr :: Type -> UniqSM Id @@ -1212,7 +1349,7 @@ mkWWcpr :: WwOpts -> [Id] -> [Cpr] -> UniqSM CprWwResultMany mkWWcpr _opts vars [] = -- special case: No CPRs means all top (for example from FlatConCpr), -- hence stop WW. - return (False, toOL vars, map varToCoreExpr vars, nop_fn) + return (badWorker, toOL vars, map varToCoreExpr vars, nop_fn) mkWWcpr opts vars cprs = do -- No existentials in 'vars'. 'wantToUnboxResult' should have checked that. massertPpr (not (any isTyVar vars)) (ppr vars $$ ppr cprs) @@ -1231,7 +1368,7 @@ mkWWcpr_one opts res_bndr cpr , Unbox dcpc arg_cprs <- wantToUnboxResult (wo_fam_envs opts) (idType res_bndr) cpr = unbox_one_result opts res_bndr arg_cprs dcpc | otherwise - = return (False, unitOL res_bndr, varToCoreExpr res_bndr, nop_fn) + = return (badWorker, unitOL res_bndr, varToCoreExpr res_bndr, nop_fn) unbox_one_result :: WwOpts -> Id -> [Cpr] -> DataConPatContext -> UniqSM CprWwResultOne @@ -1260,8 +1397,8 @@ unbox_one_result opts res_bndr arg_cprs -- Don't try to WW an unboxed tuple return type when there's nothing inside -- to unbox further. return $ if isUnboxedTupleDataCon dc && not nested_useful - then ( False, unitOL res_bndr, Var res_bndr, nop_fn ) - else ( True + then ( badWorker, unitOL res_bndr, Var res_bndr, nop_fn ) + else ( goodWorker , transit_vars , rebuilt_result , this_work_unbox_res . work_unbox_res diff --git a/compiler/GHC/Core/Rules.hs b/compiler/GHC/Core/Rules.hs index 2eabc1db0c..14bcd4564f 100644 --- a/compiler/GHC/Core/Rules.hs +++ b/compiler/GHC/Core/Rules.hs @@ -387,8 +387,10 @@ pprRuleBase rules = pprUFM rules $ \rss -> -- successful. lookupRule :: RuleOpts -> InScopeEnv -> (Activation -> Bool) -- When rule is active - -> Id -> [CoreExpr] - -> [CoreRule] -> Maybe (CoreRule, CoreExpr) + -> Id -- Function head + -> [CoreExpr] -- Args + -> [CoreRule] -- Rules + -> Maybe (CoreRule, CoreExpr) -- See Note [Extra args in the target] -- See comments on matchRule diff --git a/compiler/GHC/Core/Tidy.hs b/compiler/GHC/Core/Tidy.hs index aaf42eafd2..63473ca68a 100644 --- a/compiler/GHC/Core/Tidy.hs +++ b/compiler/GHC/Core/Tidy.hs @@ -10,26 +10,30 @@ The code for *top-level* bindings is in GHC.Iface.Tidy. {-# OPTIONS_GHC -Wno-incomplete-record-updates #-} module GHC.Core.Tidy ( - tidyExpr, tidyRules, tidyUnfolding + tidyExpr, tidyRules, tidyUnfolding, tidyCbvInfoTop ) where import GHC.Prelude import GHC.Core +import GHC.Core.Type + import GHC.Core.Seq ( seqUnfolding ) +import GHC.Core.Utils ( computeCbvInfo ) import GHC.Types.Id import GHC.Types.Id.Info import GHC.Types.Demand ( zapDmdEnvSig ) -import GHC.Core.Type ( tidyType, tidyVarBndr ) import GHC.Core.Coercion ( tidyCo ) import GHC.Types.Var import GHC.Types.Var.Env import GHC.Types.Unique (getUnique) import GHC.Types.Unique.FM import GHC.Types.Name hiding (tidyNameOcc) +import GHC.Types.Name.Set import GHC.Types.SrcLoc import GHC.Types.Tickish import GHC.Data.Maybe +import GHC.Utils.Misc import Data.List (mapAccumL) {- @@ -45,18 +49,67 @@ tidyBind :: TidyEnv -> (TidyEnv, CoreBind) tidyBind env (NonRec bndr rhs) - = tidyLetBndr env env bndr =: \ (env', bndr') -> - (env', NonRec bndr' (tidyExpr env' rhs)) + = -- pprTrace "tidyBindNonRec" (ppr bndr) $ + let cbv_bndr = (tidyCbvInfoLocal bndr rhs) + (env', bndr') = tidyLetBndr env env cbv_bndr + tidy_rhs = (tidyExpr env' rhs) + in (env', NonRec bndr' tidy_rhs) tidyBind env (Rec prs) - = let - (bndrs, rhss) = unzip prs - (env', bndrs') = mapAccumL (tidyLetBndr env') env bndrs + = -- pprTrace "tidyBindRec" (ppr $ map fst prs) $ + let + cbv_bndrs = map ((\(bnd,rhs) -> tidyCbvInfoLocal bnd rhs)) prs + (_bndrs, rhss) = unzip prs + (env', bndrs') = mapAccumL (tidyLetBndr env') env cbv_bndrs in map (tidyExpr env') rhss =: \ rhss' -> (env', Rec (zip bndrs' rhss')) +-- Note [Attaching CBV Marks to ids] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- Before tidy, function arguments which have a call-by-value semantics are identified +-- by having an `OtherCon[]` unfolding. During tidy, we transform this information into CBV (call-by-value) +-- marks. The marks themselves then are put onto the function id itself. +-- This means the code generator can get the full calling convention by only looking at the function +-- itself without having to inspect the RHS for potential argument unfoldings. +-- +-- The actual logic is in tidyCbvInfo and takes: +-- * The function id +-- * The functions rhs +-- And gives us back the function annotated with the marks. +-- We call it in: +-- * tidyTopPair for top level bindings +-- * tidyBind for local bindings. +-- +-- Not that we *have* to look at the untidied rhs. +-- During tidying some knot-tying occurs which can blow up +-- if we look at the types of the arguments, but here we dont: +-- we only check if the manifest lambdas have OtherCon unfoldings +-- and these remain valid post tidy. +-- +-- If the id is boot-exported we don't use a cbv calling convention via marks, +-- as the boot file won't contain them. Which means code calling boot-exported +-- ids might expect these ids to have a vanilla calling convention even if we +-- determine a different one here. +-- To be able to avoid this we pass a set of boot exported ids for this module around. +-- For non top level ids we can skip this. Local ids are never boot-exported +-- as boot files don't have unfoldings. So there this isn't a concern. +-- See also Note [Strict Worker Ids] + + +-- See Note [Attaching CBV Marks to ids] +tidyCbvInfoTop :: HasDebugCallStack => NameSet -> Id -> CoreExpr -> Id +tidyCbvInfoTop boot_exports id rhs + -- Can't change calling convention for boot exported things + | elemNameSet (idName id) boot_exports = id + | otherwise = computeCbvInfo id rhs + +-- See Note [Attaching CBV Marks to ids] +tidyCbvInfoLocal :: HasDebugCallStack => Id -> CoreExpr -> Id +tidyCbvInfoLocal id rhs + | otherwise = computeCbvInfo id rhs + ------------ Expressions -------------- tidyExpr :: TidyEnv -> CoreExpr -> CoreExpr tidyExpr env (Var v) = Var (tidyVarOcc env v) diff --git a/compiler/GHC/Core/Type.hs b/compiler/GHC/Core/Type.hs index bd3b0122d5..1cacfca468 100644 --- a/compiler/GHC/Core/Type.hs +++ b/compiler/GHC/Core/Type.hs @@ -126,6 +126,7 @@ module GHC.Core.Type ( isBoxedRuntimeRep, isLiftedLevity, isUnliftedLevity, isUnliftedType, isBoxedType, mightBeUnliftedType, isUnboxedTupleType, isUnboxedSumType, + isStateType, isAlgType, isDataFamilyAppType, isPrimitiveType, isStrictType, isLevityTy, isLevityVar, @@ -321,6 +322,8 @@ import Control.Monad ( guard ) -- -- [Primitive] Iff it is a built-in type that can't be expressed in Haskell. -- +-- [Unlifted] Anything that isn't lifted is considered unlifted. +-- -- Currently, all primitive types are unlifted, but that's not necessarily -- the case: for example, @Int@ could be primitive. -- @@ -2424,6 +2427,7 @@ buildSynTyCon name binders res_kind roles rhs isLiftedType_maybe :: HasDebugCallStack => Type -> Maybe Bool isLiftedType_maybe ty = case coreFullView (getRuntimeRep ty) of ty' | isLiftedRuntimeRep ty' -> Just True + | isUnliftedRuntimeRep ty' -> Just False TyConApp {} -> Just False -- Everything else is unlifted _ -> Nothing -- representation-polymorphic @@ -2441,6 +2445,13 @@ isUnliftedType ty = not (isLiftedType_maybe ty `orElse` pprPanic "isUnliftedType" (ppr ty <+> dcolon <+> ppr (typeKind ty))) +-- | State token type. +isStateType :: Type -> Bool +isStateType ty + = case tyConAppTyCon_maybe ty of + Just tycon -> tycon == statePrimTyCon + _ -> False + -- | Returns: -- -- * 'False' if the type is /guaranteed/ lifted or diff --git a/compiler/GHC/Core/Utils.hs b/compiler/GHC/Core/Utils.hs index 411628c261..3116b6bd04 100644 --- a/compiler/GHC/Core/Utils.hs +++ b/compiler/GHC/Core/Utils.hs @@ -56,6 +56,9 @@ module GHC.Core.Utils ( -- * Join points isJoinBind, + -- * Tag inference + computeCbvInfo, + -- * unsafeEqualityProof isUnsafeEqualityProof, @@ -93,7 +96,7 @@ import GHC.Types.Tickish import GHC.Types.Id import GHC.Types.Id.Info import GHC.Types.Unique -import GHC.Types.Basic ( Arity ) +import GHC.Types.Basic ( Arity, CbvMark(..), isMarkedCbv ) import GHC.Types.Unique.Set import GHC.Data.FastString @@ -2443,7 +2446,11 @@ tryEtaReduce bndrs body -- We always want args for join points so -- we should never eta-reduce to a trivial expression. -- See Note [Invariants on join points] in GHC.Core, and #20599 - not (isJoinId fun) + not (isJoinId fun) && + -- And the function doesn't require visible arguments as part of + -- it's calling convention. See Note [Strict Worker Ids] + idCbvMarkArity fun == 0 + --------------- fun_arity fun -- See Note [Arity care] @@ -2591,15 +2598,89 @@ isJoinBind (NonRec b _) = isJoinId b isJoinBind (Rec ((b, _) : _)) = isJoinId b isJoinBind _ = False -dumpIdInfoOfProgram :: (IdInfo -> SDoc) -> CoreProgram -> SDoc -dumpIdInfoOfProgram ppr_id_info binds = vcat (map printId ids) +dumpIdInfoOfProgram :: Bool -> (IdInfo -> SDoc) -> CoreProgram -> SDoc +dumpIdInfoOfProgram dump_locals ppr_id_info binds = vcat (map printId ids) where ids = sortBy (stableNameCmp `on` getName) (concatMap getIds binds) getIds (NonRec i _) = [ i ] getIds (Rec bs) = map fst bs - printId id | isExportedId id = ppr id <> colon <+> (ppr_id_info (idInfo id)) + -- By default only include full info for exported ids, unless we run in the verbose + -- pprDebug mode. + printId id | isExportedId id || dump_locals = ppr id <> colon <+> (ppr_id_info (idInfo id)) | otherwise = empty +{- +************************************************************************ +* * +\subsection{Tag inference things} +* * +************************************************************************ +-} + +-- | For a binding we: +-- * Look at the args +-- * Mark any with Unf=OtherCon[] as call-by-value, unless it's an unlifted type already. +-- * Potentially combine it with existing call-by-value marks (from ww) +-- * Update the id +-- See Note [Attaching CBV Marks to ids]. +computeCbvInfo :: HasCallStack + => Id -- The function + -> CoreExpr -- It's RHS + -> Id +computeCbvInfo id rhs = + -- pprTrace "computeCbv" (hang (ppr id) 2 (ppr dmd $$ ppr dmds)) $ + -- TODO: For perf reasons we could skip looking at non VanillaId/StrictWorkerId/JoinId bindings + cbv_bndr + where + (_,val_args,_body) = collectTyAndValBinders rhs + new_marks = mkCbvMarks val_args + cbv_marks = assertPpr (checkMarks id new_marks) + (ppr id <+> ppr (idType id) $$ text "old:" <> ppr (idCbvMarks_maybe id) $$ text "new:" <> ppr new_marks $$ text "rhs:" <> ppr rhs) + new_marks + cbv_bndr + | valid_unlifted_worker val_args + -- Avoid retaining the original rhs + = cbv_marks `seqList` setIdCbvMarks id cbv_marks + | otherwise = + -- pprTraceDebug "tidyCbvInfo: Worker seems to take unboxed tuple/sum types!" (ppr id <+> ppr rhs) + id + -- We don't set CBV marks on workers which take unboxed tuples or sums as arguments. + -- Doing so would require us to compute the result of unarise here in order to properly determine + -- argument positions at runtime. + -- In practice this doesn't matter much. Most "interesting" functions will get a W/W split which will eliminate + -- unboxed tuple arguments, and unboxed sums are rarely used. + valid_unlifted_worker args = + -- pprTrace "valid_unlifted" (ppr id $$ ppr args) $ + not $ (any (\arg -> isMultiValArg arg) args) + isMultiValArg id = + let ty = idType id + in not (isStateType ty) && (isUnboxedTupleType ty || isUnboxedSumType ty) + -- Only keep relevant marks. We *don't* have to cover all arguments. Only these + -- that we might want to pass call-by-value. + trimMarks :: [CbvMark] -> [Id] -> [CbvMark] + trimMarks marks val_args = + map fst . + -- Starting at the end, drop all non-cbv marks, and marks applied to unlifted types + dropWhileEndLE (\(m,v) -> not (isMarkedCbv m) || isUnliftedType (idType v)) $ + zip marks val_args + + mkCbvMarks :: ([Id]) -> [CbvMark] + mkCbvMarks = map mkMark + where + cbv_arg arg = isEvaldUnfolding (idUnfolding arg) + mkMark arg = if cbv_arg arg && (not $ isUnliftedType (idType arg)) + then MarkedCbv + else NotMarkedCbv + -- If we determined earlier one an argument should be passed cbv it should + -- still be so here. + checkMarks id new_marks + | Just old_marks <- idCbvMarks_maybe id + = length (trimMarks old_marks val_args) <= length new_marks && + and (zipWith checkNewMark old_marks new_marks) + | otherwise = True + checkNewMark old new = + isMarkedCbv new || (not $ isMarkedCbv old) + {- ********************************************************************* * * diff --git a/compiler/GHC/CoreToIface.hs b/compiler/GHC/CoreToIface.hs index 426dcae9cf..11fd63e0bc 100644 --- a/compiler/GHC/CoreToIface.hs +++ b/compiler/GHC/CoreToIface.hs @@ -441,6 +441,7 @@ toIfaceLetBndr id = IfLetBndr (occNameFS (getOccName id)) toIfaceIdDetails :: IdDetails -> IfaceIdDetails toIfaceIdDetails VanillaId = IfVanillaId +toIfaceIdDetails (StrictWorkerId dmds) = IfStrictWorkerId dmds toIfaceIdDetails (DFunId {}) = IfDFunId toIfaceIdDetails (RecSelId { sel_naughty = n , sel_tycon = tc }) = diff --git a/compiler/GHC/CoreToStg.hs b/compiler/GHC/CoreToStg.hs index 431d831125..6e0e4600dd 100644 --- a/compiler/GHC/CoreToStg.hs +++ b/compiler/GHC/CoreToStg.hs @@ -30,6 +30,7 @@ import GHC.Core.DataCon import GHC.Stg.Syntax import GHC.Stg.Debug +import GHC.Stg.Utils import GHC.Types.RepType import GHC.Types.Id.Make ( coercionTokenId ) diff --git a/compiler/GHC/CoreToStg/Prep.hs b/compiler/GHC/CoreToStg/Prep.hs index 99c4cd4e8c..ac626931cd 100644 --- a/compiler/GHC/CoreToStg/Prep.hs +++ b/compiler/GHC/CoreToStg/Prep.hs @@ -1024,7 +1024,7 @@ cpeApp top_env expr go terminal as = (terminal, as) cpe_app :: CorePrepEnv - -> CoreExpr + -> CoreExpr -- The thing we are calling -> [ArgInfo] -> UniqSM (Floats, CpeRhs) cpe_app env (Var f) (CpeApp Type{} : CpeApp arg : args) @@ -1170,7 +1170,7 @@ cpeApp top_env expr rebuild_app :: CorePrepEnv -> [ArgInfo] -- The arguments (inner to outer) - -> CpeApp + -> CpeApp -- The function -> Floats -> [Demand] -> Maybe Arity @@ -1519,7 +1519,8 @@ Note [Eta expansion of hasNoBinding things in CorePrep] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ maybeSaturate deals with eta expanding to saturate things that can't deal with unsaturated applications (identified by 'hasNoBinding', currently just -foreign calls and unboxed tuple/sum constructors). +foreign calls, unboxed tuple/sum constructors and strict workers). +See Note [Strict Worker Ids] Historical Note: Note that eta expansion in CorePrep used to be very fragile due to the "prediction" of CAFfyness that we used to make during tidying. @@ -1533,14 +1534,27 @@ maybeSaturate fn expr n_args unsat_ticks | hasNoBinding fn -- There's no binding = return $ wrapLamBody (\body -> foldr mkTick body unsat_ticks) sat_expr + | mark_arity > 0 -- A strict worker. See Note [Strict Worker Ids] + , not applied_marks + = assertPpr + ( not (isJoinId fn)) -- See Note [Do not eta-expand join points] + ( ppr fn $$ text "expr:" <+> ppr expr $$ text "n_args:" <+> ppr n_args $$ + text "marks:" <+> ppr (idCbvMarks_maybe fn) $$ + text "join_arity" <+> ppr (idJoinArity fn) + ) $ + return sat_expr + | otherwise = assert (null unsat_ticks) $ return expr where - fn_arity = idArity fn - excess_arity = fn_arity - n_args - sat_expr = cpeEtaExpand excess_arity expr - + mark_arity = idCbvMarkArity fn + fn_arity = idArity fn + excess_arity = (max fn_arity mark_arity) - n_args + sat_expr = cpeEtaExpand excess_arity expr + applied_marks = n_args >= (length . dropWhile (not . isMarkedCbv) . reverse . expectJust "maybeSaturate" $ (idCbvMarks_maybe fn)) + -- For join points we never eta-expand (See Note [Do not eta-expand join points]) + -- so we assert all arguments that need to be passed cbv are visible so that the backend can evalaute them if required.. {- ************************************************************************ * * @@ -1620,13 +1634,19 @@ tryEtaReducePrep bndrs expr@(App _ _) , not (any (`elemVarSet` fvs_remaining) bndrs) , exprIsHNF remaining_expr -- Don't turn value into a non-value -- else the behaviour with 'seq' changes - = Just remaining_expr + = + -- pprTrace "prep-reduce" ( + -- text "reduced:" <> ppr remaining_expr $$ + -- ppr (remaining_args) + -- ) $ + Just remaining_expr where (f, args) = collectArgs expr remaining_expr = mkApps f remaining_args fvs_remaining = exprFreeVars remaining_expr (remaining_args, last_args) = splitAt n_remaining args n_remaining = length args - length bndrs + n_remaining_vals = length $ filter isRuntimeArg remaining_args ok bndr (Var arg) = bndr == arg ok _ _ = False @@ -1634,9 +1654,11 @@ tryEtaReducePrep bndrs expr@(App _ _) -- We can't eta reduce something which must be saturated. ok_to_eta_reduce (Var f) = not (hasNoBinding f) && not (isLinearType (idType f)) && -- Unsure why this is unsafe. - (not (isJoinId f) || idJoinArity f <= n_remaining) + (not (isJoinId f) || idJoinArity f <= n_remaining) && -- Don't undersaturate join points. -- See Note [Invariants on join points] in GHC.Core, and #20599 + (idCbvMarkArity f <= n_remaining_vals) + -- Similar for StrictWorkerIds. See Note [Strict Worker Ids] ok_to_eta_reduce _ = False -- Safe. ToDo: generalise diff --git a/compiler/GHC/Driver/Config/StgToCmm.hs b/compiler/GHC/Driver/Config/StgToCmm.hs index ae59e41fdf..ab64076a79 100644 --- a/compiler/GHC/Driver/Config/StgToCmm.hs +++ b/compiler/GHC/Driver/Config/StgToCmm.hs @@ -43,6 +43,7 @@ initStgToCmmConfig dflags mod = StgToCmmConfig , stgToCmmPIE = gopt Opt_PIE dflags , stgToCmmExtDynRefs = gopt Opt_ExternalDynamicRefs dflags , stgToCmmDoBoundsCheck = gopt Opt_DoBoundsChecking dflags + , stgToCmmDoTagCheck = gopt Opt_DoTagInferenceChecks dflags -- backend flags , stgToCmmAllowBigArith = not ncg , stgToCmmAllowQuotRemInstr = ncg && (x86ish || ppc) diff --git a/compiler/GHC/Driver/Flags.hs b/compiler/GHC/Driver/Flags.hs index e95b181743..759d8b48c0 100644 --- a/compiler/GHC/Driver/Flags.hs +++ b/compiler/GHC/Driver/Flags.hs @@ -92,7 +92,9 @@ data DumpFlag | Opt_D_dump_prep | Opt_D_dump_stg_from_core -- ^ Initial STG (CoreToStg output) | Opt_D_dump_stg_unarised -- ^ STG after unarise - | Opt_D_dump_stg_final -- ^ Final STG (after stg2stg) + | Opt_D_dump_stg_cg -- ^ STG (after stg2stg) + | Opt_D_dump_stg_tags -- ^ Result of tag inference analysis. + | Opt_D_dump_stg_final -- ^ Final STG (before cmm gen) | Opt_D_dump_call_arity | Opt_D_dump_exitify | Opt_D_dump_stranal @@ -224,6 +226,7 @@ data GeneralFlag | Opt_WeightlessBlocklayout -- ^ Layout based on last instruction per block. | Opt_CprAnal | Opt_WorkerWrapper + | Opt_WorkerWrapperUnlift -- ^ Do W/W split for unlifting even if we won't unbox anything. | Opt_SolveConstantDicts | Opt_AlignmentSanitisation | Opt_CatchNonexhaustiveCases @@ -231,6 +234,9 @@ data GeneralFlag | Opt_CoreConstantFolding | Opt_FastPAPCalls -- #6084 + -- Inference flags + | Opt_DoTagInferenceChecks + -- PreInlining is on by default. The option is there just to see how -- bad things get if you turn it off! | Opt_SimplPreInlining @@ -443,6 +449,7 @@ optimisationFlags = EnumSet.fromList , Opt_WeightlessBlocklayout , Opt_CprAnal , Opt_WorkerWrapper + , Opt_WorkerWrapperUnlift , Opt_SolveConstantDicts , Opt_CatchNonexhaustiveCases , Opt_IgnoreAsserts diff --git a/compiler/GHC/Driver/GenerateCgIPEStub.hs b/compiler/GHC/Driver/GenerateCgIPEStub.hs index e1a751e762..96bf352e51 100644 --- a/compiler/GHC/Driver/GenerateCgIPEStub.hs +++ b/compiler/GHC/Driver/GenerateCgIPEStub.hs @@ -27,8 +27,10 @@ import GHC.Settings (Platform, platformUnregisterised) import GHC.StgToCmm.Monad (getCmm, initC, runC, initFCodeState) import GHC.StgToCmm.Prof (initInfoTableProv) import GHC.StgToCmm.Types (CgInfos (..), ModuleLFInfos) +import GHC.Stg.InferTags.TagSig (TagSig) import GHC.Types.IPE (InfoTableProvMap (provInfoTables), IpeSourceLocation) import GHC.Types.Name.Set (NonCaffySet) +import GHC.Types.Name.Env (NameEnv) import GHC.Types.Tickish (GenTickish (SourceNote)) import GHC.Unit.Types (Module) import GHC.Utils.Misc @@ -178,8 +180,8 @@ The find the tick: remembered in a `Maybe`. -} -generateCgIPEStub :: HscEnv -> Module -> InfoTableProvMap -> Stream IO CmmGroupSRTs (NonCaffySet, ModuleLFInfos) -> Stream IO CmmGroupSRTs CgInfos -generateCgIPEStub hsc_env this_mod denv s = do +generateCgIPEStub :: HscEnv -> Module -> InfoTableProvMap -> NameEnv TagSig -> Stream IO CmmGroupSRTs (NonCaffySet, ModuleLFInfos) -> Stream IO CmmGroupSRTs CgInfos +generateCgIPEStub hsc_env this_mod denv tag_sigs s = do let dflags = hsc_dflags hsc_env platform = targetPlatform dflags fstate = initFCodeState platform @@ -196,7 +198,7 @@ generateCgIPEStub hsc_env this_mod denv s = do (_, ipeCmmGroupSRTs) <- liftIO $ cmmPipeline hsc_env (emptySRT this_mod) ipeCmmGroup Stream.yield ipeCmmGroupSRTs - return CgInfos {cgNonCafs = nonCaffySet, cgLFInfos = moduleLFInfos, cgIPEStub = ipeStub} + return CgInfos {cgNonCafs = nonCaffySet, cgLFInfos = moduleLFInfos, cgIPEStub = ipeStub, cgTagSigs = tag_sigs} where collect :: Platform -> [(Label, CmmInfoTable, Maybe IpeSourceLocation)] -> CmmGroupSRTs -> IO ([(Label, CmmInfoTable, Maybe IpeSourceLocation)], CmmGroupSRTs) collect platform acc cmmGroupSRTs = do diff --git a/compiler/GHC/Driver/Main.hs b/compiler/GHC/Driver/Main.hs index 4c8e494321..b07d566a5b 100644 --- a/compiler/GHC/Driver/Main.hs +++ b/compiler/GHC/Driver/Main.hs @@ -246,6 +246,8 @@ import GHC.Driver.GenerateCgIPEStub (generateCgIPEStub) import Data.List.NonEmpty (NonEmpty ((:|))) +import GHC.Stg.InferTags + {- ********************************************************************** %* * Initialisation @@ -1801,7 +1803,15 @@ doCodeGen hsc_env this_mod denv data_tycons tmpfs = hsc_tmpfs hsc_env platform = targetPlatform dflags - putDumpFileMaybe logger Opt_D_dump_stg_final "Final STG:" FormatSTG (pprGenStgTopBindings (initStgPprOpts dflags) stg_binds_w_fvs) + putDumpFileMaybe logger Opt_D_dump_stg_cg "CodeGenInput STG:" FormatSTG + (pprGenStgTopBindings (initStgPprOpts dflags) stg_binds_w_fvs) + + -- Do tag inference on optimized STG + (!stg_post_infer,export_tag_info) <- + {-# SCC "StgTagFields" #-} inferTags dflags logger this_mod stg_binds_w_fvs + + putDumpFileMaybe logger Opt_D_dump_stg_final "Final STG:" FormatSTG + (pprGenStgTopBindings (initStgPprOpts dflags) stg_post_infer) let stg_to_cmm dflags mod = case stgToCmmHook hooks of Nothing -> StgToCmm.codeGen logger tmpfs (initStgToCmmConfig dflags mod) @@ -1809,8 +1819,8 @@ doCodeGen hsc_env this_mod denv data_tycons let cmm_stream :: Stream IO CmmGroup ModuleLFInfos -- See Note [Forcing of stg_binds] - cmm_stream = stg_binds_w_fvs `seqList` {-# SCC "StgToCmm" #-} - stg_to_cmm dflags this_mod denv data_tycons cost_centre_info stg_binds_w_fvs hpc_info + cmm_stream = stg_post_infer `seqList` {-# SCC "StgToCmm" #-} + stg_to_cmm dflags this_mod denv data_tycons cost_centre_info stg_post_infer hpc_info -- codegen consumes a stream of CmmGroup, and produces a new -- stream of CmmGroup (not necessarily synchronised: one @@ -1839,7 +1849,7 @@ doCodeGen hsc_env this_mod denv data_tycons putDumpFileMaybe logger Opt_D_dump_cmm "Output Cmm" FormatCMM (pdoc platform a) return a - return $ Stream.mapM dump2 $ generateCgIPEStub hsc_env this_mod denv pipeline_stream + return $ Stream.mapM dump2 $ generateCgIPEStub hsc_env this_mod denv export_tag_info pipeline_stream myCoreToStgExpr :: Logger -> DynFlags -> InteractiveContext -> Bool diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs index 1f5626cec8..91bf2adf39 100644 --- a/compiler/GHC/Driver/Session.hs +++ b/compiler/GHC/Driver/Session.hs @@ -1611,7 +1611,7 @@ setSafeHaskell s = updM f safeM <- combineSafeFlags sf s case s of Sf_Safe -> return $ dfs { safeHaskell = safeM, safeInfer = False } - -- leave safe inferrence on in Trustworthy mode so we can warn + -- leave safe inference on in Trustworthy mode so we can warn -- if it could have been inferred safe. Sf_Trustworthy -> do l <- getCurLoc @@ -2485,9 +2485,13 @@ dynamic_flags_deps = [ (setDumpFlag Opt_D_dump_stg_unarised) , make_ord_flag defGhcFlag "ddump-stg-final" (setDumpFlag Opt_D_dump_stg_final) + , make_ord_flag defGhcFlag "ddump-stg-cg" + (setDumpFlag Opt_D_dump_stg_cg) , make_dep_flag defGhcFlag "ddump-stg" (setDumpFlag Opt_D_dump_stg_from_core) "Use `-ddump-stg-from-core` or `-ddump-stg-final` instead" + , make_ord_flag defGhcFlag "ddump-stg-tags" + (setDumpFlag Opt_D_dump_stg_tags) , make_ord_flag defGhcFlag "ddump-call-arity" (setDumpFlag Opt_D_dump_call_arity) , make_ord_flag defGhcFlag "ddump-exitify" @@ -2580,6 +2584,8 @@ dynamic_flags_deps = [ (NoArg (setGeneralFlag Opt_DoAsmLinting)) , make_ord_flag defGhcFlag "dannot-lint" (NoArg (setGeneralFlag Opt_DoAnnotationLinting)) + , make_ord_flag defGhcFlag "dtag-inference-checks" + (NoArg (setGeneralFlag Opt_DoTagInferenceChecks)) , make_ord_flag defGhcFlag "dshow-passes" (NoArg $ forceRecompile >> (setVerbosity $ Just 2)) , make_ord_flag defGhcFlag "dfaststring-stats" @@ -3434,6 +3440,7 @@ fFlagsDeps = [ flagSpec "unbox-strict-fields" Opt_UnboxStrictFields, flagSpec "version-macros" Opt_VersionMacros, flagSpec "worker-wrapper" Opt_WorkerWrapper, + flagSpec "worker-wrapper-cbv" Opt_WorkerWrapperUnlift, flagSpec "solve-constant-dicts" Opt_SolveConstantDicts, flagSpec "catch-nonexhaustive-cases" Opt_CatchNonexhaustiveCases, flagSpec "alignment-sanitisation" Opt_AlignmentSanitisation, diff --git a/compiler/GHC/HsToCore.hs b/compiler/GHC/HsToCore.hs index c32f03eb97..0a210e0871 100644 --- a/compiler/GHC/HsToCore.hs +++ b/compiler/GHC/HsToCore.hs @@ -133,7 +133,8 @@ deSugar hsc_env tcg_insts = insts, tcg_fam_insts = fam_insts, tcg_hpc = other_hpc_info, - tcg_complete_matches = complete_matches + tcg_complete_matches = complete_matches, + tcg_self_boot = self_boot }) = do { let dflags = hsc_dflags hsc_env @@ -242,6 +243,7 @@ deSugar hsc_env mg_fam_insts = fam_insts, mg_inst_env = inst_env, mg_fam_inst_env = fam_inst_env, + mg_boot_exports = bootExports self_boot, mg_patsyns = patsyns, mg_rules = ds_rules_for_imps, mg_binds = ds_binds, diff --git a/compiler/GHC/Iface/Make.hs b/compiler/GHC/Iface/Make.hs index 28ddcac422..c0471cd413 100644 --- a/compiler/GHC/Iface/Make.hs +++ b/compiler/GHC/Iface/Make.hs @@ -159,16 +159,19 @@ mkFullIface hsc_env partial_iface mb_cg_infos = do updateDecl :: [IfaceDecl] -> Maybe CgInfos -> [IfaceDecl] updateDecl decls Nothing = decls -updateDecl decls (Just CgInfos{ cgNonCafs = NonCaffySet non_cafs, cgLFInfos = lf_infos }) = map update_decl decls +updateDecl decls (Just CgInfos{ cgNonCafs = NonCaffySet non_cafs, cgLFInfos = lf_infos, cgTagSigs = tag_sigs }) + = map update_decl decls where update_decl (IfaceId nm ty details infos) | let not_caffy = elemNameSet nm non_cafs , let mb_lf_info = lookupNameEnv lf_infos nm - , warnPprTrace (isNothing mb_lf_info) "Name without LFInfo" (ppr nm) True + , let sig = lookupNameEnv tag_sigs nm + , warnPprTrace (isNothing mb_lf_info) "updateDecl" (text "Name without LFInfo:" <+> ppr nm) True -- Only allocate a new IfaceId if we're going to update the infos - , isJust mb_lf_info || not_caffy + , isJust mb_lf_info || not_caffy || isJust sig = IfaceId nm ty details $ - (if not_caffy then (HsNoCafRefs :) else id) + (if not_caffy then (HsNoCafRefs :) else id) $ + (if isJust sig then (HsTagSig (fromJust sig):) else id) $ (case mb_lf_info of Nothing -> infos -- LFInfos not available when building .cmm files Just lf_info -> HsLFInfo (toIfaceLFInfo nm lf_info) : infos) diff --git a/compiler/GHC/Iface/Syntax.hs b/compiler/GHC/Iface/Syntax.hs index 39f0bd5336..c735a2f94f 100644 --- a/compiler/GHC/Iface/Syntax.hs +++ b/compiler/GHC/Iface/Syntax.hs @@ -67,6 +67,7 @@ import GHC.Types.Var( VarBndr(..), binderVar, tyVarSpecToBinders ) import GHC.Core.TyCon ( Role (..), Injectivity(..), tyConBndrVisArgFlag ) import GHC.Core.DataCon (SrcStrictness(..), SrcUnpackedness(..)) import GHC.Builtin.Types ( constraintKindTyConName ) +import GHC.Stg.InferTags.TagSig import GHC.Utils.Lexeme (isLexSym) import GHC.Utils.Fingerprint @@ -352,6 +353,7 @@ data IfaceInfoItem | HsNoCafRefs | HsLevity -- Present <=> never representation-polymorphic | HsLFInfo IfaceLFInfo + | HsTagSig TagSig -- NB: Specialisations and rules come in separately and are -- only later attached to the Id. Partial reason: some are orphans. @@ -380,6 +382,7 @@ data IfaceUnfolding data IfaceIdDetails = IfVanillaId + | IfStrictWorkerId [CbvMark] | IfRecSelId (Either IfaceTyCon IfaceDecl) Bool | IfDFunId @@ -1459,6 +1462,7 @@ instance Outputable IfaceConAlt where ------------------ instance Outputable IfaceIdDetails where ppr IfVanillaId = Outputable.empty + ppr (IfStrictWorkerId dmd) = text "StrWork" <> parens (ppr dmd) ppr (IfRecSelId tc b) = text "RecSel" <+> ppr tc <+> if b then text "<naughty>" @@ -1476,6 +1480,7 @@ instance Outputable IfaceInfoItem where ppr HsNoCafRefs = text "HasNoCafRefs" ppr HsLevity = text "Never levity-polymorphic" ppr (HsLFInfo lf_info) = text "LambdaFormInfo:" <+> ppr lf_info + ppr (HsTagSig tag_sig) = text "TagSig:" <+> ppr tag_sig instance Outputable IfaceJoinInfo where ppr IfaceNotJoinPoint = empty @@ -2223,12 +2228,14 @@ instance Binary IfaceAnnotation where instance Binary IfaceIdDetails where put_ bh IfVanillaId = putByte bh 0 put_ bh (IfRecSelId a b) = putByte bh 1 >> put_ bh a >> put_ bh b - put_ bh IfDFunId = putByte bh 2 + put_ bh (IfStrictWorkerId dmds) = putByte bh 2 >> put_ bh dmds + put_ bh IfDFunId = putByte bh 3 get bh = do h <- getByte bh case h of 0 -> return IfVanillaId 1 -> do { a <- get bh; b <- get bh; return (IfRecSelId a b) } + 2 -> do { dmds <- get bh; return (IfStrictWorkerId dmds) } _ -> return IfDFunId instance Binary IfaceInfoItem where @@ -2240,6 +2247,7 @@ instance Binary IfaceInfoItem where put_ bh HsLevity = putByte bh 5 put_ bh (HsCprSig cpr) = putByte bh 6 >> put_ bh cpr put_ bh (HsLFInfo lf_info) = putByte bh 7 >> put_ bh lf_info + put_ bh (HsTagSig sig) = putByte bh 8 >> put_ bh sig get bh = do h <- getByte bh @@ -2253,7 +2261,8 @@ instance Binary IfaceInfoItem where 4 -> return HsNoCafRefs 5 -> return HsLevity 6 -> HsCprSig <$> get bh - _ -> HsLFInfo <$> get bh + 7 -> HsLFInfo <$> get bh + _ -> HsTagSig <$> get bh instance Binary IfaceUnfolding where put_ bh (IfCoreUnfold s e) = do @@ -2586,6 +2595,7 @@ instance NFData IfaceBang where instance NFData IfaceIdDetails where rnf = \case IfVanillaId -> () + IfStrictWorkerId dmds -> dmds `seqList` () IfRecSelId (Left tycon) b -> rnf tycon `seq` rnf b IfRecSelId (Right decl) b -> rnf decl `seq` rnf b IfDFunId -> () @@ -2600,6 +2610,7 @@ instance NFData IfaceInfoItem where HsLevity -> () HsCprSig cpr -> cpr `seq` () HsLFInfo lf_info -> lf_info `seq` () -- TODO: seq further? + HsTagSig sig -> sig `seq` () instance NFData IfaceUnfolding where rnf = \case diff --git a/compiler/GHC/Iface/Tidy.hs b/compiler/GHC/Iface/Tidy.hs index b1a079205e..c453cc5336 100644 --- a/compiler/GHC/Iface/Tidy.hs +++ b/compiler/GHC/Iface/Tidy.hs @@ -365,6 +365,7 @@ tidyProgram hsc_env (ModGuts { mg_module = mod , mg_foreign_files = foreign_files , mg_hpc_info = hpc_info , mg_modBreaks = modBreaks + , mg_boot_exports = boot_exports }) = Err.withTiming logger @@ -384,7 +385,7 @@ tidyProgram hsc_env (ModGuts { mg_module = mod ; let uf_opts = unfoldingOpts dflags ; (tidy_env, tidy_binds) - <- tidyTopBinds uf_opts unfold_env tidy_occ_env trimmed_binds + <- tidyTopBinds uf_opts unfold_env boot_exports tidy_occ_env trimmed_binds -- See Note [Grand plan for static forms] in GHC.Iface.Tidy.StaticPtrTable. ; (spt_entries, tidy_binds') <- @@ -1180,39 +1181,41 @@ tidyTopName mod name_cache maybe_ref occ_env id tidyTopBinds :: UnfoldingOpts -> UnfoldEnv + -> NameSet -> TidyOccEnv -> CoreProgram -> IO (TidyEnv, CoreProgram) -tidyTopBinds uf_opts unfold_env init_occ_env binds +tidyTopBinds uf_opts unfold_env boot_exports init_occ_env binds = do let result = tidy init_env binds seqBinds (snd result) `seq` return result -- This seqBinds avoids a spike in space usage (see #13564) where init_env = (init_occ_env, emptyVarEnv) - tidy = mapAccumL (tidyTopBind uf_opts unfold_env) + tidy = mapAccumL (tidyTopBind uf_opts unfold_env boot_exports) ------------------------ tidyTopBind :: UnfoldingOpts -> UnfoldEnv + -> NameSet -> TidyEnv -> CoreBind -> (TidyEnv, CoreBind) -tidyTopBind uf_opts unfold_env +tidyTopBind uf_opts unfold_env boot_exports (occ_env,subst1) (NonRec bndr rhs) = (tidy_env2, NonRec bndr' rhs') where Just (name',show_unfold) = lookupVarEnv unfold_env bndr - (bndr', rhs') = tidyTopPair uf_opts show_unfold tidy_env2 name' (bndr, rhs) + (bndr', rhs') = tidyTopPair uf_opts show_unfold boot_exports tidy_env2 name' (bndr, rhs) subst2 = extendVarEnv subst1 bndr bndr' tidy_env2 = (occ_env, subst2) -tidyTopBind uf_opts unfold_env (occ_env, subst1) (Rec prs) +tidyTopBind uf_opts unfold_env boot_exports (occ_env, subst1) (Rec prs) = (tidy_env2, Rec prs') where - prs' = [ tidyTopPair uf_opts show_unfold tidy_env2 name' (id,rhs) + prs' = [ tidyTopPair uf_opts show_unfold boot_exports tidy_env2 name' (id,rhs) | (id,rhs) <- prs, let (name',show_unfold) = expectJust "tidyTopBind" $ lookupVarEnv unfold_env id @@ -1226,6 +1229,7 @@ tidyTopBind uf_opts unfold_env (occ_env, subst1) (Rec prs) ----------------------------------------------------------- tidyTopPair :: UnfoldingOpts -> Bool -- show unfolding + -> NameSet -> TidyEnv -- The TidyEnv is used to tidy the IdInfo -- It is knot-tied: don't look at it! -> Name -- New name @@ -1237,14 +1241,17 @@ tidyTopPair :: UnfoldingOpts -- group, a variable late in the group might be mentioned -- in the IdInfo of one early in the group -tidyTopPair uf_opts show_unfold rhs_tidy_env name' (bndr, rhs) - = (bndr1, rhs1) +tidyTopPair uf_opts show_unfold boot_exports rhs_tidy_env name' (bndr, rhs) + = -- pprTrace "tidyTop" (ppr name' <+> ppr details <+> ppr rhs) $ + (bndr1, rhs1) + where + !cbv_bndr = tidyCbvInfoTop boot_exports bndr rhs bndr1 = mkGlobalId details name' ty' idinfo' - details = idDetails bndr -- Preserve the IdDetails - ty' = tidyTopType (idType bndr) + details = idDetails cbv_bndr -- Preserve the IdDetails + ty' = tidyTopType (idType cbv_bndr) rhs1 = tidyExpr rhs_tidy_env rhs - idinfo' = tidyTopIdInfo uf_opts rhs_tidy_env name' rhs rhs1 (idInfo bndr) + idinfo' = tidyTopIdInfo uf_opts rhs_tidy_env name' rhs rhs1 (idInfo cbv_bndr) show_unfold -- tidyTopIdInfo creates the final IdInfo for top-level diff --git a/compiler/GHC/IfaceToCore.hs b/compiler/GHC/IfaceToCore.hs index 3a11c30e79..2982df668d 100644 --- a/compiler/GHC/IfaceToCore.hs +++ b/compiler/GHC/IfaceToCore.hs @@ -1618,6 +1618,7 @@ tcIfaceDataAlt mult con inst_tys arg_strs rhs tcIdDetails :: Type -> IfaceIdDetails -> IfL IdDetails tcIdDetails _ IfVanillaId = return VanillaId +tcIdDetails _ (IfStrictWorkerId dmds) = return $ StrictWorkerId dmds tcIdDetails ty IfDFunId = return (DFunId (isNewTyCon (classTyCon cls))) where @@ -1665,6 +1666,9 @@ tcIdInfo ignore_prags toplvl name ty info = do lf_info <- tcLFInfo lf_info return (info `setLFInfo` lf_info) + tcPrag info (HsTagSig sig) = do + return (info `setTagSig` sig) + -- The next two are lazy, so they don't transitively suck stuff in tcPrag info (HsUnfold lb if_unf) = do { unf <- tcUnfolding toplvl name ty info if_unf diff --git a/compiler/GHC/Prelude.hs b/compiler/GHC/Prelude.hs index f61dad9517..6a810ac200 100644 --- a/compiler/GHC/Prelude.hs +++ b/compiler/GHC/Prelude.hs @@ -1,5 +1,6 @@ {-# LANGUAGE CPP #-} {-# OPTIONS_HADDOCK not-home #-} +{-# OPTIONS_GHC -O2 #-} -- See Note [-O2 Prelude] -- | Custom GHC "Prelude" -- @@ -18,6 +19,17 @@ module GHC.Prelude ) where +{- Note [-O2 Prelude] +~~~~~~~~~~~~~~~~~~~~~ +There is some code in GHC that is *always* compiled with -O[2] because +of it's impact on compile time performance. Some of this code might depend +on the definitions like shiftL being defined here being performant. + +So we always compile this module with -O2. It's (currently) tiny so I +have little reason to suspect this impacts overall GHC compile times +negatively. + +-} -- We export the 'Semigroup' class but w/o the (<>) operator to avoid -- clashing with the (Outputable.<>) operator which is heavily used -- through GHC's code-base. diff --git a/compiler/GHC/Stg/FVs.hs b/compiler/GHC/Stg/FVs.hs index 00b4bdcc0b..636ec5eb61 100644 --- a/compiler/GHC/Stg/FVs.hs +++ b/compiler/GHC/Stg/FVs.hs @@ -47,6 +47,7 @@ module GHC.Stg.FVs ( import GHC.Prelude hiding (mod) import GHC.Stg.Syntax +import GHC.Stg.Utils (bindersOf) import GHC.Types.Id import GHC.Types.Name (Name, nameIsLocalOrFrom) import GHC.Types.Tickish ( GenTickish(Breakpoint) ) diff --git a/compiler/GHC/Stg/InferTags.hs b/compiler/GHC/Stg/InferTags.hs new file mode 100644 index 0000000000..477650da3c --- /dev/null +++ b/compiler/GHC/Stg/InferTags.hs @@ -0,0 +1,631 @@ +{-# LANGUAGE TypeFamilies, DataKinds, GADTs, FlexibleInstances #-} +{-# LANGUAGE ConstraintKinds #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE UndecidableInstances #-} + -- To permit: type instance XLet 'InferTaggedBinders = XLet 'SomePass + +{-# OPTIONS_GHC -Wname-shadowing #-} +module GHC.Stg.InferTags ( inferTags ) where + +import GHC.Prelude hiding (id) + +import GHC.Core.DataCon +import GHC.Core.Type +import GHC.Types.Id +import GHC.Types.Id.Info (tagSigInfo) +import GHC.Types.Name +import GHC.Stg.Syntax +import GHC.Types.Basic ( CbvMark (..) ) +import GHC.Types.Unique.Supply (mkSplitUniqSupply) +import GHC.Types.RepType (dataConRuntimeRepStrictness) +import GHC.Core (AltCon(..)) +import Data.List (mapAccumL) +import GHC.Utils.Outputable +import GHC.Utils.Misc( zipWithEqual, zipEqual, notNull ) + +import GHC.Stg.InferTags.Types +import GHC.Stg.InferTags.Rewrite (rewriteTopBinds) +import Data.Maybe +import GHC.Types.Name.Env (mkNameEnv, NameEnv) +import GHC.Driver.Config.Stg.Ppr +import GHC.Driver.Session +import GHC.Utils.Logger +import qualified GHC.Unit.Types + +{- Note [Tag Inference] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The purpose of this pass is to attach to every binder a flag +to indicate whether or not it is "properly tagged". A binder +is properly tagged if it is guaranteed: + - to point to a heap-allocated *value* + - and to have the tag of the value encoded in the pointer + +For example + let x = Just y in ... + +Here x will be properly tagged: it will point to the heap-allocated +values for (Just y), and the tag-bits of the pointer will encode +the tag for Just so there is no need to re-enter the closure or even +check for the presence of tag bits. The impacts of this can be very large. + +For containers the reduction in runtimes with this optimization was as follows: + +intmap-benchmarks: 89.30% +intset-benchmarks: 90.87% +map-benchmarks: 88.00% +sequence-benchmarks: 99.84% +set-benchmarks: 85.00% +set-operations-intmap:88.64% +set-operations-map: 74.23% +set-operations-set: 76.50% +lookupge-intmap: 89.57% +lookupge-map: 70.95% + +With nofib being ~0.3% faster as well. + +See Note [Tag inference passes] for how we proceed to generate and use this information. + +Note [Strict Field Invariant] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +As part of tag inference we introduce the Strict Field Invariant. +Which consists of us saying that: + +* Pointers in strict fields must (a) point directly to the value, and + (b) must be properly tagged. + +For example, given + data T = MkT ![Int] + +the Strict Field Invariant guarantees that the first field of any `MkT` constructor +will either point directly to nil, or directly to a cons cell; +and will be tagged with `001` or `010` respectively. +It will never point to a thunk, nor will it be tagged `000` (meaning "might be a thunk"). +NB: Note that the proper tag for some objects is indeed `000`. Currently this is the case for PAPs. + +Why do we care? Because if we have code like: + +case strictPair of + SP x y -> + case x of ... + +It allows us to safely omit the code to enter x and the check +for the presence of a tag that goes along with it. +However we might still branch on the tag as usual. +See Note [Tag Inference] for how much impact this can have for +some code. + +This is enforced by the code GHC.Stg.InferTags.Rewrite +where we: + +* Look at all constructor allocations. +* Check if arguments to their strict fields are known to be properly tagged +* If not we convert `StrictJust x` into `case x of x' -> StrictJust x'` + +This is usually very beneficial but can cause regressions in rare edge cases where +we fail to proof that x is properly tagged, or where it simply isn't. +See Note [How untagged pointers can end up in strict fields] for how the second case +can arise. + +For a full example of the worst case consider this code: + +foo ... = ... + let c = StrictJust x + in ... + +Here we would rewrite `let c = StrictJust x` into `let c = case x of x' -> StrictJust x'` +However that is horrible! We end up allocating a thunk for `c` first, which only when +evaluated will allocate the constructor. + +So we do our best to establish that `x` is already tagged (which it almost always is) +to avoid this cost. In my benchmarks I haven't seen any cases where this causes regressions. + +Note [How untagged pointers can end up in strict fields] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Consider + data Set a = Tip | Bin !a (Set a) (Set a) + +We make a wrapper for Bin that evaluates its arguments + $WBin x a b = case x of xv -> Bin xv a b +Here `xv` will always be evaluated and properly tagged, just as the +Strict Field Invariant requires. + +But alas the Simplifier can destroy the invariant: see #15696. +We start with + thk = f () + g x = ...(case thk of xv -> Bin xv Tip Tip)... + +So far so good; the argument to Bin (which is strict) is evaluated. +Now we do float-out. And in doing so we do a reverse binder-swap (see +Note [Binder-swap during float-out] in SetLevels) thus + + g x = ...(case thk of xv -> Bin thk Nil Nil)... + +The goal of the reverse binder-swap is to allow more floating -- and +indeed it does! We float the Bin to top level: + + lvl = Bin thk Tip Tip + g x = ...(case thk of xv -> lvl)... + +Now you can see that the argument of Bin, namely thk, points to the +thunk, not to the value as it did before. + +In short, although it may be rare, the output of optimisation passes +cannot guarantee to obey the Strict Field Invariant. For this reason +we run tag inference. See Note [Tag inference passes]. + +Note [Tag inference passes] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Tag inference proceeds in two passes: +* The first pass is an analysis to compute which binders are properly tagged. + The result is then attached to /binders/. + This is implemented by `inferTagsAnal` in GHC.Stg.InferTags +* The second pass walks over the AST checking if the Strict Field Invariant is upheld. + See Note [Strict Field Invariant]. + If required this pass modifies the program to uphold this invariant. + Tag information is also moved from /binders/ to /occurrences/ during this pass. + This is done by `GHC.Stg.InferTags.Rewrite (rewriteTopBinds)`. +* Finally the code generation uses this information to skip the thunk check when branching on + values. This is done by `cgExpr`/`cgCase` in the backend. + +Last but not least we also export the tag sigs of top level bindings to allow this optimization + to work across module boundaries. + +Note [TagInfo of functions] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The purpose of tag inference is really to figure out when we don't have to enter +value closures. There the meaning of the tag is fairly obvious. +For functions we never make use of the tag info so we have two choices: +* Treat them as TagDunno +* Treat them as TagProper (as they *are* tagged with their arity) and be really + careful to make sure we still enter them when needed. +As it makes little difference for runtime performance I've treated functions as TagDunno in a few places where +it made the code simpler. But besides implementation complexity there isn't any reason +why we couldn't be more rigourous in dealing with functions. + +Note [Tag inference debugging] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +There is a flag -dtag-inference-checks which inserts various +compile/runtime checks in order to ensure the Strict Field Invariant +holds. It should cover all places +where tags matter and disable optimizations which interfere with checking +the invariant like generation of AP-Thunks. + +Note [Polymorphic StgPass for inferTagExpr] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In order to reach a fixpoint we sometimes have to re-analyse an expression +multiple times. But after the initial run the Ast will be parameterized by +a different StgPass! To handle this a large part of the analysis is polymorphic +over the exact StgPass we are using. Which allows us to run the analysis on +the output of itself. + +-} + +{- ********************************************************************* +* * + Tag inference pass +* * +********************************************************************* -} + +-- doCodeGen :: HscEnv -> Module -> InfoTableProvMap -> [TyCon] +-- -> CollectedCCs +-- -> [CgStgTopBinding] -- ^ Bindings come already annotated with fvs +-- -> HpcInfo +-- -> IO (Stream IO CmmGroupSRTs CgInfos) +-- -- Note we produce a 'Stream' of CmmGroups, so that the +-- -- backend can be run incrementally. Otherwise it generates all +-- -- the C-- up front, which has a significant space cost. +inferTags :: DynFlags -> Logger -> (GHC.Unit.Types.Module) -> [CgStgTopBinding] -> IO ([TgStgTopBinding], NameEnv TagSig) +inferTags dflags logger this_mod stg_binds = do + + -- Annotate binders with tag information. + let (!stg_binds_w_tags) = {-# SCC "StgTagFields" #-} + inferTagsAnal stg_binds + putDumpFileMaybe logger Opt_D_dump_stg_tags "CodeGenAnal STG:" FormatSTG (pprGenStgTopBindings (initStgPprOpts dflags) stg_binds_w_tags) + + let export_tag_info = collectExportInfo stg_binds_w_tags + + -- Rewrite STG to uphold the strict field invariant + us_t <- mkSplitUniqSupply 't' + let rewritten_binds = {-# SCC "StgTagRewrite" #-} rewriteTopBinds this_mod us_t stg_binds_w_tags :: [TgStgTopBinding] + + return (rewritten_binds,export_tag_info) + +{- ********************************************************************* +* * + Main inference algorithm +* * +********************************************************************* -} + +type OutputableInferPass p = (Outputable (TagEnv p) + , Outputable (GenStgExpr p) + , Outputable (BinderP p) + , Outputable (GenStgRhs p)) + +-- | This constraint encodes the fact that no matter what pass +-- we use the Let/Closure extension points are the same as these for +-- 'InferTaggedBinders. +type InferExtEq i = ( XLet i ~ XLet 'InferTaggedBinders + , XLetNoEscape i ~ XLetNoEscape 'InferTaggedBinders + , XRhsClosure i ~ XRhsClosure 'InferTaggedBinders) + +inferTagsAnal :: [GenStgTopBinding 'CodeGen] -> [GenStgTopBinding 'InferTaggedBinders] +inferTagsAnal binds = + -- pprTrace "Binds" (pprGenStgTopBindings shortStgPprOpts $ binds) $ + snd (mapAccumL inferTagTopBind initEnv binds) + +----------------------- +inferTagTopBind :: TagEnv 'CodeGen -> GenStgTopBinding 'CodeGen + -> (TagEnv 'CodeGen, GenStgTopBinding 'InferTaggedBinders) +inferTagTopBind env (StgTopStringLit id bs) + = (env, StgTopStringLit id bs) +inferTagTopBind env (StgTopLifted bind) + = (env', StgTopLifted bind') + where + (env', bind') = inferTagBind env bind + + +-- Why is this polymorphic over the StgPass? See Note [Polymorphic StgPass for inferTagExpr] +----------------------- +inferTagExpr :: forall p. (OutputableInferPass p, InferExtEq p) + => TagEnv p -> GenStgExpr p -> (TagInfo, GenStgExpr 'InferTaggedBinders) +inferTagExpr env (StgApp fun args) + = --pprTrace "inferTagExpr1" + -- (ppr fun <+> ppr args $$ ppr info $$ + -- text "deadEndInfo:" <> ppr (isDeadEndId fun, idArity fun, length args) + -- ) + (info, StgApp fun args) + where + !fun_arity = idArity fun + info | fun_arity == 0 -- Unknown arity => Thunk or unknown call + = TagDunno + + | isDeadEndId fun + , fun_arity == length args -- Implies we will simply call the function. + = TagTagged -- See Note [Bottom functions are TagTagged] + + | Just (TagSig res_info) <- tagSigInfo (idInfo fun) + , fun_arity == length args -- Saturated + = res_info + + | Just (TagSig res_info) <- lookupSig env fun + , fun_arity == length args -- Saturated + = res_info + + | otherwise + = --pprTrace "inferAppUnknown" (ppr fun) $ + TagDunno +-- TODO: +-- If we have something like: +-- let x = thunk in +-- f g = case g of g' -> (# x, g' #) +-- then we *do* know that g' will be properly tagged, +-- so we should return TagTagged [TagDunno,TagProper] but currently we infer +-- TagTagged [TagDunno,TagDunno] because of the unknown arity case in inferTagExpr. +-- Seems not to matter much but should be changed eventually. + +inferTagExpr env (StgConApp con cn args tys) + = (inferConTag env con args, StgConApp con cn args tys) + +inferTagExpr _ (StgLit l) + = (TagTagged, StgLit l) + +inferTagExpr env (StgTick tick body) + = (info, StgTick tick body') + where + (info, body') = inferTagExpr env body + +inferTagExpr _ (StgOpApp op args ty) + = -- Do any primops guarantee to return a properly tagged value? + -- I think not. Ditto foreign calls. + (TagDunno, StgOpApp op args ty) + +inferTagExpr env (StgLet ext bind body) + = (info, StgLet ext bind' body') + where + (env', bind') = inferTagBind env bind + (info, body') = inferTagExpr env' body + +inferTagExpr env (StgLetNoEscape ext bind body) + = (info, StgLetNoEscape ext bind' body') + where + (env', bind') = inferTagBind env bind + (info, body') = inferTagExpr env' body + +inferTagExpr in_env (StgCase scrut bndr ty alts) + -- Unboxed tuples get their info from the expression we scrutinise if any + | [(DataAlt con, bndrs, rhs)] <- alts + , isUnboxedTupleDataCon con + , Just infos <- scrut_infos bndrs + , let bndrs' = zipWithEqual "inferTagExpr" mk_bndr bndrs infos + mk_bndr :: BinderP p -> TagInfo -> (Id, TagSig) + mk_bndr tup_bndr tup_info = + -- pprTrace "mk_ubx_bndr_info" ( ppr bndr <+> ppr info ) $ + (getBinderId in_env tup_bndr, TagSig tup_info) + -- no case binder in alt_env here, unboxed tuple binders are dead after unarise + alt_env = extendSigEnv in_env bndrs' + (info, rhs') = inferTagExpr alt_env rhs + = + -- pprTrace "inferCase1" ( + -- text "scrut:" <> ppr scrut $$ + -- text "bndr:" <> ppr bndr $$ + -- text "infos" <> ppr infos $$ + -- text "out_bndrs" <> ppr bndrs') $ + (info, StgCase scrut' (noSig in_env bndr) ty [(DataAlt con, bndrs', rhs')]) + + | null alts -- Empty case, but I might just be paranoid. + = -- pprTrace "inferCase2" empty $ + (TagDunno, StgCase scrut' bndr' ty []) + -- More than one alternative OR non-TagTuple single alternative. + | otherwise + = + let + case_env = extendSigEnv in_env [bndr'] + + (infos, alts') + = unzip [ (info, (con, bndrs', rhs')) + | (con, bndrs, rhs) <- alts + , let (alt_env,bndrs') = addAltBndrInfo case_env con bndrs + (info, rhs') = inferTagExpr alt_env rhs + ] + alt_info = foldr combineAltInfo TagTagged infos + in ( alt_info, StgCase scrut' bndr' ty alts') + where + -- Single unboxed tuple alternative + scrut_infos bndrs = case scrut_info of + TagTagged -> Just $ replicate (length bndrs) TagProper + TagTuple infos -> Just infos + _ -> Nothing + (scrut_info, scrut') = inferTagExpr in_env scrut + bndr' = (getBinderId in_env bndr, TagSig TagProper) + +-- Compute binder sigs based on the constructors strict fields. +-- NB: Not used if we have tuple info from the scrutinee. +addAltBndrInfo :: forall p. TagEnv p -> AltCon -> [BinderP p] -> (TagEnv p, [BinderP 'InferTaggedBinders]) +addAltBndrInfo env (DataAlt con) bndrs + | not (isUnboxedTupleDataCon con || isUnboxedSumDataCon con) + = (out_env, out_bndrs) + where + marks = dataConRuntimeRepStrictness con :: [StrictnessMark] + out_bndrs = zipWith mk_bndr bndrs marks + out_env = extendSigEnv env out_bndrs + + mk_bndr :: (BinderP p -> StrictnessMark -> (Id, TagSig)) + mk_bndr bndr mark + | isUnliftedType (idType id) || isMarkedStrict mark + = (id, TagSig TagProper) + | otherwise + = noSig env bndr + where + id = getBinderId env bndr + +addAltBndrInfo env _ bndrs = (env, map (noSig env) bndrs) + +----------------------------- +inferTagBind :: (OutputableInferPass p, InferExtEq p) + => TagEnv p -> GenStgBinding p -> (TagEnv p, GenStgBinding 'InferTaggedBinders) +inferTagBind in_env (StgNonRec bndr rhs) + = + -- pprTrace "inferBindNonRec" ( + -- ppr bndr $$ + -- ppr (isDeadEndId id) $$ + -- ppr sig) + (env', StgNonRec (id, sig) rhs') + where + id = getBinderId in_env bndr + env' = extendSigEnv in_env [(id, sig)] + (sig,rhs') = inferTagRhs id in_env rhs + +inferTagBind in_env (StgRec pairs) + = -- pprTrace "rec" (ppr (map fst pairs) $$ ppr (in_env { te_env = out_env }, StgRec pairs')) $ + (in_env { te_env = out_env }, StgRec pairs') + where + (bndrs, rhss) = unzip pairs + in_ids = map (getBinderId in_env) bndrs + init_sigs = map (initSig) $ zip in_ids rhss + (out_env, pairs') = go in_env init_sigs rhss + + go :: forall q. (OutputableInferPass q , InferExtEq q) => TagEnv q -> [TagSig] -> [GenStgRhs q] + -> (TagSigEnv, [((Id,TagSig), GenStgRhs 'InferTaggedBinders)]) + go go_env in_sigs go_rhss + -- | pprTrace "go" (ppr in_ids $$ ppr in_sigs $$ ppr out_sigs $$ ppr rhss') False + -- = undefined + | in_sigs == out_sigs = (te_env rhs_env, out_bndrs `zip` rhss') + | otherwise = go env' out_sigs rhss' + where + out_bndrs = map updateBndr in_bndrs -- TODO: Keeps in_ids alive + in_bndrs = in_ids `zip` in_sigs + rhs_env = extendSigEnv go_env in_bndrs + (out_sigs, rhss') = unzip (zipWithEqual "inferTagBind" anaRhs in_ids go_rhss) + env' = makeTagged go_env + + anaRhs :: Id -> GenStgRhs q -> (TagSig, GenStgRhs 'InferTaggedBinders) + anaRhs bnd rhs = inferTagRhs bnd rhs_env rhs + + updateBndr :: (Id,TagSig) -> (Id,TagSig) + updateBndr (v,sig) = (setIdTagSig v sig, sig) + +initSig :: forall p. (Id, GenStgRhs p) -> TagSig +-- Initial signature for the fixpoint loop +initSig (_bndr, StgRhsCon {}) = TagSig TagTagged +initSig (bndr, StgRhsClosure _ _ _ _ _) = + fromMaybe defaultSig (idTagSig_maybe bndr) + where defaultSig = (TagSig TagTagged) + +{- Note [Bottom functions are TagTagged] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If we have a function with two branches with one +being bottom, and the other returning a tagged +unboxed tuple what is the result? We give it TagTagged! +To answer why consider this function: + +foo :: Bool -> (# Bool, Bool #) +foo x = case x of + True -> (# True,True #) + False -> undefined + +The true branch is obviously tagged. The other branch isn't. +We want to treat the *result* of foo as tagged as well so that +the combination of the branches also is tagged if all non-bottom +branches are tagged. +This is safe because the function is still always called/entered as long +as it's applied to arguments. Since the function will never return we can give +it safely any tag sig we like. +So we give it TagTagged, as it allows the combined tag sig of the case expression +to be the combination of all non-bottoming branches. + +-} + +----------------------------- +inferTagRhs :: forall p. + (OutputableInferPass p, InferExtEq p) + => Id -- ^ Id we are binding to. + -> TagEnv p -- ^ + -> GenStgRhs p -- ^ + -> (TagSig, GenStgRhs 'InferTaggedBinders) +inferTagRhs bnd_id in_env (StgRhsClosure ext cc upd bndrs body) + | isDeadEndId bnd_id && (notNull) bndrs + -- See Note [Bottom functions are TagTagged] + = (TagSig TagTagged, StgRhsClosure ext cc upd out_bndrs body') + | otherwise + = --pprTrace "inferTagRhsClosure" (ppr (_top, _grp_ids, env,info')) $ + (TagSig info', StgRhsClosure ext cc upd out_bndrs body') + where + out_bndrs + | Just marks <- idCbvMarks_maybe bnd_id + -- Sometimes an we eta-expand foo with additional arguments after ww, and we also trim + -- the list of marks to the last strict entry. So we can conservatively + -- assume these are not strict + = zipWith (mkArgSig) bndrs (marks ++ repeat NotMarkedCbv) + | otherwise = map (noSig env') bndrs :: [(Id,TagSig)] + + env' = extendSigEnv in_env out_bndrs + (info, body') = inferTagExpr env' body + info' + -- It's a thunk + | null bndrs + = TagDunno + -- TODO: We could preserve tuple fields for thunks + -- as well. But likely not worth the complexity. + + | otherwise = info + + mkArgSig :: BinderP p -> CbvMark -> (Id,TagSig) + mkArgSig bndp mark = + let id = getBinderId in_env bndp + tag = case mark of + MarkedCbv -> TagProper + _ + | isUnliftedType (idType id) -> TagProper + | otherwise -> TagDunno + in (id, TagSig tag) + +inferTagRhs _ env _rhs@(StgRhsCon cc con cn ticks args) +-- Constructors, which have untagged arguments to strict fields +-- become thunks. We encode this by giving changing RhsCon nodes the info TagDunno + = --pprTrace "inferTagRhsCon" (ppr grp_ids) $ + (TagSig (inferConTag env con args), StgRhsCon cc con cn ticks args) + +{- Note [Constructor TagSigs] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +@inferConTag@ will infer the proper tag signature for a binding who's RHS is a constructor +or a StgConApp expression. +Usually these will simply be TagProper. But there are exceptions. +If any of the fields in the constructor are strict, but any argument to these +fields is not tagged then we will have to case on the argument before storing +in the constructor. Which means for let bindings the RHS turns into a thunk +which obviously is no longer properly tagged. +For example we might start with: + + let x<TagDunno> = f ... + let c<TagProper> = StrictPair x True + +But we know during the rewrite stage x will need to be evaluated in the RHS +of `c` so we will infer: + + let x<TagDunno> = f ... + let c<TagDunno> = StrictPair x True + +Which in the rewrite stage will then be rewritten into: + + let x<TagDunno> = f ... + let c<TagDunno> = case x of x' -> StrictPair x' True + +The other exception is unboxed tuples. These will get a TagTuple +signature with a list of TagInfo about their individual binders +as argument. As example: + + let c<TagProper> = True + let x<TagDunno> = ... + let f<?> z = case z of z'<TagProper> -> (# c, x #) + +Here we will infer for f the Signature <TagTuple[TagProper,TagDunno]>. +This information will be used if we scrutinze a saturated application of +`f` in order to determine the taggedness of the result. +That is for `case f x of (# r1,r2 #) -> rhs` we can infer +r1<TagProper> and r2<TagDunno> which allows us to skip all tag checks on `r1` +in `rhs`. + +Things get a bit more complicated with nesting: + + let closeFd<TagTuple[...]> = ... + let f x = ... + case x of + _ -> Solo# closeFd + +The "natural" signature for the Solo# branch in `f` would be <TagTuple[TagTuple[...]]>. +But we flatten this out to <TagTuple[TagDunno]> for the time being as it improves compile +time and there doesn't seem to huge benefit to doing differently. + + -} + +-- See Note [Constructor TagSigs] +inferConTag :: TagEnv p -> DataCon -> [StgArg] -> TagInfo +inferConTag env con args + | isUnboxedTupleDataCon con + = TagTuple $ map (flatten_arg_tag . lookupInfo env) args + | otherwise = + -- pprTrace "inferConTag" + -- ( text "con:" <> ppr con $$ + -- text "args:" <> ppr args $$ + -- text "marks:" <> ppr (dataConRuntimeRepStrictness con) $$ + -- text "arg_info:" <> ppr (map (lookupInfo env) args) $$ + -- text "info:" <> ppr info) $ + info + where + info = if any arg_needs_eval strictArgs then TagDunno else TagProper + strictArgs = zipEqual "inferTagRhs" args (dataConRuntimeRepStrictness con) :: ([(StgArg, StrictnessMark)]) + arg_needs_eval (arg,strict) + -- lazy args + | not (isMarkedStrict strict) = False + | tag <- (lookupInfo env arg) + -- banged args need to be tagged, or require eval + = not (isTaggedInfo tag) + + flatten_arg_tag (TagTagged) = TagProper + flatten_arg_tag (TagProper ) = TagProper + flatten_arg_tag (TagTuple _) = TagDunno -- See Note [Constructor TagSigs] + flatten_arg_tag (TagDunno) = TagDunno + + +collectExportInfo :: [GenStgTopBinding 'InferTaggedBinders] -> NameEnv TagSig +collectExportInfo binds = + mkNameEnv bndr_info + where + bndr_info = concatMap collect binds :: [(Name,TagSig)] + + collect (StgTopStringLit {}) = [] + collect (StgTopLifted bnd) = + case bnd of + StgNonRec (id,sig) _rhs + | TagSig TagDunno <- sig -> [] + | otherwise -> [(idName id,sig)] + StgRec bnds -> collectRec bnds + + collectRec :: [(BinderP 'InferTaggedBinders, rhs)] -> [(Name,TagSig)] + collectRec [] = [] + collectRec (bnd:bnds) + | (p,_rhs) <- bnd + , (id,sig) <- p + , TagSig TagDunno <- sig + = (idName id,sig) : collectRec bnds + | otherwise = collectRec bnds diff --git a/compiler/GHC/Stg/InferTags/Rewrite.hs b/compiler/GHC/Stg/InferTags/Rewrite.hs new file mode 100644 index 0000000000..8b2c24cbdb --- /dev/null +++ b/compiler/GHC/Stg/InferTags/Rewrite.hs @@ -0,0 +1,495 @@ +-- +-- Copyright (c) 2019 Andreas Klebinger +-- + +{-# LANGUAGE CPP #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE GeneralisedNewtypeDeriving #-} + +module GHC.Stg.InferTags.Rewrite (rewriteTopBinds) +where + +import GHC.Prelude + +import GHC.Types.Id +import GHC.Types.Name +import GHC.Types.Unique.Supply +import GHC.Types.Unique.FM +import GHC.Types.RepType +import GHC.Unit.Types (Module) + +import GHC.Core.DataCon +import GHC.Core (AltCon(..) ) +import GHC.Core.Type + +import GHC.StgToCmm.Types + +import GHC.Stg.Utils +import GHC.Stg.Syntax as StgSyn + +import GHC.Data.Maybe +import GHC.Utils.Panic +import GHC.Utils.Panic.Plain + +import GHC.Utils.Outputable +import GHC.Utils.Monad.State.Strict +import GHC.Utils.Misc + +import GHC.Stg.InferTags.Types + +import Control.Monad +import GHC.Types.Basic (CbvMark (NotMarkedCbv, MarkedCbv), isMarkedCbv, TopLevelFlag(..), isTopLevel) +import GHC.Types.Var.Set + +-- import GHC.Utils.Trace +-- import GHC.Driver.Ppr + +newtype RM a = RM { unRM :: (State (UniqFM Id TagSig, UniqSupply, Module, IdSet) a) } + deriving (Functor, Monad, Applicative) + +------------------------------------------------------------ +-- Add cases around strict fields where required. +------------------------------------------------------------ +{- +The work of this pass is simple: +* We traverse the STG AST looking for constructor allocations. +* For all allocations we check if there are strict fields in the constructor. +* For any strict field we check if the argument is known to be properly tagged. +* If it's not known to be properly tagged, we wrap the whole thing in a case, + which will force the argument before allocation. +This is described in detail in Note [Strict Field Invariant]. + +The only slight complication is that we have to make sure not to invalidate free +variable analysis in the process. + +Note [Partially applied workers] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Sometimes we will get a function f of the form + -- Arity 1 + f :: Dict a -> a -> b -> (c -> d) + f dict a b = case dict of + C m1 m2 -> m1 a b + +Which will result in a W/W split along the lines of + -- Arity 1 + f :: Dict a -> a -> b -> (c -> d) + f dict a = case dict of + C m1 m2 -> $wf m1 a b + + -- Arity 4 + $wf :: (a -> b -> d -> c) -> a -> b -> c -> d + $wf m1 a b c = m1 a b c + +It's notable that the worker is called *undersatured* in the wrapper. +At runtime what happens is that the wrapper will allocate a PAP which +once fully applied will call the worker. And all is fine. + +But what about a strict worker! Well the function returned by `f` would +be a unknown call, so we lose the ability to enfore the invariant that +cbv marked arguments from StictWorkerId's are actually properly tagged +as the annotations would be unavailable at the (unknown) call site. + +The fix is easy. We eta-expand all calls to functions taking call-by-value +arguments during CorePrep just like we do with constructor allocations. + +Note [Upholding free variable annotations] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The code generator requires us to maintain exact information +about free variables about closures. Since we convert some +RHSs from constructor allocations to closures we have to provide +fvs of these closures. Not all constructor arguments will become +free variables. Only these which are not bound at the top level +have to be captured. +To facilitate this we keep track of a set of locally bound variables in +the current context which we then use to filter constructor arguments +when building the free variable list. +-} + +-------------------------------- +-- Utilities +-------------------------------- + +instance MonadUnique RM where + getUniqueSupplyM = RM $ do + (m, us, mod,lcls) <- get + let (us1, us2) = splitUniqSupply us + (put) (m,us2,mod,lcls) + return us1 + +getMap :: RM (UniqFM Id TagSig) +getMap = RM $ ((\(fst,_,_,_) -> fst) <$> get) + +setMap :: (UniqFM Id TagSig) -> RM () +setMap m = RM $ do + (_,us,mod,lcls) <- get + put (m, us,mod,lcls) + +getMod :: RM Module +getMod = RM $ ( (\(_,_,thrd,_) -> thrd) <$> get) + +getFVs :: RM IdSet +getFVs = RM $ ((\(_,_,_,lcls) -> lcls) <$> get) + +setFVs :: IdSet -> RM () +setFVs fvs = RM $ do + (tag_map,us,mod,_lcls) <- get + put (tag_map, us,mod,fvs) + +-- Rewrite the RHS(s) while making the id and it's sig available +-- to determine if things are tagged/need to be captured as FV. +withBind :: TopLevelFlag -> GenStgBinding 'InferTaggedBinders -> RM a -> RM a +withBind top_flag (StgNonRec bnd _) cont = withBinder top_flag bnd cont +withBind top_flag (StgRec binds) cont = do + let (bnds,_rhss) = unzip binds :: ([(Id, TagSig)], [GenStgRhs 'InferTaggedBinders]) + withBinders top_flag bnds cont + +addTopBind :: GenStgBinding 'InferTaggedBinders -> RM () +addTopBind (StgNonRec (id, tag) _) = do + s <- getMap + -- pprTraceM "AddBind" (ppr id) + setMap $ addToUFM s id tag + return () +addTopBind (StgRec binds) = do + let (bnds,_rhss) = unzip binds + !s <- getMap + -- pprTraceM "AddBinds" (ppr $ map fst bnds) + setMap $! addListToUFM s bnds + +withBinder :: TopLevelFlag -> (Id, TagSig) -> RM a -> RM a +withBinder top_flag (id,sig) cont = do + oldMap <- getMap + setMap $ addToUFM oldMap id sig + a <- if isTopLevel top_flag + then cont + else withLcl id cont + setMap oldMap + return a + +withBinders :: TopLevelFlag -> [(Id, TagSig)] -> RM a -> RM a +withBinders TopLevel sigs cont = do + oldMap <- getMap + setMap $ addListToUFM oldMap sigs + a <- cont + setMap oldMap + return a +withBinders NotTopLevel sigs cont = do + oldMap <- getMap + oldFvs <- getFVs + setMap $ addListToUFM oldMap sigs + setFVs $ extendVarSetList oldFvs (map fst sigs) + a <- cont + setMap oldMap + setFVs oldFvs + return a + +-- | Compute the argument with the given set of ids treated as requiring capture +-- as free variables. +withClosureLcls :: DIdSet -> RM a -> RM a +withClosureLcls fvs act = do + old_fvs <- getFVs + let fvs' = nonDetStrictFoldDVarSet (flip extendVarSet) old_fvs fvs + setFVs fvs' + r <- act + setFVs old_fvs + return r + +-- | Compute the argument with the given id treated as requiring capture +-- as free variables in closures. +withLcl :: Id -> RM a -> RM a +withLcl fv act = do + old_fvs <- getFVs + let fvs' = extendVarSet old_fvs fv + setFVs fvs' + r <- act + setFVs old_fvs + return r + +isTagged :: Id -> RM Bool +isTagged v = do + this_mod <- getMod + case nameIsLocalOrFrom this_mod (idName v) of + True + | isUnliftedType (idType v) + -> return True + | otherwise -> do -- Local binding + !s <- getMap + let !sig = lookupWithDefaultUFM s (pprPanic "unknown Id:" (ppr v)) v + return $ case sig of + TagSig info -> + case info of + TagDunno -> False + TagProper -> True + TagTagged -> True + TagTuple _ -> True -- Consider unboxed tuples tagged. + False -- Imported + | Just con <- (isDataConWorkId_maybe v) + , isNullaryRepDataCon con + -> return True + | Just lf_info <- idLFInfo_maybe v + -> return $ + -- Can we treat the thing as tagged based on it's LFInfo? + case lf_info of + -- Function, applied not entered. + LFReEntrant {} + -> True + -- Thunks need to be entered. + LFThunk {} + -> False + -- LFCon means we already know the tag, and it's tagged. + LFCon {} + -> True + LFUnknown {} + -> False + LFUnlifted {} + -> True + LFLetNoEscape {} + -- Shouldn't be possible. I don't think we can export letNoEscapes + -> True + + | otherwise + -> return False + + +isArgTagged :: StgArg -> RM Bool +isArgTagged (StgLitArg _) = return True +isArgTagged (StgVarArg v) = isTagged v + +mkLocalArgId :: Id -> RM Id +mkLocalArgId id = do + !u <- getUniqueM + return $! setIdUnique (localiseId id) u + +--------------------------- +-- Actual rewrite pass +--------------------------- + + +rewriteTopBinds :: Module -> UniqSupply -> [GenStgTopBinding 'InferTaggedBinders] -> [TgStgTopBinding] +rewriteTopBinds mod us binds = + let doBinds = mapM rewriteTop binds + + in evalState (unRM doBinds) (mempty, us, mod, mempty) + +rewriteTop :: InferStgTopBinding -> RM TgStgTopBinding +rewriteTop (StgTopStringLit v s) = return $! (StgTopStringLit v s) +rewriteTop (StgTopLifted bind) = do + -- Top level bindings can, and must remain in scope + addTopBind bind + (StgTopLifted) <$!> (rewriteBinds TopLevel bind) + +-- For top level binds, the wrapper is guaranteed to be `id` +rewriteBinds :: TopLevelFlag -> InferStgBinding -> RM (TgStgBinding) +rewriteBinds _top_flag (StgNonRec v rhs) = do + (!rhs) <- rewriteRhs v rhs + return $! (StgNonRec (fst v) rhs) +rewriteBinds top_flag b@(StgRec binds) = + -- Bring sigs of binds into scope for all rhss + withBind top_flag b $ do + (rhss) <- mapM (uncurry rewriteRhs) binds + return $! (mkRec rhss) + where + mkRec :: [TgStgRhs] -> TgStgBinding + mkRec rhss = StgRec (zip (map (fst . fst) binds) rhss) + +-- Rewrite a RHS +rewriteRhs :: (Id,TagSig) -> InferStgRhs + -> RM (TgStgRhs) +rewriteRhs (_id, _tagSig) (StgRhsCon ccs con cn ticks args) = {-# SCC rewriteRhs_ #-} do + -- pprTraceM "rewriteRhs" (ppr _id) + + -- Look up the nodes representing the constructor arguments. + fieldInfos <- mapM isArgTagged args + + -- Filter out non-strict fields. + let strictFields = + getStrictConArgs con (zip args fieldInfos) :: [(StgArg,Bool)] -- (nth-argument, tagInfo) + -- Filter out already tagged arguments. + let needsEval = map fst . --get the actual argument + filter (not . snd) $ -- Keep untagged (False) elements. + strictFields :: [StgArg] + let evalArgs = [v | StgVarArg v <- needsEval] :: [Id] + + if (null evalArgs) + then return $! (StgRhsCon ccs con cn ticks args) + else do + --assert not (isTaggedSig tagSig) + -- pprTraceM "CreatingSeqs for " $ ppr _id <+> ppr node_id + + -- At this point iff we have possibly untagged arguments to strict fields + -- we convert the RHS into a RhsClosure which will evaluate the arguments + -- before allocating the constructor. + let ty_stub = panic "mkSeqs shouldn't use the type arg" + conExpr <- mkSeqs args evalArgs (\taggedArgs -> StgConApp con cn taggedArgs ty_stub) + + fvs <- fvArgs args + -- lcls <- getFVs + -- pprTraceM "RhsClosureConversion" (ppr (StgRhsClosure fvs ccs ReEntrant [] $! conExpr) $$ text "lcls:" <> ppr lcls) + return $! (StgRhsClosure fvs ccs ReEntrant [] $! conExpr) +rewriteRhs _binding (StgRhsClosure fvs ccs flag args body) = do + withBinders NotTopLevel args $ + withClosureLcls fvs $ + StgRhsClosure fvs ccs flag (map fst args) <$> rewriteExpr False body + -- return (closure) + +fvArgs :: [StgArg] -> RM DVarSet +fvArgs args = do + fv_lcls <- getFVs + -- pprTraceM "fvArgs" (text "args:" <> ppr args $$ text "lcls:" <> pprVarSet (fv_lcls) (braces . fsep . map ppr) ) + return $ mkDVarSet [ v | StgVarArg v <- args, elemVarSet v fv_lcls] + +type IsScrut = Bool + +rewriteExpr :: IsScrut -> InferStgExpr -> RM TgStgExpr +rewriteExpr _ (e@StgCase {}) = rewriteCase e +rewriteExpr _ (e@StgLet {}) = rewriteLet e +rewriteExpr _ (e@StgLetNoEscape {}) = rewriteLetNoEscape e +rewriteExpr isScrut (StgTick t e) = StgTick t <$!> rewriteExpr isScrut e +rewriteExpr _ e@(StgConApp {}) = rewriteConApp e + +rewriteExpr isScrut e@(StgApp {}) = rewriteApp isScrut e +rewriteExpr _ (StgLit lit) = return $! (StgLit lit) +rewriteExpr _ (StgOpApp op args res_ty) = return $! (StgOpApp op args res_ty) + +rewriteCase :: InferStgExpr -> RM TgStgExpr +rewriteCase (StgCase scrut bndr alt_type alts) = + withBinder NotTopLevel bndr $ + pure StgCase <*> + rewriteExpr True scrut <*> + pure (fst bndr) <*> + pure alt_type <*> + mapM rewriteAlt alts + +rewriteCase _ = panic "Impossible: nodeCase" + +rewriteAlt :: InferStgAlt -> RM TgStgAlt +rewriteAlt (altCon, bndrs, rhs) = do + withBinders NotTopLevel bndrs $ do + !rhs' <- rewriteExpr False rhs + return $! (altCon, map fst bndrs, rhs') + +rewriteLet :: InferStgExpr -> RM TgStgExpr +rewriteLet (StgLet xt bind expr) = do + (!bind') <- rewriteBinds NotTopLevel bind + withBind NotTopLevel bind $ do + -- pprTraceM "withBindLet" (ppr $ bindersOfX bind) + !expr' <- rewriteExpr False expr + return $! (StgLet xt bind' expr') +rewriteLet _ = panic "Impossible" + +rewriteLetNoEscape :: InferStgExpr -> RM TgStgExpr +rewriteLetNoEscape (StgLetNoEscape xt bind expr) = do + (!bind') <- rewriteBinds NotTopLevel bind + withBind NotTopLevel bind $ do + !expr' <- rewriteExpr False expr + return $! (StgLetNoEscape xt bind' expr') +rewriteLetNoEscape _ = panic "Impossible" + +rewriteConApp :: InferStgExpr -> RM TgStgExpr +rewriteConApp (StgConApp con cn args tys) = do + -- We check if the strict field arguments are already known to be tagged. + -- If not we evaluate them first. + fieldInfos <- mapM isArgTagged args + let strictIndices = getStrictConArgs con (zip fieldInfos args) :: [(Bool, StgArg)] + let needsEval = map snd . filter (not . fst) $ strictIndices :: [StgArg] + let evalArgs = [v | StgVarArg v <- needsEval] :: [Id] + if (not $ null evalArgs) + then do + -- pprTraceM "Creating conAppSeqs for " $ ppr nodeId <+> parens ( ppr evalArgs ) -- <+> parens ( ppr fieldInfos ) + mkSeqs args evalArgs (\taggedArgs -> StgConApp con cn taggedArgs tys) + else return $! (StgConApp con cn args tys) + +rewriteConApp _ = panic "Impossible" + +-- Special case: Expressions like `case x of { ... }` +rewriteApp :: IsScrut -> InferStgExpr -> RM TgStgExpr +rewriteApp True (StgApp f []) = do + -- pprTraceM "rewriteAppScrut" (ppr f) + f_tagged <- isTagged f + -- isTagged looks at more than the result of our analysis. + -- So always update here if useful. + let f' = if f_tagged + then setIdTagSig f (TagSig TagProper) + else f + return $! StgApp f' [] + where +rewriteApp _ (StgApp f args) + -- | pprTrace "rewriteAppOther" (ppr f <+> ppr args) False + -- = undefined + | Just marks <- idCbvMarks_maybe f + , relevant_marks <- dropWhileEndLE (not . isMarkedCbv) marks + , any isMarkedCbv relevant_marks + = assert (length relevant_marks <= length args) + unliftArg relevant_marks + + where + -- If the function expects any argument to be call-by-value ensure the argument is already + -- evaluated. + unliftArg relevant_marks = do + argTags <- mapM isArgTagged args + let argInfo = zipWith3 ((,,)) args (relevant_marks++repeat NotMarkedCbv) argTags :: [(StgArg, CbvMark, Bool)] + + -- untagged cbv argument positions + cbvArgInfo = filter (\x -> sndOf3 x == MarkedCbv && thdOf3 x == False) argInfo + cbvArgIds = [x | StgVarArg x <- map fstOf3 cbvArgInfo] :: [Id] + mkSeqs args cbvArgIds (\cbv_args -> StgApp f cbv_args) + +rewriteApp _ (StgApp f args) = return $ StgApp f args +rewriteApp _ _ = panic "Impossible" + +-- `mkSeq` x x' e generates `case x of x' -> e` +-- We could also substitute x' for x in e but that's so rarely beneficial +-- that we don't bother. +mkSeq :: Id -> Id -> TgStgExpr -> TgStgExpr +mkSeq id bndr !expr = + -- pprTrace "mkSeq" (ppr (id,bndr)) $ + let altTy = mkStgAltTypeFromStgAlts bndr [(DEFAULT, [], panic "Not used")] + in + StgCase (StgApp id []) bndr altTy [(DEFAULT, [], expr)] + +-- `mkSeqs args vs mkExpr` will force all vs, and construct +-- an argument list args' where each v is replaced by it's evaluated +-- counterpart v'. +-- That is if we call `mkSeqs [StgVar x, StgLit l] [x] mkExpr` then +-- the result will be (case x of x' { _DEFAULT -> <mkExpr [StgVar x', StgLit l]>} +{-# INLINE mkSeqs #-} -- We inline to avoid allocating mkExpr +mkSeqs :: [StgArg] -- ^ Original arguments + -> [Id] -- ^ var args to be evaluated ahead of time + -> ([StgArg] -> TgStgExpr) + -- ^ Function that reconstructs the expressions when passed + -- the list of evaluated arguments. + -> RM TgStgExpr +mkSeqs args untaggedIds mkExpr = do + argMap <- mapM (\arg -> (arg,) <$> mkLocalArgId arg ) untaggedIds :: RM [(InId, OutId)] + -- mapM_ (pprTraceM "Forcing strict args before allocation:" . ppr) argMap + let taggedArgs :: [StgArg] + = map (\v -> case v of + StgVarArg v' -> StgVarArg $ fromMaybe v' $ lookup v' argMap + lit -> lit) + args + + let conBody = mkExpr taggedArgs + let body = foldr (\(v,bndr) expr -> mkSeq v bndr expr) conBody argMap + return $! body + +-- Out of all arguments passed at runtime only return these ending up in a +-- strict field +getStrictConArgs :: DataCon -> [a] -> [a] +getStrictConArgs con args + -- These are always lazy in their arguments. + | isUnboxedTupleDataCon con = [] + | isUnboxedSumDataCon con = [] + -- For proper data cons we have to check. + | otherwise = + [ arg | (arg,MarkedStrict) + <- zipEqual "getStrictConArgs" + args + (dataConRuntimeRepStrictness con)] diff --git a/compiler/GHC/Stg/InferTags/TagSig.hs b/compiler/GHC/Stg/InferTags/TagSig.hs new file mode 100644 index 0000000000..a1381881f1 --- /dev/null +++ b/compiler/GHC/Stg/InferTags/TagSig.hs @@ -0,0 +1,66 @@ +{-# LANGUAGE TypeFamilies, DataKinds, GADTs, FlexibleInstances #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE ConstraintKinds #-} + +-- We export this type from this module instead of GHC.Stg.InferTags.Types +-- because it's used by more than the analysis itself. For example in interface +-- files where we record a tag signature for bindings. +-- By putting the sig into it's own module we can avoid module loops. +module GHC.Stg.InferTags.TagSig + +where + +import GHC.Prelude + +import GHC.Types.Var +import GHC.Utils.Outputable +import GHC.Utils.Binary +import GHC.Utils.Panic.Plain + +data TagInfo + = TagDunno -- We don't know anything about the tag. + | TagTuple [TagInfo] -- Represents a function/thunk which when evaluated + -- will return a Unboxed tuple whos components have + -- the given TagInfos. + | TagProper -- Heap pointer to properly-tagged value + | TagTagged -- Bottom of the domain. + deriving (Eq) + +instance Outputable TagInfo where + ppr TagTagged = text "TagTagged" + ppr TagDunno = text "TagDunno" + ppr TagProper = text "TagProper" + ppr (TagTuple tis) = text "TagTuple" <> brackets (pprWithCommas ppr tis) + +instance Binary TagInfo where + put_ bh TagDunno = putByte bh 1 + put_ bh (TagTuple flds) = putByte bh 2 >> put_ bh flds + put_ bh TagProper = putByte bh 3 + put_ bh TagTagged = putByte bh 4 + + get bh = do tag <- getByte bh + case tag of 1 -> return TagDunno + 2 -> TagTuple <$> get bh + 3 -> return TagProper + 4 -> return TagTagged + _ -> panic ("get TagInfo " ++ show tag) + +newtype TagSig -- The signature for each binding, this is a newtype as we might + -- want to track more information in the future. + = TagSig TagInfo + deriving (Eq) + +instance Outputable TagSig where + ppr (TagSig ti) = char '<' <> ppr ti <> char '>' +instance OutputableBndr (Id,TagSig) where + pprInfixOcc = ppr + pprPrefixOcc = ppr + +instance Binary TagSig where + put_ bh (TagSig sig) = put_ bh sig + get bh = pure TagSig <*> get bh + +isTaggedSig :: TagSig -> Bool +isTaggedSig (TagSig TagProper) = True +isTaggedSig (TagSig TagTagged) = True +isTaggedSig _ = False diff --git a/compiler/GHC/Stg/InferTags/Types.hs b/compiler/GHC/Stg/InferTags/Types.hs new file mode 100644 index 0000000000..bcb1f3300b --- /dev/null +++ b/compiler/GHC/Stg/InferTags/Types.hs @@ -0,0 +1,137 @@ +{-# LANGUAGE TypeFamilies, DataKinds, GADTs, FlexibleInstances #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE ConstraintKinds #-} + +{-# LANGUAGE UndecidableInstances #-} + -- To permit: type instance XLet 'InferTaggedBinders = XLet 'CodeGen + +module GHC.Stg.InferTags.Types + ( module GHC.Stg.InferTags.Types + , module TagSig) +where + +import GHC.Prelude + +import GHC.Core.DataCon +import GHC.Core.Type (isUnliftedType) +import GHC.Types.Id +import GHC.Stg.Syntax +import GHC.Stg.InferTags.TagSig as TagSig +import GHC.Types.Var.Env +import GHC.Utils.Outputable +import GHC.Utils.Misc( zipWithEqual ) +import GHC.Utils.Panic + +import GHC.StgToCmm.Types + +{- ********************************************************************* +* * + Supporting data types +* * +********************************************************************* -} + +type instance BinderP 'InferTaggedBinders = (Id, TagSig) +type instance XLet 'InferTaggedBinders = XLet 'CodeGen +type instance XLetNoEscape 'InferTaggedBinders = XLetNoEscape 'CodeGen +type instance XRhsClosure 'InferTaggedBinders = XRhsClosure 'CodeGen + +type InferStgTopBinding = GenStgTopBinding 'InferTaggedBinders +type InferStgBinding = GenStgBinding 'InferTaggedBinders +type InferStgExpr = GenStgExpr 'InferTaggedBinders +type InferStgRhs = GenStgRhs 'InferTaggedBinders +type InferStgAlt = GenStgAlt 'InferTaggedBinders + +combineAltInfo :: TagInfo -> TagInfo -> TagInfo +combineAltInfo TagDunno _ = TagDunno +combineAltInfo _ TagDunno = TagDunno +combineAltInfo (TagTuple {}) TagProper = panic "Combining unboxed tuple with non-tuple result" +combineAltInfo TagProper (TagTuple {}) = panic "Combining unboxed tuple with non-tuple result" +combineAltInfo TagProper TagProper = TagProper +combineAltInfo (TagTuple is1) (TagTuple is2) = TagTuple (zipWithEqual "combineAltInfo" combineAltInfo is1 is2) +combineAltInfo (TagTagged) ti = ti +combineAltInfo ti TagTagged = ti + +type TagSigEnv = IdEnv TagSig +data TagEnv p = TE { te_env :: TagSigEnv + , te_get :: BinderP p -> Id + } + +instance Outputable (TagEnv p) where + ppr te = ppr (te_env te) + + +getBinderId :: TagEnv p -> BinderP p -> Id +getBinderId = te_get + +initEnv :: TagEnv 'CodeGen +initEnv = TE { te_env = emptyVarEnv + , te_get = \x -> x} + +-- | Simple convert env to a env of the 'InferTaggedBinders pass +-- with no other changes. +makeTagged :: TagEnv p -> TagEnv 'InferTaggedBinders +makeTagged env = TE { te_env = te_env env + , te_get = fst } + +noSig :: TagEnv p -> BinderP p -> (Id, TagSig) +noSig env bndr + | isUnliftedType (idType var) = (var, TagSig TagProper) + | otherwise = (var, TagSig TagDunno) + where + var = getBinderId env bndr + +lookupSig :: TagEnv p -> Id -> Maybe TagSig +lookupSig env fun = lookupVarEnv (te_env env) fun + +lookupInfo :: TagEnv p -> StgArg -> TagInfo +lookupInfo env (StgVarArg var) + -- Nullary data constructors like True, False + | Just dc <- isDataConWorkId_maybe var + , isNullaryRepDataCon dc + = TagProper + + | isUnliftedType (idType var) + = TagProper + + -- Variables in the environment. + | Just (TagSig info) <- lookupVarEnv (te_env env) var + = info + + | Just lf_info <- idLFInfo_maybe var + = case lf_info of + -- Function, tagged (with arity) + LFReEntrant {} + -> TagProper + -- Thunks need to be entered. + LFThunk {} + -> TagDunno + -- Constructors, already tagged. + LFCon {} + -> TagProper + LFUnknown {} + -> TagDunno + LFUnlifted {} + -> TagProper + -- Shouldn't be possible. I don't think we can export letNoEscapes + LFLetNoEscape {} -> panic "LFLetNoEscape exported" + + | otherwise + = TagDunno + +lookupInfo _ (StgLitArg {}) + = TagProper + +isDunnoSig :: TagSig -> Bool +isDunnoSig (TagSig TagDunno) = True +isDunnoSig (TagSig TagProper) = False +isDunnoSig (TagSig TagTuple{}) = False +isDunnoSig (TagSig TagTagged{}) = False + +isTaggedInfo :: TagInfo -> Bool +isTaggedInfo TagProper = True +isTaggedInfo TagTagged = True +isTaggedInfo _ = False + +extendSigEnv :: TagEnv p -> [(Id,TagSig)] -> TagEnv p +extendSigEnv env@(TE { te_env = sig_env }) bndrs + = env { te_env = extendVarEnvList sig_env bndrs } diff --git a/compiler/GHC/Stg/Lint.hs b/compiler/GHC/Stg/Lint.hs index aa7d03cacc..405abdd1f4 100644 --- a/compiler/GHC/Stg/Lint.hs +++ b/compiler/GHC/Stg/Lint.hs @@ -40,6 +40,7 @@ module GHC.Stg.Lint ( lintStgTopBindings ) where import GHC.Prelude import GHC.Stg.Syntax +import GHC.Stg.Utils import GHC.Core.Lint ( interactiveInScope ) import GHC.Core.DataCon @@ -67,6 +68,7 @@ import GHC.Data.Bag ( Bag, emptyBag, isEmptyBag, snocBag, bagToList ) import Control.Applicative ((<|>)) import Control.Monad +import Data.Maybe lintStgTopBindings :: forall a . (OutputablePass a, BinderP a ~ Id) => Logger @@ -188,10 +190,24 @@ lintStgExpr :: (OutputablePass a, BinderP a ~ Id) => GenStgExpr a -> LintM () lintStgExpr (StgLit _) = return () -lintStgExpr (StgApp fun args) = do +lintStgExpr e@(StgApp fun args) = do lintStgVar fun mapM_ lintStgArg args + lf <- getLintFlags + when (lf_unarised lf) $ do + -- A function which expects a unlifted argument as n'th argument + -- always needs to be applied to n arguments. + -- See Note [Strict Worker Ids]. + let marks = fromMaybe [] $ idCbvMarks_maybe fun + if length marks > length args + then addErrL $ hang (text "Undersatured cbv marked ID in App" <+> ppr e ) 2 $ + (text "marks" <> ppr marks $$ + text "args" <> ppr args $$ + text "arity" <> ppr (idArity fun) $$ + text "join_arity" <> ppr (isJoinId_maybe fun)) + else return () + lintStgExpr app@(StgConApp con _n args _arg_tys) = do -- unboxed sums should vanish during unarise lf <- getLintFlags diff --git a/compiler/GHC/Stg/Stats.hs b/compiler/GHC/Stg/Stats.hs index a4d36136bf..7c7df34f27 100644 --- a/compiler/GHC/Stg/Stats.hs +++ b/compiler/GHC/Stg/Stats.hs @@ -146,7 +146,7 @@ statExpr :: StgExpr -> StatEnv statExpr (StgApp _ _) = countOne Applications statExpr (StgLit _) = countOne Literals -statExpr (StgConApp _ _ _ _)= countOne ConstructorApps +statExpr (StgConApp {}) = countOne ConstructorApps statExpr (StgOpApp _ _ _) = countOne PrimitiveApps statExpr (StgTick _ e) = statExpr e diff --git a/compiler/GHC/Stg/Syntax.hs b/compiler/GHC/Stg/Syntax.hs index bf6bac3853..6726bbe526 100644 --- a/compiler/GHC/Stg/Syntax.hs +++ b/compiler/GHC/Stg/Syntax.hs @@ -39,6 +39,9 @@ module GHC.Stg.Syntax ( -- a set of synonyms for the code gen parameterisation CgStgTopBinding, CgStgBinding, CgStgExpr, CgStgRhs, CgStgAlt, + -- Same for taggedness + TgStgTopBinding, TgStgBinding, TgStgExpr, TgStgRhs, TgStgAlt, + -- a set of synonyms for the lambda lifting parameterisation LlStgTopBinding, LlStgBinding, LlStgExpr, LlStgRhs, LlStgAlt, @@ -53,9 +56,7 @@ module GHC.Stg.Syntax ( stgRhsArity, freeVarsOfRhs, isDllConApp, stgArgType, - stripStgTicksTop, stripStgTicksTopE, stgCaseBndrInScope, - bindersOf, bindersOfTop, bindersOfTopBinds, -- ppr StgPprOpts(..), @@ -176,21 +177,6 @@ stgArgType :: StgArg -> Type stgArgType (StgVarArg v) = idType v stgArgType (StgLitArg lit) = literalType lit - --- | Strip ticks of a given type from an STG expression. -stripStgTicksTop :: (StgTickish -> Bool) -> GenStgExpr p -> ([StgTickish], GenStgExpr p) -stripStgTicksTop p = go [] - where go ts (StgTick t e) | p t = go (t:ts) e - -- This special case avoid building a thunk for "reverse ts" when there are no ticks - go [] other = ([], other) - go ts other = (reverse ts, other) - --- | Strip ticks of a given type from an STG expression returning only the expression. -stripStgTicksTopE :: (StgTickish -> Bool) -> GenStgExpr p -> GenStgExpr p -stripStgTicksTopE p = go - where go (StgTick t e) | p t = go e - go other = other - -- | Given an alt type and whether the program is unarised, return whether the -- case binder is in scope. -- @@ -428,36 +414,6 @@ important): [StgTickish] [StgArg] -- Args -{- -Note Stg Passes -~~~~~~~~~~~~~~~ -Here is a short summary of the STG pipeline and where we use the different -StgPass data type indexes: - - 1. CoreToStg.Prep performs several transformations that prepare the desugared - and simplified core to be converted to STG. One of these transformations is - making it so that value lambdas only exist as the RHS of a binding. - - 2. CoreToStg converts the prepared core to STG, specifically GenStg* - parameterised by 'Vanilla. - - 3. Stg.Pipeline does a number of passes on the generated STG. One of these is - the lambda-lifting pass, which internally uses the 'LiftLams - parameterisation to store information for deciding whether or not to lift - each binding. - - 4. Stg.FVs annotates closures with their free variables. To store these - annotations we use the 'CodeGen parameterisation. - - 5. Stg.StgToCmm generates Cmm from the annotated STG. --} - --- | Used as a data type index for the stgSyn AST -data StgPass - = Vanilla - | LiftLams - | CodeGen - -- | Like 'GHC.Hs.Extension.NoExtField', but with an 'Outputable' instance that -- returns 'empty'. data NoExtFieldSilent = NoExtFieldSilent @@ -473,40 +429,11 @@ noExtFieldSilent = NoExtFieldSilent -- TODO: Maybe move this to GHC.Hs.Extension? I'm not sure about the -- implications on build time... --- TODO: Do we really want to the extension point type families to have a closed --- domain? -type family BinderP (pass :: StgPass) -type instance BinderP 'Vanilla = Id -type instance BinderP 'CodeGen = Id - -type family XRhsClosure (pass :: StgPass) -type instance XRhsClosure 'Vanilla = NoExtFieldSilent --- | Code gen needs to track non-global free vars -type instance XRhsClosure 'CodeGen = DIdSet - -type family XLet (pass :: StgPass) -type instance XLet 'Vanilla = NoExtFieldSilent -type instance XLet 'CodeGen = NoExtFieldSilent - --- | When `-fdistinct-constructor-tables` is turned on then --- each usage of a constructor is given an unique number and --- an info table is generated for each different constructor. -data ConstructorNumber = - NoNumber | Numbered Int - -instance Outputable ConstructorNumber where - ppr NoNumber = empty - ppr (Numbered n) = text "#" <> ppr n - -type family XLetNoEscape (pass :: StgPass) -type instance XLetNoEscape 'Vanilla = NoExtFieldSilent -type instance XLetNoEscape 'CodeGen = NoExtFieldSilent - stgRhsArity :: StgRhs -> Int stgRhsArity (StgRhsClosure _ _ _ bndrs _) = assert (all isId bndrs) $ length bndrs -- The arity never includes type parameters, but they should have gone by now -stgRhsArity (StgRhsCon _ _ _ _ _) = 0 +stgRhsArity (StgRhsCon {}) = 0 freeVarsOfRhs :: (XRhsClosure pass ~ DIdSet) => GenStgRhs pass -> DIdSet freeVarsOfRhs (StgRhsCon _ _ _ _ args) = mkDVarSet [ id | StgVarArg id <- args ] @@ -550,7 +477,31 @@ The Plain STG parameterisation * * ************************************************************************ -This happens to be the only one we use at the moment. + Note [STG Extension points] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~ + We now make use of extension points in STG for different passes which want + to associate information with AST nodes. + + Currently the pipeline is roughly: + + CoreToStg: Core -> Stg + StgSimpl: Stg -> Stg + CodeGen: Stg -> Cmm + + As part of StgSimpl we run late lambda lifting (Ll). + Late lambda lift: + Stg -> FvStg -> LlStg -> Stg + + CodeGen: + As part of CodeGen we run tag inference. + Tag Inference: + Stg -> Stg 'InferTaggedBinders` -> Stg + + And at a last step we add the free Variables: + Stg -> CgStg + + Which finally CgStg being used to generate Cmm. + -} type StgTopBinding = GenStgTopBinding 'Vanilla @@ -571,6 +522,12 @@ type CgStgExpr = GenStgExpr 'CodeGen type CgStgRhs = GenStgRhs 'CodeGen type CgStgAlt = GenStgAlt 'CodeGen +type TgStgTopBinding = GenStgTopBinding 'CodeGen +type TgStgBinding = GenStgBinding 'CodeGen +type TgStgExpr = GenStgExpr 'CodeGen +type TgStgRhs = GenStgRhs 'CodeGen +type TgStgAlt = GenStgAlt 'CodeGen + {- Many passes apply a substitution, and it's very handy to have type synonyms to remind us whether or not the substitution has been applied. See GHC.Core for precedence in Core land @@ -589,6 +546,79 @@ type OutStgExpr = StgExpr type OutStgRhs = StgRhs type OutStgAlt = StgAlt +-- | When `-fdistinct-constructor-tables` is turned on then +-- each usage of a constructor is given an unique number and +-- an info table is generated for each different constructor. +data ConstructorNumber = + NoNumber | Numbered Int + +instance Outputable ConstructorNumber where + ppr NoNumber = empty + ppr (Numbered n) = text "#" <> ppr n + +{- +Note Stg Passes +~~~~~~~~~~~~~~~ +Here is a short summary of the STG pipeline and where we use the different +StgPass data type indexes: + + 1. CoreToStg.Prep performs several transformations that prepare the desugared + and simplified core to be converted to STG. One of these transformations is + making it so that value lambdas only exist as the RHS of a binding. + See Note [CorePrep Overview]. + + 2. CoreToStg converts the prepared core to STG, specifically GenStg* + parameterised by 'Vanilla. See the GHC.CoreToStg Module. + + 3. Stg.Pipeline does a number of passes on the generated STG. One of these is + the lambda-lifting pass, which internally uses the 'LiftLams + parameterisation to store information for deciding whether or not to lift + each binding. + See Note [Late lambda lifting in STG]. + + 4. Tag inference takes in 'Vanilla and produces 'InferTagged STG, while using + the InferTaggedBinders annotated AST internally. + See Note [Tag Inference]. + + 5. Stg.FVs annotates closures with their free variables. To store these + annotations we use the 'CodeGen parameterisation. + See the GHC.Stg.FVs module. + + 6. The Module Stg.StgToCmm generates Cmm from the CodeGen annotated STG. +-} + + +-- | Used as a data type index for the stgSyn AST +data StgPass + = Vanilla + | LiftLams -- ^ Use internally by the lambda lifting pass + | InferTaggedBinders -- ^ Tag inference information on binders. + -- See Note [Tag inference passes] in GHC.Stg.InferTags + | InferTagged -- ^ Tag inference information put on relevant StgApp nodes + -- See Note [Tag inference passes] in GHC.Stg.InferTags + | CodeGen + +type family BinderP (pass :: StgPass) +type instance BinderP 'Vanilla = Id +type instance BinderP 'CodeGen = Id +type instance BinderP 'InferTagged = Id + +type family XRhsClosure (pass :: StgPass) +type instance XRhsClosure 'Vanilla = NoExtFieldSilent +type instance XRhsClosure 'InferTagged = NoExtFieldSilent +-- | Code gen needs to track non-global free vars +type instance XRhsClosure 'CodeGen = DIdSet + +type family XLet (pass :: StgPass) +type instance XLet 'Vanilla = NoExtFieldSilent +type instance XLet 'InferTagged = NoExtFieldSilent +type instance XLet 'CodeGen = NoExtFieldSilent + +type family XLetNoEscape (pass :: StgPass) +type instance XLetNoEscape 'Vanilla = NoExtFieldSilent +type instance XLetNoEscape 'InferTagged = NoExtFieldSilent +type instance XLetNoEscape 'CodeGen = NoExtFieldSilent + {- ************************************************************************ @@ -643,25 +673,6 @@ data StgOp {- ************************************************************************ * * -Utilities -* * -************************************************************************ --} - -bindersOf :: BinderP a ~ Id => GenStgBinding a -> [Id] -bindersOf (StgNonRec binder _) = [binder] -bindersOf (StgRec pairs) = [binder | (binder, _) <- pairs] - -bindersOfTop :: BinderP a ~ Id => GenStgTopBinding a -> [Id] -bindersOfTop (StgTopLifted bind) = bindersOf bind -bindersOfTop (StgTopStringLit binder _) = [binder] - -bindersOfTopBinds :: BinderP a ~ Id => [GenStgTopBinding a] -> [Id] -bindersOfTopBinds = foldr ((++) . bindersOfTop) [] - -{- -************************************************************************ -* * Pretty-printing * * ************************************************************************ @@ -712,6 +723,9 @@ pprGenStgBinding opts b = case b of = hang (hsep [pprBndr LetBind bndr, equals]) 4 (pprStgRhs opts expr <> semi) +instance OutputablePass pass => Outputable (GenStgBinding pass) where + ppr = pprGenStgBinding panicStgPprOpts + pprGenStgTopBindings :: (OutputablePass pass) => StgPprOpts -> [GenStgTopBinding pass] -> SDoc pprGenStgTopBindings opts binds = vcat $ intersperse blankLine (map (pprGenStgTopBinding opts) binds) @@ -732,12 +746,19 @@ pprStgArg :: StgArg -> SDoc pprStgArg (StgVarArg var) = ppr var pprStgArg (StgLitArg con) = ppr con +instance OutputablePass pass => Outputable (GenStgExpr pass) where + ppr = pprStgExpr panicStgPprOpts + pprStgExpr :: OutputablePass pass => StgPprOpts -> GenStgExpr pass -> SDoc pprStgExpr opts e = case e of -- special case StgLit lit -> ppr lit -- general case - StgApp func args -> hang (ppr func) 4 (interppSP args) + StgApp func args + | null args + , Just sig <- idTagSig_maybe func + -> ppr func <> ppr sig + | otherwise -> hang (ppr func) 4 (interppSP args) -- TODO: Print taggedness StgConApp con n args _ -> hsep [ ppr con, ppr n, brackets (interppSP args) ] StgOpApp op args _ -> hsep [ pprStgOp op, brackets (interppSP args)] @@ -850,4 +871,8 @@ pprStgRhs opts rhs = case rhs of , case mid of NoNumber -> empty Numbered n -> hcat [ppr n, space] + -- The bang indicates this is an StgRhsCon instead of an StgConApp. , ppr con, text "! ", brackets (sep (map pprStgArg args))] + +instance OutputablePass pass => Outputable (GenStgRhs pass) where + ppr = pprStgRhs panicStgPprOpts diff --git a/compiler/GHC/Stg/Unarise.hs b/compiler/GHC/Stg/Unarise.hs index 15635e754a..25b9c5e582 100644 --- a/compiler/GHC/Stg/Unarise.hs +++ b/compiler/GHC/Stg/Unarise.hs @@ -258,6 +258,7 @@ import GHC.Utils.Panic import GHC.Utils.Panic.Plain import GHC.Types.RepType import GHC.Stg.Syntax +import GHC.Stg.Utils import GHC.Core.Type import GHC.Builtin.Types.Prim (intPrimTy) import GHC.Builtin.Types @@ -802,10 +803,10 @@ unariseConArgBinder = unariseArgBinder True -------------------------------------------------------------------------------- mkIds :: FastString -> [UnaryType] -> UniqSM [Id] -mkIds fs tys = mapM (mkId fs) tys +mkIds fs tys = mkUnarisedIds fs tys mkId :: FastString -> UnaryType -> UniqSM Id -mkId s t = mkSysLocalM s Many t +mkId s t = mkUnarisedId s t isMultiValBndr :: Id -> Bool isMultiValBndr id diff --git a/compiler/GHC/Stg/Utils.hs b/compiler/GHC/Stg/Utils.hs new file mode 100644 index 0000000000..4561e25765 --- /dev/null +++ b/compiler/GHC/Stg/Utils.hs @@ -0,0 +1,124 @@ +{-# LANGUAGE CPP, ScopedTypeVariables, TypeFamilies #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DataKinds #-} + +module GHC.Stg.Utils + ( mkStgAltTypeFromStgAlts + , bindersOf, bindersOfX, bindersOfTop, bindersOfTopBinds + + , stripStgTicksTop, stripStgTicksTopE + , idArgs + + , mkUnarisedId, mkUnarisedIds + ) where + +import GHC.Prelude + +import GHC.Types.Id +import GHC.Core.Type +import GHC.Core.TyCon +import GHC.Core.DataCon +import GHC.Core ( AltCon(..) ) +import GHC.Types.Tickish +import GHC.Types.Unique.Supply + +import GHC.Types.RepType +import GHC.Stg.Syntax + +import GHC.Utils.Outputable + +import GHC.Utils.Panic.Plain +import GHC.Utils.Panic + +import GHC.Data.FastString + +mkUnarisedIds :: MonadUnique m => FastString -> [UnaryType] -> m [Id] +mkUnarisedIds fs tys = mapM (mkUnarisedId fs) tys + +mkUnarisedId :: MonadUnique m => FastString -> UnaryType -> m Id +mkUnarisedId s t = mkSysLocalM s Many t + +-- Checks if id is a top level error application. +-- isErrorAp_maybe :: Id -> + +-- | Extract the default case alternative +-- findDefaultStg :: [Alt b] -> ([Alt b], Maybe (Expr b)) +findDefaultStg :: [GenStgAlt p] -> ([(AltCon, [BinderP p], GenStgExpr p)], + Maybe (GenStgExpr p)) +findDefaultStg ((DEFAULT, args, rhs) : alts) = assert( null args ) (alts, Just rhs) +findDefaultStg alts = (alts, Nothing) + +mkStgAltTypeFromStgAlts :: forall p. Id -> [GenStgAlt p] -> AltType +mkStgAltTypeFromStgAlts bndr alts + | isUnboxedTupleType bndr_ty || isUnboxedSumType bndr_ty + = MultiValAlt (length prim_reps) -- always use MultiValAlt for unboxed tuples + + | otherwise + = case prim_reps of + [rep] | isGcPtrRep rep -> + case tyConAppTyCon_maybe (unwrapType bndr_ty) of + Just tc + | isAbstractTyCon tc -> look_for_better_tycon + | isAlgTyCon tc -> AlgAlt tc + | otherwise -> assertPpr ( _is_poly_alt_tycon tc) (ppr tc) + PolyAlt + Nothing -> PolyAlt + [non_gcd] -> PrimAlt non_gcd + not_unary -> MultiValAlt (length not_unary) + where + bndr_ty = idType bndr + prim_reps = typePrimRep bndr_ty + + _is_poly_alt_tycon tc + = isFunTyCon tc + || isPrimTyCon tc -- "Any" is lifted but primitive + || isFamilyTyCon tc -- Type family; e.g. Any, or arising from strict + -- function application where argument has a + -- type-family type + + -- Sometimes, the TyCon is a AbstractTyCon which may not have any + -- constructors inside it. Then we may get a better TyCon by + -- grabbing the one from a constructor alternative + -- if one exists. + look_for_better_tycon + | (((DataAlt con) ,_, _) : _) <- data_alts = + AlgAlt (dataConTyCon con) + | otherwise = + assert(null data_alts) + PolyAlt + where + (data_alts, _deflt) = findDefaultStg alts + +bindersOf :: BinderP a ~ Id => GenStgBinding a -> [Id] +bindersOf (StgNonRec binder _) = [binder] +bindersOf (StgRec pairs) = [binder | (binder, _) <- pairs] + +bindersOfX :: GenStgBinding a -> [BinderP a] +bindersOfX (StgNonRec binder _) = [binder] +bindersOfX (StgRec pairs) = [binder | (binder, _) <- pairs] + +bindersOfTop :: BinderP a ~ Id => GenStgTopBinding a -> [Id] +bindersOfTop (StgTopLifted bind) = bindersOf bind +bindersOfTop (StgTopStringLit binder _) = [binder] + +-- All ids we bind something to on the top level. +bindersOfTopBinds :: BinderP a ~ Id => [GenStgTopBinding a] -> [Id] +-- bindersOfTopBinds binds = mapUnionVarSet (mkVarSet . bindersOfTop) binds +bindersOfTopBinds binds = foldr ((++) . bindersOfTop) [] binds + +idArgs :: [StgArg] -> [Id] +idArgs args = [v | StgVarArg v <- args] + +-- | Strip ticks of a given type from an STG expression. +stripStgTicksTop :: (StgTickish -> Bool) -> GenStgExpr p -> ([StgTickish], GenStgExpr p) +stripStgTicksTop p = go [] + where go ts (StgTick t e) | p t = go (t:ts) e + -- This special case avoid building a thunk for "reverse ts" when there are no ticks + go [] other = ([], other) + go ts other = (reverse ts, other) + +-- | Strip ticks of a given type from an STG expression returning only the expression. +stripStgTicksTopE :: (StgTickish -> Bool) -> GenStgExpr p -> GenStgExpr p +stripStgTicksTopE p = go + where go (StgTick t e) | p t = go e + go other = other diff --git a/compiler/GHC/StgToByteCode.hs b/compiler/GHC/StgToByteCode.hs index c574327665..885af12944 100644 --- a/compiler/GHC/StgToByteCode.hs +++ b/compiler/GHC/StgToByteCode.hs @@ -661,7 +661,7 @@ schemeT _d _s _p (StgOpApp StgPrimCallOp{} _args _ty) = unsupportedCConvException -- Case 2: Unboxed tuple -schemeT d s p (StgConApp con _ext args _tys) +schemeT d s p (StgConApp con _cn args _tys) | isUnboxedTupleDataCon con || isUnboxedSumDataCon con = returnUnboxedTuple d s p args diff --git a/compiler/GHC/StgToCmm.hs b/compiler/GHC/StgToCmm.hs index 9931b81e6e..eea77198aa 100644 --- a/compiler/GHC/StgToCmm.hs +++ b/compiler/GHC/StgToCmm.hs @@ -70,6 +70,7 @@ import GHC.Utils.Misc import System.IO.Unsafe import qualified Data.ByteString as BS import Data.IORef +import GHC.Utils.Panic (assertPpr) codeGen :: Logger -> TmpFs @@ -206,7 +207,7 @@ cgTopRhs cfg _rec bndr (StgRhsCon _cc con mn _ts args) -- see Note [Post-unarisation invariants] in GHC.Stg.Unarise cgTopRhs cfg rec bndr (StgRhsClosure fvs cc upd_flag args body) - = assert (isEmptyDVarSet fvs) -- There should be no free variables + = assertPpr (isEmptyDVarSet fvs) (text "fvs:" <> ppr fvs) $ -- There should be no free variables cgTopRhsClosure (stgToCmmPlatform cfg) rec bndr cc upd_flag args body diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs index 4fb3fd1fbe..25d04b323c 100644 --- a/compiler/GHC/StgToCmm/Bind.hs +++ b/compiler/GHC/StgToCmm/Bind.hs @@ -32,6 +32,7 @@ import GHC.StgToCmm.DataCon import GHC.StgToCmm.Heap import GHC.StgToCmm.Prof (ldvEnterClosure, enterCostCentreFun, enterCostCentreThunk, initUpdFrameProf) +import GHC.StgToCmm.TagCheck import GHC.StgToCmm.Ticky import GHC.StgToCmm.Layout import GHC.StgToCmm.Utils @@ -45,6 +46,7 @@ import GHC.Cmm.Info import GHC.Cmm.Utils import GHC.Cmm.CLabel +import GHC.Stg.Utils import GHC.Types.CostCentre import GHC.Types.Id import GHC.Types.Id.Info @@ -217,14 +219,19 @@ cgRhs id (StgRhsCon cc con mn _ts args) {- See Note [GC recovery] in "GHC.StgToCmm.Closure" -} cgRhs id (StgRhsClosure fvs cc upd_flag args body) - = do profile <- getProfile - mkRhsClosure profile id cc (nonVoidIds (dVarSetElems fvs)) upd_flag args body + = do + checkFunctionArgTags (text "TagCheck Failed: Rhs of" <> ppr id) id args + profile <- getProfile + check_tags <- stgToCmmDoTagCheck <$> getStgToCmmConfig + mkRhsClosure profile check_tags id cc (nonVoidIds (dVarSetElems fvs)) upd_flag args body + ------------------------------------------------------------------------ -- Non-constructor right hand sides ------------------------------------------------------------------------ -mkRhsClosure :: Profile -> Id -> CostCentreStack +mkRhsClosure :: Profile -> Bool + -> Id -> CostCentreStack -> [NonVoid Id] -- Free vars -> UpdateFlag -> [Id] -- Args @@ -267,7 +274,7 @@ for semi-obvious reasons. -} ---------- See Note [Selectors] ------------------ -mkRhsClosure profile bndr _cc +mkRhsClosure profile _check_tags bndr _cc [NonVoid the_fv] -- Just one free var upd_flag -- Updatable thunk [] -- A thunk @@ -300,7 +307,7 @@ mkRhsClosure profile bndr _cc in cgRhsStdThunk bndr lf_info [StgVarArg the_fv] ---------- See Note [Ap thunks] ------------------ -mkRhsClosure profile bndr _cc +mkRhsClosure profile check_tags bndr _cc fvs upd_flag [] -- No args; a thunk @@ -321,8 +328,8 @@ mkRhsClosure profile bndr _cc -- lose information about this particular -- thunk (e.g. its type) (#949) , idArity fun_id == unknownArity -- don't spoil a known call - -- Ha! an Ap thunk + , not check_tags -- See Note [Tag inference debugging] = cgRhsStdThunk bndr lf_info payload where @@ -333,7 +340,7 @@ mkRhsClosure profile bndr _cc payload = StgVarArg fun_id : args ---------- Default case ------------------ -mkRhsClosure profile bndr cc fvs upd_flag args body +mkRhsClosure profile _check_tags bndr cc fvs upd_flag args body = do { let lf_info = mkClosureLFInfo (profilePlatform profile) bndr NotTopLevel fvs upd_flag args ; (id_info, reg) <- rhsIdInfo bndr lf_info ; return (id_info, gen_code lf_info reg) } @@ -517,6 +524,7 @@ closureCodeBody top_lvl bndr cl_info cc args@(arg0:_) body fv_details -- Load free vars out of closure *after* -- heap check, to reduce live vars over check ; when node_points $ load_fvs node lf_info fv_bindings + ; checkFunctionArgTags (text "TagCheck failed - Argument to local function:" <> ppr bndr) bndr (map fromNonVoid nv_args) ; void $ cgExpr body }}} diff --git a/compiler/GHC/StgToCmm/Closure.hs b/compiler/GHC/StgToCmm/Closure.hs index b2f51c60fd..0d048a6be8 100644 --- a/compiler/GHC/StgToCmm/Closure.hs +++ b/compiler/GHC/StgToCmm/Closure.hs @@ -4,7 +4,6 @@ {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleInstances #-} - ----------------------------------------------------------------------------- -- -- Stg to C-- code generation: @@ -101,6 +100,7 @@ import GHC.Utils.Misc import Data.Coerce (coerce) import qualified Data.ByteString.Char8 as BS8 import GHC.StgToCmm.Config +import GHC.Stg.InferTags.TagSig (isTaggedSig) ----------------------------------------------------------------------------- -- Data types and synonyms @@ -478,20 +478,38 @@ When black-holing, single-entry closures could also be entered via node (rather than directly) to catch double-entry. -} data CallMethod - = EnterIt -- No args, not a function + = EnterIt -- ^ No args, not a function | JumpToIt BlockId [LocalReg] -- A join point or a header of a local loop | ReturnIt -- It's a value (function, unboxed value, -- or constructor), so just return it. - | SlowCall -- Unknown fun, or known fun with + | InferedReturnIt -- A properly tagged value, as determined by tag inference. + -- See Note [Tag Inference] and Note [Tag inference passes] in + -- GHC.Stg.InferTags. + -- It behaves /precisely/ like `ReturnIt`, except that when debugging is + -- enabled we emit an extra assertion to check that the returned value is + -- properly tagged. We can use this as a check that tag inference is working + -- correctly. + -- TODO: SPJ suggested we could combine this with EnterIt, but for now I decided + -- not to do so. + + | SlowCall -- Unknown fun, or known fun with -- too few args. | DirectEntry -- Jump directly, with args in regs CLabel -- The code label RepArity -- Its arity +instance Outputable CallMethod where + ppr (EnterIt) = text "Enter" + ppr (JumpToIt {}) = text "JumpToIt" + ppr (ReturnIt ) = text "ReturnIt" + ppr (InferedReturnIt) = text "InferedReturnIt" + ppr (SlowCall ) = text "SlowCall" + ppr (DirectEntry {}) = text "DirectEntry" + getCallMethod :: StgToCmmConfig -> Name -- Function being applied -> Id -- Function Id used to chech if it can refer to @@ -538,6 +556,12 @@ getCallMethod _ _name _ (LFCon _) n_args _v_args _cg_loc _self_loop_info getCallMethod cfg name id (LFThunk _ _ updatable std_form_info is_fun) n_args _v_args _cg_loc _self_loop_info + + | Just sig <- idTagSig_maybe id + , isTaggedSig sig -- Infered to be already evaluated by Tag Inference + , n_args == 0 -- See Note [Tag Inference] + = InferedReturnIt + | is_fun -- it *might* be a function, so we must "call" it (which is always safe) = SlowCall -- We cannot just enter it [in eval/apply, the entry code -- is the fast-entry code] @@ -568,12 +592,27 @@ getCallMethod cfg name id (LFThunk _ _ updatable std_form_info is_fun) DirectEntry (thunkEntryLabel (stgToCmmPlatform cfg) name (idCafInfo id) std_form_info updatable) 0 -getCallMethod _ _name _ (LFUnknown True) _n_arg _v_args _cg_locs _self_loop_info - = SlowCall -- might be a function - -getCallMethod _ name _ (LFUnknown False) n_args _v_args _cg_loc _self_loop_info - = assertPpr (n_args == 0) (ppr name <+> ppr n_args) - EnterIt -- Not a function +-- Imported(Unknown) Ids +getCallMethod cfg name id (LFUnknown might_be_a_function) n_args _v_args _cg_locs _self_loop_info + | n_args == 0 + , Just sig <- idTagSig_maybe id + , isTaggedSig sig -- Infered to be already evaluated by Tag Inference + -- When profiling we enter functions to update the SCC so we + -- can't use the infered enterInfo here. + -- See Note [Evaluating functions with profiling] in rts/Apply.cmm + , not (profileIsProfiling (stgToCmmProfile cfg) && might_be_a_function) + = InferedReturnIt -- See Note [Tag Inference] + + | might_be_a_function = SlowCall + + | otherwise = + assertPpr ( n_args == 0) ( ppr name <+> ppr n_args ) + EnterIt -- Not a function + +-- TODO: Redundant with above match? +-- getCallMethod _ name _ (LFUnknown False) n_args _v_args _cg_loc _self_loop_info +-- = assertPpr (n_args == 0) (ppr name <+> ppr n_args) +-- EnterIt -- Not a function getCallMethod _ _name _ LFLetNoEscape _n_args _v_args (LneLoc blk_id lne_regs) _self_loop_info = JumpToIt blk_id lne_regs diff --git a/compiler/GHC/StgToCmm/Config.hs b/compiler/GHC/StgToCmm/Config.hs index 8751f5ffe5..28c11e1321 100644 --- a/compiler/GHC/StgToCmm/Config.hs +++ b/compiler/GHC/StgToCmm/Config.hs @@ -52,6 +52,7 @@ data StgToCmmConfig = StgToCmmConfig -- code for linking against dynamic libraries , stgToCmmDoBoundsCheck :: !Bool -- ^ decides whether to check array bounds in StgToCmm.Prim -- or not + , stgToCmmDoTagCheck :: !Bool -- ^ Verify tag inference predictions. ------------------------------ Backend Flags ---------------------------------- , stgToCmmAllowBigArith :: !Bool -- ^ Allowed to emit larger than native size arithmetic (only LLVM and C backends) , stgToCmmAllowQuotRemInstr :: !Bool -- ^ Allowed to generate QuotRem instructions diff --git a/compiler/GHC/StgToCmm/DataCon.hs b/compiler/GHC/StgToCmm/DataCon.hs index bed2f164eb..2a7203e101 100644 --- a/compiler/GHC/StgToCmm/DataCon.hs +++ b/compiler/GHC/StgToCmm/DataCon.hs @@ -52,6 +52,8 @@ import GHC.Utils.Monad (mapMaybeM) import Control.Monad import Data.Char import GHC.StgToCmm.Config (stgToCmmPlatform) +import GHC.StgToCmm.TagCheck (checkConArgsStatic, checkConArgsDyn) +import GHC.Utils.Outputable --------------------------------------------------------------- -- Top-level constructors @@ -93,7 +95,7 @@ cgTopRhsCon cfg id con mn args -- Windows DLLs have a problem with static cross-DLL refs. massert (not (isDllConApp platform (stgToCmmExtDynRefs cfg) this_mod con (map fromNonVoid args))) ; assert (args `lengthIs` countConRepArgs con ) return () - + ; checkConArgsStatic (text "TagCheck failed - Top level con") con (map fromNonVoid args) -- LAY IT OUT ; let (tot_wds, -- #ptr_wds + #nonptr_wds @@ -210,6 +212,7 @@ buildDynCon' binder mn actually_bound ccs con args ; let ticky_name | actually_bound = Just binder | otherwise = Nothing + ; checkConArgsDyn (text "TagCheck failed - con_alloc:" <> ppr binder) con (map fromNonVoid args) ; hp_plus_n <- allocDynClosure ticky_name info_tbl lf_info use_cc blame_cc args_w_offsets ; return (mkRhsInit platform reg lf_info hp_plus_n) } diff --git a/compiler/GHC/StgToCmm/Env.hs b/compiler/GHC/StgToCmm/Env.hs index 8f82c02e8e..eef1420a72 100644 --- a/compiler/GHC/StgToCmm/Env.hs +++ b/compiler/GHC/StgToCmm/Env.hs @@ -19,7 +19,7 @@ module GHC.StgToCmm.Env ( bindArgToReg, idToReg, getCgIdInfo, maybeLetNoEscape, - ) where + ) where import GHC.Prelude @@ -44,7 +44,6 @@ import GHC.Utils.Outputable import GHC.Utils.Panic import GHC.Utils.Panic.Plain - ------------------------------------- -- Manipulating CgIdInfo ------------------------------------- @@ -116,12 +115,20 @@ addBindsC new_bindings = do new_bindings setBinds new_binds +-- Inside GHC the average module creates 385 external references +-- with noteable cgIdInfo (so not generated by mkLFArgument). +-- On average 200 of these are covered by True/False/[] +-- and nullary constructors make up ~80. +-- One would think it would be worthwhile to cache these. +-- Sadly it's not. See #16937 + getCgIdInfo :: Id -> FCode CgIdInfo getCgIdInfo id = do { platform <- getPlatform ; local_binds <- getBinds -- Try local bindings first ; case lookupVarEnv local_binds id of { - Just info -> return info ; + Just info -> -- pprTrace "getCgIdInfoLocal" (ppr id) $ + return info ; Nothing -> do { -- Should be imported; make up a CgIdInfo for it diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs index 3a3c1db647..5129a45b1c 100644 --- a/compiler/GHC/StgToCmm/Expr.hs +++ b/compiler/GHC/StgToCmm/Expr.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE BangPatterns #-} - {-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE TypeFamilies #-} ----------------------------------------------------------------------------- -- @@ -25,6 +26,7 @@ import GHC.StgToCmm.Layout import GHC.StgToCmm.Lit import GHC.StgToCmm.Prim import GHC.StgToCmm.Hpc +import GHC.StgToCmm.TagCheck import GHC.StgToCmm.Ticky import GHC.StgToCmm.Utils import GHC.StgToCmm.Closure @@ -36,6 +38,7 @@ import GHC.Cmm.BlockId import GHC.Cmm hiding ( succ ) import GHC.Cmm.Info import GHC.Cmm.Utils ( zeroExpr, cmmTagMask, mkWordCLit, mAX_PTR_TAG ) +import GHC.Cmm.Ppr import GHC.Core import GHC.Core.DataCon import GHC.Types.ForeignCall @@ -56,6 +59,7 @@ import GHC.Utils.Panic.Plain import Control.Monad ( unless, void ) import Control.Arrow ( first ) import Data.List ( partition ) +import GHC.Stg.InferTags.TagSig (isTaggedSig) ------------------------------------------------------------------------ -- cgExpr: the main function @@ -291,7 +295,7 @@ We adopt (b) because that is more likely to put the heap check at the entry to a function, when not many things are live. After a bunch of single-branch cases, we may have lots of things live -Hence: two basic plans for +Hence: Two basic plans for case e of r { alts } @@ -306,6 +310,13 @@ Hence: two basic plans for ...code for alts... ...alts do their own heap checks + When using GcInAlts the return point for heap checks and evaluating + the scrutinee is shared. This does mean we might execute the actual + branching code twice but it's rare enough to not matter. + The huge advantage of this pattern is that we do not require multiple + info tables for returning from gc as they can be shared between all + cases. Reducing code size nicely. + ------ Plan B: special case when --------- (i) e does not allocate or call GC (ii) either upstream code performs allocation @@ -320,6 +331,80 @@ Hence: two basic plans for ...code for alts... ...no heap check... + + There is a variant B.2 which we use if: + + (i) e is already evaluated+tagged + (ii) We have multiple alternatives + (iii) and there is no upstream allocation. + + Here we also place one heap check before the `case` which + branches on `e`. Hopefully to be absorbed by an already existing + heap check further up. However the big difference in this case is that + there is no code for e. So we are not guaranteed that the heap + checks of the alts will be combined with an heap check further up. + + Very common example: Casing on strict fields. + + ...heap check... + ...assign bindings... + + ...code for alts... + ...no heap check... + + -- Reasoning for Plan B.2: + Since the scrutinee is already evaluated there is no evaluation + call which would force a info table that we can use as a shared + return point. + This means currently if we were to do GcInAlts like in Plan A then + we would end up with one info table per alternative. + + To avoid this we unconditionally do gc outside of the alts with all + the pros and cons described in Note [Compiling case expressions]. + Rewriting the logic to generate a shared return point before the case + expression while keeping the heap checks in the alternatives would be + possible. But it's unclear to me that this would actually be an improvement. + + This means if we have code along these lines: + + g x y = case x of + True -> Left $ (y + 1,y,y-1) + False -> Right $! y - (2 :: Int) + + We get these potential heap check placements: + + f = ... + !max(L,R)!; -- Might be absorbed upstream. + case x of + True -> !L!; ...L... + False -> !R!; ...R... + + And we place a heap check at !max(L,R)! + + The downsides of using !max(L,R)! are: + + * If f is recursive, and the hot loop wouldn't allocate, but the exit branch does then we do + a redundant heap check. + * We use one more instruction to de-allocate the unused heap in the branch using less heap. (Neglible) + * A small risk of running gc slightly more often than needed especially if one branch allocates a lot. + + The upsides are: + * May save a heap overflow test if there is an upstream check already. + * If the heap check is absorbed upstream we can also eliminate its info table. + * We generate at most one heap check (versus one per alt otherwise). + * No need to save volatile vars etc across heap checks in !L!, !R! + * We can use relative addressing from a single Hp to get at all the closures so allocated. (seems neglible) + * It fits neatly in the logic we already have for handling A/B + + For containers:Data/Sequence/Internal/Sorting.o the difference is + about 10% in terms of code size compared to using Plan A for this case. + The main downside is we might put heap checks into loops, even if we + could avoid it (See Note [Compiling case expressions]). + + Potential improvement: Investigate if heap checks in alts would be an + improvement if we generate and use a shared return point that is placed + in the common path for all alts. + -} @@ -461,11 +546,13 @@ cgCase scrut bndr alt_type alts ; up_hp_usg <- getVirtHp -- Upstream heap usage ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts alt_regs = map (idToReg platform) ret_bndrs + ; simple_scrut <- isSimpleScrut scrut alt_type ; let do_gc | is_cmp_op scrut = False -- See Note [GC for conditionals] | not simple_scrut = True | isSingleton alts = False | up_hp_usg > 0 = False + | evaluatedScrut = False | otherwise = True -- cf Note [Compiling case expressions] gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts @@ -481,6 +568,13 @@ cgCase scrut bndr alt_type alts where is_cmp_op (StgOpApp (StgPrimOp op) _ _) = isComparisonPrimOp op is_cmp_op _ = False + evaluatedScrut + | (StgApp v []) <- scrut + , Just sig <- idTagSig_maybe v + , isTaggedSig sig = True + | otherwise = False + + {- Note [GC for conditionals] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -530,10 +624,13 @@ isSimpleScrut :: CgStgExpr -> AltType -> FCode Bool -- heap usage from alternatives into the stuff before the case -- NB: if you get this wrong, and claim that the expression doesn't allocate -- when it does, you'll deeply mess up allocation -isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args -isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... } -isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... } -isSimpleScrut _ _ = return False +isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args +isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... } +isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... } +isSimpleScrut (StgApp f []) _ + | Just sig <- idTagSig_maybe f + = return $! isTaggedSig sig -- case !x of { ... } +isSimpleScrut _ _ = return False isSimpleOp :: StgOp -> [StgArg] -> FCode Bool -- True iff the op cannot block or allocate @@ -890,6 +987,7 @@ cgConApp con mn stg_args cgIdApp :: Id -> [StgArg] -> FCode ReturnKind cgIdApp fun_id args = do + platform <- getPlatform fun_info <- getCgIdInfo fun_id cfg <- getStgToCmmConfig self_loop <- getSelfLoop @@ -904,8 +1002,26 @@ cgIdApp fun_id args = do -- A value in WHNF, so we can just return it. ReturnIt | isZeroBitTy (idType fun_id) -> emitReturn [] - | otherwise -> emitReturn [fun] - -- ToDo: does ReturnIt guarantee tagged? + | otherwise -> emitReturn [fun] + + -- A value infered to be in WHNF, so we can just return it. + InferedReturnIt + | isZeroBitTy (idType fun_id) -> trace >> emitReturn [] + | otherwise -> trace >> assertTag >> + emitReturn [fun] + where + trace = do + tickyTagged + use_id <- newUnique + _lbl <- emitTickyCounterTag use_id (NonVoid fun_id) + tickyTagSkip use_id fun_id + + -- pprTraceM "WHNF:" (ppr fun_id <+> ppr args ) + assertTag = whenCheckTags $ do + mod <- getModuleName + emitTagAssertion (showPprUnsafe + (text "TagCheck failed on entry in" <+> ppr mod <+> text "- value:" <> ppr fun_id <+> pprExpr platform fun)) + fun EnterIt -> assert (null args) $ -- Discarding arguments emitEnter fun diff --git a/compiler/GHC/StgToCmm/TagCheck.hs b/compiler/GHC/StgToCmm/TagCheck.hs new file mode 100644 index 0000000000..4c9fb23cc5 --- /dev/null +++ b/compiler/GHC/StgToCmm/TagCheck.hs @@ -0,0 +1,156 @@ +{-# LANGUAGE CPP #-} + +----------------------------------------------------------------------------- +-- +-- Code generator utilities; mostly monadic +-- +-- (c) The University of Glasgow 2004-2006 +-- +----------------------------------------------------------------------------- + +module GHC.StgToCmm.TagCheck + ( emitTagAssertion, emitArgTagCheck, checkArg, whenCheckTags, + checkArgStatic, checkFunctionArgTags,checkConArgsStatic,checkConArgsDyn) where + +#include "ClosureTypes.h" + +import GHC.Prelude + +import GHC.StgToCmm.Env +import GHC.StgToCmm.Monad +import GHC.StgToCmm.Utils +import GHC.Cmm +import GHC.Cmm.BlockId +import GHC.Cmm.Graph as CmmGraph + +import GHC.Core.Type +import GHC.Types.Id +import GHC.Utils.Misc +import GHC.Utils.Outputable + +import GHC.Core.DataCon +import Control.Monad +import GHC.StgToCmm.Types +import GHC.Utils.Panic (pprPanic) +import GHC.Utils.Panic.Plain (panic) +import GHC.Stg.Syntax +import GHC.StgToCmm.Closure +import GHC.Types.RepType (dataConRuntimeRepStrictness) +import GHC.Types.Basic +import GHC.Data.FastString (mkFastString) +import GHC.Cmm.Info (cmmGetClosureType) +import GHC.Cmm.Utils (mkWordCLit) + +-- | Check all arguments marked as already tagged for a function +-- are tagged by inserting runtime checks. +checkFunctionArgTags :: SDoc -> Id -> [Id] -> FCode () +checkFunctionArgTags msg f args = whenCheckTags $ do + onJust (return ()) (idCbvMarks_maybe f) $ \marks -> do + -- Only check args marked as strict, and only lifted ones. + let cbv_args = filter (isLiftedRuntimeRep . idType) $ filterByList (map isMarkedCbv marks) args + -- Get their (cmm) address + arg_infos <- mapM getCgIdInfo cbv_args + let arg_cmms = map idInfoToAmode arg_infos + mapM_ (emitTagAssertion (showPprUnsafe msg)) (arg_cmms) + +-- | Check all required-tagged arguments of a constructor are tagged *at compile time*. +checkConArgsStatic :: SDoc -> DataCon -> [StgArg] -> FCode () +checkConArgsStatic msg con args = whenCheckTags $ do + let marks = dataConRuntimeRepStrictness con + zipWithM_ (checkArgStatic msg) marks args + +-- Check all required arguments of a constructor are tagged. +-- Possible by emitting checks at runtime. +checkConArgsDyn :: SDoc -> DataCon -> [StgArg] -> FCode () +checkConArgsDyn msg con args = whenCheckTags $ do + let marks = dataConRuntimeRepStrictness con + zipWithM_ (checkArg msg) (map cbvFromStrictMark marks) args + +whenCheckTags :: FCode () -> FCode () +whenCheckTags act = do + check_tags <- stgToCmmDoTagCheck <$> getStgToCmmConfig + when check_tags act + +-- | Call barf if we failed to predict a tag correctly. +-- This is immensly useful when debugging issues in tag inference +-- as it will result in a program abort when we encounter an invalid +-- call/heap object, rather than leaving it be and segfaulting arbitrary +-- or producing invalid results. +-- We check if either: +-- * A tag is present +-- * Or the object is a PAP (for which zero is the proper tag) +emitTagAssertion :: String -> CmmExpr -> FCode () +emitTagAssertion onWhat fun = do + { platform <- getPlatform + ; lret <- newBlockId + ; lno_tag <- newBlockId + ; lbarf <- newBlockId + -- Check for presence of any tag. + ; emit $ mkCbranch (cmmIsTagged platform fun) + lret lno_tag (Just True) + -- If there is no tag check if we are dealing with a PAP + ; emitLabel lno_tag + ; emitComment (mkFastString "closereTypeCheck") + ; align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig + ; profile <- getProfile + ; let closure_ty = cmmGetClosureType profile align_check fun + ; ty_reg <- newTemp (bWord platform) + ; emitAssign (CmmLocal ty_reg) closure_ty + ; emit $ mkCbranch (cmmEqWord platform + (CmmReg $ CmmLocal ty_reg) + (CmmLit $ mkWordCLit platform PAP)) + lret lbarf (Just True) + + ; emitLabel lbarf + ; emitBarf ("Tag inference failed on:" ++ onWhat) + ; emitLabel lret + } + +emitArgTagCheck :: SDoc -> [CbvMark] -> [Id] -> FCode () +emitArgTagCheck info marks args = whenCheckTags $ do + mod <- getModuleName + let cbv_args = filter (isLiftedRuntimeRep . idType) $ filterByList (map isMarkedCbv marks) args + arg_infos <- mapM getCgIdInfo cbv_args + let arg_cmms = map idInfoToAmode arg_infos + mk_msg arg = showPprUnsafe (text "Untagged arg:" <> (ppr mod) <> char ':' <> info <+> ppr arg) + zipWithM_ emitTagAssertion (map mk_msg args) (arg_cmms) + +taggedCgInfo :: CgIdInfo -> Bool +taggedCgInfo cg_info + = case lf of + LFCon {} -> True + LFReEntrant {} -> True + LFUnlifted {} -> True + LFThunk {} -> False + LFUnknown {} -> False + LFLetNoEscape -> panic "Let no escape binding passed to top level con" + where + lf = cg_lf cg_info + +-- Check that one argument is properly tagged. +checkArg :: SDoc -> CbvMark -> StgArg -> FCode () +checkArg _ NotMarkedCbv _ = return () +checkArg msg MarkedCbv arg = whenCheckTags $ + case arg of + StgLitArg _ -> return () + StgVarArg v -> do + info <- getCgIdInfo v + if taggedCgInfo info + then return () + else case (cg_loc info) of + CmmLoc loc -> emitTagAssertion (showPprUnsafe msg) loc + LneLoc {} -> panic "LNE-arg" + +-- Check that argument is properly tagged. +checkArgStatic :: SDoc -> StrictnessMark -> StgArg -> FCode () +checkArgStatic _ NotMarkedStrict _ = return () +checkArgStatic msg MarkedStrict arg = whenCheckTags $ + case arg of + StgLitArg _ -> return () + StgVarArg v -> do + info <- getCgIdInfo v + if taggedCgInfo info + then return () + else pprPanic "Arg not tagged as expectd" (ppr msg <+> ppr arg) + + diff --git a/compiler/GHC/StgToCmm/Ticky.hs b/compiler/GHC/StgToCmm/Ticky.hs index 118c05d920..57bf1a97b6 100644 --- a/compiler/GHC/StgToCmm/Ticky.hs +++ b/compiler/GHC/StgToCmm/Ticky.hs @@ -64,6 +64,15 @@ the code generator as well as the RTS because: * someone else might know how to repair it! + +Note [Ticky counters are static] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Currently GHC only supports static ticky events. That is -ticky emits +code containing labels containing counters which then get bumped at runtime. + +There are currently only *static* ticky counters. Either we bump one of the +static counters included in the RTS. Or we emit StgEntCounter structures in +the object code and bump these. -} module GHC.StgToCmm.Ticky ( @@ -72,6 +81,7 @@ module GHC.StgToCmm.Ticky ( withNewTickyCounterThunk, withNewTickyCounterStdThunk, withNewTickyCounterCon, + emitTickyCounterTag, tickyDynAlloc, tickyAllocHeap, @@ -97,7 +107,10 @@ module GHC.StgToCmm.Ticky ( tickyUnboxedTupleReturn, tickyReturnOldCon, tickyReturnNewCon, - tickySlowCall + tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs, + tickySlowCall, tickySlowCallPat, + + tickyTagged, tickyUntagged, tickyTagSkip ) where import GHC.Prelude @@ -260,6 +273,93 @@ emitTickyCounter cloType name args ] } +{- Note [TagSkip ticky counters] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +These counters keep track how often we execute code where we +would have performed a tag check if we hadn't run tag inference. + +If we have some code of the form: + case v[tagged] of ... +and we want to record how often we avoid a tag check on v +through tag inference we have to emit a new StgEntCounter for +each such case statement in order to record how often it's executed. + +In theory we could emit one per *binding*. But then we +would have to either keep track of the bindings which +already have a StgEntCounter associated with them in the +code gen state or preallocate such a structure for each binding +in the code unconditionally (since ticky-code can call non-ticky code) + +The first makes the compiler slower, even when ticky is not +used (a big no no). The later is fairly complex but increases code size +unconditionally. See also Note [Ticky counters are static]. + +So instead we emit a new StgEntCounter for each use site of a binding +where we infered a tag to be present. And increment the counter whenever +this use site is executed. + +We use the fields as follows: + +entry_count: Entries avoided. +str: : Name of the id. + +We use emitTickyCounterTag to emit the counter. + +Unlike the closure counters each *use* site of v has it's own +counter. So there is no need to keep track of the closure/case we are +in. + +We also have to pass a unique for the counter. An Id might be +scrutinized in more than one place, so the ID alone isn't enough +to distinguish between use sites. +-} + +emitTickyCounterTag :: Unique -> NonVoid Id -> FCode CLabel +emitTickyCounterTag unique (NonVoid id) = + let name = idName id + ctr_lbl = mkTagHitLabel name unique in + (>> return ctr_lbl) $ + ifTicky $ do + { platform <- getPlatform + ; parent <- getTickyCtrLabel + ; mod_name <- getModuleName + + -- When printing the name of a thing in a ticky file, we + -- want to give the module name even for *local* things. We + -- print just "x (M)" rather that "M.x" to distinguish them + -- from the global kind. + ; let ppr_for_ticky_name :: SDoc + ppr_for_ticky_name = + let n = ppr name + ext = empty -- parens (text "tagged") + p = case hasHaskellName parent of + -- NB the default "top" ticky ctr does not + -- have a Haskell name + Just pname -> text "at" <+> ppr (nameSrcLoc pname) <+> + text "in" <+> pprNameUnqualified name + _ -> empty + in if isInternalName name + then n <+> parens (ppr mod_name) <+> ext <+> p + else n <+> ext <+> p + ; sdoc_context <- stgToCmmContext <$> getStgToCmmConfig + ; fun_descr_lit <- newStringCLit $ renderWithContext sdoc_context ppr_for_ticky_name + ; arg_descr_lit <- newStringCLit $ "infer" + ; emitDataLits ctr_lbl + -- Must match layout of includes/rts/Ticky.h's StgEntCounter + -- + -- krc: note that all the fields are I32 now; some were I16 + -- before, but the code generator wasn't handling that + -- properly and it led to chaos, panic and disorder. + [ mkIntCLit platform 0, -- registered? + mkIntCLit platform 0, -- Arity + mkIntCLit platform 0, -- Heap allocated for this thing + fun_descr_lit, + arg_descr_lit, + zeroCLit platform, -- Entries into this thing + zeroCLit platform, -- Heap allocated by this thing + zeroCLit platform -- Link to next StgEntCounter + ] + } -- ----------------------------------------------------------------------------- -- Ticky stack frames @@ -560,6 +660,27 @@ tickyStackCheck :: FCode () tickyStackCheck = ifTicky $ bumpTickyCounter (fsLit "STK_CHK_ctr") -- ----------------------------------------------------------------------------- +-- Ticky for tag inference characterisation + +-- | Predicted a pointer would be tagged correctly (GHC will crash if not so no miss case) +tickyTagged :: FCode () +tickyTagged = ifTicky $ bumpTickyCounter (fsLit "TAG_TAGGED_pred") + +-- | Pass a boolean expr indicating if tag was present. +tickyUntagged :: CmmExpr -> FCode () +tickyUntagged e = do + ifTicky $ bumpTickyCounter (fsLit "TAG_UNTAGGED_pred") + ifTicky $ bumpTickyCounterByE (fsLit "TAG_UNTAGGED_miss") e + +-- | Called when for `case v of ...` we can avoid entering v based on +-- tag inference information. +tickyTagSkip :: Unique -> Id -> FCode () +tickyTagSkip unique id = ifTicky $ do + let ctr_lbl = mkTagHitLabel (idName id) unique + registerTickyCtr ctr_lbl + bumpTickyTagSkip ctr_lbl + +-- ----------------------------------------------------------------------------- -- Ticky utils isEnabled :: (StgToCmmConfig -> Bool) -> FCode Bool @@ -597,7 +718,12 @@ bumpTickyEntryCount lbl = do bumpTickyAllocd :: CLabel -> Int -> FCode () bumpTickyAllocd lbl bytes = do platform <- getPlatform - bumpTickyLitBy (cmmLabelOffB lbl (pc_OFFSET_StgEntCounter_allocd (platformConstants platform))) bytes + bumpTickyLitBy (cmmLabelOffB lbl (pc_OFFSET_StgEntCounter_entry_count (platformConstants platform))) bytes + +bumpTickyTagSkip :: CLabel -> FCode () +bumpTickyTagSkip lbl = do + platform <- getPlatform + bumpTickyLitBy (cmmLabelOffB lbl (pc_OFFSET_StgEntCounter_entry_count (platformConstants platform))) 1 bumpTickyLbl :: CLabel -> FCode () bumpTickyLbl lhs = bumpTickyLitBy (cmmLabelOffB lhs 0) 1 diff --git a/compiler/GHC/StgToCmm/Types.hs b/compiler/GHC/StgToCmm/Types.hs index 1d50f0ae70..1a4d06ed9b 100644 --- a/compiler/GHC/StgToCmm/Types.hs +++ b/compiler/GHC/StgToCmm/Types.hs @@ -19,6 +19,7 @@ import GHC.Types.ForeignStubs import GHC.Core.DataCon import GHC.Types.Name.Env import GHC.Types.Name.Set +import GHC.Stg.InferTags.TagSig import GHC.Utils.Outputable @@ -90,6 +91,7 @@ data CgInfos = CgInfos -- ^ LambdaFormInfos of exported closures in the current module. , cgIPEStub :: !CStub -- ^ The C stub which is used for IPE information + , cgTagSigs :: !(NameEnv TagSig) } -------------------------------------------------------------------------------- diff --git a/compiler/GHC/StgToCmm/Utils.hs b/compiler/GHC/StgToCmm/Utils.hs index 270699b9ed..ddda97ab2a 100644 --- a/compiler/GHC/StgToCmm/Utils.hs +++ b/compiler/GHC/StgToCmm/Utils.hs @@ -12,7 +12,8 @@ module GHC.StgToCmm.Utils ( emitDataLits, emitRODataLits, emitDataCon, emitRtsCall, emitRtsCallWithResult, emitRtsCallGen, - assignTemp, + emitBarf, + assignTemp, newTemp, newUnboxedTupleRegs, @@ -50,7 +51,7 @@ import GHC.Prelude import GHC.Platform import GHC.StgToCmm.Monad import GHC.StgToCmm.Closure -import GHC.StgToCmm.Lit (mkSimpleLit) +import GHC.StgToCmm.Lit (mkSimpleLit, newStringCLit) import GHC.Cmm import GHC.Cmm.BlockId import GHC.Cmm.Graph as CmmGraph @@ -157,6 +158,11 @@ tagToClosure platform tycon tag -- ------------------------------------------------------------------------- +emitBarf :: String -> FCode () +emitBarf msg = do + strLbl <- newStringCLit msg + emitRtsCall rtsUnitId (fsLit "barf") [(CmmLit strLbl,AddrHint)] False + emitRtsCall :: UnitId -> FastString -> [(CmmExpr,ForeignHint)] -> Bool -> FCode () emitRtsCall pkg fun = emitRtsCallGen [] (mkCmmCodeLabel pkg fun) @@ -299,8 +305,6 @@ newUnboxedTupleRegs res_ty choose_regs _ (AssignTo regs _) = return regs choose_regs platform _ = mapM (newTemp . primRepCmmType platform) reps - - ------------------------------------------------------------------------- -- emitMultiAssign ------------------------------------------------------------------------- diff --git a/compiler/GHC/Tc/Module.hs b/compiler/GHC/Tc/Module.hs index 026686b8cc..d9b59b4fd8 100644 --- a/compiler/GHC/Tc/Module.hs +++ b/compiler/GHC/Tc/Module.hs @@ -2975,6 +2975,7 @@ ppr_types debug type_env | otherwise = hasTopUserName id && case idDetails id of VanillaId -> True + StrictWorkerId{} -> True RecSelId {} -> True ClassOpId {} -> True FCallId {} -> True diff --git a/compiler/GHC/Tc/Solver.hs b/compiler/GHC/Tc/Solver.hs index 6a1f2d3315..8b6ac9928d 100644 --- a/compiler/GHC/Tc/Solver.hs +++ b/compiler/GHC/Tc/Solver.hs @@ -677,7 +677,7 @@ type-class or type defined in N. Secondly, when should these heuristics be enforced? We enforced them when the type-class method call site is in a module marked `-XSafe` or `-XTrustworthy`. This allows `-XUnsafe` modules to operate without restriction, and for Safe -Haskell inferrence to infer modules with unsafe overlaps as unsafe. +Haskell inference to infer modules with unsafe overlaps as unsafe. One alternative design would be to also consider if an instance was imported as a `safe` import or not and only apply the restriction to instances imported @@ -745,7 +745,7 @@ How is this implemented? It's complicated! So we'll step through it all: IORefs called `tcg_safe_infer` and `tcg_safe_infer_reason`. 7) `GHC.Driver.Main.tcRnModule'` -- Reads `tcg_safe_infer` after type-checking, calling - `GHC.Driver.Main.markUnsafeInfer` (passing the reason along) when safe-inferrence + `GHC.Driver.Main.markUnsafeInfer` (passing the reason along) when safe-inference failed. Note [No defaulting in the ambiguity check] diff --git a/compiler/GHC/Tc/Types.hs b/compiler/GHC/Tc/Types.hs index 97f11c8a0b..e1f0400e44 100644 --- a/compiler/GHC/Tc/Types.hs +++ b/compiler/GHC/Tc/Types.hs @@ -50,7 +50,7 @@ module GHC.Tc.Types( PromotionErr(..), IdBindingInfo(..), ClosedTypeId, RhsNames, IsGroupClosed(..), - SelfBootInfo(..), + SelfBootInfo(..), bootExports, tcTyThingCategory, pprTcTyThingCategory, peCategory, pprPECategory, CompleteMatch, CompleteMatches, @@ -696,6 +696,15 @@ data SelfBootInfo -- What is sb_tcs used for? See Note [Extra dependencies from .hs-boot files] -- in GHC.Rename.Module +bootExports :: SelfBootInfo -> NameSet +bootExports boot = + case boot of + NoSelfBoot -> emptyNameSet + SelfBoot { sb_mds = mds} -> + let exports = md_exports mds + in availsToNameSet exports + + {- Note [Tracking unused binding and imports] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/compiler/GHC/Tc/Types.hs-boot b/compiler/GHC/Tc/Types.hs-boot index c6302adb57..9f107936a5 100644 --- a/compiler/GHC/Tc/Types.hs-boot +++ b/compiler/GHC/Tc/Types.hs-boot @@ -6,6 +6,8 @@ import GHC.Utils.Outputable data TcLclEnv +data SelfBootInfo + data TcIdSigInfo instance Outputable TcIdSigInfo diff --git a/compiler/GHC/Types/Basic.hs b/compiler/GHC/Types/Basic.hs index e7c384d0a4..2ba185c25e 100644 --- a/compiler/GHC/Types/Basic.hs +++ b/compiler/GHC/Types/Basic.hs @@ -45,6 +45,8 @@ module GHC.Types.Basic ( Boxity(..), isBoxed, + CbvMark(..), isMarkedCbv, + PprPrec(..), topPrec, sigPrec, opPrec, funPrec, starPrec, appPrec, maybeParen, @@ -517,6 +519,37 @@ instance Binary Boxity where -- implemented via isBoxed-isomorphism to Bool {- ************************************************************************ * * + Call by value flag +* * +************************************************************************ +-} + +-- | Should an argument be passed evaluated *and* tagged. +data CbvMark = MarkedCbv | NotMarkedCbv + deriving Eq + +instance Outputable CbvMark where + ppr MarkedCbv = text "!" + ppr NotMarkedCbv = text "~" + +instance Binary CbvMark where + put_ bh NotMarkedCbv = putByte bh 0 + put_ bh MarkedCbv = putByte bh 1 + get bh = + do h <- getByte bh + case h of + 0 -> return NotMarkedCbv + 1 -> return MarkedCbv + _ -> panic "Invalid binary format" + +isMarkedCbv :: CbvMark -> Bool +isMarkedCbv MarkedCbv = True +isMarkedCbv NotMarkedCbv = False + + +{- +************************************************************************ +* * Recursive/Non-Recursive flag * * ************************************************************************ diff --git a/compiler/GHC/Types/Id.hs b/compiler/GHC/Types/Id.hs index 1245b372af..76dc4d0721 100644 --- a/compiler/GHC/Types/Id.hs +++ b/compiler/GHC/Types/Id.hs @@ -114,17 +114,22 @@ module GHC.Types.Id ( setIdDemandInfo, setIdDmdSig, setIdCprSig, + setIdCbvMarks, + idCbvMarks_maybe, + idCbvMarkArity, idDemandInfo, idDmdSig, idCprSig, + idTagSig_maybe, + setIdTagSig ) where import GHC.Prelude import GHC.Core ( CoreRule, isStableUnfolding, evaldUnfolding, - isCompulsoryUnfolding, Unfolding( NoUnfolding ) ) + isCompulsoryUnfolding, Unfolding( NoUnfolding ), isEvaldUnfolding ) import GHC.Types.Id.Info import GHC.Types.Basic @@ -163,6 +168,7 @@ import GHC.Utils.Panic import GHC.Utils.Panic.Plain import GHC.Utils.GlobalVars import GHC.Utils.Trace +import GHC.Stg.InferTags.TagSig -- infixl so you can say (id `set` a `set` b) infixl 1 `setIdUnfolding`, @@ -181,7 +187,8 @@ infixl 1 `setIdUnfolding`, `setIdCprSig`, `asJoinId`, - `asJoinId_maybe` + `asJoinId_maybe`, + `setIdCbvMarks` {- ************************************************************************ @@ -259,6 +266,11 @@ maybeModifyIdInfo :: Maybe IdInfo -> Id -> Id maybeModifyIdInfo (Just new_info) id = lazySetIdInfo id new_info maybeModifyIdInfo Nothing id = id +-- maybeModifyIdInfo tries to avoid unnecessary thrashing +maybeModifyIdDetails :: Maybe IdDetails -> Id -> Id +maybeModifyIdDetails (Just new_details) id = setIdDetails id new_details +maybeModifyIdDetails Nothing id = id + {- ************************************************************************ * * @@ -540,11 +552,12 @@ isJoinId id _ -> False | otherwise = False +-- | Doesn't return strictness marks isJoinId_maybe :: Var -> Maybe JoinArity isJoinId_maybe id | isId id = assertPpr (isId id) (ppr id) $ case Var.idDetails id of - JoinId arity -> Just arity + JoinId arity _marks -> Just arity _ -> Nothing | otherwise = Nothing @@ -609,10 +622,12 @@ asJoinId id arity = warnPprTrace (not (isLocalId id)) warnPprTrace (not (is_vanilla_or_join id)) "asJoinId" (ppr id <+> pprIdDetails (idDetails id)) $ - id `setIdDetails` JoinId arity + id `setIdDetails` JoinId arity (idCbvMarks_maybe id) where is_vanilla_or_join id = case Var.idDetails id of VanillaId -> True + -- Can workers become join ids? Yes! + StrictWorkerId {} -> pprTraceDebug "asJoinId (strict worker)" (ppr id) True JoinId {} -> True _ -> False @@ -689,6 +704,9 @@ isStrictId id isStrUsedDmd (idDemandInfo id) -- Take the best of both strictnesses - old and new +idTagSig_maybe :: Id -> Maybe TagSig +idTagSig_maybe = tagSig . idInfo + --------------------------------- -- UNFOLDING @@ -712,6 +730,49 @@ idDemandInfo id = demandInfo (idInfo id) setIdDemandInfo :: Id -> Demand -> Id setIdDemandInfo id dmd = modifyIdInfo (`setDemandInfo` dmd) id +setIdTagSig :: Id -> TagSig -> Id +setIdTagSig id sig = modifyIdInfo (`setTagSig` sig) id + +-- | If all marks are NotMarkedStrict we just set nothing. +setIdCbvMarks :: Id -> [CbvMark] -> Id +setIdCbvMarks id marks + | not (any isMarkedCbv marks) = maybeModifyIdDetails (removeMarks $ idDetails id) id + | otherwise = + -- pprTrace "setMarks:" (ppr id <> text ":" <> ppr marks) $ + case idDetails id of + -- good ol (likely worker) function + VanillaId -> id `setIdDetails` (StrictWorkerId trimmedMarks) + JoinId arity _ -> id `setIdDetails` (JoinId arity (Just trimmedMarks)) + -- Updating an existing strict worker. + StrictWorkerId _ -> id `setIdDetails` (StrictWorkerId trimmedMarks) + -- Do nothing for these + RecSelId{} -> id + DFunId{} -> id + _ -> pprTrace "setIdCbvMarks: Unable to set cbv marks for" (ppr id $$ + text "marks:" <> ppr marks $$ + text "idDetails:" <> ppr (idDetails id)) id + + where + -- (Currently) no point in passing args beyond the arity unlifted. + -- We would have to eta expand all call sites to (length marks). + -- Perhaps that's sensible but for now be conservative. + trimmedMarks = take (idArity id) marks + removeMarks details = case details of + JoinId arity (Just _) -> Just $ JoinId arity Nothing + StrictWorkerId _ -> Just VanillaId + _ -> Nothing + +idCbvMarks_maybe :: Id -> Maybe [CbvMark] +idCbvMarks_maybe id = case idDetails id of + StrictWorkerId marks -> Just marks + JoinId _arity marks -> marks + _ -> Nothing + +-- Id must be called with at least this arity in order to allow arguments to +-- be passed unlifted. +idCbvMarkArity :: Id -> Arity +idCbvMarkArity fn = maybe 0 length (idCbvMarks_maybe fn) + setCaseBndrEvald :: StrictnessMark -> Id -> Id -- Used for variables bound by a case expressions, both the case-binder -- itself, and any pattern-bound variables that are argument of a @@ -884,6 +945,7 @@ updOneShotInfo id one_shot -- f = \x -> e -- If we change the one-shot-ness of x, f's type changes +-- Replaces the id info if the zapper returns @Just idinfo@ zapInfo :: (IdInfo -> Maybe IdInfo) -> Id -> Id zapInfo zapper id = maybeModifyIdInfo (zapper (idInfo id)) id @@ -969,7 +1031,7 @@ transferPolyIdInfo :: Id -- Original Id -> Id -- New Id -> Id transferPolyIdInfo old_id abstract_wrt new_id - = modifyIdInfo transfer new_id + = modifyIdInfo transfer new_id `setIdCbvMarks` new_cbv_marks where arity_increase = count isId abstract_wrt -- Arity increases by the -- number of value binders @@ -985,6 +1047,18 @@ transferPolyIdInfo old_id abstract_wrt new_id new_strictness = prependArgsDmdSig arity_increase old_strictness old_cpr = cprSigInfo old_info + old_cbv_marks = fromMaybe (replicate old_arity NotMarkedCbv) (idCbvMarks_maybe old_id) + abstr_cbv_marks = mapMaybe getMark abstract_wrt + new_cbv_marks = abstr_cbv_marks ++ old_cbv_marks + + getMark v + | not (isId v) + = Nothing + | isId v + , isEvaldUnfolding (idUnfolding v) + , not (isUnliftedType $ idType v) + = Just MarkedCbv + | otherwise = Just NotMarkedCbv transfer new_info = new_info `setArityInfo` new_arity `setInlinePragInfo` old_inline_prag `setOccInfo` new_occ_info diff --git a/compiler/GHC/Types/Id/Info.hs b/compiler/GHC/Types/Id/Info.hs index ec5607d40f..73e7169181 100644 --- a/compiler/GHC/Types/Id/Info.hs +++ b/compiler/GHC/Types/Id/Info.hs @@ -68,7 +68,7 @@ module GHC.Types.Id.Info ( emptyRuleInfo, isEmptyRuleInfo, ruleInfoFreeVars, ruleInfoRules, setRuleInfoHead, - ruleInfo, setRuleInfo, + ruleInfo, setRuleInfo, tagSigInfo, -- ** The CAFInfo type CafInfo(..), @@ -76,8 +76,10 @@ module GHC.Types.Id.Info ( cafInfo, setCafInfo, -- ** The LambdaFormInfo type - LambdaFormInfo(..), - lfInfo, setLFInfo, + LambdaFormInfo, + lfInfo, setLFInfo, setTagSig, + + tagSig, -- ** Tick-box Info TickBoxOp(..), TickBoxId, @@ -108,10 +110,11 @@ import GHC.Utils.Misc import GHC.Utils.Outputable import GHC.Utils.Panic import GHC.Utils.Panic.Plain +import GHC.Stg.InferTags.TagSig import Data.Word -import GHC.StgToCmm.Types (LambdaFormInfo (..)) +import GHC.StgToCmm.Types (LambdaFormInfo) -- infixl so you can say (id `set` a `set` b) infixl 1 `setRuleInfo`, @@ -126,7 +129,6 @@ infixl 1 `setRuleInfo`, `setDemandInfo`, `setNeverRepPoly`, `setLevityInfoWithType` - {- ************************************************************************ * * @@ -173,8 +175,59 @@ data IdDetails | CoVarId -- ^ A coercion variable -- This only covers /un-lifted/ coercions, of type -- (t1 ~# t2) or (t1 ~R# t2), not their lifted variants - | JoinId JoinArity -- ^ An 'Id' for a join point taking n arguments - -- Note [Join points] in "GHC.Core" + | JoinId JoinArity (Maybe [CbvMark]) + -- ^ An 'Id' for a join point taking n arguments + -- Note [Join points] in "GHC.Core" + | StrictWorkerId [CbvMark] + -- ^ An 'Id' for a worker function, which expects some arguments to be + -- passed both evaluated and tagged. + -- See Note [Strict Worker Ids] + -- See Note [Tag Inference] + +{- Note [Strict Worker Ids] +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +StrictWorkerId essentially constrains the calling convention for the given Id. +It requires arguments marked as tagged to be passed properly evaluated+*tagged*. + +While we were always able to express the fact that an argument is evaluated +via attaching a evaldUnfolding to the functions arguments there used to be +no way to express that an lifted argument is already properly tagged once we jump +into the RHS. +This means when branching on a value the RHS always needed to perform +a tag check to ensure the argument wasn't an indirection (the evaldUnfolding +already ruling out thunks). + +StrictWorkerIds give us this additional expressiveness which we use to improve +runtime. This is all part of the TagInference work. See also Note [Tag Inference]. + +What we do is: +* If we think a function might benefit from passing certain arguments unlifted + for performance reasons we attach an evaldUnfolding to these arguments. +* Either during W/W, but at latest during Tidy VanillaIds with arguments that + have evaldUnfoldings are turned into StrictWorkerIds. +* During CorePrep calls to StrictWorkerIds are eta expanded. +* During Stg CodeGen: + * When we call a binding that is a StrictWorkerId: + * We check if all arguments marked to be passed unlifted are already tagged. + * If they aren't we will wrap the call in case expressions which will evaluate+tag + these arguments before jumping to the function. +* During Cmm codeGen: + * When generating code for the RHS of a StrictWorker binding + we omit tag checks when using arguments marked as tagged. + +We primarily use this for workers where we mark strictly demanded arguments +and arguments representing strict fields as call-by-value during W/W. But we +also check other functions during tidy and potentially turn some of them into +strict workers and mark some of their arguments as call-by-value by looking at +argument unfoldings. + +NB: I choose to put the information into a new Id constructor since these are loaded +at all optimization levels. This makes it trivial to ensure the additional +calling convention demands are available at all call sites. Putting it into +IdInfo would require us at the very least to always decode the IdInfo +just to decide if we need to throw it away or not after. + +-} -- | Recursive Selector Parent data RecSelParent = RecSelData TyCon | RecSelPatSyn PatSyn deriving Eq @@ -198,8 +251,8 @@ isCoVarDetails :: IdDetails -> Bool isCoVarDetails CoVarId = True isCoVarDetails _ = False -isJoinIdDetails_maybe :: IdDetails -> Maybe JoinArity -isJoinIdDetails_maybe (JoinId join_arity) = Just join_arity +isJoinIdDetails_maybe :: IdDetails -> Maybe (JoinArity, (Maybe [CbvMark])) +isJoinIdDetails_maybe (JoinId join_arity marks) = Just (join_arity, marks) isJoinIdDetails_maybe _ = Nothing instance Outputable IdDetails where @@ -210,6 +263,7 @@ pprIdDetails VanillaId = empty pprIdDetails other = brackets (pp other) where pp VanillaId = panic "pprIdDetails" + pp (StrictWorkerId dmds) = text "StrictWorker" <> parens (ppr dmds) pp (DataConWorkId _) = text "DataCon" pp (DataConWrapId _) = text "DataConWrapper" pp (ClassOpId {}) = text "ClassOp" @@ -221,7 +275,7 @@ pprIdDetails other = brackets (pp other) = brackets $ text "RecSel" <> ppWhen is_naughty (text "(naughty)") pp CoVarId = text "CoVarId" - pp (JoinId arity) = text "JoinId" <> parens (int arity) + pp (JoinId arity marks) = text "JoinId" <> parens (int arity) <> parens (ppr marks) {- ************************************************************************ @@ -274,7 +328,10 @@ data IdInfo -- 4% in some programs. See #17497 and associated MR. -- -- See documentation of the getters for what these packed fields mean. - lfInfo :: !(Maybe LambdaFormInfo) + lfInfo :: !(Maybe LambdaFormInfo), + + -- See documentation of the getters for what these packed fields mean. + tagSig :: !(Maybe TagSig) } -- | Encodes arities, OneShotInfo, CafInfo and LevityInfo. @@ -365,6 +422,9 @@ cafInfo = bitfieldGetCafInfo . bitfield callArityInfo :: IdInfo -> ArityInfo callArityInfo = bitfieldGetCallArityInfo . bitfield +tagSigInfo :: IdInfo -> Maybe TagSig +tagSigInfo = tagSig + -- Setters setRuleInfo :: IdInfo -> RuleInfo -> IdInfo @@ -414,6 +474,9 @@ setCafInfo info caf = setLFInfo :: IdInfo -> LambdaFormInfo -> IdInfo setLFInfo info lf = info { lfInfo = Just lf } +setTagSig :: IdInfo -> TagSig -> IdInfo +setTagSig info sig = info { tagSig = Just sig } + setOneShotInfo :: IdInfo -> OneShotInfo -> IdInfo setOneShotInfo info lb = info { bitfield = bitfieldSetOneShotInfo lb (bitfield info) } @@ -444,7 +507,8 @@ vanillaIdInfo bitfieldSetOneShotInfo NoOneShotInfo $ bitfieldSetLevityInfo NoLevityInfo $ emptyBitField, - lfInfo = Nothing + lfInfo = Nothing, + tagSig = Nothing } -- | More informative 'IdInfo' we can use when we know the 'Id' has no CAF references diff --git a/compiler/GHC/Types/Id/Make.hs b/compiler/GHC/Types/Id/Make.hs index 6fe9f0dafe..67f7e405a3 100644 --- a/compiler/GHC/Types/Id/Make.hs +++ b/compiler/GHC/Types/Id/Make.hs @@ -491,6 +491,10 @@ mkDictSelId name clas | otherwise = base_info `setRuleInfo` mkRuleInfo [rule] + `setInlinePragInfo` neverInlinePragma + `setUnfoldingInfo` mkInlineUnfoldingWithArity 1 + defaultSimpleOpts + (mkDictSelRhs clas val_index) -- Add a magic BuiltinRule, but no unfolding -- so that the rule is always available to fire. -- See Note [ClassOp/DFun selection] in GHC.Tc.TyCl.Instance diff --git a/compiler/GHC/Types/RepType.hs b/compiler/GHC/Types/RepType.hs index b2e8a1c3b8..cf6517bd39 100644 --- a/compiler/GHC/Types/RepType.hs +++ b/compiler/GHC/Types/RepType.hs @@ -14,7 +14,8 @@ module GHC.Types.RepType typePrimRep, typePrimRep1, typeMonoPrimRep_maybe, runtimeRepPrimRep, typePrimRepArgs, PrimRep(..), primRepToType, - countFunRepArgs, countConRepArgs, tyConPrimRep, tyConPrimRep1, + countFunRepArgs, countConRepArgs, dataConRuntimeRepStrictness, + tyConPrimRep, tyConPrimRep1, -- * Unboxed sum representation type ubxSumRepType, layoutUbxSum, typeSlotTy, SlotTy (..), @@ -127,8 +128,41 @@ countConRepArgs dc = go (dataConRepArity dc) (dataConRepType dc) | otherwise = pprPanic "countConRepArgs: arity greater than type can handle" (ppr (n, ty, typePrimRep ty)) +dataConRuntimeRepStrictness :: HasDebugCallStack => DataCon -> [StrictnessMark] +-- ^ Give the demands on the arguments of a +-- Core constructor application (Con dc args) at runtime. +-- Assumes the constructor is not levity polymorphic. For example +-- unboxed tuples won't work. +dataConRuntimeRepStrictness dc = + + -- pprTrace "dataConRuntimeRepStrictness" (ppr dc $$ ppr (dataConRepArgTys dc)) $ + + let repMarks = dataConRepStrictness dc + repTys = map irrelevantMult $ dataConRepArgTys dc + in -- todo: assert dc != unboxedTuple/unboxedSum + go repMarks repTys [] + where + go (mark:marks) (ty:types) out_marks + -- Zero-width argument, mark is irrelevant at runtime. + | -- pprTrace "VoidTy" (ppr ty) $ + (isZeroBitTy ty) + = go marks types out_marks + -- Single rep argument, e.g. Int + -- Keep mark as-is + | [_] <- reps + = go marks types (mark:out_marks) + -- Multi-rep argument, e.g. (# Int, Bool #) or (# Int | Bool #) + -- Make up one non-strict mark per runtime argument. + | otherwise -- TODO: Assert real_reps /= null + = go marks types ((replicate (length real_reps) NotMarkedStrict)++out_marks) + where + reps = typePrimRep ty + real_reps = filter (not . isVoidRep) $ reps + go [] [] out_marks = reverse out_marks + go _m _t _o = pprPanic "dataConRuntimeRepStrictness2" (ppr dc $$ ppr _m $$ ppr _t $$ ppr _o) + -- | True if the type has zero width. -isZeroBitTy :: Type -> Bool +isZeroBitTy :: HasDebugCallStack => Type -> Bool isZeroBitTy = null . typePrimRep diff --git a/compiler/GHC/Unit/Module/ModGuts.hs b/compiler/GHC/Unit/Module/ModGuts.hs index 1558e5944a..4fc683d844 100644 --- a/compiler/GHC/Unit/Module/ModGuts.hs +++ b/compiler/GHC/Unit/Module/ModGuts.hs @@ -31,6 +31,7 @@ import GHC.Types.Fixity.Env import GHC.Types.ForeignStubs import GHC.Types.HpcInfo import GHC.Types.Name.Reader +import GHC.Types.Name.Set (NameSet) import GHC.Types.SafeHaskell import GHC.Types.SourceFile ( HscSource(..), hscSourceToIsBoot ) import GHC.Types.SrcLoc @@ -88,6 +89,7 @@ data ModGuts mg_fam_inst_env :: FamInstEnv, -- ^ Type-family instance environment for -- /home-package/ modules (including this -- one); c.f. 'tcg_fam_inst_env' + mg_boot_exports :: !NameSet, -- Things that are also export via hs-boot file mg_safe_haskell :: SafeHaskellMode, -- ^ Safe Haskell mode mg_trust_pkg :: Bool, -- ^ Do we need to trust our diff --git a/compiler/GHC/Utils/Misc.hs b/compiler/GHC/Utils/Misc.hs index 8f33944b86..3bc4e6adb8 100644 --- a/compiler/GHC/Utils/Misc.hs +++ b/compiler/GHC/Utils/Misc.hs @@ -30,7 +30,7 @@ module GHC.Utils.Misc ( mapAndUnzip, mapAndUnzip3, filterOut, partitionWith, - dropWhileEndLE, spanEnd, last2, lastMaybe, + dropWhileEndLE, spanEnd, last2, lastMaybe, onJust, List.foldl1', foldl2, count, countWhile, all2, @@ -764,6 +764,10 @@ lastMaybe :: [a] -> Maybe a lastMaybe [] = Nothing lastMaybe xs = Just $ last xs +-- | @onJust x m f@ applies f to the value inside the Just or returns the default. +onJust :: b -> Maybe a -> (a->b) -> b +onJust dflt = flip (maybe dflt) + -- | Split a list into its last element and the initial part of the list. -- @snocView xs = Just (init xs, last xs)@ for non-empty lists. -- @snocView xs = Nothing@ otherwise. diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index efc4639b96..068ded4e9c 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -546,10 +546,15 @@ Library GHC.Stg.Lift.Config GHC.Stg.Lift.Monad GHC.Stg.Lint + GHC.Stg.InferTags + GHC.Stg.InferTags.Rewrite + GHC.Stg.InferTags.TagSig + GHC.Stg.InferTags.Types GHC.Stg.Pipeline GHC.Stg.Stats GHC.Stg.Subst GHC.Stg.Syntax + GHC.Stg.Utils GHC.StgToByteCode GHC.StgToCmm GHC.StgToCmm.ArgRep @@ -570,6 +575,7 @@ Library GHC.StgToCmm.Prim GHC.StgToCmm.Prof GHC.StgToCmm.Sequel + GHC.StgToCmm.TagCheck GHC.StgToCmm.Ticky GHC.StgToCmm.Types GHC.StgToCmm.Utils |