summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/GHC/Cmm/CLabel.hs42
-rw-r--r--compiler/GHC/Cmm/Liveness.hs2
-rw-r--r--compiler/GHC/Cmm/Utils.hs7
-rw-r--r--compiler/GHC/Core/DataCon.hs20
-rw-r--r--compiler/GHC/Core/Opt/CprAnal.hs2
-rw-r--r--compiler/GHC/Core/Opt/DmdAnal.hs6
-rw-r--r--compiler/GHC/Core/Opt/Pipeline.hs2
-rw-r--r--compiler/GHC/Core/Opt/Simplify/Monad.hs3
-rw-r--r--compiler/GHC/Core/Opt/SpecConstr.hs13
-rw-r--r--compiler/GHC/Core/Opt/WorkWrap.hs16
-rw-r--r--compiler/GHC/Core/Opt/WorkWrap/Utils.hs219
-rw-r--r--compiler/GHC/Core/Rules.hs6
-rw-r--r--compiler/GHC/Core/Tidy.hs67
-rw-r--r--compiler/GHC/Core/Type.hs11
-rw-r--r--compiler/GHC/Core/Utils.hs91
-rw-r--r--compiler/GHC/CoreToIface.hs1
-rw-r--r--compiler/GHC/CoreToStg.hs1
-rw-r--r--compiler/GHC/CoreToStg/Prep.hs40
-rw-r--r--compiler/GHC/Driver/Config/StgToCmm.hs1
-rw-r--r--compiler/GHC/Driver/Flags.hs9
-rw-r--r--compiler/GHC/Driver/GenerateCgIPEStub.hs8
-rw-r--r--compiler/GHC/Driver/Main.hs18
-rw-r--r--compiler/GHC/Driver/Session.hs9
-rw-r--r--compiler/GHC/HsToCore.hs4
-rw-r--r--compiler/GHC/Iface/Make.hs11
-rw-r--r--compiler/GHC/Iface/Syntax.hs15
-rw-r--r--compiler/GHC/Iface/Tidy.hs31
-rw-r--r--compiler/GHC/IfaceToCore.hs4
-rw-r--r--compiler/GHC/Prelude.hs12
-rw-r--r--compiler/GHC/Stg/FVs.hs1
-rw-r--r--compiler/GHC/Stg/InferTags.hs631
-rw-r--r--compiler/GHC/Stg/InferTags/Rewrite.hs495
-rw-r--r--compiler/GHC/Stg/InferTags/TagSig.hs66
-rw-r--r--compiler/GHC/Stg/InferTags/Types.hs137
-rw-r--r--compiler/GHC/Stg/Lint.hs18
-rw-r--r--compiler/GHC/Stg/Stats.hs2
-rw-r--r--compiler/GHC/Stg/Syntax.hs221
-rw-r--r--compiler/GHC/Stg/Unarise.hs5
-rw-r--r--compiler/GHC/Stg/Utils.hs124
-rw-r--r--compiler/GHC/StgToByteCode.hs2
-rw-r--r--compiler/GHC/StgToCmm.hs3
-rw-r--r--compiler/GHC/StgToCmm/Bind.hs22
-rw-r--r--compiler/GHC/StgToCmm/Closure.hs57
-rw-r--r--compiler/GHC/StgToCmm/Config.hs1
-rw-r--r--compiler/GHC/StgToCmm/DataCon.hs5
-rw-r--r--compiler/GHC/StgToCmm/Env.hs13
-rw-r--r--compiler/GHC/StgToCmm/Expr.hs134
-rw-r--r--compiler/GHC/StgToCmm/TagCheck.hs156
-rw-r--r--compiler/GHC/StgToCmm/Ticky.hs130
-rw-r--r--compiler/GHC/StgToCmm/Types.hs2
-rw-r--r--compiler/GHC/StgToCmm/Utils.hs12
-rw-r--r--compiler/GHC/Tc/Module.hs1
-rw-r--r--compiler/GHC/Tc/Solver.hs4
-rw-r--r--compiler/GHC/Tc/Types.hs11
-rw-r--r--compiler/GHC/Tc/Types.hs-boot2
-rw-r--r--compiler/GHC/Types/Basic.hs33
-rw-r--r--compiler/GHC/Types/Id.hs84
-rw-r--r--compiler/GHC/Types/Id/Info.hs88
-rw-r--r--compiler/GHC/Types/Id/Make.hs4
-rw-r--r--compiler/GHC/Types/RepType.hs38
-rw-r--r--compiler/GHC/Unit/Module/ModGuts.hs2
-rw-r--r--compiler/GHC/Utils/Misc.hs6
-rw-r--r--compiler/ghc.cabal.in6
63 files changed, 2909 insertions, 278 deletions
diff --git a/compiler/GHC/Cmm/CLabel.hs b/compiler/GHC/Cmm/CLabel.hs
index 3acace8be2..dd7e1f14f5 100644
--- a/compiler/GHC/Cmm/CLabel.hs
+++ b/compiler/GHC/Cmm/CLabel.hs
@@ -25,6 +25,7 @@ module GHC.Cmm.CLabel (
mkInfoTableLabel,
mkEntryLabel,
mkRednCountsLabel,
+ mkTagHitLabel,
mkConInfoTableLabel,
mkApEntryLabel,
mkApInfoTableLabel,
@@ -301,7 +302,7 @@ isIdLabel _ = False
-- Used in SRT analysis. See Note [Ticky labels in SRT analysis] in
-- GHC.Cmm.Info.Build.
isTickyLabel :: CLabel -> Bool
-isTickyLabel (IdLabel _ _ RednCounts) = True
+isTickyLabel (IdLabel _ _ IdTickyInfo{}) = True
isTickyLabel _ = False
-- | Indicate if "GHC.CmmToC" has to generate an extern declaration for the
@@ -447,6 +448,26 @@ pprDebugCLabel platform lbl = pprCLabel platform AsmStyle lbl <> parens extra
_ -> text "other CLabel"
+-- Dynamic ticky info for the id.
+data TickyIdInfo
+ = TickyRednCounts -- ^ Used for dynamic allocations
+ | TickyInferedTag !Unique -- ^ Used to track dynamic hits of tag inference.
+ deriving (Eq,Show)
+
+instance Outputable TickyIdInfo where
+ ppr TickyRednCounts = text "ct_rdn"
+ ppr (TickyInferedTag unique) = text "ct_tag[" <> ppr unique <> char ']'
+
+-- | Don't depend on this if you need determinism.
+-- No determinism in the ncg backend, so we use the unique for Ord.
+-- Even if it pains me slightly.
+instance Ord TickyIdInfo where
+ compare TickyRednCounts TickyRednCounts = EQ
+ compare TickyRednCounts _ = LT
+ compare _ TickyRednCounts = GT
+ compare (TickyInferedTag unique1) (TickyInferedTag unique2) =
+ nonDetCmpUnique unique1 unique2
+
data IdLabelInfo
= Closure -- ^ Label for closure
@@ -457,7 +478,7 @@ data IdLabelInfo
| LocalInfoTable -- ^ Like InfoTable but not externally visible
| LocalEntry -- ^ Like Entry but not externally visible
- | RednCounts -- ^ Label of place to keep Ticky-ticky info for this Id
+ | IdTickyInfo !TickyIdInfo -- ^ Label of place to keep Ticky-ticky hit info for this Id
| ConEntry ConInfoTableLocation
-- ^ Constructor entry point, when `-fdistinct-info-tables` is enabled then
@@ -504,12 +525,12 @@ instance Outputable IdLabelInfo where
ppr LocalInfoTable = text "LocalInfoTable"
ppr LocalEntry = text "LocalEntry"
- ppr RednCounts = text "RednCounts"
ppr (ConEntry mn) = text "ConEntry" <+> ppr mn
ppr (ConInfoTable mn) = text "ConInfoTable" <+> ppr mn
ppr ClosureTable = text "ClosureTable"
ppr Bytes = text "Bytes"
ppr BlockInfoTable = text "BlockInfoTable"
+ ppr (IdTickyInfo info) = text "IdTickyInfo" <+> ppr info
data RtsLabelInfo
@@ -559,8 +580,12 @@ data DynamicLinkerLabelInfo
mkSRTLabel :: Unique -> CLabel
mkSRTLabel u = SRTLabel u
+-- See Note [ticky for LNE]
mkRednCountsLabel :: Name -> CLabel
-mkRednCountsLabel name = IdLabel name NoCafRefs RednCounts -- Note [ticky for LNE]
+mkRednCountsLabel name = IdLabel name NoCafRefs (IdTickyInfo TickyRednCounts)
+
+mkTagHitLabel :: Name -> Unique -> CLabel
+mkTagHitLabel name !uniq = IdLabel name NoCafRefs (IdTickyInfo (TickyInferedTag uniq))
-- These have local & (possibly) external variants:
mkLocalClosureLabel :: Name -> CafInfo -> CLabel
@@ -892,7 +917,7 @@ hasIdLabelInfo _ = Nothing
-- -----------------------------------------------------------------------------
-- Does a CLabel's referent itself refer to a CAF?
hasCAF :: CLabel -> Bool
-hasCAF (IdLabel _ _ RednCounts) = False -- Note [ticky for LNE]
+hasCAF (IdLabel _ _ (IdTickyInfo TickyRednCounts)) = False -- See Note [ticky for LNE]
hasCAF (IdLabel _ MayHaveCafRefs _) = True
hasCAF _ = False
@@ -1146,7 +1171,7 @@ idInfoLabelType info =
Closure -> GcPtrLabel
ConInfoTable {} -> DataLabel
ClosureTable -> DataLabel
- RednCounts -> DataLabel
+ IdTickyInfo{} -> DataLabel
Bytes -> DataLabel
_ -> CodeLabel
@@ -1503,7 +1528,10 @@ ppIdFlavor x = pp_cSEP <> case x of
Entry -> text "entry"
LocalEntry -> text "entry"
Slow -> text "slow"
- RednCounts -> text "ct"
+ IdTickyInfo TickyRednCounts
+ -> text "ct"
+ IdTickyInfo (TickyInferedTag unique)
+ -> text "ct_inf_tag" <> char '_' <> ppr unique
ConEntry loc ->
case loc of
DefinitionSite -> text "con_entry"
diff --git a/compiler/GHC/Cmm/Liveness.hs b/compiler/GHC/Cmm/Liveness.hs
index f047ea4367..a1526be099 100644
--- a/compiler/GHC/Cmm/Liveness.hs
+++ b/compiler/GHC/Cmm/Liveness.hs
@@ -71,6 +71,8 @@ cmmGlobalLiveness platform graph =
analyzeCmmBwd liveLattice (xferLive platform) graph mapEmpty
-- | On entry to the procedure, there had better not be any LocalReg's live-in.
+-- If you see this error it most likely means you are trying to use a variable
+-- without it being defined in the given scope.
noLiveOnEntry :: BlockId -> CmmLive LocalReg -> a -> a
noLiveOnEntry bid in_fact x =
if nullRegSet in_fact then x
diff --git a/compiler/GHC/Cmm/Utils.hs b/compiler/GHC/Cmm/Utils.hs
index 9e9566b334..2060be5bda 100644
--- a/compiler/GHC/Cmm/Utils.hs
+++ b/compiler/GHC/Cmm/Utils.hs
@@ -48,7 +48,7 @@ module GHC.Cmm.Utils(
currentTSOExpr, currentNurseryExpr, cccsExpr,
-- Tagging
- cmmTagMask, cmmPointerMask, cmmUntag, cmmIsTagged,
+ cmmTagMask, cmmPointerMask, cmmUntag, cmmIsTagged, cmmIsNotTagged,
cmmConstrTag1, mAX_PTR_TAG, tAG_MASK,
-- Overlap and usage
@@ -447,13 +447,14 @@ cmmPointerMask platform = mkIntExpr platform (complement (tAG_MASK platform))
-- Used to untag a possibly tagged pointer
-- A static label need not be untagged
-cmmUntag, cmmIsTagged, cmmConstrTag1 :: Platform -> CmmExpr -> CmmExpr
+cmmUntag, cmmIsTagged, cmmIsNotTagged, cmmConstrTag1 :: Platform -> CmmExpr -> CmmExpr
cmmUntag _ e@(CmmLit (CmmLabel _)) = e
-- Default case
cmmUntag platform e = cmmAndWord platform e (cmmPointerMask platform)
--- Test if a closure pointer is untagged
+-- Test if a closure pointer is untagged/tagged.
cmmIsTagged platform e = cmmNeWord platform (cmmAndWord platform e (cmmTagMask platform)) (zeroExpr platform)
+cmmIsNotTagged platform e = cmmEqWord platform (cmmAndWord platform e (cmmTagMask platform)) (zeroExpr platform)
-- Get constructor tag, but one based.
cmmConstrTag1 platform e = cmmAndWord platform e (cmmTagMask platform)
diff --git a/compiler/GHC/Core/DataCon.hs b/compiler/GHC/Core/DataCon.hs
index feca5d3754..07419b9c5c 100644
--- a/compiler/GHC/Core/DataCon.hs
+++ b/compiler/GHC/Core/DataCon.hs
@@ -58,7 +58,7 @@ module GHC.Core.DataCon (
isUnboxedSumDataCon,
isVanillaDataCon, isNewDataCon, classDataCon, dataConCannotMatch,
dataConUserTyVarsArePermuted,
- isBanged, isMarkedStrict, eqHsBang, isSrcStrict, isSrcUnpacked,
+ isBanged, isMarkedStrict, cbvFromStrictMark, eqHsBang, isSrcStrict, isSrcUnpacked,
specialPromotedDc,
-- ** Promotion related functions
@@ -745,9 +745,10 @@ data SrcUnpackedness = SrcUnpack -- ^ {-# UNPACK #-} specified
-------------------------
--- StrictnessMark is internal only, used to indicate strictness
+-- StrictnessMark is used to indicate strictness
-- of the DataCon *worker* fields
data StrictnessMark = MarkedStrict | NotMarkedStrict
+ deriving Eq
-- | An 'EqSpec' is a tyvar/type pair representing an equality made in
-- rejigging a GADT constructor
@@ -944,6 +945,16 @@ instance Outputable StrictnessMark where
ppr MarkedStrict = text "!"
ppr NotMarkedStrict = empty
+instance Binary StrictnessMark where
+ put_ bh NotMarkedStrict = putByte bh 0
+ put_ bh MarkedStrict = putByte bh 1
+ get bh =
+ do h <- getByte bh
+ case h of
+ 0 -> return NotMarkedStrict
+ 1 -> return MarkedStrict
+ _ -> panic "Invalid binary format"
+
instance Binary SrcStrictness where
put_ bh SrcLazy = putByte bh 0
put_ bh SrcStrict = putByte bh 1
@@ -994,6 +1005,11 @@ isMarkedStrict :: StrictnessMark -> Bool
isMarkedStrict NotMarkedStrict = False
isMarkedStrict _ = True -- All others are strict
+cbvFromStrictMark :: StrictnessMark -> CbvMark
+cbvFromStrictMark NotMarkedStrict = NotMarkedCbv
+cbvFromStrictMark MarkedStrict = MarkedCbv
+
+
{- *********************************************************************
* *
\subsection{Construction}
diff --git a/compiler/GHC/Core/Opt/CprAnal.hs b/compiler/GHC/Core/Opt/CprAnal.hs
index 65468cd037..64551f9498 100644
--- a/compiler/GHC/Core/Opt/CprAnal.hs
+++ b/compiler/GHC/Core/Opt/CprAnal.hs
@@ -179,7 +179,7 @@ cprAnalProgram logger fam_envs binds = do
let env = emptyAnalEnv fam_envs
let binds_plus_cpr = snd $ mapAccumL cprAnalTopBind env binds
putDumpFileMaybe logger Opt_D_dump_cpr_signatures "Cpr signatures" FormatText $
- dumpIdInfoOfProgram (ppr . cprSigInfo) binds_plus_cpr
+ dumpIdInfoOfProgram False (ppr . cprSigInfo) binds_plus_cpr
-- See Note [Stamp out space leaks in demand analysis] in GHC.Core.Opt.DmdAnal
seqBinds binds_plus_cpr `seq` return binds_plus_cpr
diff --git a/compiler/GHC/Core/Opt/DmdAnal.hs b/compiler/GHC/Core/Opt/DmdAnal.hs
index e6b404ff61..cd58ff89d7 100644
--- a/compiler/GHC/Core/Opt/DmdAnal.hs
+++ b/compiler/GHC/Core/Opt/DmdAnal.hs
@@ -1466,7 +1466,7 @@ finaliseArgBoxities env fn arity rhs
go_arg :: Budgets -> (Type,StrictnessMark,Demand) -> (Budgets, Demand)
go_arg bg@(MkB bg_top bg_inner) (ty, str_mark, dmd@(n :* _))
- = case wantToUnboxArg fam_envs ty dmd of
+ = case wantToUnboxArg False fam_envs ty dmd of
DropAbsent -> (bg, dmd)
StopUnboxing -> (MkB (bg_top-1) bg_inner, trimBoxity dmd)
@@ -1487,6 +1487,7 @@ finaliseArgBoxities env fn arity rhs
= (bg_inner', dmd')
| otherwise
= (bg_inner, trimBoxity dmd)
+ Unlift -> panic "No unlifting in DmdAnal"
add_demands :: [Demand] -> CoreExpr -> CoreExpr
-- Attach the demands to the outer lambdas of this expression
@@ -1509,7 +1510,7 @@ finaliseLetBoxity env ty dmd
= go ty NotMarkedStrict dmd
where
go ty mark dmd@(n :* _) =
- case wantToUnboxArg env ty dmd of
+ case wantToUnboxArg False env ty dmd of
DropAbsent -> dmd
StopUnboxing -> trimBoxity dmd
Unbox DataConPatContext{dcpc_dc=dc, dcpc_tc_args=tc_args} dmds
@@ -1520,6 +1521,7 @@ finaliseLetBoxity env ty dmd
-> n :* (mkProd Unboxed $! dmds')
| otherwise
-> trimBoxity dmd
+ Unlift -> panic "No unlifting in DmdAnal"
{- *********************************************************************
diff --git a/compiler/GHC/Core/Opt/Pipeline.hs b/compiler/GHC/Core/Opt/Pipeline.hs
index 741552f815..276bfee45d 100644
--- a/compiler/GHC/Core/Opt/Pipeline.hs
+++ b/compiler/GHC/Core/Opt/Pipeline.hs
@@ -1079,6 +1079,6 @@ dmdAnal logger dflags fam_envs rules binds = do
}
binds_plus_dmds = dmdAnalProgram opts fam_envs rules binds
Logger.putDumpFileMaybe logger Opt_D_dump_str_signatures "Strictness signatures" FormatText $
- dumpIdInfoOfProgram (ppr . zapDmdEnvSig . dmdSigInfo) binds_plus_dmds
+ dumpIdInfoOfProgram (hasPprDebug dflags) (ppr . zapDmdEnvSig . dmdSigInfo) binds_plus_dmds
-- See Note [Stamp out space leaks in demand analysis] in GHC.Core.Opt.DmdAnal
seqBinds binds_plus_dmds `seq` return binds_plus_dmds
diff --git a/compiler/GHC/Core/Opt/Simplify/Monad.hs b/compiler/GHC/Core/Opt/Simplify/Monad.hs
index c730a3e981..83d27f4fe5 100644
--- a/compiler/GHC/Core/Opt/Simplify/Monad.hs
+++ b/compiler/GHC/Core/Opt/Simplify/Monad.hs
@@ -215,6 +215,7 @@ newId :: FastString -> Mult -> Type -> SimplM Id
newId fs w ty = do uniq <- getUniqueM
return (mkSysLocalOrCoVar fs uniq w ty)
+-- | Make a join id with given type and arity but without call-by-value annotations.
newJoinId :: [Var] -> Type -> SimplM Id
newJoinId bndrs body_ty
= do { uniq <- getUniqueM
@@ -223,7 +224,7 @@ newJoinId bndrs body_ty
arity = count isId bndrs
-- arity: See Note [Invariants on join points] invariant 2b, in GHC.Core
join_arity = length bndrs
- details = JoinId join_arity
+ details = JoinId join_arity Nothing
id_info = vanillaIdInfo `setArityInfo` arity
-- `setOccInfo` strongLoopBreaker
diff --git a/compiler/GHC/Core/Opt/SpecConstr.hs b/compiler/GHC/Core/Opt/SpecConstr.hs
index afd8afc5ea..14fe9bec00 100644
--- a/compiler/GHC/Core/Opt/SpecConstr.hs
+++ b/compiler/GHC/Core/Opt/SpecConstr.hs
@@ -656,6 +656,11 @@ containers package we have a merge function with this specialization:
We give sc_s5lZ and sc_s5m0 a evaluated unfolding since they come out of
strict field fields in the Bin constructor.
+This is especially important since tag inference can then use this
+information to adjust the calling convention of
+`$wmerge0_s4UK` to enforce arguments being passed fully evaluated+tagged.
+See Note [Tag Inference], Note [Strict Worker Ids] for more information on
+how we can take advantage of this.
-----------------------------------------------------
Stuff not yet handled
@@ -1771,11 +1776,15 @@ spec_one env fn arg_bndrs body (call_pat, rule_number)
-- Annotate the variables with the strictness information from
-- the function (see Note [Strictness information in worker binders])
- (spec_lam_args, spec_call_args) = mkWorkerArgs fn False
- spec_lam_args1
+ (spec_lam_args, spec_call_args,_) = mkWorkerArgs fn False
+ spec_lam_args1 []
spec_body_ty
-- mkWorkerArgs: usual w/w hack to avoid generating
-- a spec_rhs of unlifted type and no args.
+ -- Unlike W/W we don't turn functions into strict workers
+ -- immediately here instead letting tidy handle this.
+ -- For this reason we can ignore the cbv marks.
+ -- See Note [Strict Worker Ids]. See Note [Tag Inference].
spec_id = mkLocalId spec_name Many
(mkLamTypes spec_lam_args spec_body_ty)
diff --git a/compiler/GHC/Core/Opt/WorkWrap.hs b/compiler/GHC/Core/Opt/WorkWrap.hs
index 3e4770a997..f07e8dde37 100644
--- a/compiler/GHC/Core/Opt/WorkWrap.hs
+++ b/compiler/GHC/Core/Opt/WorkWrap.hs
@@ -763,11 +763,12 @@ splitFun ww_opts fn_id rhs
mkWWBindPair :: WwOpts -> Id -> IdInfo
-> [Var] -> CoreExpr -> Unique -> Divergence
- -> ([Demand], JoinArity, Id -> CoreExpr, Expr CoreBndr -> CoreExpr)
+ -> ([Demand],[CbvMark], JoinArity, Id -> CoreExpr, Expr CoreBndr -> CoreExpr)
-> [(Id, CoreExpr)]
mkWWBindPair ww_opts fn_id fn_info fn_args fn_body work_uniq div
- (work_demands, join_arity, wrap_fn, work_fn)
- = [(work_id, work_rhs), (wrap_id, wrap_rhs)]
+ (work_demands, cbv_marks :: [CbvMark], join_arity, wrap_fn, work_fn)
+ = -- pprTrace "mkWWBindPair" (ppr fn_id <+> ppr wrap_id <+> ppr work_id $$ ppr wrap_rhs) $
+ [(work_id, work_rhs), (wrap_id, wrap_rhs)]
-- Worker first, because wrapper mentions it
where
arity = arityInfo fn_info
@@ -819,9 +820,13 @@ mkWWBindPair ww_opts fn_id fn_info fn_args fn_body work_uniq div
-- Set the arity so that the Core Lint check that the
-- arity is consistent with the demand type goes
-- through
+
+ `setIdCbvMarks` cbv_marks
+
`asJoinId_maybe` work_join_arity
+ -- `setIdThing` (undefined cbv_marks)
- work_arity = length work_demands
+ work_arity = length work_demands :: Int
-- See Note [Demand on the worker]
single_call = saturatedByOneShots arity (demandInfo fn_info)
@@ -1014,7 +1019,8 @@ splitThunk :: WwOpts -> RecFlag -> Var -> Expr Var -> UniqSM [(Var, Expr Var)]
splitThunk ww_opts is_rec x rhs
= assert (not (isJoinId x)) $
do { let x' = localiseId x -- See comment above
- ; (useful,_, wrap_fn, fn_arg) <- mkWWstr_one ww_opts x'
+ ; (useful,_args, wrap_fn, fn_arg)
+ <- mkWWstr_one ww_opts x' NotMarkedCbv
; let res = [ (x, Let (NonRec x' rhs) (wrap_fn fn_arg)) ]
; if useful then assertPpr (isNonRec is_rec) (ppr x) -- The thunk must be non-recursive
return res
diff --git a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs
index 1b2d3ca1ba..8936ccdfe5 100644
--- a/compiler/GHC/Core/Opt/WorkWrap/Utils.hs
+++ b/compiler/GHC/Core/Opt/WorkWrap/Utils.hs
@@ -6,6 +6,7 @@ A library for the ``worker\/wrapper'' back-end to the strictness analyser
{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module GHC.Core.Opt.WorkWrap.Utils
( WwOpts(..), initWwOpts, mkWwBodies, mkWWstr, mkWWstr_one, mkWorkerArgs
@@ -14,6 +15,7 @@ module GHC.Core.Opt.WorkWrap.Utils
, findTypeShape, IsRecDataConResult(..), isRecDataCon
, mkAbsentFiller
, isWorkerSmallEnough, dubiousDataConInstArgTys
+ , isGoodWorker, badWorker , goodWorker
)
where
@@ -144,6 +146,9 @@ data WwOpts
-- Used for absent argument error message
, wo_module :: !Module
+ , wo_unlift_strict :: !Bool -- Generate workers even if the only effect is some args
+ -- get passed unlifted.
+ -- See Note [WW for calling convention]
}
initWwOpts :: Module -> DynFlags -> FamInstEnvs -> WwOpts
@@ -153,10 +158,12 @@ initWwOpts this_mod dflags fam_envs = MkWwOpts
, wo_cpr_anal = gopt Opt_CprAnal dflags
, wo_fun_to_thunk = gopt Opt_FunToThunk dflags
, wo_module = this_mod
+ , wo_unlift_strict = gopt Opt_WorkerWrapperUnlift dflags
}
type WwResult
= ([Demand], -- Demands for worker (value) args
+ [CbvMark], -- Cbv semantics for worker (value) args
JoinArity, -- Number of worker (type OR value) args
Id -> CoreExpr, -- Wrapper body, lacking only the worker Id
CoreExpr -> CoreExpr) -- Worker body, lacking the original function rhs
@@ -226,25 +233,29 @@ mkWwBodies opts fun_id arg_vars res_ty demands res_cpr
zapped_arg_vars = map zap_var arg_vars
(subst, cloned_arg_vars) = cloneBndrs empty_subst uniq_supply zapped_arg_vars
res_ty' = GHC.Core.Subst.substTy subst res_ty
+ init_cbv_marks = map (const NotMarkedCbv) cloned_arg_vars
- ; (useful1, work_args, wrap_fn_str, fn_args)
- <- mkWWstr opts cloned_arg_vars
+ ; (useful1, work_args_cbv, wrap_fn_str, fn_args)
+ <- mkWWstr opts cloned_arg_vars init_cbv_marks
+
+ ; let (work_args, work_marks) = unzip work_args_cbv
-- Do CPR w/w. See Note [Always do CPR w/w]
; (useful2, wrap_fn_cpr, work_fn_cpr, cpr_res_ty)
<- mkWWcpr_entry opts res_ty' res_cpr
- ; let (work_lam_args, work_call_args) = mkWorkerArgs fun_id (wo_fun_to_thunk opts)
- work_args cpr_res_ty
+ ; let (work_lam_args, work_call_args, work_call_cbv) = mkWorkerArgs fun_id (wo_fun_to_thunk opts)
+ work_args work_marks cpr_res_ty
+
call_work work_fn = mkVarApps (Var work_fn) work_call_args
call_rhs fn_rhs = mkAppsBeta fn_rhs fn_args
-- See Note [Join points and beta-redexes]
wrapper_body = mkLams cloned_arg_vars . wrap_fn_cpr . wrap_fn_str . call_work
worker_body = mkLams work_lam_args . work_fn_cpr . call_rhs
- worker_args_dmds = [idDemandInfo v | v <- work_call_args, isId v]
+ (worker_args_dmds, work_val_cbvs)= unzip [(idDemandInfo v,cbv) | (v,cbv) <- zipEqual "mkWwBodies" work_call_args work_call_cbv, isId v]
; if ((useful1 && not only_one_void_argument) || useful2)
- then return (Just (worker_args_dmds, length work_call_args,
+ then return (Just (worker_args_dmds, work_val_cbvs, length work_call_args,
wrapper_body, worker_body))
else return Nothing
}
@@ -359,20 +370,25 @@ add a void argument. E.g.
We use the state-token type which generates no code.
-}
+-- | Prevent a function from becoming a thunk by adding a void argument if
+-- required.
mkWorkerArgs :: Id -- The wrapper Id
- -> Bool
+ -> Bool -- Allow fun->thunk conversion.
-> [Var]
+ -> [CbvMark]
-> Type -- Type of body
-> ([Var], -- Lambda bound args
- [Var]) -- Args at call site
-mkWorkerArgs wrap_id fun_to_thunk args res_ty
+ [Var], -- Args at call site
+ [CbvMark] -- cbv semantics for the worker args.
+ )
+mkWorkerArgs wrap_id fun_to_thunk args cbv_marks res_ty
| not (isJoinId wrap_id) -- Join Ids never need an extra arg
, not (any isId args) -- No existing value lambdas
, needs_a_value_lambda -- and we need to add one
- = (args ++ [voidArgId], args ++ [voidPrimId])
+ = (args ++ [voidArgId], args ++ [voidPrimId], cbv_marks ++ [NotMarkedCbv])
| otherwise
- = (args, args)
+ = (args, args, cbv_marks)
where
-- If fun_to_thunk is False we always keep at least one value
-- argument: see Note [Protecting the last value argument]
@@ -512,17 +528,38 @@ data UnboxingDecision s
-- instantiation with 'dataConRepInstPat'.
-- The @[s]@ carries the bits of information with which we can continue
-- unboxing, e.g. @s@ will be 'Demand' or 'Cpr'.
+ | Unlift
+ -- ^ The argument can't be unboxed, but we want it to be passed evaluated to the worker.
+
+-- Do we want to create workers just for unlifting?
+wwForUnlifting :: WwOpts -> Bool
+wwForUnlifting !opts
+ -- Always unlift if possible
+ | wo_unlift_strict opts = goodWorker
+ -- Don't unlift it would cause additional W/W splits.
+ | otherwise = badWorker
+
+badWorker :: Bool
+badWorker = False
+
+goodWorker :: Bool
+goodWorker = True
+
+isGoodWorker :: Bool -> Bool
+isGoodWorker = id
+
-- | Unwraps the 'Boxity' decision encoded in the given 'SubDemand' and returns
-- a 'DataConPatContext' as well the nested demands on fields of the 'DataCon'
-- to unbox.
wantToUnboxArg
- :: FamInstEnvs
+ :: Bool -- ^ Consider unlifting
+ -> FamInstEnvs
-> Type -- ^ Type of the argument
-> Demand -- ^ How the arg was used
-> UnboxingDecision Demand
-- See Note [Which types are unboxed?]
-wantToUnboxArg fam_envs ty (n :* sd)
+wantToUnboxArg do_unlifting fam_envs ty dmd@(n :* sd)
| isAbs n
= DropAbsent
@@ -530,10 +567,17 @@ wantToUnboxArg fam_envs ty (n :* sd)
, Just dc <- tyConSingleAlgDataCon_maybe tc
, let arity = dataConRepArity dc
, Just (Unboxed, ds) <- viewProd arity sd -- See Note [Boxity analysis]
- -- NB: No strictness or evaluatedness checks here.
+ -- NB: No strictness or evaluatedness checks for unboxing here.
-- That is done by 'finaliseArgBoxities'!
= Unbox (DataConPatContext dc tc_args co) ds
+ -- See Note [Strict Worker Ids]
+ | do_unlifting
+ , isStrUsedDmd dmd
+ , not (isFunTy ty)
+ , not (isUnliftedType ty) -- Already unlifted!
+ = Unlift
+
| otherwise
= StopUnboxing
@@ -637,6 +681,65 @@ other cases where something went avoidably wrong.
This warning also triggers for the stream fusion library within `text`.
We can'easily W/W constructed results like `Stream` because we have no simple
way to express existential types in the worker's type signature.
+
+Note [WW for calling convention]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we know a function f will always evaluate a particular argument
+we might decide that it should rather get evaluated by the caller.
+We call this "unlifting" the argument.
+Sometimes the caller knows that the argument is already evaluated,
+so we won't generate any code to enter/evaluate the argument.
+This evaluation avoidance can be quite beneficial.
+Especially for recursive functions who pass the same lifted argument
+along on each iteration or walk over strict data structures.
+
+One way to achieve this is to do a W/W split, where the wrapper does
+the evaluation, and the worker can treat its arguments as unlifted.
+The wrapper is small and will be inlined at almost all call sites and
+the evaluation code in the wrapper can then cancel out with evaluation
+done by the calling context if the argument is evaluated there.
+Same idea as W/W to avoid allocation really, just for a different kind
+of work.
+
+Performing W/W might not always be a win. In particular it's easy to break
+(badly written, but common) rule frameworks by doing additional W/W splits.
+See #20364 for a more detailed explaination.
+
+Hence we have the following strategies with different trade-offs:
+A) Never do W/W *just* for unlifting of arguments.
+ + Very conservative - doesn't break any rules
+ - Lot's of performance left on the table
+B) Do W/W on just about anything where it might be
+ beneficial.
+ + Exploits pretty much every oppertunity for unlifting.
+ - A bit of compile time/code size cost for all the wrappers.
+ - Can break rules which would otherwise fire. See #20364.
+C) Unlift *any* (non-boot exported) functions arguments if they are strict.
+ That is instead of creating a Worker with the new calling convention we
+ change the calling convention of the binding itself.
+ + Exploits every opportunity for unlifting.
+ + Maybe less bad interactions with rules.
+ - Requires tracking of boot-exported definitions.
+ - Requires either:
+ ~ Eta-expansion at *all* call sites in order to generate
+ an impedance matcher function. Leading to massive code bloat.
+ Essentially we end up creating a imprompto wrapper function
+ wherever we wouldn't inline the wrapper with a W/W approach.
+ ~ There is the option of achieving this without eta-expansion if we instead expand
+ the partial application code to check for demands on the calling convention and
+ for it to evaluate the arguments. The main downsides there would be the complexity
+ of the implementation and that it carries a certain overhead even for functions who
+ don't take advantage of this functionality. I haven't tried this approach because it's
+ not trivial to implement and doing W/W splits seems to work well enough.
+
+Currently we use the first approach A) by default, with a flag that allows users to fall back to the
+more aggressive approach B).
+I also tried the third approach C) using eta-expansion at call sites to avoid modifying the PAP-handling
+code which wasn't fruitful. See https://gitlab.haskell.org/ghc/ghc/-/merge_requests/5614#note_389903.
+We could still try to do C) in the future by having PAP calls which will evaluate the required arguments
+before calling the partially applied function. But this would be neither a small nor simple change so we
+stick with A) and a flag for B) for now.
+See also Note [Tag Inference] and Note [Strict Worker Ids]
-}
{-
@@ -650,26 +753,29 @@ way to express existential types in the worker's type signature.
mkWWstr :: WwOpts
-> [Var] -- Wrapper args; have their demand info on them
-- *Includes type variables*
- -> UniqSM (Bool, -- Is this useful
- [Var], -- Worker args
+ -> [CbvMark] -- cbv info for arguments
+ -> UniqSM (Bool, -- Will this result in a useful worker
+ [(Var,CbvMark)], -- Worker args/their call-by-value semantics.
CoreExpr -> CoreExpr, -- Wrapper body, lacking the worker call
-- and without its lambdas
-- This fn adds the unboxing
[CoreExpr]) -- Reboxed args for the call to the
-- original RHS. Corresponds one-to-one
-- with the wrapper arg vars
-mkWWstr opts args
- = go args
+mkWWstr opts args cbv_info
+ = go args cbv_info
where
- go_one arg = mkWWstr_one opts arg
+ go_one arg cbv = mkWWstr_one opts arg cbv
- go [] = return (False, [], nop_fn, [])
- go (arg : args) = do { (useful1, args1, wrap_fn1, wrap_arg) <- go_one arg
- ; (useful2, args2, wrap_fn2, wrap_args) <- go args
+ go [] _ = return (badWorker, [], nop_fn, [])
+ go (arg : args) (cbv:cbvs)
+ = do { (useful1, args1, wrap_fn1, wrap_arg) <- go_one arg cbv
+ ; (useful2, args2, wrap_fn2, wrap_args) <- go args cbvs
; return ( useful1 || useful2
, args1 ++ args2
, wrap_fn1 . wrap_fn2
, wrap_arg:wrap_args ) }
+ go _ _ = panic "mkWWstr: Impossible - cbv/arg length missmatch"
----------------------
-- mkWWstr_one wrap_var = (useful, work_args, wrap_fn, wrap_arg)
@@ -678,19 +784,28 @@ mkWWstr opts args
-- * wrap_arg assumes work_args are in scope, and builds a ConApp that
-- reconstructs the RHS of wrap_var that we pass to the original RHS
-- See Note [Worker/wrapper for Strictness and Absence]
-mkWWstr_one :: WwOpts -> Var -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr)
-mkWWstr_one opts arg =
- case wantToUnboxArg fam_envs arg_ty arg_dmd of
+mkWWstr_one :: WwOpts
+ -> Var
+ -> CbvMark
+ -> UniqSM (Bool, [(Var,CbvMark)], CoreExpr -> CoreExpr, CoreExpr)
+mkWWstr_one opts arg marked_cbv =
+ case wantToUnboxArg True fam_envs arg_ty arg_dmd of
_ | isTyVar arg -> do_nothing
DropAbsent
| Just absent_filler <- mkAbsentFiller opts arg
- -- Absent case. We can't always handle absence for arbitrary
+ -- Absent case. Dropt the argument from the worker.
+ -- We can't always handle absence for arbitrary
-- unlifted types, so we need to choose just the cases we can
-- (that's what mkAbsentFiller does)
- -> return (True, [], nop_fn, absent_filler)
+ -> return (goodWorker, [], nop_fn, absent_filler)
+
+ Unbox dcpc ds -> unbox_one_arg opts arg ds dcpc marked_cbv
- Unbox dcpc ds -> unbox_one_arg opts arg ds dcpc
+ Unlift -> return ( wwForUnlifting opts
+ , [(setIdUnfolding arg evaldUnfolding, MarkedCbv)]
+ , nop_fn
+ , varToCoreExpr arg)
_ -> do_nothing -- Other cases, like StopUnboxing
@@ -698,27 +813,44 @@ mkWWstr_one opts arg =
fam_envs = wo_fam_envs opts
arg_ty = idType arg
arg_dmd = idDemandInfo arg
- do_nothing = return (False, [arg], nop_fn, varToCoreExpr arg)
+ -- Type args don't get cbv marks
+ arg_cbv = if isTyVar arg then NotMarkedCbv else marked_cbv
+ do_nothing = return (badWorker, [(arg,arg_cbv)], nop_fn, varToCoreExpr arg)
unbox_one_arg :: WwOpts
-> Var
-> [Demand]
-> DataConPatContext
- -> UniqSM (Bool, [Var], CoreExpr -> CoreExpr, CoreExpr)
+ -> CbvMark
+ -> UniqSM (Bool, [(Var,CbvMark)], CoreExpr -> CoreExpr, CoreExpr)
unbox_one_arg opts arg_var ds
DataConPatContext { dcpc_dc = dc, dcpc_tc_args = tc_args
, dcpc_co = co }
+ _marked_cbv
= do { pat_bndrs_uniqs <- getUniquesM
; let ex_name_fss = map getOccFS $ dataConExTyCoVars dc
+ -- Create new arguments we get when unboxing dc
(ex_tvs', arg_ids) =
dataConRepFSInstPat (ex_name_fss ++ repeat ww_prefix) pat_bndrs_uniqs (idMult arg_var) dc tc_args
+ -- Apply str info to new args.
arg_ids' = zipWithEqual "unbox_one_arg" setIdDemandInfo arg_ids ds
unbox_fn = mkUnpackCase (Var arg_var) co (idMult arg_var)
dc (ex_tvs' ++ arg_ids')
- ; (_, worker_args, wrap_fn, wrap_args) <- mkWWstr opts (ex_tvs' ++ arg_ids')
+ -- Mark arguments coming out of strict fields as evaluated and give them cbv semantics. See Note [Strict Worker Ids]
+ cbv_arg_marks = zipWithEqual "unbox_one_arg" bangToMark (dataConRepStrictness dc) arg_ids'
+ unf_args = zipWith setEvald arg_ids' cbv_arg_marks
+ cbv_marks = (map (const NotMarkedCbv) ex_tvs') ++ cbv_arg_marks
+ ; (_sub_args_quality, worker_args, wrap_fn, wrap_args) <- mkWWstr opts (ex_tvs' ++ unf_args) cbv_marks
; let wrap_arg = mkConApp dc (map Type tc_args ++ wrap_args) `mkCast` mkSymCo co
- ; return (True, worker_args, unbox_fn . wrap_fn, wrap_arg) }
+ ; return (goodWorker, worker_args, unbox_fn . wrap_fn, wrap_arg) }
-- Don't pass the arg, rebox instead
+ where bangToMark :: StrictnessMark -> Id -> CbvMark
+ bangToMark NotMarkedStrict _ = NotMarkedCbv
+ bangToMark MarkedStrict v
+ | isUnliftedType (idType v) = NotMarkedCbv
+ | otherwise = MarkedCbv
+ setEvald var NotMarkedCbv = var
+ setEvald var MarkedCbv = setIdUnfolding var evaldUnfolding
-- | Tries to find a suitable absent filler to bind the given absent identifier
-- to. See Note [Absent fillers].
@@ -795,7 +927,7 @@ function is worthy for splitting:
g c p = case p of (a,b) -> $gw c a b
$gw c a b = if c then a else b
-2a But do /not/ split if Boxity Analysis said "Boxed".
+2a But do /not/ unbox if Boxity Analysis said "Boxed".
In this case, 'wantToUnboxArg' returns 'StopUnboxing'.
Otherwise we risk decomposing and reboxing a massive
tuple which is barely used. Example:
@@ -809,6 +941,11 @@ function is worthy for splitting:
Imagine that it had millions of fields. This actually happened
in GHC itself where the tuple was DynFlags
+2b But if e.g. a large tuple or product type is always demanded we might
+ decide to "unlift" it. That is tighten the calling convention for that
+ argument to require it to be passed as a pointer to the value itself.
+ See Note [WW for calling convention].
+
3. In all other cases (e.g., lazy, used demand and not eval'd),
'finaliseArgBoxities' will have cleared the Boxity flag to 'Boxed'
(see Note [Finalising boxity for demand signatures] in GHC.Core.Opt.DmdAnal)
@@ -1163,14 +1300,14 @@ mkWWcpr_entry
:: WwOpts
-> Type -- function body
-> Cpr -- CPR analysis results
- -> UniqSM (Bool, -- Is w/w'ing useful?
+ -> UniqSM (Bool, -- Is w/w'ing useful?
CoreExpr -> CoreExpr, -- New wrapper. 'nop_fn' if not useful
CoreExpr -> CoreExpr, -- New worker. 'nop_fn' if not useful
Type) -- Type of worker's body.
-- Just the input body_ty if not useful
-- ^ Entrypoint to CPR W/W. See Note [Worker/wrapper for CPR] for an overview.
mkWWcpr_entry opts body_ty body_cpr
- | not (wo_cpr_anal opts) = return (False, nop_fn, nop_fn, body_ty)
+ | not (wo_cpr_anal opts) = return (badWorker, nop_fn, nop_fn, body_ty)
| otherwise = do
-- Part (1)
res_bndr <- mk_res_bndr body_ty
@@ -1188,8 +1325,8 @@ mkWWcpr_entry opts body_ty body_cpr
work_fn body = bind_res_bndr body (work_unpack_res transit_tup) -- 1 2 3
work_body_ty = exprType transit_tup
return $ if not useful
- then (False, nop_fn, nop_fn, body_ty)
- else (True, wrap_fn, work_fn, work_body_ty)
+ then (badWorker, nop_fn, nop_fn, body_ty)
+ else (goodWorker, wrap_fn, work_fn, work_body_ty)
-- | Part (1) of Note [Worker/wrapper for CPR].
mk_res_bndr :: Type -> UniqSM Id
@@ -1212,7 +1349,7 @@ mkWWcpr :: WwOpts -> [Id] -> [Cpr] -> UniqSM CprWwResultMany
mkWWcpr _opts vars [] =
-- special case: No CPRs means all top (for example from FlatConCpr),
-- hence stop WW.
- return (False, toOL vars, map varToCoreExpr vars, nop_fn)
+ return (badWorker, toOL vars, map varToCoreExpr vars, nop_fn)
mkWWcpr opts vars cprs = do
-- No existentials in 'vars'. 'wantToUnboxResult' should have checked that.
massertPpr (not (any isTyVar vars)) (ppr vars $$ ppr cprs)
@@ -1231,7 +1368,7 @@ mkWWcpr_one opts res_bndr cpr
, Unbox dcpc arg_cprs <- wantToUnboxResult (wo_fam_envs opts) (idType res_bndr) cpr
= unbox_one_result opts res_bndr arg_cprs dcpc
| otherwise
- = return (False, unitOL res_bndr, varToCoreExpr res_bndr, nop_fn)
+ = return (badWorker, unitOL res_bndr, varToCoreExpr res_bndr, nop_fn)
unbox_one_result
:: WwOpts -> Id -> [Cpr] -> DataConPatContext -> UniqSM CprWwResultOne
@@ -1260,8 +1397,8 @@ unbox_one_result opts res_bndr arg_cprs
-- Don't try to WW an unboxed tuple return type when there's nothing inside
-- to unbox further.
return $ if isUnboxedTupleDataCon dc && not nested_useful
- then ( False, unitOL res_bndr, Var res_bndr, nop_fn )
- else ( True
+ then ( badWorker, unitOL res_bndr, Var res_bndr, nop_fn )
+ else ( goodWorker
, transit_vars
, rebuilt_result
, this_work_unbox_res . work_unbox_res
diff --git a/compiler/GHC/Core/Rules.hs b/compiler/GHC/Core/Rules.hs
index 2eabc1db0c..14bcd4564f 100644
--- a/compiler/GHC/Core/Rules.hs
+++ b/compiler/GHC/Core/Rules.hs
@@ -387,8 +387,10 @@ pprRuleBase rules = pprUFM rules $ \rss ->
-- successful.
lookupRule :: RuleOpts -> InScopeEnv
-> (Activation -> Bool) -- When rule is active
- -> Id -> [CoreExpr]
- -> [CoreRule] -> Maybe (CoreRule, CoreExpr)
+ -> Id -- Function head
+ -> [CoreExpr] -- Args
+ -> [CoreRule] -- Rules
+ -> Maybe (CoreRule, CoreExpr)
-- See Note [Extra args in the target]
-- See comments on matchRule
diff --git a/compiler/GHC/Core/Tidy.hs b/compiler/GHC/Core/Tidy.hs
index aaf42eafd2..63473ca68a 100644
--- a/compiler/GHC/Core/Tidy.hs
+++ b/compiler/GHC/Core/Tidy.hs
@@ -10,26 +10,30 @@ The code for *top-level* bindings is in GHC.Iface.Tidy.
{-# OPTIONS_GHC -Wno-incomplete-record-updates #-}
module GHC.Core.Tidy (
- tidyExpr, tidyRules, tidyUnfolding
+ tidyExpr, tidyRules, tidyUnfolding, tidyCbvInfoTop
) where
import GHC.Prelude
import GHC.Core
+import GHC.Core.Type
+
import GHC.Core.Seq ( seqUnfolding )
+import GHC.Core.Utils ( computeCbvInfo )
import GHC.Types.Id
import GHC.Types.Id.Info
import GHC.Types.Demand ( zapDmdEnvSig )
-import GHC.Core.Type ( tidyType, tidyVarBndr )
import GHC.Core.Coercion ( tidyCo )
import GHC.Types.Var
import GHC.Types.Var.Env
import GHC.Types.Unique (getUnique)
import GHC.Types.Unique.FM
import GHC.Types.Name hiding (tidyNameOcc)
+import GHC.Types.Name.Set
import GHC.Types.SrcLoc
import GHC.Types.Tickish
import GHC.Data.Maybe
+import GHC.Utils.Misc
import Data.List (mapAccumL)
{-
@@ -45,18 +49,67 @@ tidyBind :: TidyEnv
-> (TidyEnv, CoreBind)
tidyBind env (NonRec bndr rhs)
- = tidyLetBndr env env bndr =: \ (env', bndr') ->
- (env', NonRec bndr' (tidyExpr env' rhs))
+ = -- pprTrace "tidyBindNonRec" (ppr bndr) $
+ let cbv_bndr = (tidyCbvInfoLocal bndr rhs)
+ (env', bndr') = tidyLetBndr env env cbv_bndr
+ tidy_rhs = (tidyExpr env' rhs)
+ in (env', NonRec bndr' tidy_rhs)
tidyBind env (Rec prs)
- = let
- (bndrs, rhss) = unzip prs
- (env', bndrs') = mapAccumL (tidyLetBndr env') env bndrs
+ = -- pprTrace "tidyBindRec" (ppr $ map fst prs) $
+ let
+ cbv_bndrs = map ((\(bnd,rhs) -> tidyCbvInfoLocal bnd rhs)) prs
+ (_bndrs, rhss) = unzip prs
+ (env', bndrs') = mapAccumL (tidyLetBndr env') env cbv_bndrs
in
map (tidyExpr env') rhss =: \ rhss' ->
(env', Rec (zip bndrs' rhss'))
+-- Note [Attaching CBV Marks to ids]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- Before tidy, function arguments which have a call-by-value semantics are identified
+-- by having an `OtherCon[]` unfolding. During tidy, we transform this information into CBV (call-by-value)
+-- marks. The marks themselves then are put onto the function id itself.
+-- This means the code generator can get the full calling convention by only looking at the function
+-- itself without having to inspect the RHS for potential argument unfoldings.
+--
+-- The actual logic is in tidyCbvInfo and takes:
+-- * The function id
+-- * The functions rhs
+-- And gives us back the function annotated with the marks.
+-- We call it in:
+-- * tidyTopPair for top level bindings
+-- * tidyBind for local bindings.
+--
+-- Not that we *have* to look at the untidied rhs.
+-- During tidying some knot-tying occurs which can blow up
+-- if we look at the types of the arguments, but here we dont:
+-- we only check if the manifest lambdas have OtherCon unfoldings
+-- and these remain valid post tidy.
+--
+-- If the id is boot-exported we don't use a cbv calling convention via marks,
+-- as the boot file won't contain them. Which means code calling boot-exported
+-- ids might expect these ids to have a vanilla calling convention even if we
+-- determine a different one here.
+-- To be able to avoid this we pass a set of boot exported ids for this module around.
+-- For non top level ids we can skip this. Local ids are never boot-exported
+-- as boot files don't have unfoldings. So there this isn't a concern.
+-- See also Note [Strict Worker Ids]
+
+
+-- See Note [Attaching CBV Marks to ids]
+tidyCbvInfoTop :: HasDebugCallStack => NameSet -> Id -> CoreExpr -> Id
+tidyCbvInfoTop boot_exports id rhs
+ -- Can't change calling convention for boot exported things
+ | elemNameSet (idName id) boot_exports = id
+ | otherwise = computeCbvInfo id rhs
+
+-- See Note [Attaching CBV Marks to ids]
+tidyCbvInfoLocal :: HasDebugCallStack => Id -> CoreExpr -> Id
+tidyCbvInfoLocal id rhs
+ | otherwise = computeCbvInfo id rhs
+
------------ Expressions --------------
tidyExpr :: TidyEnv -> CoreExpr -> CoreExpr
tidyExpr env (Var v) = Var (tidyVarOcc env v)
diff --git a/compiler/GHC/Core/Type.hs b/compiler/GHC/Core/Type.hs
index bd3b0122d5..1cacfca468 100644
--- a/compiler/GHC/Core/Type.hs
+++ b/compiler/GHC/Core/Type.hs
@@ -126,6 +126,7 @@ module GHC.Core.Type (
isBoxedRuntimeRep,
isLiftedLevity, isUnliftedLevity,
isUnliftedType, isBoxedType, mightBeUnliftedType, isUnboxedTupleType, isUnboxedSumType,
+ isStateType,
isAlgType, isDataFamilyAppType,
isPrimitiveType, isStrictType,
isLevityTy, isLevityVar,
@@ -321,6 +322,8 @@ import Control.Monad ( guard )
--
-- [Primitive] Iff it is a built-in type that can't be expressed in Haskell.
--
+-- [Unlifted] Anything that isn't lifted is considered unlifted.
+--
-- Currently, all primitive types are unlifted, but that's not necessarily
-- the case: for example, @Int@ could be primitive.
--
@@ -2424,6 +2427,7 @@ buildSynTyCon name binders res_kind roles rhs
isLiftedType_maybe :: HasDebugCallStack => Type -> Maybe Bool
isLiftedType_maybe ty = case coreFullView (getRuntimeRep ty) of
ty' | isLiftedRuntimeRep ty' -> Just True
+ | isUnliftedRuntimeRep ty' -> Just False
TyConApp {} -> Just False -- Everything else is unlifted
_ -> Nothing -- representation-polymorphic
@@ -2441,6 +2445,13 @@ isUnliftedType ty
= not (isLiftedType_maybe ty `orElse`
pprPanic "isUnliftedType" (ppr ty <+> dcolon <+> ppr (typeKind ty)))
+-- | State token type.
+isStateType :: Type -> Bool
+isStateType ty
+ = case tyConAppTyCon_maybe ty of
+ Just tycon -> tycon == statePrimTyCon
+ _ -> False
+
-- | Returns:
--
-- * 'False' if the type is /guaranteed/ lifted or
diff --git a/compiler/GHC/Core/Utils.hs b/compiler/GHC/Core/Utils.hs
index 411628c261..3116b6bd04 100644
--- a/compiler/GHC/Core/Utils.hs
+++ b/compiler/GHC/Core/Utils.hs
@@ -56,6 +56,9 @@ module GHC.Core.Utils (
-- * Join points
isJoinBind,
+ -- * Tag inference
+ computeCbvInfo,
+
-- * unsafeEqualityProof
isUnsafeEqualityProof,
@@ -93,7 +96,7 @@ import GHC.Types.Tickish
import GHC.Types.Id
import GHC.Types.Id.Info
import GHC.Types.Unique
-import GHC.Types.Basic ( Arity )
+import GHC.Types.Basic ( Arity, CbvMark(..), isMarkedCbv )
import GHC.Types.Unique.Set
import GHC.Data.FastString
@@ -2443,7 +2446,11 @@ tryEtaReduce bndrs body
-- We always want args for join points so
-- we should never eta-reduce to a trivial expression.
-- See Note [Invariants on join points] in GHC.Core, and #20599
- not (isJoinId fun)
+ not (isJoinId fun) &&
+ -- And the function doesn't require visible arguments as part of
+ -- it's calling convention. See Note [Strict Worker Ids]
+ idCbvMarkArity fun == 0
+
---------------
fun_arity fun -- See Note [Arity care]
@@ -2591,15 +2598,89 @@ isJoinBind (NonRec b _) = isJoinId b
isJoinBind (Rec ((b, _) : _)) = isJoinId b
isJoinBind _ = False
-dumpIdInfoOfProgram :: (IdInfo -> SDoc) -> CoreProgram -> SDoc
-dumpIdInfoOfProgram ppr_id_info binds = vcat (map printId ids)
+dumpIdInfoOfProgram :: Bool -> (IdInfo -> SDoc) -> CoreProgram -> SDoc
+dumpIdInfoOfProgram dump_locals ppr_id_info binds = vcat (map printId ids)
where
ids = sortBy (stableNameCmp `on` getName) (concatMap getIds binds)
getIds (NonRec i _) = [ i ]
getIds (Rec bs) = map fst bs
- printId id | isExportedId id = ppr id <> colon <+> (ppr_id_info (idInfo id))
+ -- By default only include full info for exported ids, unless we run in the verbose
+ -- pprDebug mode.
+ printId id | isExportedId id || dump_locals = ppr id <> colon <+> (ppr_id_info (idInfo id))
| otherwise = empty
+{-
+************************************************************************
+* *
+\subsection{Tag inference things}
+* *
+************************************************************************
+-}
+
+-- | For a binding we:
+-- * Look at the args
+-- * Mark any with Unf=OtherCon[] as call-by-value, unless it's an unlifted type already.
+-- * Potentially combine it with existing call-by-value marks (from ww)
+-- * Update the id
+-- See Note [Attaching CBV Marks to ids].
+computeCbvInfo :: HasCallStack
+ => Id -- The function
+ -> CoreExpr -- It's RHS
+ -> Id
+computeCbvInfo id rhs =
+ -- pprTrace "computeCbv" (hang (ppr id) 2 (ppr dmd $$ ppr dmds)) $
+ -- TODO: For perf reasons we could skip looking at non VanillaId/StrictWorkerId/JoinId bindings
+ cbv_bndr
+ where
+ (_,val_args,_body) = collectTyAndValBinders rhs
+ new_marks = mkCbvMarks val_args
+ cbv_marks = assertPpr (checkMarks id new_marks)
+ (ppr id <+> ppr (idType id) $$ text "old:" <> ppr (idCbvMarks_maybe id) $$ text "new:" <> ppr new_marks $$ text "rhs:" <> ppr rhs)
+ new_marks
+ cbv_bndr
+ | valid_unlifted_worker val_args
+ -- Avoid retaining the original rhs
+ = cbv_marks `seqList` setIdCbvMarks id cbv_marks
+ | otherwise =
+ -- pprTraceDebug "tidyCbvInfo: Worker seems to take unboxed tuple/sum types!" (ppr id <+> ppr rhs)
+ id
+ -- We don't set CBV marks on workers which take unboxed tuples or sums as arguments.
+ -- Doing so would require us to compute the result of unarise here in order to properly determine
+ -- argument positions at runtime.
+ -- In practice this doesn't matter much. Most "interesting" functions will get a W/W split which will eliminate
+ -- unboxed tuple arguments, and unboxed sums are rarely used.
+ valid_unlifted_worker args =
+ -- pprTrace "valid_unlifted" (ppr id $$ ppr args) $
+ not $ (any (\arg -> isMultiValArg arg) args)
+ isMultiValArg id =
+ let ty = idType id
+ in not (isStateType ty) && (isUnboxedTupleType ty || isUnboxedSumType ty)
+ -- Only keep relevant marks. We *don't* have to cover all arguments. Only these
+ -- that we might want to pass call-by-value.
+ trimMarks :: [CbvMark] -> [Id] -> [CbvMark]
+ trimMarks marks val_args =
+ map fst .
+ -- Starting at the end, drop all non-cbv marks, and marks applied to unlifted types
+ dropWhileEndLE (\(m,v) -> not (isMarkedCbv m) || isUnliftedType (idType v)) $
+ zip marks val_args
+
+ mkCbvMarks :: ([Id]) -> [CbvMark]
+ mkCbvMarks = map mkMark
+ where
+ cbv_arg arg = isEvaldUnfolding (idUnfolding arg)
+ mkMark arg = if cbv_arg arg && (not $ isUnliftedType (idType arg))
+ then MarkedCbv
+ else NotMarkedCbv
+ -- If we determined earlier one an argument should be passed cbv it should
+ -- still be so here.
+ checkMarks id new_marks
+ | Just old_marks <- idCbvMarks_maybe id
+ = length (trimMarks old_marks val_args) <= length new_marks &&
+ and (zipWith checkNewMark old_marks new_marks)
+ | otherwise = True
+ checkNewMark old new =
+ isMarkedCbv new || (not $ isMarkedCbv old)
+
{- *********************************************************************
* *
diff --git a/compiler/GHC/CoreToIface.hs b/compiler/GHC/CoreToIface.hs
index 426dcae9cf..11fd63e0bc 100644
--- a/compiler/GHC/CoreToIface.hs
+++ b/compiler/GHC/CoreToIface.hs
@@ -441,6 +441,7 @@ toIfaceLetBndr id = IfLetBndr (occNameFS (getOccName id))
toIfaceIdDetails :: IdDetails -> IfaceIdDetails
toIfaceIdDetails VanillaId = IfVanillaId
+toIfaceIdDetails (StrictWorkerId dmds) = IfStrictWorkerId dmds
toIfaceIdDetails (DFunId {}) = IfDFunId
toIfaceIdDetails (RecSelId { sel_naughty = n
, sel_tycon = tc }) =
diff --git a/compiler/GHC/CoreToStg.hs b/compiler/GHC/CoreToStg.hs
index 431d831125..6e0e4600dd 100644
--- a/compiler/GHC/CoreToStg.hs
+++ b/compiler/GHC/CoreToStg.hs
@@ -30,6 +30,7 @@ import GHC.Core.DataCon
import GHC.Stg.Syntax
import GHC.Stg.Debug
+import GHC.Stg.Utils
import GHC.Types.RepType
import GHC.Types.Id.Make ( coercionTokenId )
diff --git a/compiler/GHC/CoreToStg/Prep.hs b/compiler/GHC/CoreToStg/Prep.hs
index 99c4cd4e8c..ac626931cd 100644
--- a/compiler/GHC/CoreToStg/Prep.hs
+++ b/compiler/GHC/CoreToStg/Prep.hs
@@ -1024,7 +1024,7 @@ cpeApp top_env expr
go terminal as = (terminal, as)
cpe_app :: CorePrepEnv
- -> CoreExpr
+ -> CoreExpr -- The thing we are calling
-> [ArgInfo]
-> UniqSM (Floats, CpeRhs)
cpe_app env (Var f) (CpeApp Type{} : CpeApp arg : args)
@@ -1170,7 +1170,7 @@ cpeApp top_env expr
rebuild_app
:: CorePrepEnv
-> [ArgInfo] -- The arguments (inner to outer)
- -> CpeApp
+ -> CpeApp -- The function
-> Floats
-> [Demand]
-> Maybe Arity
@@ -1519,7 +1519,8 @@ Note [Eta expansion of hasNoBinding things in CorePrep]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
maybeSaturate deals with eta expanding to saturate things that can't deal with
unsaturated applications (identified by 'hasNoBinding', currently just
-foreign calls and unboxed tuple/sum constructors).
+foreign calls, unboxed tuple/sum constructors and strict workers).
+See Note [Strict Worker Ids]
Historical Note: Note that eta expansion in CorePrep used to be very fragile
due to the "prediction" of CAFfyness that we used to make during tidying.
@@ -1533,14 +1534,27 @@ maybeSaturate fn expr n_args unsat_ticks
| hasNoBinding fn -- There's no binding
= return $ wrapLamBody (\body -> foldr mkTick body unsat_ticks) sat_expr
+ | mark_arity > 0 -- A strict worker. See Note [Strict Worker Ids]
+ , not applied_marks
+ = assertPpr
+ ( not (isJoinId fn)) -- See Note [Do not eta-expand join points]
+ ( ppr fn $$ text "expr:" <+> ppr expr $$ text "n_args:" <+> ppr n_args $$
+ text "marks:" <+> ppr (idCbvMarks_maybe fn) $$
+ text "join_arity" <+> ppr (idJoinArity fn)
+ ) $
+ return sat_expr
+
| otherwise
= assert (null unsat_ticks) $
return expr
where
- fn_arity = idArity fn
- excess_arity = fn_arity - n_args
- sat_expr = cpeEtaExpand excess_arity expr
-
+ mark_arity = idCbvMarkArity fn
+ fn_arity = idArity fn
+ excess_arity = (max fn_arity mark_arity) - n_args
+ sat_expr = cpeEtaExpand excess_arity expr
+ applied_marks = n_args >= (length . dropWhile (not . isMarkedCbv) . reverse . expectJust "maybeSaturate" $ (idCbvMarks_maybe fn))
+ -- For join points we never eta-expand (See Note [Do not eta-expand join points])
+ -- so we assert all arguments that need to be passed cbv are visible so that the backend can evalaute them if required..
{-
************************************************************************
* *
@@ -1620,13 +1634,19 @@ tryEtaReducePrep bndrs expr@(App _ _)
, not (any (`elemVarSet` fvs_remaining) bndrs)
, exprIsHNF remaining_expr -- Don't turn value into a non-value
-- else the behaviour with 'seq' changes
- = Just remaining_expr
+ =
+ -- pprTrace "prep-reduce" (
+ -- text "reduced:" <> ppr remaining_expr $$
+ -- ppr (remaining_args)
+ -- ) $
+ Just remaining_expr
where
(f, args) = collectArgs expr
remaining_expr = mkApps f remaining_args
fvs_remaining = exprFreeVars remaining_expr
(remaining_args, last_args) = splitAt n_remaining args
n_remaining = length args - length bndrs
+ n_remaining_vals = length $ filter isRuntimeArg remaining_args
ok bndr (Var arg) = bndr == arg
ok _ _ = False
@@ -1634,9 +1654,11 @@ tryEtaReducePrep bndrs expr@(App _ _)
-- We can't eta reduce something which must be saturated.
ok_to_eta_reduce (Var f) = not (hasNoBinding f) &&
not (isLinearType (idType f)) && -- Unsure why this is unsafe.
- (not (isJoinId f) || idJoinArity f <= n_remaining)
+ (not (isJoinId f) || idJoinArity f <= n_remaining) &&
-- Don't undersaturate join points.
-- See Note [Invariants on join points] in GHC.Core, and #20599
+ (idCbvMarkArity f <= n_remaining_vals)
+ -- Similar for StrictWorkerIds. See Note [Strict Worker Ids]
ok_to_eta_reduce _ = False -- Safe. ToDo: generalise
diff --git a/compiler/GHC/Driver/Config/StgToCmm.hs b/compiler/GHC/Driver/Config/StgToCmm.hs
index ae59e41fdf..ab64076a79 100644
--- a/compiler/GHC/Driver/Config/StgToCmm.hs
+++ b/compiler/GHC/Driver/Config/StgToCmm.hs
@@ -43,6 +43,7 @@ initStgToCmmConfig dflags mod = StgToCmmConfig
, stgToCmmPIE = gopt Opt_PIE dflags
, stgToCmmExtDynRefs = gopt Opt_ExternalDynamicRefs dflags
, stgToCmmDoBoundsCheck = gopt Opt_DoBoundsChecking dflags
+ , stgToCmmDoTagCheck = gopt Opt_DoTagInferenceChecks dflags
-- backend flags
, stgToCmmAllowBigArith = not ncg
, stgToCmmAllowQuotRemInstr = ncg && (x86ish || ppc)
diff --git a/compiler/GHC/Driver/Flags.hs b/compiler/GHC/Driver/Flags.hs
index e95b181743..759d8b48c0 100644
--- a/compiler/GHC/Driver/Flags.hs
+++ b/compiler/GHC/Driver/Flags.hs
@@ -92,7 +92,9 @@ data DumpFlag
| Opt_D_dump_prep
| Opt_D_dump_stg_from_core -- ^ Initial STG (CoreToStg output)
| Opt_D_dump_stg_unarised -- ^ STG after unarise
- | Opt_D_dump_stg_final -- ^ Final STG (after stg2stg)
+ | Opt_D_dump_stg_cg -- ^ STG (after stg2stg)
+ | Opt_D_dump_stg_tags -- ^ Result of tag inference analysis.
+ | Opt_D_dump_stg_final -- ^ Final STG (before cmm gen)
| Opt_D_dump_call_arity
| Opt_D_dump_exitify
| Opt_D_dump_stranal
@@ -224,6 +226,7 @@ data GeneralFlag
| Opt_WeightlessBlocklayout -- ^ Layout based on last instruction per block.
| Opt_CprAnal
| Opt_WorkerWrapper
+ | Opt_WorkerWrapperUnlift -- ^ Do W/W split for unlifting even if we won't unbox anything.
| Opt_SolveConstantDicts
| Opt_AlignmentSanitisation
| Opt_CatchNonexhaustiveCases
@@ -231,6 +234,9 @@ data GeneralFlag
| Opt_CoreConstantFolding
| Opt_FastPAPCalls -- #6084
+ -- Inference flags
+ | Opt_DoTagInferenceChecks
+
-- PreInlining is on by default. The option is there just to see how
-- bad things get if you turn it off!
| Opt_SimplPreInlining
@@ -443,6 +449,7 @@ optimisationFlags = EnumSet.fromList
, Opt_WeightlessBlocklayout
, Opt_CprAnal
, Opt_WorkerWrapper
+ , Opt_WorkerWrapperUnlift
, Opt_SolveConstantDicts
, Opt_CatchNonexhaustiveCases
, Opt_IgnoreAsserts
diff --git a/compiler/GHC/Driver/GenerateCgIPEStub.hs b/compiler/GHC/Driver/GenerateCgIPEStub.hs
index e1a751e762..96bf352e51 100644
--- a/compiler/GHC/Driver/GenerateCgIPEStub.hs
+++ b/compiler/GHC/Driver/GenerateCgIPEStub.hs
@@ -27,8 +27,10 @@ import GHC.Settings (Platform, platformUnregisterised)
import GHC.StgToCmm.Monad (getCmm, initC, runC, initFCodeState)
import GHC.StgToCmm.Prof (initInfoTableProv)
import GHC.StgToCmm.Types (CgInfos (..), ModuleLFInfos)
+import GHC.Stg.InferTags.TagSig (TagSig)
import GHC.Types.IPE (InfoTableProvMap (provInfoTables), IpeSourceLocation)
import GHC.Types.Name.Set (NonCaffySet)
+import GHC.Types.Name.Env (NameEnv)
import GHC.Types.Tickish (GenTickish (SourceNote))
import GHC.Unit.Types (Module)
import GHC.Utils.Misc
@@ -178,8 +180,8 @@ The find the tick:
remembered in a `Maybe`.
-}
-generateCgIPEStub :: HscEnv -> Module -> InfoTableProvMap -> Stream IO CmmGroupSRTs (NonCaffySet, ModuleLFInfos) -> Stream IO CmmGroupSRTs CgInfos
-generateCgIPEStub hsc_env this_mod denv s = do
+generateCgIPEStub :: HscEnv -> Module -> InfoTableProvMap -> NameEnv TagSig -> Stream IO CmmGroupSRTs (NonCaffySet, ModuleLFInfos) -> Stream IO CmmGroupSRTs CgInfos
+generateCgIPEStub hsc_env this_mod denv tag_sigs s = do
let dflags = hsc_dflags hsc_env
platform = targetPlatform dflags
fstate = initFCodeState platform
@@ -196,7 +198,7 @@ generateCgIPEStub hsc_env this_mod denv s = do
(_, ipeCmmGroupSRTs) <- liftIO $ cmmPipeline hsc_env (emptySRT this_mod) ipeCmmGroup
Stream.yield ipeCmmGroupSRTs
- return CgInfos {cgNonCafs = nonCaffySet, cgLFInfos = moduleLFInfos, cgIPEStub = ipeStub}
+ return CgInfos {cgNonCafs = nonCaffySet, cgLFInfos = moduleLFInfos, cgIPEStub = ipeStub, cgTagSigs = tag_sigs}
where
collect :: Platform -> [(Label, CmmInfoTable, Maybe IpeSourceLocation)] -> CmmGroupSRTs -> IO ([(Label, CmmInfoTable, Maybe IpeSourceLocation)], CmmGroupSRTs)
collect platform acc cmmGroupSRTs = do
diff --git a/compiler/GHC/Driver/Main.hs b/compiler/GHC/Driver/Main.hs
index 4c8e494321..b07d566a5b 100644
--- a/compiler/GHC/Driver/Main.hs
+++ b/compiler/GHC/Driver/Main.hs
@@ -246,6 +246,8 @@ import GHC.Driver.GenerateCgIPEStub (generateCgIPEStub)
import Data.List.NonEmpty (NonEmpty ((:|)))
+import GHC.Stg.InferTags
+
{- **********************************************************************
%* *
Initialisation
@@ -1801,7 +1803,15 @@ doCodeGen hsc_env this_mod denv data_tycons
tmpfs = hsc_tmpfs hsc_env
platform = targetPlatform dflags
- putDumpFileMaybe logger Opt_D_dump_stg_final "Final STG:" FormatSTG (pprGenStgTopBindings (initStgPprOpts dflags) stg_binds_w_fvs)
+ putDumpFileMaybe logger Opt_D_dump_stg_cg "CodeGenInput STG:" FormatSTG
+ (pprGenStgTopBindings (initStgPprOpts dflags) stg_binds_w_fvs)
+
+ -- Do tag inference on optimized STG
+ (!stg_post_infer,export_tag_info) <-
+ {-# SCC "StgTagFields" #-} inferTags dflags logger this_mod stg_binds_w_fvs
+
+ putDumpFileMaybe logger Opt_D_dump_stg_final "Final STG:" FormatSTG
+ (pprGenStgTopBindings (initStgPprOpts dflags) stg_post_infer)
let stg_to_cmm dflags mod = case stgToCmmHook hooks of
Nothing -> StgToCmm.codeGen logger tmpfs (initStgToCmmConfig dflags mod)
@@ -1809,8 +1819,8 @@ doCodeGen hsc_env this_mod denv data_tycons
let cmm_stream :: Stream IO CmmGroup ModuleLFInfos
-- See Note [Forcing of stg_binds]
- cmm_stream = stg_binds_w_fvs `seqList` {-# SCC "StgToCmm" #-}
- stg_to_cmm dflags this_mod denv data_tycons cost_centre_info stg_binds_w_fvs hpc_info
+ cmm_stream = stg_post_infer `seqList` {-# SCC "StgToCmm" #-}
+ stg_to_cmm dflags this_mod denv data_tycons cost_centre_info stg_post_infer hpc_info
-- codegen consumes a stream of CmmGroup, and produces a new
-- stream of CmmGroup (not necessarily synchronised: one
@@ -1839,7 +1849,7 @@ doCodeGen hsc_env this_mod denv data_tycons
putDumpFileMaybe logger Opt_D_dump_cmm "Output Cmm" FormatCMM (pdoc platform a)
return a
- return $ Stream.mapM dump2 $ generateCgIPEStub hsc_env this_mod denv pipeline_stream
+ return $ Stream.mapM dump2 $ generateCgIPEStub hsc_env this_mod denv export_tag_info pipeline_stream
myCoreToStgExpr :: Logger -> DynFlags -> InteractiveContext
-> Bool
diff --git a/compiler/GHC/Driver/Session.hs b/compiler/GHC/Driver/Session.hs
index 1f5626cec8..91bf2adf39 100644
--- a/compiler/GHC/Driver/Session.hs
+++ b/compiler/GHC/Driver/Session.hs
@@ -1611,7 +1611,7 @@ setSafeHaskell s = updM f
safeM <- combineSafeFlags sf s
case s of
Sf_Safe -> return $ dfs { safeHaskell = safeM, safeInfer = False }
- -- leave safe inferrence on in Trustworthy mode so we can warn
+ -- leave safe inference on in Trustworthy mode so we can warn
-- if it could have been inferred safe.
Sf_Trustworthy -> do
l <- getCurLoc
@@ -2485,9 +2485,13 @@ dynamic_flags_deps = [
(setDumpFlag Opt_D_dump_stg_unarised)
, make_ord_flag defGhcFlag "ddump-stg-final"
(setDumpFlag Opt_D_dump_stg_final)
+ , make_ord_flag defGhcFlag "ddump-stg-cg"
+ (setDumpFlag Opt_D_dump_stg_cg)
, make_dep_flag defGhcFlag "ddump-stg"
(setDumpFlag Opt_D_dump_stg_from_core)
"Use `-ddump-stg-from-core` or `-ddump-stg-final` instead"
+ , make_ord_flag defGhcFlag "ddump-stg-tags"
+ (setDumpFlag Opt_D_dump_stg_tags)
, make_ord_flag defGhcFlag "ddump-call-arity"
(setDumpFlag Opt_D_dump_call_arity)
, make_ord_flag defGhcFlag "ddump-exitify"
@@ -2580,6 +2584,8 @@ dynamic_flags_deps = [
(NoArg (setGeneralFlag Opt_DoAsmLinting))
, make_ord_flag defGhcFlag "dannot-lint"
(NoArg (setGeneralFlag Opt_DoAnnotationLinting))
+ , make_ord_flag defGhcFlag "dtag-inference-checks"
+ (NoArg (setGeneralFlag Opt_DoTagInferenceChecks))
, make_ord_flag defGhcFlag "dshow-passes"
(NoArg $ forceRecompile >> (setVerbosity $ Just 2))
, make_ord_flag defGhcFlag "dfaststring-stats"
@@ -3434,6 +3440,7 @@ fFlagsDeps = [
flagSpec "unbox-strict-fields" Opt_UnboxStrictFields,
flagSpec "version-macros" Opt_VersionMacros,
flagSpec "worker-wrapper" Opt_WorkerWrapper,
+ flagSpec "worker-wrapper-cbv" Opt_WorkerWrapperUnlift,
flagSpec "solve-constant-dicts" Opt_SolveConstantDicts,
flagSpec "catch-nonexhaustive-cases" Opt_CatchNonexhaustiveCases,
flagSpec "alignment-sanitisation" Opt_AlignmentSanitisation,
diff --git a/compiler/GHC/HsToCore.hs b/compiler/GHC/HsToCore.hs
index c32f03eb97..0a210e0871 100644
--- a/compiler/GHC/HsToCore.hs
+++ b/compiler/GHC/HsToCore.hs
@@ -133,7 +133,8 @@ deSugar hsc_env
tcg_insts = insts,
tcg_fam_insts = fam_insts,
tcg_hpc = other_hpc_info,
- tcg_complete_matches = complete_matches
+ tcg_complete_matches = complete_matches,
+ tcg_self_boot = self_boot
})
= do { let dflags = hsc_dflags hsc_env
@@ -242,6 +243,7 @@ deSugar hsc_env
mg_fam_insts = fam_insts,
mg_inst_env = inst_env,
mg_fam_inst_env = fam_inst_env,
+ mg_boot_exports = bootExports self_boot,
mg_patsyns = patsyns,
mg_rules = ds_rules_for_imps,
mg_binds = ds_binds,
diff --git a/compiler/GHC/Iface/Make.hs b/compiler/GHC/Iface/Make.hs
index 28ddcac422..c0471cd413 100644
--- a/compiler/GHC/Iface/Make.hs
+++ b/compiler/GHC/Iface/Make.hs
@@ -159,16 +159,19 @@ mkFullIface hsc_env partial_iface mb_cg_infos = do
updateDecl :: [IfaceDecl] -> Maybe CgInfos -> [IfaceDecl]
updateDecl decls Nothing = decls
-updateDecl decls (Just CgInfos{ cgNonCafs = NonCaffySet non_cafs, cgLFInfos = lf_infos }) = map update_decl decls
+updateDecl decls (Just CgInfos{ cgNonCafs = NonCaffySet non_cafs, cgLFInfos = lf_infos, cgTagSigs = tag_sigs })
+ = map update_decl decls
where
update_decl (IfaceId nm ty details infos)
| let not_caffy = elemNameSet nm non_cafs
, let mb_lf_info = lookupNameEnv lf_infos nm
- , warnPprTrace (isNothing mb_lf_info) "Name without LFInfo" (ppr nm) True
+ , let sig = lookupNameEnv tag_sigs nm
+ , warnPprTrace (isNothing mb_lf_info) "updateDecl" (text "Name without LFInfo:" <+> ppr nm) True
-- Only allocate a new IfaceId if we're going to update the infos
- , isJust mb_lf_info || not_caffy
+ , isJust mb_lf_info || not_caffy || isJust sig
= IfaceId nm ty details $
- (if not_caffy then (HsNoCafRefs :) else id)
+ (if not_caffy then (HsNoCafRefs :) else id) $
+ (if isJust sig then (HsTagSig (fromJust sig):) else id) $
(case mb_lf_info of
Nothing -> infos -- LFInfos not available when building .cmm files
Just lf_info -> HsLFInfo (toIfaceLFInfo nm lf_info) : infos)
diff --git a/compiler/GHC/Iface/Syntax.hs b/compiler/GHC/Iface/Syntax.hs
index 39f0bd5336..c735a2f94f 100644
--- a/compiler/GHC/Iface/Syntax.hs
+++ b/compiler/GHC/Iface/Syntax.hs
@@ -67,6 +67,7 @@ import GHC.Types.Var( VarBndr(..), binderVar, tyVarSpecToBinders )
import GHC.Core.TyCon ( Role (..), Injectivity(..), tyConBndrVisArgFlag )
import GHC.Core.DataCon (SrcStrictness(..), SrcUnpackedness(..))
import GHC.Builtin.Types ( constraintKindTyConName )
+import GHC.Stg.InferTags.TagSig
import GHC.Utils.Lexeme (isLexSym)
import GHC.Utils.Fingerprint
@@ -352,6 +353,7 @@ data IfaceInfoItem
| HsNoCafRefs
| HsLevity -- Present <=> never representation-polymorphic
| HsLFInfo IfaceLFInfo
+ | HsTagSig TagSig
-- NB: Specialisations and rules come in separately and are
-- only later attached to the Id. Partial reason: some are orphans.
@@ -380,6 +382,7 @@ data IfaceUnfolding
data IfaceIdDetails
= IfVanillaId
+ | IfStrictWorkerId [CbvMark]
| IfRecSelId (Either IfaceTyCon IfaceDecl) Bool
| IfDFunId
@@ -1459,6 +1462,7 @@ instance Outputable IfaceConAlt where
------------------
instance Outputable IfaceIdDetails where
ppr IfVanillaId = Outputable.empty
+ ppr (IfStrictWorkerId dmd) = text "StrWork" <> parens (ppr dmd)
ppr (IfRecSelId tc b) = text "RecSel" <+> ppr tc
<+> if b
then text "<naughty>"
@@ -1476,6 +1480,7 @@ instance Outputable IfaceInfoItem where
ppr HsNoCafRefs = text "HasNoCafRefs"
ppr HsLevity = text "Never levity-polymorphic"
ppr (HsLFInfo lf_info) = text "LambdaFormInfo:" <+> ppr lf_info
+ ppr (HsTagSig tag_sig) = text "TagSig:" <+> ppr tag_sig
instance Outputable IfaceJoinInfo where
ppr IfaceNotJoinPoint = empty
@@ -2223,12 +2228,14 @@ instance Binary IfaceAnnotation where
instance Binary IfaceIdDetails where
put_ bh IfVanillaId = putByte bh 0
put_ bh (IfRecSelId a b) = putByte bh 1 >> put_ bh a >> put_ bh b
- put_ bh IfDFunId = putByte bh 2
+ put_ bh (IfStrictWorkerId dmds) = putByte bh 2 >> put_ bh dmds
+ put_ bh IfDFunId = putByte bh 3
get bh = do
h <- getByte bh
case h of
0 -> return IfVanillaId
1 -> do { a <- get bh; b <- get bh; return (IfRecSelId a b) }
+ 2 -> do { dmds <- get bh; return (IfStrictWorkerId dmds) }
_ -> return IfDFunId
instance Binary IfaceInfoItem where
@@ -2240,6 +2247,7 @@ instance Binary IfaceInfoItem where
put_ bh HsLevity = putByte bh 5
put_ bh (HsCprSig cpr) = putByte bh 6 >> put_ bh cpr
put_ bh (HsLFInfo lf_info) = putByte bh 7 >> put_ bh lf_info
+ put_ bh (HsTagSig sig) = putByte bh 8 >> put_ bh sig
get bh = do
h <- getByte bh
@@ -2253,7 +2261,8 @@ instance Binary IfaceInfoItem where
4 -> return HsNoCafRefs
5 -> return HsLevity
6 -> HsCprSig <$> get bh
- _ -> HsLFInfo <$> get bh
+ 7 -> HsLFInfo <$> get bh
+ _ -> HsTagSig <$> get bh
instance Binary IfaceUnfolding where
put_ bh (IfCoreUnfold s e) = do
@@ -2586,6 +2595,7 @@ instance NFData IfaceBang where
instance NFData IfaceIdDetails where
rnf = \case
IfVanillaId -> ()
+ IfStrictWorkerId dmds -> dmds `seqList` ()
IfRecSelId (Left tycon) b -> rnf tycon `seq` rnf b
IfRecSelId (Right decl) b -> rnf decl `seq` rnf b
IfDFunId -> ()
@@ -2600,6 +2610,7 @@ instance NFData IfaceInfoItem where
HsLevity -> ()
HsCprSig cpr -> cpr `seq` ()
HsLFInfo lf_info -> lf_info `seq` () -- TODO: seq further?
+ HsTagSig sig -> sig `seq` ()
instance NFData IfaceUnfolding where
rnf = \case
diff --git a/compiler/GHC/Iface/Tidy.hs b/compiler/GHC/Iface/Tidy.hs
index b1a079205e..c453cc5336 100644
--- a/compiler/GHC/Iface/Tidy.hs
+++ b/compiler/GHC/Iface/Tidy.hs
@@ -365,6 +365,7 @@ tidyProgram hsc_env (ModGuts { mg_module = mod
, mg_foreign_files = foreign_files
, mg_hpc_info = hpc_info
, mg_modBreaks = modBreaks
+ , mg_boot_exports = boot_exports
})
= Err.withTiming logger
@@ -384,7 +385,7 @@ tidyProgram hsc_env (ModGuts { mg_module = mod
; let uf_opts = unfoldingOpts dflags
; (tidy_env, tidy_binds)
- <- tidyTopBinds uf_opts unfold_env tidy_occ_env trimmed_binds
+ <- tidyTopBinds uf_opts unfold_env boot_exports tidy_occ_env trimmed_binds
-- See Note [Grand plan for static forms] in GHC.Iface.Tidy.StaticPtrTable.
; (spt_entries, tidy_binds') <-
@@ -1180,39 +1181,41 @@ tidyTopName mod name_cache maybe_ref occ_env id
tidyTopBinds :: UnfoldingOpts
-> UnfoldEnv
+ -> NameSet
-> TidyOccEnv
-> CoreProgram
-> IO (TidyEnv, CoreProgram)
-tidyTopBinds uf_opts unfold_env init_occ_env binds
+tidyTopBinds uf_opts unfold_env boot_exports init_occ_env binds
= do let result = tidy init_env binds
seqBinds (snd result) `seq` return result
-- This seqBinds avoids a spike in space usage (see #13564)
where
init_env = (init_occ_env, emptyVarEnv)
- tidy = mapAccumL (tidyTopBind uf_opts unfold_env)
+ tidy = mapAccumL (tidyTopBind uf_opts unfold_env boot_exports)
------------------------
tidyTopBind :: UnfoldingOpts
-> UnfoldEnv
+ -> NameSet
-> TidyEnv
-> CoreBind
-> (TidyEnv, CoreBind)
-tidyTopBind uf_opts unfold_env
+tidyTopBind uf_opts unfold_env boot_exports
(occ_env,subst1) (NonRec bndr rhs)
= (tidy_env2, NonRec bndr' rhs')
where
Just (name',show_unfold) = lookupVarEnv unfold_env bndr
- (bndr', rhs') = tidyTopPair uf_opts show_unfold tidy_env2 name' (bndr, rhs)
+ (bndr', rhs') = tidyTopPair uf_opts show_unfold boot_exports tidy_env2 name' (bndr, rhs)
subst2 = extendVarEnv subst1 bndr bndr'
tidy_env2 = (occ_env, subst2)
-tidyTopBind uf_opts unfold_env (occ_env, subst1) (Rec prs)
+tidyTopBind uf_opts unfold_env boot_exports (occ_env, subst1) (Rec prs)
= (tidy_env2, Rec prs')
where
- prs' = [ tidyTopPair uf_opts show_unfold tidy_env2 name' (id,rhs)
+ prs' = [ tidyTopPair uf_opts show_unfold boot_exports tidy_env2 name' (id,rhs)
| (id,rhs) <- prs,
let (name',show_unfold) =
expectJust "tidyTopBind" $ lookupVarEnv unfold_env id
@@ -1226,6 +1229,7 @@ tidyTopBind uf_opts unfold_env (occ_env, subst1) (Rec prs)
-----------------------------------------------------------
tidyTopPair :: UnfoldingOpts
-> Bool -- show unfolding
+ -> NameSet
-> TidyEnv -- The TidyEnv is used to tidy the IdInfo
-- It is knot-tied: don't look at it!
-> Name -- New name
@@ -1237,14 +1241,17 @@ tidyTopPair :: UnfoldingOpts
-- group, a variable late in the group might be mentioned
-- in the IdInfo of one early in the group
-tidyTopPair uf_opts show_unfold rhs_tidy_env name' (bndr, rhs)
- = (bndr1, rhs1)
+tidyTopPair uf_opts show_unfold boot_exports rhs_tidy_env name' (bndr, rhs)
+ = -- pprTrace "tidyTop" (ppr name' <+> ppr details <+> ppr rhs) $
+ (bndr1, rhs1)
+
where
+ !cbv_bndr = tidyCbvInfoTop boot_exports bndr rhs
bndr1 = mkGlobalId details name' ty' idinfo'
- details = idDetails bndr -- Preserve the IdDetails
- ty' = tidyTopType (idType bndr)
+ details = idDetails cbv_bndr -- Preserve the IdDetails
+ ty' = tidyTopType (idType cbv_bndr)
rhs1 = tidyExpr rhs_tidy_env rhs
- idinfo' = tidyTopIdInfo uf_opts rhs_tidy_env name' rhs rhs1 (idInfo bndr)
+ idinfo' = tidyTopIdInfo uf_opts rhs_tidy_env name' rhs rhs1 (idInfo cbv_bndr)
show_unfold
-- tidyTopIdInfo creates the final IdInfo for top-level
diff --git a/compiler/GHC/IfaceToCore.hs b/compiler/GHC/IfaceToCore.hs
index 3a11c30e79..2982df668d 100644
--- a/compiler/GHC/IfaceToCore.hs
+++ b/compiler/GHC/IfaceToCore.hs
@@ -1618,6 +1618,7 @@ tcIfaceDataAlt mult con inst_tys arg_strs rhs
tcIdDetails :: Type -> IfaceIdDetails -> IfL IdDetails
tcIdDetails _ IfVanillaId = return VanillaId
+tcIdDetails _ (IfStrictWorkerId dmds) = return $ StrictWorkerId dmds
tcIdDetails ty IfDFunId
= return (DFunId (isNewTyCon (classTyCon cls)))
where
@@ -1665,6 +1666,9 @@ tcIdInfo ignore_prags toplvl name ty info = do
lf_info <- tcLFInfo lf_info
return (info `setLFInfo` lf_info)
+ tcPrag info (HsTagSig sig) = do
+ return (info `setTagSig` sig)
+
-- The next two are lazy, so they don't transitively suck stuff in
tcPrag info (HsUnfold lb if_unf)
= do { unf <- tcUnfolding toplvl name ty info if_unf
diff --git a/compiler/GHC/Prelude.hs b/compiler/GHC/Prelude.hs
index f61dad9517..6a810ac200 100644
--- a/compiler/GHC/Prelude.hs
+++ b/compiler/GHC/Prelude.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE CPP #-}
{-# OPTIONS_HADDOCK not-home #-}
+{-# OPTIONS_GHC -O2 #-} -- See Note [-O2 Prelude]
-- | Custom GHC "Prelude"
--
@@ -18,6 +19,17 @@ module GHC.Prelude
) where
+{- Note [-O2 Prelude]
+~~~~~~~~~~~~~~~~~~~~~
+There is some code in GHC that is *always* compiled with -O[2] because
+of it's impact on compile time performance. Some of this code might depend
+on the definitions like shiftL being defined here being performant.
+
+So we always compile this module with -O2. It's (currently) tiny so I
+have little reason to suspect this impacts overall GHC compile times
+negatively.
+
+-}
-- We export the 'Semigroup' class but w/o the (<>) operator to avoid
-- clashing with the (Outputable.<>) operator which is heavily used
-- through GHC's code-base.
diff --git a/compiler/GHC/Stg/FVs.hs b/compiler/GHC/Stg/FVs.hs
index 00b4bdcc0b..636ec5eb61 100644
--- a/compiler/GHC/Stg/FVs.hs
+++ b/compiler/GHC/Stg/FVs.hs
@@ -47,6 +47,7 @@ module GHC.Stg.FVs (
import GHC.Prelude hiding (mod)
import GHC.Stg.Syntax
+import GHC.Stg.Utils (bindersOf)
import GHC.Types.Id
import GHC.Types.Name (Name, nameIsLocalOrFrom)
import GHC.Types.Tickish ( GenTickish(Breakpoint) )
diff --git a/compiler/GHC/Stg/InferTags.hs b/compiler/GHC/Stg/InferTags.hs
new file mode 100644
index 0000000000..477650da3c
--- /dev/null
+++ b/compiler/GHC/Stg/InferTags.hs
@@ -0,0 +1,631 @@
+{-# LANGUAGE TypeFamilies, DataKinds, GADTs, FlexibleInstances #-}
+{-# LANGUAGE ConstraintKinds #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE UndecidableInstances #-}
+ -- To permit: type instance XLet 'InferTaggedBinders = XLet 'SomePass
+
+{-# OPTIONS_GHC -Wname-shadowing #-}
+module GHC.Stg.InferTags ( inferTags ) where
+
+import GHC.Prelude hiding (id)
+
+import GHC.Core.DataCon
+import GHC.Core.Type
+import GHC.Types.Id
+import GHC.Types.Id.Info (tagSigInfo)
+import GHC.Types.Name
+import GHC.Stg.Syntax
+import GHC.Types.Basic ( CbvMark (..) )
+import GHC.Types.Unique.Supply (mkSplitUniqSupply)
+import GHC.Types.RepType (dataConRuntimeRepStrictness)
+import GHC.Core (AltCon(..))
+import Data.List (mapAccumL)
+import GHC.Utils.Outputable
+import GHC.Utils.Misc( zipWithEqual, zipEqual, notNull )
+
+import GHC.Stg.InferTags.Types
+import GHC.Stg.InferTags.Rewrite (rewriteTopBinds)
+import Data.Maybe
+import GHC.Types.Name.Env (mkNameEnv, NameEnv)
+import GHC.Driver.Config.Stg.Ppr
+import GHC.Driver.Session
+import GHC.Utils.Logger
+import qualified GHC.Unit.Types
+
+{- Note [Tag Inference]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The purpose of this pass is to attach to every binder a flag
+to indicate whether or not it is "properly tagged". A binder
+is properly tagged if it is guaranteed:
+ - to point to a heap-allocated *value*
+ - and to have the tag of the value encoded in the pointer
+
+For example
+ let x = Just y in ...
+
+Here x will be properly tagged: it will point to the heap-allocated
+values for (Just y), and the tag-bits of the pointer will encode
+the tag for Just so there is no need to re-enter the closure or even
+check for the presence of tag bits. The impacts of this can be very large.
+
+For containers the reduction in runtimes with this optimization was as follows:
+
+intmap-benchmarks: 89.30%
+intset-benchmarks: 90.87%
+map-benchmarks: 88.00%
+sequence-benchmarks: 99.84%
+set-benchmarks: 85.00%
+set-operations-intmap:88.64%
+set-operations-map: 74.23%
+set-operations-set: 76.50%
+lookupge-intmap: 89.57%
+lookupge-map: 70.95%
+
+With nofib being ~0.3% faster as well.
+
+See Note [Tag inference passes] for how we proceed to generate and use this information.
+
+Note [Strict Field Invariant]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+As part of tag inference we introduce the Strict Field Invariant.
+Which consists of us saying that:
+
+* Pointers in strict fields must (a) point directly to the value, and
+ (b) must be properly tagged.
+
+For example, given
+ data T = MkT ![Int]
+
+the Strict Field Invariant guarantees that the first field of any `MkT` constructor
+will either point directly to nil, or directly to a cons cell;
+and will be tagged with `001` or `010` respectively.
+It will never point to a thunk, nor will it be tagged `000` (meaning "might be a thunk").
+NB: Note that the proper tag for some objects is indeed `000`. Currently this is the case for PAPs.
+
+Why do we care? Because if we have code like:
+
+case strictPair of
+ SP x y ->
+ case x of ...
+
+It allows us to safely omit the code to enter x and the check
+for the presence of a tag that goes along with it.
+However we might still branch on the tag as usual.
+See Note [Tag Inference] for how much impact this can have for
+some code.
+
+This is enforced by the code GHC.Stg.InferTags.Rewrite
+where we:
+
+* Look at all constructor allocations.
+* Check if arguments to their strict fields are known to be properly tagged
+* If not we convert `StrictJust x` into `case x of x' -> StrictJust x'`
+
+This is usually very beneficial but can cause regressions in rare edge cases where
+we fail to proof that x is properly tagged, or where it simply isn't.
+See Note [How untagged pointers can end up in strict fields] for how the second case
+can arise.
+
+For a full example of the worst case consider this code:
+
+foo ... = ...
+ let c = StrictJust x
+ in ...
+
+Here we would rewrite `let c = StrictJust x` into `let c = case x of x' -> StrictJust x'`
+However that is horrible! We end up allocating a thunk for `c` first, which only when
+evaluated will allocate the constructor.
+
+So we do our best to establish that `x` is already tagged (which it almost always is)
+to avoid this cost. In my benchmarks I haven't seen any cases where this causes regressions.
+
+Note [How untagged pointers can end up in strict fields]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+ data Set a = Tip | Bin !a (Set a) (Set a)
+
+We make a wrapper for Bin that evaluates its arguments
+ $WBin x a b = case x of xv -> Bin xv a b
+Here `xv` will always be evaluated and properly tagged, just as the
+Strict Field Invariant requires.
+
+But alas the Simplifier can destroy the invariant: see #15696.
+We start with
+ thk = f ()
+ g x = ...(case thk of xv -> Bin xv Tip Tip)...
+
+So far so good; the argument to Bin (which is strict) is evaluated.
+Now we do float-out. And in doing so we do a reverse binder-swap (see
+Note [Binder-swap during float-out] in SetLevels) thus
+
+ g x = ...(case thk of xv -> Bin thk Nil Nil)...
+
+The goal of the reverse binder-swap is to allow more floating -- and
+indeed it does! We float the Bin to top level:
+
+ lvl = Bin thk Tip Tip
+ g x = ...(case thk of xv -> lvl)...
+
+Now you can see that the argument of Bin, namely thk, points to the
+thunk, not to the value as it did before.
+
+In short, although it may be rare, the output of optimisation passes
+cannot guarantee to obey the Strict Field Invariant. For this reason
+we run tag inference. See Note [Tag inference passes].
+
+Note [Tag inference passes]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Tag inference proceeds in two passes:
+* The first pass is an analysis to compute which binders are properly tagged.
+ The result is then attached to /binders/.
+ This is implemented by `inferTagsAnal` in GHC.Stg.InferTags
+* The second pass walks over the AST checking if the Strict Field Invariant is upheld.
+ See Note [Strict Field Invariant].
+ If required this pass modifies the program to uphold this invariant.
+ Tag information is also moved from /binders/ to /occurrences/ during this pass.
+ This is done by `GHC.Stg.InferTags.Rewrite (rewriteTopBinds)`.
+* Finally the code generation uses this information to skip the thunk check when branching on
+ values. This is done by `cgExpr`/`cgCase` in the backend.
+
+Last but not least we also export the tag sigs of top level bindings to allow this optimization
+ to work across module boundaries.
+
+Note [TagInfo of functions]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The purpose of tag inference is really to figure out when we don't have to enter
+value closures. There the meaning of the tag is fairly obvious.
+For functions we never make use of the tag info so we have two choices:
+* Treat them as TagDunno
+* Treat them as TagProper (as they *are* tagged with their arity) and be really
+ careful to make sure we still enter them when needed.
+As it makes little difference for runtime performance I've treated functions as TagDunno in a few places where
+it made the code simpler. But besides implementation complexity there isn't any reason
+why we couldn't be more rigourous in dealing with functions.
+
+Note [Tag inference debugging]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+There is a flag -dtag-inference-checks which inserts various
+compile/runtime checks in order to ensure the Strict Field Invariant
+holds. It should cover all places
+where tags matter and disable optimizations which interfere with checking
+the invariant like generation of AP-Thunks.
+
+Note [Polymorphic StgPass for inferTagExpr]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In order to reach a fixpoint we sometimes have to re-analyse an expression
+multiple times. But after the initial run the Ast will be parameterized by
+a different StgPass! To handle this a large part of the analysis is polymorphic
+over the exact StgPass we are using. Which allows us to run the analysis on
+the output of itself.
+
+-}
+
+{- *********************************************************************
+* *
+ Tag inference pass
+* *
+********************************************************************* -}
+
+-- doCodeGen :: HscEnv -> Module -> InfoTableProvMap -> [TyCon]
+-- -> CollectedCCs
+-- -> [CgStgTopBinding] -- ^ Bindings come already annotated with fvs
+-- -> HpcInfo
+-- -> IO (Stream IO CmmGroupSRTs CgInfos)
+-- -- Note we produce a 'Stream' of CmmGroups, so that the
+-- -- backend can be run incrementally. Otherwise it generates all
+-- -- the C-- up front, which has a significant space cost.
+inferTags :: DynFlags -> Logger -> (GHC.Unit.Types.Module) -> [CgStgTopBinding] -> IO ([TgStgTopBinding], NameEnv TagSig)
+inferTags dflags logger this_mod stg_binds = do
+
+ -- Annotate binders with tag information.
+ let (!stg_binds_w_tags) = {-# SCC "StgTagFields" #-}
+ inferTagsAnal stg_binds
+ putDumpFileMaybe logger Opt_D_dump_stg_tags "CodeGenAnal STG:" FormatSTG (pprGenStgTopBindings (initStgPprOpts dflags) stg_binds_w_tags)
+
+ let export_tag_info = collectExportInfo stg_binds_w_tags
+
+ -- Rewrite STG to uphold the strict field invariant
+ us_t <- mkSplitUniqSupply 't'
+ let rewritten_binds = {-# SCC "StgTagRewrite" #-} rewriteTopBinds this_mod us_t stg_binds_w_tags :: [TgStgTopBinding]
+
+ return (rewritten_binds,export_tag_info)
+
+{- *********************************************************************
+* *
+ Main inference algorithm
+* *
+********************************************************************* -}
+
+type OutputableInferPass p = (Outputable (TagEnv p)
+ , Outputable (GenStgExpr p)
+ , Outputable (BinderP p)
+ , Outputable (GenStgRhs p))
+
+-- | This constraint encodes the fact that no matter what pass
+-- we use the Let/Closure extension points are the same as these for
+-- 'InferTaggedBinders.
+type InferExtEq i = ( XLet i ~ XLet 'InferTaggedBinders
+ , XLetNoEscape i ~ XLetNoEscape 'InferTaggedBinders
+ , XRhsClosure i ~ XRhsClosure 'InferTaggedBinders)
+
+inferTagsAnal :: [GenStgTopBinding 'CodeGen] -> [GenStgTopBinding 'InferTaggedBinders]
+inferTagsAnal binds =
+ -- pprTrace "Binds" (pprGenStgTopBindings shortStgPprOpts $ binds) $
+ snd (mapAccumL inferTagTopBind initEnv binds)
+
+-----------------------
+inferTagTopBind :: TagEnv 'CodeGen -> GenStgTopBinding 'CodeGen
+ -> (TagEnv 'CodeGen, GenStgTopBinding 'InferTaggedBinders)
+inferTagTopBind env (StgTopStringLit id bs)
+ = (env, StgTopStringLit id bs)
+inferTagTopBind env (StgTopLifted bind)
+ = (env', StgTopLifted bind')
+ where
+ (env', bind') = inferTagBind env bind
+
+
+-- Why is this polymorphic over the StgPass? See Note [Polymorphic StgPass for inferTagExpr]
+-----------------------
+inferTagExpr :: forall p. (OutputableInferPass p, InferExtEq p)
+ => TagEnv p -> GenStgExpr p -> (TagInfo, GenStgExpr 'InferTaggedBinders)
+inferTagExpr env (StgApp fun args)
+ = --pprTrace "inferTagExpr1"
+ -- (ppr fun <+> ppr args $$ ppr info $$
+ -- text "deadEndInfo:" <> ppr (isDeadEndId fun, idArity fun, length args)
+ -- )
+ (info, StgApp fun args)
+ where
+ !fun_arity = idArity fun
+ info | fun_arity == 0 -- Unknown arity => Thunk or unknown call
+ = TagDunno
+
+ | isDeadEndId fun
+ , fun_arity == length args -- Implies we will simply call the function.
+ = TagTagged -- See Note [Bottom functions are TagTagged]
+
+ | Just (TagSig res_info) <- tagSigInfo (idInfo fun)
+ , fun_arity == length args -- Saturated
+ = res_info
+
+ | Just (TagSig res_info) <- lookupSig env fun
+ , fun_arity == length args -- Saturated
+ = res_info
+
+ | otherwise
+ = --pprTrace "inferAppUnknown" (ppr fun) $
+ TagDunno
+-- TODO:
+-- If we have something like:
+-- let x = thunk in
+-- f g = case g of g' -> (# x, g' #)
+-- then we *do* know that g' will be properly tagged,
+-- so we should return TagTagged [TagDunno,TagProper] but currently we infer
+-- TagTagged [TagDunno,TagDunno] because of the unknown arity case in inferTagExpr.
+-- Seems not to matter much but should be changed eventually.
+
+inferTagExpr env (StgConApp con cn args tys)
+ = (inferConTag env con args, StgConApp con cn args tys)
+
+inferTagExpr _ (StgLit l)
+ = (TagTagged, StgLit l)
+
+inferTagExpr env (StgTick tick body)
+ = (info, StgTick tick body')
+ where
+ (info, body') = inferTagExpr env body
+
+inferTagExpr _ (StgOpApp op args ty)
+ = -- Do any primops guarantee to return a properly tagged value?
+ -- I think not. Ditto foreign calls.
+ (TagDunno, StgOpApp op args ty)
+
+inferTagExpr env (StgLet ext bind body)
+ = (info, StgLet ext bind' body')
+ where
+ (env', bind') = inferTagBind env bind
+ (info, body') = inferTagExpr env' body
+
+inferTagExpr env (StgLetNoEscape ext bind body)
+ = (info, StgLetNoEscape ext bind' body')
+ where
+ (env', bind') = inferTagBind env bind
+ (info, body') = inferTagExpr env' body
+
+inferTagExpr in_env (StgCase scrut bndr ty alts)
+ -- Unboxed tuples get their info from the expression we scrutinise if any
+ | [(DataAlt con, bndrs, rhs)] <- alts
+ , isUnboxedTupleDataCon con
+ , Just infos <- scrut_infos bndrs
+ , let bndrs' = zipWithEqual "inferTagExpr" mk_bndr bndrs infos
+ mk_bndr :: BinderP p -> TagInfo -> (Id, TagSig)
+ mk_bndr tup_bndr tup_info =
+ -- pprTrace "mk_ubx_bndr_info" ( ppr bndr <+> ppr info ) $
+ (getBinderId in_env tup_bndr, TagSig tup_info)
+ -- no case binder in alt_env here, unboxed tuple binders are dead after unarise
+ alt_env = extendSigEnv in_env bndrs'
+ (info, rhs') = inferTagExpr alt_env rhs
+ =
+ -- pprTrace "inferCase1" (
+ -- text "scrut:" <> ppr scrut $$
+ -- text "bndr:" <> ppr bndr $$
+ -- text "infos" <> ppr infos $$
+ -- text "out_bndrs" <> ppr bndrs') $
+ (info, StgCase scrut' (noSig in_env bndr) ty [(DataAlt con, bndrs', rhs')])
+
+ | null alts -- Empty case, but I might just be paranoid.
+ = -- pprTrace "inferCase2" empty $
+ (TagDunno, StgCase scrut' bndr' ty [])
+ -- More than one alternative OR non-TagTuple single alternative.
+ | otherwise
+ =
+ let
+ case_env = extendSigEnv in_env [bndr']
+
+ (infos, alts')
+ = unzip [ (info, (con, bndrs', rhs'))
+ | (con, bndrs, rhs) <- alts
+ , let (alt_env,bndrs') = addAltBndrInfo case_env con bndrs
+ (info, rhs') = inferTagExpr alt_env rhs
+ ]
+ alt_info = foldr combineAltInfo TagTagged infos
+ in ( alt_info, StgCase scrut' bndr' ty alts')
+ where
+ -- Single unboxed tuple alternative
+ scrut_infos bndrs = case scrut_info of
+ TagTagged -> Just $ replicate (length bndrs) TagProper
+ TagTuple infos -> Just infos
+ _ -> Nothing
+ (scrut_info, scrut') = inferTagExpr in_env scrut
+ bndr' = (getBinderId in_env bndr, TagSig TagProper)
+
+-- Compute binder sigs based on the constructors strict fields.
+-- NB: Not used if we have tuple info from the scrutinee.
+addAltBndrInfo :: forall p. TagEnv p -> AltCon -> [BinderP p] -> (TagEnv p, [BinderP 'InferTaggedBinders])
+addAltBndrInfo env (DataAlt con) bndrs
+ | not (isUnboxedTupleDataCon con || isUnboxedSumDataCon con)
+ = (out_env, out_bndrs)
+ where
+ marks = dataConRuntimeRepStrictness con :: [StrictnessMark]
+ out_bndrs = zipWith mk_bndr bndrs marks
+ out_env = extendSigEnv env out_bndrs
+
+ mk_bndr :: (BinderP p -> StrictnessMark -> (Id, TagSig))
+ mk_bndr bndr mark
+ | isUnliftedType (idType id) || isMarkedStrict mark
+ = (id, TagSig TagProper)
+ | otherwise
+ = noSig env bndr
+ where
+ id = getBinderId env bndr
+
+addAltBndrInfo env _ bndrs = (env, map (noSig env) bndrs)
+
+-----------------------------
+inferTagBind :: (OutputableInferPass p, InferExtEq p)
+ => TagEnv p -> GenStgBinding p -> (TagEnv p, GenStgBinding 'InferTaggedBinders)
+inferTagBind in_env (StgNonRec bndr rhs)
+ =
+ -- pprTrace "inferBindNonRec" (
+ -- ppr bndr $$
+ -- ppr (isDeadEndId id) $$
+ -- ppr sig)
+ (env', StgNonRec (id, sig) rhs')
+ where
+ id = getBinderId in_env bndr
+ env' = extendSigEnv in_env [(id, sig)]
+ (sig,rhs') = inferTagRhs id in_env rhs
+
+inferTagBind in_env (StgRec pairs)
+ = -- pprTrace "rec" (ppr (map fst pairs) $$ ppr (in_env { te_env = out_env }, StgRec pairs')) $
+ (in_env { te_env = out_env }, StgRec pairs')
+ where
+ (bndrs, rhss) = unzip pairs
+ in_ids = map (getBinderId in_env) bndrs
+ init_sigs = map (initSig) $ zip in_ids rhss
+ (out_env, pairs') = go in_env init_sigs rhss
+
+ go :: forall q. (OutputableInferPass q , InferExtEq q) => TagEnv q -> [TagSig] -> [GenStgRhs q]
+ -> (TagSigEnv, [((Id,TagSig), GenStgRhs 'InferTaggedBinders)])
+ go go_env in_sigs go_rhss
+ -- | pprTrace "go" (ppr in_ids $$ ppr in_sigs $$ ppr out_sigs $$ ppr rhss') False
+ -- = undefined
+ | in_sigs == out_sigs = (te_env rhs_env, out_bndrs `zip` rhss')
+ | otherwise = go env' out_sigs rhss'
+ where
+ out_bndrs = map updateBndr in_bndrs -- TODO: Keeps in_ids alive
+ in_bndrs = in_ids `zip` in_sigs
+ rhs_env = extendSigEnv go_env in_bndrs
+ (out_sigs, rhss') = unzip (zipWithEqual "inferTagBind" anaRhs in_ids go_rhss)
+ env' = makeTagged go_env
+
+ anaRhs :: Id -> GenStgRhs q -> (TagSig, GenStgRhs 'InferTaggedBinders)
+ anaRhs bnd rhs = inferTagRhs bnd rhs_env rhs
+
+ updateBndr :: (Id,TagSig) -> (Id,TagSig)
+ updateBndr (v,sig) = (setIdTagSig v sig, sig)
+
+initSig :: forall p. (Id, GenStgRhs p) -> TagSig
+-- Initial signature for the fixpoint loop
+initSig (_bndr, StgRhsCon {}) = TagSig TagTagged
+initSig (bndr, StgRhsClosure _ _ _ _ _) =
+ fromMaybe defaultSig (idTagSig_maybe bndr)
+ where defaultSig = (TagSig TagTagged)
+
+{- Note [Bottom functions are TagTagged]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If we have a function with two branches with one
+being bottom, and the other returning a tagged
+unboxed tuple what is the result? We give it TagTagged!
+To answer why consider this function:
+
+foo :: Bool -> (# Bool, Bool #)
+foo x = case x of
+ True -> (# True,True #)
+ False -> undefined
+
+The true branch is obviously tagged. The other branch isn't.
+We want to treat the *result* of foo as tagged as well so that
+the combination of the branches also is tagged if all non-bottom
+branches are tagged.
+This is safe because the function is still always called/entered as long
+as it's applied to arguments. Since the function will never return we can give
+it safely any tag sig we like.
+So we give it TagTagged, as it allows the combined tag sig of the case expression
+to be the combination of all non-bottoming branches.
+
+-}
+
+-----------------------------
+inferTagRhs :: forall p.
+ (OutputableInferPass p, InferExtEq p)
+ => Id -- ^ Id we are binding to.
+ -> TagEnv p -- ^
+ -> GenStgRhs p -- ^
+ -> (TagSig, GenStgRhs 'InferTaggedBinders)
+inferTagRhs bnd_id in_env (StgRhsClosure ext cc upd bndrs body)
+ | isDeadEndId bnd_id && (notNull) bndrs
+ -- See Note [Bottom functions are TagTagged]
+ = (TagSig TagTagged, StgRhsClosure ext cc upd out_bndrs body')
+ | otherwise
+ = --pprTrace "inferTagRhsClosure" (ppr (_top, _grp_ids, env,info')) $
+ (TagSig info', StgRhsClosure ext cc upd out_bndrs body')
+ where
+ out_bndrs
+ | Just marks <- idCbvMarks_maybe bnd_id
+ -- Sometimes an we eta-expand foo with additional arguments after ww, and we also trim
+ -- the list of marks to the last strict entry. So we can conservatively
+ -- assume these are not strict
+ = zipWith (mkArgSig) bndrs (marks ++ repeat NotMarkedCbv)
+ | otherwise = map (noSig env') bndrs :: [(Id,TagSig)]
+
+ env' = extendSigEnv in_env out_bndrs
+ (info, body') = inferTagExpr env' body
+ info'
+ -- It's a thunk
+ | null bndrs
+ = TagDunno
+ -- TODO: We could preserve tuple fields for thunks
+ -- as well. But likely not worth the complexity.
+
+ | otherwise = info
+
+ mkArgSig :: BinderP p -> CbvMark -> (Id,TagSig)
+ mkArgSig bndp mark =
+ let id = getBinderId in_env bndp
+ tag = case mark of
+ MarkedCbv -> TagProper
+ _
+ | isUnliftedType (idType id) -> TagProper
+ | otherwise -> TagDunno
+ in (id, TagSig tag)
+
+inferTagRhs _ env _rhs@(StgRhsCon cc con cn ticks args)
+-- Constructors, which have untagged arguments to strict fields
+-- become thunks. We encode this by giving changing RhsCon nodes the info TagDunno
+ = --pprTrace "inferTagRhsCon" (ppr grp_ids) $
+ (TagSig (inferConTag env con args), StgRhsCon cc con cn ticks args)
+
+{- Note [Constructor TagSigs]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@inferConTag@ will infer the proper tag signature for a binding who's RHS is a constructor
+or a StgConApp expression.
+Usually these will simply be TagProper. But there are exceptions.
+If any of the fields in the constructor are strict, but any argument to these
+fields is not tagged then we will have to case on the argument before storing
+in the constructor. Which means for let bindings the RHS turns into a thunk
+which obviously is no longer properly tagged.
+For example we might start with:
+
+ let x<TagDunno> = f ...
+ let c<TagProper> = StrictPair x True
+
+But we know during the rewrite stage x will need to be evaluated in the RHS
+of `c` so we will infer:
+
+ let x<TagDunno> = f ...
+ let c<TagDunno> = StrictPair x True
+
+Which in the rewrite stage will then be rewritten into:
+
+ let x<TagDunno> = f ...
+ let c<TagDunno> = case x of x' -> StrictPair x' True
+
+The other exception is unboxed tuples. These will get a TagTuple
+signature with a list of TagInfo about their individual binders
+as argument. As example:
+
+ let c<TagProper> = True
+ let x<TagDunno> = ...
+ let f<?> z = case z of z'<TagProper> -> (# c, x #)
+
+Here we will infer for f the Signature <TagTuple[TagProper,TagDunno]>.
+This information will be used if we scrutinze a saturated application of
+`f` in order to determine the taggedness of the result.
+That is for `case f x of (# r1,r2 #) -> rhs` we can infer
+r1<TagProper> and r2<TagDunno> which allows us to skip all tag checks on `r1`
+in `rhs`.
+
+Things get a bit more complicated with nesting:
+
+ let closeFd<TagTuple[...]> = ...
+ let f x = ...
+ case x of
+ _ -> Solo# closeFd
+
+The "natural" signature for the Solo# branch in `f` would be <TagTuple[TagTuple[...]]>.
+But we flatten this out to <TagTuple[TagDunno]> for the time being as it improves compile
+time and there doesn't seem to huge benefit to doing differently.
+
+ -}
+
+-- See Note [Constructor TagSigs]
+inferConTag :: TagEnv p -> DataCon -> [StgArg] -> TagInfo
+inferConTag env con args
+ | isUnboxedTupleDataCon con
+ = TagTuple $ map (flatten_arg_tag . lookupInfo env) args
+ | otherwise =
+ -- pprTrace "inferConTag"
+ -- ( text "con:" <> ppr con $$
+ -- text "args:" <> ppr args $$
+ -- text "marks:" <> ppr (dataConRuntimeRepStrictness con) $$
+ -- text "arg_info:" <> ppr (map (lookupInfo env) args) $$
+ -- text "info:" <> ppr info) $
+ info
+ where
+ info = if any arg_needs_eval strictArgs then TagDunno else TagProper
+ strictArgs = zipEqual "inferTagRhs" args (dataConRuntimeRepStrictness con) :: ([(StgArg, StrictnessMark)])
+ arg_needs_eval (arg,strict)
+ -- lazy args
+ | not (isMarkedStrict strict) = False
+ | tag <- (lookupInfo env arg)
+ -- banged args need to be tagged, or require eval
+ = not (isTaggedInfo tag)
+
+ flatten_arg_tag (TagTagged) = TagProper
+ flatten_arg_tag (TagProper ) = TagProper
+ flatten_arg_tag (TagTuple _) = TagDunno -- See Note [Constructor TagSigs]
+ flatten_arg_tag (TagDunno) = TagDunno
+
+
+collectExportInfo :: [GenStgTopBinding 'InferTaggedBinders] -> NameEnv TagSig
+collectExportInfo binds =
+ mkNameEnv bndr_info
+ where
+ bndr_info = concatMap collect binds :: [(Name,TagSig)]
+
+ collect (StgTopStringLit {}) = []
+ collect (StgTopLifted bnd) =
+ case bnd of
+ StgNonRec (id,sig) _rhs
+ | TagSig TagDunno <- sig -> []
+ | otherwise -> [(idName id,sig)]
+ StgRec bnds -> collectRec bnds
+
+ collectRec :: [(BinderP 'InferTaggedBinders, rhs)] -> [(Name,TagSig)]
+ collectRec [] = []
+ collectRec (bnd:bnds)
+ | (p,_rhs) <- bnd
+ , (id,sig) <- p
+ , TagSig TagDunno <- sig
+ = (idName id,sig) : collectRec bnds
+ | otherwise = collectRec bnds
diff --git a/compiler/GHC/Stg/InferTags/Rewrite.hs b/compiler/GHC/Stg/InferTags/Rewrite.hs
new file mode 100644
index 0000000000..8b2c24cbdb
--- /dev/null
+++ b/compiler/GHC/Stg/InferTags/Rewrite.hs
@@ -0,0 +1,495 @@
+--
+-- Copyright (c) 2019 Andreas Klebinger
+--
+
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE GeneralisedNewtypeDeriving #-}
+
+module GHC.Stg.InferTags.Rewrite (rewriteTopBinds)
+where
+
+import GHC.Prelude
+
+import GHC.Types.Id
+import GHC.Types.Name
+import GHC.Types.Unique.Supply
+import GHC.Types.Unique.FM
+import GHC.Types.RepType
+import GHC.Unit.Types (Module)
+
+import GHC.Core.DataCon
+import GHC.Core (AltCon(..) )
+import GHC.Core.Type
+
+import GHC.StgToCmm.Types
+
+import GHC.Stg.Utils
+import GHC.Stg.Syntax as StgSyn
+
+import GHC.Data.Maybe
+import GHC.Utils.Panic
+import GHC.Utils.Panic.Plain
+
+import GHC.Utils.Outputable
+import GHC.Utils.Monad.State.Strict
+import GHC.Utils.Misc
+
+import GHC.Stg.InferTags.Types
+
+import Control.Monad
+import GHC.Types.Basic (CbvMark (NotMarkedCbv, MarkedCbv), isMarkedCbv, TopLevelFlag(..), isTopLevel)
+import GHC.Types.Var.Set
+
+-- import GHC.Utils.Trace
+-- import GHC.Driver.Ppr
+
+newtype RM a = RM { unRM :: (State (UniqFM Id TagSig, UniqSupply, Module, IdSet) a) }
+ deriving (Functor, Monad, Applicative)
+
+------------------------------------------------------------
+-- Add cases around strict fields where required.
+------------------------------------------------------------
+{-
+The work of this pass is simple:
+* We traverse the STG AST looking for constructor allocations.
+* For all allocations we check if there are strict fields in the constructor.
+* For any strict field we check if the argument is known to be properly tagged.
+* If it's not known to be properly tagged, we wrap the whole thing in a case,
+ which will force the argument before allocation.
+This is described in detail in Note [Strict Field Invariant].
+
+The only slight complication is that we have to make sure not to invalidate free
+variable analysis in the process.
+
+Note [Partially applied workers]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Sometimes we will get a function f of the form
+ -- Arity 1
+ f :: Dict a -> a -> b -> (c -> d)
+ f dict a b = case dict of
+ C m1 m2 -> m1 a b
+
+Which will result in a W/W split along the lines of
+ -- Arity 1
+ f :: Dict a -> a -> b -> (c -> d)
+ f dict a = case dict of
+ C m1 m2 -> $wf m1 a b
+
+ -- Arity 4
+ $wf :: (a -> b -> d -> c) -> a -> b -> c -> d
+ $wf m1 a b c = m1 a b c
+
+It's notable that the worker is called *undersatured* in the wrapper.
+At runtime what happens is that the wrapper will allocate a PAP which
+once fully applied will call the worker. And all is fine.
+
+But what about a strict worker! Well the function returned by `f` would
+be a unknown call, so we lose the ability to enfore the invariant that
+cbv marked arguments from StictWorkerId's are actually properly tagged
+as the annotations would be unavailable at the (unknown) call site.
+
+The fix is easy. We eta-expand all calls to functions taking call-by-value
+arguments during CorePrep just like we do with constructor allocations.
+
+Note [Upholding free variable annotations]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The code generator requires us to maintain exact information
+about free variables about closures. Since we convert some
+RHSs from constructor allocations to closures we have to provide
+fvs of these closures. Not all constructor arguments will become
+free variables. Only these which are not bound at the top level
+have to be captured.
+To facilitate this we keep track of a set of locally bound variables in
+the current context which we then use to filter constructor arguments
+when building the free variable list.
+-}
+
+--------------------------------
+-- Utilities
+--------------------------------
+
+instance MonadUnique RM where
+ getUniqueSupplyM = RM $ do
+ (m, us, mod,lcls) <- get
+ let (us1, us2) = splitUniqSupply us
+ (put) (m,us2,mod,lcls)
+ return us1
+
+getMap :: RM (UniqFM Id TagSig)
+getMap = RM $ ((\(fst,_,_,_) -> fst) <$> get)
+
+setMap :: (UniqFM Id TagSig) -> RM ()
+setMap m = RM $ do
+ (_,us,mod,lcls) <- get
+ put (m, us,mod,lcls)
+
+getMod :: RM Module
+getMod = RM $ ( (\(_,_,thrd,_) -> thrd) <$> get)
+
+getFVs :: RM IdSet
+getFVs = RM $ ((\(_,_,_,lcls) -> lcls) <$> get)
+
+setFVs :: IdSet -> RM ()
+setFVs fvs = RM $ do
+ (tag_map,us,mod,_lcls) <- get
+ put (tag_map, us,mod,fvs)
+
+-- Rewrite the RHS(s) while making the id and it's sig available
+-- to determine if things are tagged/need to be captured as FV.
+withBind :: TopLevelFlag -> GenStgBinding 'InferTaggedBinders -> RM a -> RM a
+withBind top_flag (StgNonRec bnd _) cont = withBinder top_flag bnd cont
+withBind top_flag (StgRec binds) cont = do
+ let (bnds,_rhss) = unzip binds :: ([(Id, TagSig)], [GenStgRhs 'InferTaggedBinders])
+ withBinders top_flag bnds cont
+
+addTopBind :: GenStgBinding 'InferTaggedBinders -> RM ()
+addTopBind (StgNonRec (id, tag) _) = do
+ s <- getMap
+ -- pprTraceM "AddBind" (ppr id)
+ setMap $ addToUFM s id tag
+ return ()
+addTopBind (StgRec binds) = do
+ let (bnds,_rhss) = unzip binds
+ !s <- getMap
+ -- pprTraceM "AddBinds" (ppr $ map fst bnds)
+ setMap $! addListToUFM s bnds
+
+withBinder :: TopLevelFlag -> (Id, TagSig) -> RM a -> RM a
+withBinder top_flag (id,sig) cont = do
+ oldMap <- getMap
+ setMap $ addToUFM oldMap id sig
+ a <- if isTopLevel top_flag
+ then cont
+ else withLcl id cont
+ setMap oldMap
+ return a
+
+withBinders :: TopLevelFlag -> [(Id, TagSig)] -> RM a -> RM a
+withBinders TopLevel sigs cont = do
+ oldMap <- getMap
+ setMap $ addListToUFM oldMap sigs
+ a <- cont
+ setMap oldMap
+ return a
+withBinders NotTopLevel sigs cont = do
+ oldMap <- getMap
+ oldFvs <- getFVs
+ setMap $ addListToUFM oldMap sigs
+ setFVs $ extendVarSetList oldFvs (map fst sigs)
+ a <- cont
+ setMap oldMap
+ setFVs oldFvs
+ return a
+
+-- | Compute the argument with the given set of ids treated as requiring capture
+-- as free variables.
+withClosureLcls :: DIdSet -> RM a -> RM a
+withClosureLcls fvs act = do
+ old_fvs <- getFVs
+ let fvs' = nonDetStrictFoldDVarSet (flip extendVarSet) old_fvs fvs
+ setFVs fvs'
+ r <- act
+ setFVs old_fvs
+ return r
+
+-- | Compute the argument with the given id treated as requiring capture
+-- as free variables in closures.
+withLcl :: Id -> RM a -> RM a
+withLcl fv act = do
+ old_fvs <- getFVs
+ let fvs' = extendVarSet old_fvs fv
+ setFVs fvs'
+ r <- act
+ setFVs old_fvs
+ return r
+
+isTagged :: Id -> RM Bool
+isTagged v = do
+ this_mod <- getMod
+ case nameIsLocalOrFrom this_mod (idName v) of
+ True
+ | isUnliftedType (idType v)
+ -> return True
+ | otherwise -> do -- Local binding
+ !s <- getMap
+ let !sig = lookupWithDefaultUFM s (pprPanic "unknown Id:" (ppr v)) v
+ return $ case sig of
+ TagSig info ->
+ case info of
+ TagDunno -> False
+ TagProper -> True
+ TagTagged -> True
+ TagTuple _ -> True -- Consider unboxed tuples tagged.
+ False -- Imported
+ | Just con <- (isDataConWorkId_maybe v)
+ , isNullaryRepDataCon con
+ -> return True
+ | Just lf_info <- idLFInfo_maybe v
+ -> return $
+ -- Can we treat the thing as tagged based on it's LFInfo?
+ case lf_info of
+ -- Function, applied not entered.
+ LFReEntrant {}
+ -> True
+ -- Thunks need to be entered.
+ LFThunk {}
+ -> False
+ -- LFCon means we already know the tag, and it's tagged.
+ LFCon {}
+ -> True
+ LFUnknown {}
+ -> False
+ LFUnlifted {}
+ -> True
+ LFLetNoEscape {}
+ -- Shouldn't be possible. I don't think we can export letNoEscapes
+ -> True
+
+ | otherwise
+ -> return False
+
+
+isArgTagged :: StgArg -> RM Bool
+isArgTagged (StgLitArg _) = return True
+isArgTagged (StgVarArg v) = isTagged v
+
+mkLocalArgId :: Id -> RM Id
+mkLocalArgId id = do
+ !u <- getUniqueM
+ return $! setIdUnique (localiseId id) u
+
+---------------------------
+-- Actual rewrite pass
+---------------------------
+
+
+rewriteTopBinds :: Module -> UniqSupply -> [GenStgTopBinding 'InferTaggedBinders] -> [TgStgTopBinding]
+rewriteTopBinds mod us binds =
+ let doBinds = mapM rewriteTop binds
+
+ in evalState (unRM doBinds) (mempty, us, mod, mempty)
+
+rewriteTop :: InferStgTopBinding -> RM TgStgTopBinding
+rewriteTop (StgTopStringLit v s) = return $! (StgTopStringLit v s)
+rewriteTop (StgTopLifted bind) = do
+ -- Top level bindings can, and must remain in scope
+ addTopBind bind
+ (StgTopLifted) <$!> (rewriteBinds TopLevel bind)
+
+-- For top level binds, the wrapper is guaranteed to be `id`
+rewriteBinds :: TopLevelFlag -> InferStgBinding -> RM (TgStgBinding)
+rewriteBinds _top_flag (StgNonRec v rhs) = do
+ (!rhs) <- rewriteRhs v rhs
+ return $! (StgNonRec (fst v) rhs)
+rewriteBinds top_flag b@(StgRec binds) =
+ -- Bring sigs of binds into scope for all rhss
+ withBind top_flag b $ do
+ (rhss) <- mapM (uncurry rewriteRhs) binds
+ return $! (mkRec rhss)
+ where
+ mkRec :: [TgStgRhs] -> TgStgBinding
+ mkRec rhss = StgRec (zip (map (fst . fst) binds) rhss)
+
+-- Rewrite a RHS
+rewriteRhs :: (Id,TagSig) -> InferStgRhs
+ -> RM (TgStgRhs)
+rewriteRhs (_id, _tagSig) (StgRhsCon ccs con cn ticks args) = {-# SCC rewriteRhs_ #-} do
+ -- pprTraceM "rewriteRhs" (ppr _id)
+
+ -- Look up the nodes representing the constructor arguments.
+ fieldInfos <- mapM isArgTagged args
+
+ -- Filter out non-strict fields.
+ let strictFields =
+ getStrictConArgs con (zip args fieldInfos) :: [(StgArg,Bool)] -- (nth-argument, tagInfo)
+ -- Filter out already tagged arguments.
+ let needsEval = map fst . --get the actual argument
+ filter (not . snd) $ -- Keep untagged (False) elements.
+ strictFields :: [StgArg]
+ let evalArgs = [v | StgVarArg v <- needsEval] :: [Id]
+
+ if (null evalArgs)
+ then return $! (StgRhsCon ccs con cn ticks args)
+ else do
+ --assert not (isTaggedSig tagSig)
+ -- pprTraceM "CreatingSeqs for " $ ppr _id <+> ppr node_id
+
+ -- At this point iff we have possibly untagged arguments to strict fields
+ -- we convert the RHS into a RhsClosure which will evaluate the arguments
+ -- before allocating the constructor.
+ let ty_stub = panic "mkSeqs shouldn't use the type arg"
+ conExpr <- mkSeqs args evalArgs (\taggedArgs -> StgConApp con cn taggedArgs ty_stub)
+
+ fvs <- fvArgs args
+ -- lcls <- getFVs
+ -- pprTraceM "RhsClosureConversion" (ppr (StgRhsClosure fvs ccs ReEntrant [] $! conExpr) $$ text "lcls:" <> ppr lcls)
+ return $! (StgRhsClosure fvs ccs ReEntrant [] $! conExpr)
+rewriteRhs _binding (StgRhsClosure fvs ccs flag args body) = do
+ withBinders NotTopLevel args $
+ withClosureLcls fvs $
+ StgRhsClosure fvs ccs flag (map fst args) <$> rewriteExpr False body
+ -- return (closure)
+
+fvArgs :: [StgArg] -> RM DVarSet
+fvArgs args = do
+ fv_lcls <- getFVs
+ -- pprTraceM "fvArgs" (text "args:" <> ppr args $$ text "lcls:" <> pprVarSet (fv_lcls) (braces . fsep . map ppr) )
+ return $ mkDVarSet [ v | StgVarArg v <- args, elemVarSet v fv_lcls]
+
+type IsScrut = Bool
+
+rewriteExpr :: IsScrut -> InferStgExpr -> RM TgStgExpr
+rewriteExpr _ (e@StgCase {}) = rewriteCase e
+rewriteExpr _ (e@StgLet {}) = rewriteLet e
+rewriteExpr _ (e@StgLetNoEscape {}) = rewriteLetNoEscape e
+rewriteExpr isScrut (StgTick t e) = StgTick t <$!> rewriteExpr isScrut e
+rewriteExpr _ e@(StgConApp {}) = rewriteConApp e
+
+rewriteExpr isScrut e@(StgApp {}) = rewriteApp isScrut e
+rewriteExpr _ (StgLit lit) = return $! (StgLit lit)
+rewriteExpr _ (StgOpApp op args res_ty) = return $! (StgOpApp op args res_ty)
+
+rewriteCase :: InferStgExpr -> RM TgStgExpr
+rewriteCase (StgCase scrut bndr alt_type alts) =
+ withBinder NotTopLevel bndr $
+ pure StgCase <*>
+ rewriteExpr True scrut <*>
+ pure (fst bndr) <*>
+ pure alt_type <*>
+ mapM rewriteAlt alts
+
+rewriteCase _ = panic "Impossible: nodeCase"
+
+rewriteAlt :: InferStgAlt -> RM TgStgAlt
+rewriteAlt (altCon, bndrs, rhs) = do
+ withBinders NotTopLevel bndrs $ do
+ !rhs' <- rewriteExpr False rhs
+ return $! (altCon, map fst bndrs, rhs')
+
+rewriteLet :: InferStgExpr -> RM TgStgExpr
+rewriteLet (StgLet xt bind expr) = do
+ (!bind') <- rewriteBinds NotTopLevel bind
+ withBind NotTopLevel bind $ do
+ -- pprTraceM "withBindLet" (ppr $ bindersOfX bind)
+ !expr' <- rewriteExpr False expr
+ return $! (StgLet xt bind' expr')
+rewriteLet _ = panic "Impossible"
+
+rewriteLetNoEscape :: InferStgExpr -> RM TgStgExpr
+rewriteLetNoEscape (StgLetNoEscape xt bind expr) = do
+ (!bind') <- rewriteBinds NotTopLevel bind
+ withBind NotTopLevel bind $ do
+ !expr' <- rewriteExpr False expr
+ return $! (StgLetNoEscape xt bind' expr')
+rewriteLetNoEscape _ = panic "Impossible"
+
+rewriteConApp :: InferStgExpr -> RM TgStgExpr
+rewriteConApp (StgConApp con cn args tys) = do
+ -- We check if the strict field arguments are already known to be tagged.
+ -- If not we evaluate them first.
+ fieldInfos <- mapM isArgTagged args
+ let strictIndices = getStrictConArgs con (zip fieldInfos args) :: [(Bool, StgArg)]
+ let needsEval = map snd . filter (not . fst) $ strictIndices :: [StgArg]
+ let evalArgs = [v | StgVarArg v <- needsEval] :: [Id]
+ if (not $ null evalArgs)
+ then do
+ -- pprTraceM "Creating conAppSeqs for " $ ppr nodeId <+> parens ( ppr evalArgs ) -- <+> parens ( ppr fieldInfos )
+ mkSeqs args evalArgs (\taggedArgs -> StgConApp con cn taggedArgs tys)
+ else return $! (StgConApp con cn args tys)
+
+rewriteConApp _ = panic "Impossible"
+
+-- Special case: Expressions like `case x of { ... }`
+rewriteApp :: IsScrut -> InferStgExpr -> RM TgStgExpr
+rewriteApp True (StgApp f []) = do
+ -- pprTraceM "rewriteAppScrut" (ppr f)
+ f_tagged <- isTagged f
+ -- isTagged looks at more than the result of our analysis.
+ -- So always update here if useful.
+ let f' = if f_tagged
+ then setIdTagSig f (TagSig TagProper)
+ else f
+ return $! StgApp f' []
+ where
+rewriteApp _ (StgApp f args)
+ -- | pprTrace "rewriteAppOther" (ppr f <+> ppr args) False
+ -- = undefined
+ | Just marks <- idCbvMarks_maybe f
+ , relevant_marks <- dropWhileEndLE (not . isMarkedCbv) marks
+ , any isMarkedCbv relevant_marks
+ = assert (length relevant_marks <= length args)
+ unliftArg relevant_marks
+
+ where
+ -- If the function expects any argument to be call-by-value ensure the argument is already
+ -- evaluated.
+ unliftArg relevant_marks = do
+ argTags <- mapM isArgTagged args
+ let argInfo = zipWith3 ((,,)) args (relevant_marks++repeat NotMarkedCbv) argTags :: [(StgArg, CbvMark, Bool)]
+
+ -- untagged cbv argument positions
+ cbvArgInfo = filter (\x -> sndOf3 x == MarkedCbv && thdOf3 x == False) argInfo
+ cbvArgIds = [x | StgVarArg x <- map fstOf3 cbvArgInfo] :: [Id]
+ mkSeqs args cbvArgIds (\cbv_args -> StgApp f cbv_args)
+
+rewriteApp _ (StgApp f args) = return $ StgApp f args
+rewriteApp _ _ = panic "Impossible"
+
+-- `mkSeq` x x' e generates `case x of x' -> e`
+-- We could also substitute x' for x in e but that's so rarely beneficial
+-- that we don't bother.
+mkSeq :: Id -> Id -> TgStgExpr -> TgStgExpr
+mkSeq id bndr !expr =
+ -- pprTrace "mkSeq" (ppr (id,bndr)) $
+ let altTy = mkStgAltTypeFromStgAlts bndr [(DEFAULT, [], panic "Not used")]
+ in
+ StgCase (StgApp id []) bndr altTy [(DEFAULT, [], expr)]
+
+-- `mkSeqs args vs mkExpr` will force all vs, and construct
+-- an argument list args' where each v is replaced by it's evaluated
+-- counterpart v'.
+-- That is if we call `mkSeqs [StgVar x, StgLit l] [x] mkExpr` then
+-- the result will be (case x of x' { _DEFAULT -> <mkExpr [StgVar x', StgLit l]>}
+{-# INLINE mkSeqs #-} -- We inline to avoid allocating mkExpr
+mkSeqs :: [StgArg] -- ^ Original arguments
+ -> [Id] -- ^ var args to be evaluated ahead of time
+ -> ([StgArg] -> TgStgExpr)
+ -- ^ Function that reconstructs the expressions when passed
+ -- the list of evaluated arguments.
+ -> RM TgStgExpr
+mkSeqs args untaggedIds mkExpr = do
+ argMap <- mapM (\arg -> (arg,) <$> mkLocalArgId arg ) untaggedIds :: RM [(InId, OutId)]
+ -- mapM_ (pprTraceM "Forcing strict args before allocation:" . ppr) argMap
+ let taggedArgs :: [StgArg]
+ = map (\v -> case v of
+ StgVarArg v' -> StgVarArg $ fromMaybe v' $ lookup v' argMap
+ lit -> lit)
+ args
+
+ let conBody = mkExpr taggedArgs
+ let body = foldr (\(v,bndr) expr -> mkSeq v bndr expr) conBody argMap
+ return $! body
+
+-- Out of all arguments passed at runtime only return these ending up in a
+-- strict field
+getStrictConArgs :: DataCon -> [a] -> [a]
+getStrictConArgs con args
+ -- These are always lazy in their arguments.
+ | isUnboxedTupleDataCon con = []
+ | isUnboxedSumDataCon con = []
+ -- For proper data cons we have to check.
+ | otherwise =
+ [ arg | (arg,MarkedStrict)
+ <- zipEqual "getStrictConArgs"
+ args
+ (dataConRuntimeRepStrictness con)]
diff --git a/compiler/GHC/Stg/InferTags/TagSig.hs b/compiler/GHC/Stg/InferTags/TagSig.hs
new file mode 100644
index 0000000000..a1381881f1
--- /dev/null
+++ b/compiler/GHC/Stg/InferTags/TagSig.hs
@@ -0,0 +1,66 @@
+{-# LANGUAGE TypeFamilies, DataKinds, GADTs, FlexibleInstances #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE ConstraintKinds #-}
+
+-- We export this type from this module instead of GHC.Stg.InferTags.Types
+-- because it's used by more than the analysis itself. For example in interface
+-- files where we record a tag signature for bindings.
+-- By putting the sig into it's own module we can avoid module loops.
+module GHC.Stg.InferTags.TagSig
+
+where
+
+import GHC.Prelude
+
+import GHC.Types.Var
+import GHC.Utils.Outputable
+import GHC.Utils.Binary
+import GHC.Utils.Panic.Plain
+
+data TagInfo
+ = TagDunno -- We don't know anything about the tag.
+ | TagTuple [TagInfo] -- Represents a function/thunk which when evaluated
+ -- will return a Unboxed tuple whos components have
+ -- the given TagInfos.
+ | TagProper -- Heap pointer to properly-tagged value
+ | TagTagged -- Bottom of the domain.
+ deriving (Eq)
+
+instance Outputable TagInfo where
+ ppr TagTagged = text "TagTagged"
+ ppr TagDunno = text "TagDunno"
+ ppr TagProper = text "TagProper"
+ ppr (TagTuple tis) = text "TagTuple" <> brackets (pprWithCommas ppr tis)
+
+instance Binary TagInfo where
+ put_ bh TagDunno = putByte bh 1
+ put_ bh (TagTuple flds) = putByte bh 2 >> put_ bh flds
+ put_ bh TagProper = putByte bh 3
+ put_ bh TagTagged = putByte bh 4
+
+ get bh = do tag <- getByte bh
+ case tag of 1 -> return TagDunno
+ 2 -> TagTuple <$> get bh
+ 3 -> return TagProper
+ 4 -> return TagTagged
+ _ -> panic ("get TagInfo " ++ show tag)
+
+newtype TagSig -- The signature for each binding, this is a newtype as we might
+ -- want to track more information in the future.
+ = TagSig TagInfo
+ deriving (Eq)
+
+instance Outputable TagSig where
+ ppr (TagSig ti) = char '<' <> ppr ti <> char '>'
+instance OutputableBndr (Id,TagSig) where
+ pprInfixOcc = ppr
+ pprPrefixOcc = ppr
+
+instance Binary TagSig where
+ put_ bh (TagSig sig) = put_ bh sig
+ get bh = pure TagSig <*> get bh
+
+isTaggedSig :: TagSig -> Bool
+isTaggedSig (TagSig TagProper) = True
+isTaggedSig (TagSig TagTagged) = True
+isTaggedSig _ = False
diff --git a/compiler/GHC/Stg/InferTags/Types.hs b/compiler/GHC/Stg/InferTags/Types.hs
new file mode 100644
index 0000000000..bcb1f3300b
--- /dev/null
+++ b/compiler/GHC/Stg/InferTags/Types.hs
@@ -0,0 +1,137 @@
+{-# LANGUAGE TypeFamilies, DataKinds, GADTs, FlexibleInstances #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE ConstraintKinds #-}
+
+{-# LANGUAGE UndecidableInstances #-}
+ -- To permit: type instance XLet 'InferTaggedBinders = XLet 'CodeGen
+
+module GHC.Stg.InferTags.Types
+ ( module GHC.Stg.InferTags.Types
+ , module TagSig)
+where
+
+import GHC.Prelude
+
+import GHC.Core.DataCon
+import GHC.Core.Type (isUnliftedType)
+import GHC.Types.Id
+import GHC.Stg.Syntax
+import GHC.Stg.InferTags.TagSig as TagSig
+import GHC.Types.Var.Env
+import GHC.Utils.Outputable
+import GHC.Utils.Misc( zipWithEqual )
+import GHC.Utils.Panic
+
+import GHC.StgToCmm.Types
+
+{- *********************************************************************
+* *
+ Supporting data types
+* *
+********************************************************************* -}
+
+type instance BinderP 'InferTaggedBinders = (Id, TagSig)
+type instance XLet 'InferTaggedBinders = XLet 'CodeGen
+type instance XLetNoEscape 'InferTaggedBinders = XLetNoEscape 'CodeGen
+type instance XRhsClosure 'InferTaggedBinders = XRhsClosure 'CodeGen
+
+type InferStgTopBinding = GenStgTopBinding 'InferTaggedBinders
+type InferStgBinding = GenStgBinding 'InferTaggedBinders
+type InferStgExpr = GenStgExpr 'InferTaggedBinders
+type InferStgRhs = GenStgRhs 'InferTaggedBinders
+type InferStgAlt = GenStgAlt 'InferTaggedBinders
+
+combineAltInfo :: TagInfo -> TagInfo -> TagInfo
+combineAltInfo TagDunno _ = TagDunno
+combineAltInfo _ TagDunno = TagDunno
+combineAltInfo (TagTuple {}) TagProper = panic "Combining unboxed tuple with non-tuple result"
+combineAltInfo TagProper (TagTuple {}) = panic "Combining unboxed tuple with non-tuple result"
+combineAltInfo TagProper TagProper = TagProper
+combineAltInfo (TagTuple is1) (TagTuple is2) = TagTuple (zipWithEqual "combineAltInfo" combineAltInfo is1 is2)
+combineAltInfo (TagTagged) ti = ti
+combineAltInfo ti TagTagged = ti
+
+type TagSigEnv = IdEnv TagSig
+data TagEnv p = TE { te_env :: TagSigEnv
+ , te_get :: BinderP p -> Id
+ }
+
+instance Outputable (TagEnv p) where
+ ppr te = ppr (te_env te)
+
+
+getBinderId :: TagEnv p -> BinderP p -> Id
+getBinderId = te_get
+
+initEnv :: TagEnv 'CodeGen
+initEnv = TE { te_env = emptyVarEnv
+ , te_get = \x -> x}
+
+-- | Simple convert env to a env of the 'InferTaggedBinders pass
+-- with no other changes.
+makeTagged :: TagEnv p -> TagEnv 'InferTaggedBinders
+makeTagged env = TE { te_env = te_env env
+ , te_get = fst }
+
+noSig :: TagEnv p -> BinderP p -> (Id, TagSig)
+noSig env bndr
+ | isUnliftedType (idType var) = (var, TagSig TagProper)
+ | otherwise = (var, TagSig TagDunno)
+ where
+ var = getBinderId env bndr
+
+lookupSig :: TagEnv p -> Id -> Maybe TagSig
+lookupSig env fun = lookupVarEnv (te_env env) fun
+
+lookupInfo :: TagEnv p -> StgArg -> TagInfo
+lookupInfo env (StgVarArg var)
+ -- Nullary data constructors like True, False
+ | Just dc <- isDataConWorkId_maybe var
+ , isNullaryRepDataCon dc
+ = TagProper
+
+ | isUnliftedType (idType var)
+ = TagProper
+
+ -- Variables in the environment.
+ | Just (TagSig info) <- lookupVarEnv (te_env env) var
+ = info
+
+ | Just lf_info <- idLFInfo_maybe var
+ = case lf_info of
+ -- Function, tagged (with arity)
+ LFReEntrant {}
+ -> TagProper
+ -- Thunks need to be entered.
+ LFThunk {}
+ -> TagDunno
+ -- Constructors, already tagged.
+ LFCon {}
+ -> TagProper
+ LFUnknown {}
+ -> TagDunno
+ LFUnlifted {}
+ -> TagProper
+ -- Shouldn't be possible. I don't think we can export letNoEscapes
+ LFLetNoEscape {} -> panic "LFLetNoEscape exported"
+
+ | otherwise
+ = TagDunno
+
+lookupInfo _ (StgLitArg {})
+ = TagProper
+
+isDunnoSig :: TagSig -> Bool
+isDunnoSig (TagSig TagDunno) = True
+isDunnoSig (TagSig TagProper) = False
+isDunnoSig (TagSig TagTuple{}) = False
+isDunnoSig (TagSig TagTagged{}) = False
+
+isTaggedInfo :: TagInfo -> Bool
+isTaggedInfo TagProper = True
+isTaggedInfo TagTagged = True
+isTaggedInfo _ = False
+
+extendSigEnv :: TagEnv p -> [(Id,TagSig)] -> TagEnv p
+extendSigEnv env@(TE { te_env = sig_env }) bndrs
+ = env { te_env = extendVarEnvList sig_env bndrs }
diff --git a/compiler/GHC/Stg/Lint.hs b/compiler/GHC/Stg/Lint.hs
index aa7d03cacc..405abdd1f4 100644
--- a/compiler/GHC/Stg/Lint.hs
+++ b/compiler/GHC/Stg/Lint.hs
@@ -40,6 +40,7 @@ module GHC.Stg.Lint ( lintStgTopBindings ) where
import GHC.Prelude
import GHC.Stg.Syntax
+import GHC.Stg.Utils
import GHC.Core.Lint ( interactiveInScope )
import GHC.Core.DataCon
@@ -67,6 +68,7 @@ import GHC.Data.Bag ( Bag, emptyBag, isEmptyBag, snocBag, bagToList )
import Control.Applicative ((<|>))
import Control.Monad
+import Data.Maybe
lintStgTopBindings :: forall a . (OutputablePass a, BinderP a ~ Id)
=> Logger
@@ -188,10 +190,24 @@ lintStgExpr :: (OutputablePass a, BinderP a ~ Id) => GenStgExpr a -> LintM ()
lintStgExpr (StgLit _) = return ()
-lintStgExpr (StgApp fun args) = do
+lintStgExpr e@(StgApp fun args) = do
lintStgVar fun
mapM_ lintStgArg args
+ lf <- getLintFlags
+ when (lf_unarised lf) $ do
+ -- A function which expects a unlifted argument as n'th argument
+ -- always needs to be applied to n arguments.
+ -- See Note [Strict Worker Ids].
+ let marks = fromMaybe [] $ idCbvMarks_maybe fun
+ if length marks > length args
+ then addErrL $ hang (text "Undersatured cbv marked ID in App" <+> ppr e ) 2 $
+ (text "marks" <> ppr marks $$
+ text "args" <> ppr args $$
+ text "arity" <> ppr (idArity fun) $$
+ text "join_arity" <> ppr (isJoinId_maybe fun))
+ else return ()
+
lintStgExpr app@(StgConApp con _n args _arg_tys) = do
-- unboxed sums should vanish during unarise
lf <- getLintFlags
diff --git a/compiler/GHC/Stg/Stats.hs b/compiler/GHC/Stg/Stats.hs
index a4d36136bf..7c7df34f27 100644
--- a/compiler/GHC/Stg/Stats.hs
+++ b/compiler/GHC/Stg/Stats.hs
@@ -146,7 +146,7 @@ statExpr :: StgExpr -> StatEnv
statExpr (StgApp _ _) = countOne Applications
statExpr (StgLit _) = countOne Literals
-statExpr (StgConApp _ _ _ _)= countOne ConstructorApps
+statExpr (StgConApp {}) = countOne ConstructorApps
statExpr (StgOpApp _ _ _) = countOne PrimitiveApps
statExpr (StgTick _ e) = statExpr e
diff --git a/compiler/GHC/Stg/Syntax.hs b/compiler/GHC/Stg/Syntax.hs
index bf6bac3853..6726bbe526 100644
--- a/compiler/GHC/Stg/Syntax.hs
+++ b/compiler/GHC/Stg/Syntax.hs
@@ -39,6 +39,9 @@ module GHC.Stg.Syntax (
-- a set of synonyms for the code gen parameterisation
CgStgTopBinding, CgStgBinding, CgStgExpr, CgStgRhs, CgStgAlt,
+ -- Same for taggedness
+ TgStgTopBinding, TgStgBinding, TgStgExpr, TgStgRhs, TgStgAlt,
+
-- a set of synonyms for the lambda lifting parameterisation
LlStgTopBinding, LlStgBinding, LlStgExpr, LlStgRhs, LlStgAlt,
@@ -53,9 +56,7 @@ module GHC.Stg.Syntax (
stgRhsArity, freeVarsOfRhs,
isDllConApp,
stgArgType,
- stripStgTicksTop, stripStgTicksTopE,
stgCaseBndrInScope,
- bindersOf, bindersOfTop, bindersOfTopBinds,
-- ppr
StgPprOpts(..),
@@ -176,21 +177,6 @@ stgArgType :: StgArg -> Type
stgArgType (StgVarArg v) = idType v
stgArgType (StgLitArg lit) = literalType lit
-
--- | Strip ticks of a given type from an STG expression.
-stripStgTicksTop :: (StgTickish -> Bool) -> GenStgExpr p -> ([StgTickish], GenStgExpr p)
-stripStgTicksTop p = go []
- where go ts (StgTick t e) | p t = go (t:ts) e
- -- This special case avoid building a thunk for "reverse ts" when there are no ticks
- go [] other = ([], other)
- go ts other = (reverse ts, other)
-
--- | Strip ticks of a given type from an STG expression returning only the expression.
-stripStgTicksTopE :: (StgTickish -> Bool) -> GenStgExpr p -> GenStgExpr p
-stripStgTicksTopE p = go
- where go (StgTick t e) | p t = go e
- go other = other
-
-- | Given an alt type and whether the program is unarised, return whether the
-- case binder is in scope.
--
@@ -428,36 +414,6 @@ important):
[StgTickish]
[StgArg] -- Args
-{-
-Note Stg Passes
-~~~~~~~~~~~~~~~
-Here is a short summary of the STG pipeline and where we use the different
-StgPass data type indexes:
-
- 1. CoreToStg.Prep performs several transformations that prepare the desugared
- and simplified core to be converted to STG. One of these transformations is
- making it so that value lambdas only exist as the RHS of a binding.
-
- 2. CoreToStg converts the prepared core to STG, specifically GenStg*
- parameterised by 'Vanilla.
-
- 3. Stg.Pipeline does a number of passes on the generated STG. One of these is
- the lambda-lifting pass, which internally uses the 'LiftLams
- parameterisation to store information for deciding whether or not to lift
- each binding.
-
- 4. Stg.FVs annotates closures with their free variables. To store these
- annotations we use the 'CodeGen parameterisation.
-
- 5. Stg.StgToCmm generates Cmm from the annotated STG.
--}
-
--- | Used as a data type index for the stgSyn AST
-data StgPass
- = Vanilla
- | LiftLams
- | CodeGen
-
-- | Like 'GHC.Hs.Extension.NoExtField', but with an 'Outputable' instance that
-- returns 'empty'.
data NoExtFieldSilent = NoExtFieldSilent
@@ -473,40 +429,11 @@ noExtFieldSilent = NoExtFieldSilent
-- TODO: Maybe move this to GHC.Hs.Extension? I'm not sure about the
-- implications on build time...
--- TODO: Do we really want to the extension point type families to have a closed
--- domain?
-type family BinderP (pass :: StgPass)
-type instance BinderP 'Vanilla = Id
-type instance BinderP 'CodeGen = Id
-
-type family XRhsClosure (pass :: StgPass)
-type instance XRhsClosure 'Vanilla = NoExtFieldSilent
--- | Code gen needs to track non-global free vars
-type instance XRhsClosure 'CodeGen = DIdSet
-
-type family XLet (pass :: StgPass)
-type instance XLet 'Vanilla = NoExtFieldSilent
-type instance XLet 'CodeGen = NoExtFieldSilent
-
--- | When `-fdistinct-constructor-tables` is turned on then
--- each usage of a constructor is given an unique number and
--- an info table is generated for each different constructor.
-data ConstructorNumber =
- NoNumber | Numbered Int
-
-instance Outputable ConstructorNumber where
- ppr NoNumber = empty
- ppr (Numbered n) = text "#" <> ppr n
-
-type family XLetNoEscape (pass :: StgPass)
-type instance XLetNoEscape 'Vanilla = NoExtFieldSilent
-type instance XLetNoEscape 'CodeGen = NoExtFieldSilent
-
stgRhsArity :: StgRhs -> Int
stgRhsArity (StgRhsClosure _ _ _ bndrs _)
= assert (all isId bndrs) $ length bndrs
-- The arity never includes type parameters, but they should have gone by now
-stgRhsArity (StgRhsCon _ _ _ _ _) = 0
+stgRhsArity (StgRhsCon {}) = 0
freeVarsOfRhs :: (XRhsClosure pass ~ DIdSet) => GenStgRhs pass -> DIdSet
freeVarsOfRhs (StgRhsCon _ _ _ _ args) = mkDVarSet [ id | StgVarArg id <- args ]
@@ -550,7 +477,31 @@ The Plain STG parameterisation
* *
************************************************************************
-This happens to be the only one we use at the moment.
+ Note [STG Extension points]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ We now make use of extension points in STG for different passes which want
+ to associate information with AST nodes.
+
+ Currently the pipeline is roughly:
+
+ CoreToStg: Core -> Stg
+ StgSimpl: Stg -> Stg
+ CodeGen: Stg -> Cmm
+
+ As part of StgSimpl we run late lambda lifting (Ll).
+ Late lambda lift:
+ Stg -> FvStg -> LlStg -> Stg
+
+ CodeGen:
+ As part of CodeGen we run tag inference.
+ Tag Inference:
+ Stg -> Stg 'InferTaggedBinders` -> Stg
+
+ And at a last step we add the free Variables:
+ Stg -> CgStg
+
+ Which finally CgStg being used to generate Cmm.
+
-}
type StgTopBinding = GenStgTopBinding 'Vanilla
@@ -571,6 +522,12 @@ type CgStgExpr = GenStgExpr 'CodeGen
type CgStgRhs = GenStgRhs 'CodeGen
type CgStgAlt = GenStgAlt 'CodeGen
+type TgStgTopBinding = GenStgTopBinding 'CodeGen
+type TgStgBinding = GenStgBinding 'CodeGen
+type TgStgExpr = GenStgExpr 'CodeGen
+type TgStgRhs = GenStgRhs 'CodeGen
+type TgStgAlt = GenStgAlt 'CodeGen
+
{- Many passes apply a substitution, and it's very handy to have type
synonyms to remind us whether or not the substitution has been applied.
See GHC.Core for precedence in Core land
@@ -589,6 +546,79 @@ type OutStgExpr = StgExpr
type OutStgRhs = StgRhs
type OutStgAlt = StgAlt
+-- | When `-fdistinct-constructor-tables` is turned on then
+-- each usage of a constructor is given an unique number and
+-- an info table is generated for each different constructor.
+data ConstructorNumber =
+ NoNumber | Numbered Int
+
+instance Outputable ConstructorNumber where
+ ppr NoNumber = empty
+ ppr (Numbered n) = text "#" <> ppr n
+
+{-
+Note Stg Passes
+~~~~~~~~~~~~~~~
+Here is a short summary of the STG pipeline and where we use the different
+StgPass data type indexes:
+
+ 1. CoreToStg.Prep performs several transformations that prepare the desugared
+ and simplified core to be converted to STG. One of these transformations is
+ making it so that value lambdas only exist as the RHS of a binding.
+ See Note [CorePrep Overview].
+
+ 2. CoreToStg converts the prepared core to STG, specifically GenStg*
+ parameterised by 'Vanilla. See the GHC.CoreToStg Module.
+
+ 3. Stg.Pipeline does a number of passes on the generated STG. One of these is
+ the lambda-lifting pass, which internally uses the 'LiftLams
+ parameterisation to store information for deciding whether or not to lift
+ each binding.
+ See Note [Late lambda lifting in STG].
+
+ 4. Tag inference takes in 'Vanilla and produces 'InferTagged STG, while using
+ the InferTaggedBinders annotated AST internally.
+ See Note [Tag Inference].
+
+ 5. Stg.FVs annotates closures with their free variables. To store these
+ annotations we use the 'CodeGen parameterisation.
+ See the GHC.Stg.FVs module.
+
+ 6. The Module Stg.StgToCmm generates Cmm from the CodeGen annotated STG.
+-}
+
+
+-- | Used as a data type index for the stgSyn AST
+data StgPass
+ = Vanilla
+ | LiftLams -- ^ Use internally by the lambda lifting pass
+ | InferTaggedBinders -- ^ Tag inference information on binders.
+ -- See Note [Tag inference passes] in GHC.Stg.InferTags
+ | InferTagged -- ^ Tag inference information put on relevant StgApp nodes
+ -- See Note [Tag inference passes] in GHC.Stg.InferTags
+ | CodeGen
+
+type family BinderP (pass :: StgPass)
+type instance BinderP 'Vanilla = Id
+type instance BinderP 'CodeGen = Id
+type instance BinderP 'InferTagged = Id
+
+type family XRhsClosure (pass :: StgPass)
+type instance XRhsClosure 'Vanilla = NoExtFieldSilent
+type instance XRhsClosure 'InferTagged = NoExtFieldSilent
+-- | Code gen needs to track non-global free vars
+type instance XRhsClosure 'CodeGen = DIdSet
+
+type family XLet (pass :: StgPass)
+type instance XLet 'Vanilla = NoExtFieldSilent
+type instance XLet 'InferTagged = NoExtFieldSilent
+type instance XLet 'CodeGen = NoExtFieldSilent
+
+type family XLetNoEscape (pass :: StgPass)
+type instance XLetNoEscape 'Vanilla = NoExtFieldSilent
+type instance XLetNoEscape 'InferTagged = NoExtFieldSilent
+type instance XLetNoEscape 'CodeGen = NoExtFieldSilent
+
{-
************************************************************************
@@ -643,25 +673,6 @@ data StgOp
{-
************************************************************************
* *
-Utilities
-* *
-************************************************************************
--}
-
-bindersOf :: BinderP a ~ Id => GenStgBinding a -> [Id]
-bindersOf (StgNonRec binder _) = [binder]
-bindersOf (StgRec pairs) = [binder | (binder, _) <- pairs]
-
-bindersOfTop :: BinderP a ~ Id => GenStgTopBinding a -> [Id]
-bindersOfTop (StgTopLifted bind) = bindersOf bind
-bindersOfTop (StgTopStringLit binder _) = [binder]
-
-bindersOfTopBinds :: BinderP a ~ Id => [GenStgTopBinding a] -> [Id]
-bindersOfTopBinds = foldr ((++) . bindersOfTop) []
-
-{-
-************************************************************************
-* *
Pretty-printing
* *
************************************************************************
@@ -712,6 +723,9 @@ pprGenStgBinding opts b = case b of
= hang (hsep [pprBndr LetBind bndr, equals])
4 (pprStgRhs opts expr <> semi)
+instance OutputablePass pass => Outputable (GenStgBinding pass) where
+ ppr = pprGenStgBinding panicStgPprOpts
+
pprGenStgTopBindings :: (OutputablePass pass) => StgPprOpts -> [GenStgTopBinding pass] -> SDoc
pprGenStgTopBindings opts binds
= vcat $ intersperse blankLine (map (pprGenStgTopBinding opts) binds)
@@ -732,12 +746,19 @@ pprStgArg :: StgArg -> SDoc
pprStgArg (StgVarArg var) = ppr var
pprStgArg (StgLitArg con) = ppr con
+instance OutputablePass pass => Outputable (GenStgExpr pass) where
+ ppr = pprStgExpr panicStgPprOpts
+
pprStgExpr :: OutputablePass pass => StgPprOpts -> GenStgExpr pass -> SDoc
pprStgExpr opts e = case e of
-- special case
StgLit lit -> ppr lit
-- general case
- StgApp func args -> hang (ppr func) 4 (interppSP args)
+ StgApp func args
+ | null args
+ , Just sig <- idTagSig_maybe func
+ -> ppr func <> ppr sig
+ | otherwise -> hang (ppr func) 4 (interppSP args) -- TODO: Print taggedness
StgConApp con n args _ -> hsep [ ppr con, ppr n, brackets (interppSP args) ]
StgOpApp op args _ -> hsep [ pprStgOp op, brackets (interppSP args)]
@@ -850,4 +871,8 @@ pprStgRhs opts rhs = case rhs of
, case mid of
NoNumber -> empty
Numbered n -> hcat [ppr n, space]
+ -- The bang indicates this is an StgRhsCon instead of an StgConApp.
, ppr con, text "! ", brackets (sep (map pprStgArg args))]
+
+instance OutputablePass pass => Outputable (GenStgRhs pass) where
+ ppr = pprStgRhs panicStgPprOpts
diff --git a/compiler/GHC/Stg/Unarise.hs b/compiler/GHC/Stg/Unarise.hs
index 15635e754a..25b9c5e582 100644
--- a/compiler/GHC/Stg/Unarise.hs
+++ b/compiler/GHC/Stg/Unarise.hs
@@ -258,6 +258,7 @@ import GHC.Utils.Panic
import GHC.Utils.Panic.Plain
import GHC.Types.RepType
import GHC.Stg.Syntax
+import GHC.Stg.Utils
import GHC.Core.Type
import GHC.Builtin.Types.Prim (intPrimTy)
import GHC.Builtin.Types
@@ -802,10 +803,10 @@ unariseConArgBinder = unariseArgBinder True
--------------------------------------------------------------------------------
mkIds :: FastString -> [UnaryType] -> UniqSM [Id]
-mkIds fs tys = mapM (mkId fs) tys
+mkIds fs tys = mkUnarisedIds fs tys
mkId :: FastString -> UnaryType -> UniqSM Id
-mkId s t = mkSysLocalM s Many t
+mkId s t = mkUnarisedId s t
isMultiValBndr :: Id -> Bool
isMultiValBndr id
diff --git a/compiler/GHC/Stg/Utils.hs b/compiler/GHC/Stg/Utils.hs
new file mode 100644
index 0000000000..4561e25765
--- /dev/null
+++ b/compiler/GHC/Stg/Utils.hs
@@ -0,0 +1,124 @@
+{-# LANGUAGE CPP, ScopedTypeVariables, TypeFamilies #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE DataKinds #-}
+
+module GHC.Stg.Utils
+ ( mkStgAltTypeFromStgAlts
+ , bindersOf, bindersOfX, bindersOfTop, bindersOfTopBinds
+
+ , stripStgTicksTop, stripStgTicksTopE
+ , idArgs
+
+ , mkUnarisedId, mkUnarisedIds
+ ) where
+
+import GHC.Prelude
+
+import GHC.Types.Id
+import GHC.Core.Type
+import GHC.Core.TyCon
+import GHC.Core.DataCon
+import GHC.Core ( AltCon(..) )
+import GHC.Types.Tickish
+import GHC.Types.Unique.Supply
+
+import GHC.Types.RepType
+import GHC.Stg.Syntax
+
+import GHC.Utils.Outputable
+
+import GHC.Utils.Panic.Plain
+import GHC.Utils.Panic
+
+import GHC.Data.FastString
+
+mkUnarisedIds :: MonadUnique m => FastString -> [UnaryType] -> m [Id]
+mkUnarisedIds fs tys = mapM (mkUnarisedId fs) tys
+
+mkUnarisedId :: MonadUnique m => FastString -> UnaryType -> m Id
+mkUnarisedId s t = mkSysLocalM s Many t
+
+-- Checks if id is a top level error application.
+-- isErrorAp_maybe :: Id ->
+
+-- | Extract the default case alternative
+-- findDefaultStg :: [Alt b] -> ([Alt b], Maybe (Expr b))
+findDefaultStg :: [GenStgAlt p] -> ([(AltCon, [BinderP p], GenStgExpr p)],
+ Maybe (GenStgExpr p))
+findDefaultStg ((DEFAULT, args, rhs) : alts) = assert( null args ) (alts, Just rhs)
+findDefaultStg alts = (alts, Nothing)
+
+mkStgAltTypeFromStgAlts :: forall p. Id -> [GenStgAlt p] -> AltType
+mkStgAltTypeFromStgAlts bndr alts
+ | isUnboxedTupleType bndr_ty || isUnboxedSumType bndr_ty
+ = MultiValAlt (length prim_reps) -- always use MultiValAlt for unboxed tuples
+
+ | otherwise
+ = case prim_reps of
+ [rep] | isGcPtrRep rep ->
+ case tyConAppTyCon_maybe (unwrapType bndr_ty) of
+ Just tc
+ | isAbstractTyCon tc -> look_for_better_tycon
+ | isAlgTyCon tc -> AlgAlt tc
+ | otherwise -> assertPpr ( _is_poly_alt_tycon tc) (ppr tc)
+ PolyAlt
+ Nothing -> PolyAlt
+ [non_gcd] -> PrimAlt non_gcd
+ not_unary -> MultiValAlt (length not_unary)
+ where
+ bndr_ty = idType bndr
+ prim_reps = typePrimRep bndr_ty
+
+ _is_poly_alt_tycon tc
+ = isFunTyCon tc
+ || isPrimTyCon tc -- "Any" is lifted but primitive
+ || isFamilyTyCon tc -- Type family; e.g. Any, or arising from strict
+ -- function application where argument has a
+ -- type-family type
+
+ -- Sometimes, the TyCon is a AbstractTyCon which may not have any
+ -- constructors inside it. Then we may get a better TyCon by
+ -- grabbing the one from a constructor alternative
+ -- if one exists.
+ look_for_better_tycon
+ | (((DataAlt con) ,_, _) : _) <- data_alts =
+ AlgAlt (dataConTyCon con)
+ | otherwise =
+ assert(null data_alts)
+ PolyAlt
+ where
+ (data_alts, _deflt) = findDefaultStg alts
+
+bindersOf :: BinderP a ~ Id => GenStgBinding a -> [Id]
+bindersOf (StgNonRec binder _) = [binder]
+bindersOf (StgRec pairs) = [binder | (binder, _) <- pairs]
+
+bindersOfX :: GenStgBinding a -> [BinderP a]
+bindersOfX (StgNonRec binder _) = [binder]
+bindersOfX (StgRec pairs) = [binder | (binder, _) <- pairs]
+
+bindersOfTop :: BinderP a ~ Id => GenStgTopBinding a -> [Id]
+bindersOfTop (StgTopLifted bind) = bindersOf bind
+bindersOfTop (StgTopStringLit binder _) = [binder]
+
+-- All ids we bind something to on the top level.
+bindersOfTopBinds :: BinderP a ~ Id => [GenStgTopBinding a] -> [Id]
+-- bindersOfTopBinds binds = mapUnionVarSet (mkVarSet . bindersOfTop) binds
+bindersOfTopBinds binds = foldr ((++) . bindersOfTop) [] binds
+
+idArgs :: [StgArg] -> [Id]
+idArgs args = [v | StgVarArg v <- args]
+
+-- | Strip ticks of a given type from an STG expression.
+stripStgTicksTop :: (StgTickish -> Bool) -> GenStgExpr p -> ([StgTickish], GenStgExpr p)
+stripStgTicksTop p = go []
+ where go ts (StgTick t e) | p t = go (t:ts) e
+ -- This special case avoid building a thunk for "reverse ts" when there are no ticks
+ go [] other = ([], other)
+ go ts other = (reverse ts, other)
+
+-- | Strip ticks of a given type from an STG expression returning only the expression.
+stripStgTicksTopE :: (StgTickish -> Bool) -> GenStgExpr p -> GenStgExpr p
+stripStgTicksTopE p = go
+ where go (StgTick t e) | p t = go e
+ go other = other
diff --git a/compiler/GHC/StgToByteCode.hs b/compiler/GHC/StgToByteCode.hs
index c574327665..885af12944 100644
--- a/compiler/GHC/StgToByteCode.hs
+++ b/compiler/GHC/StgToByteCode.hs
@@ -661,7 +661,7 @@ schemeT _d _s _p (StgOpApp StgPrimCallOp{} _args _ty)
= unsupportedCConvException
-- Case 2: Unboxed tuple
-schemeT d s p (StgConApp con _ext args _tys)
+schemeT d s p (StgConApp con _cn args _tys)
| isUnboxedTupleDataCon con || isUnboxedSumDataCon con
= returnUnboxedTuple d s p args
diff --git a/compiler/GHC/StgToCmm.hs b/compiler/GHC/StgToCmm.hs
index 9931b81e6e..eea77198aa 100644
--- a/compiler/GHC/StgToCmm.hs
+++ b/compiler/GHC/StgToCmm.hs
@@ -70,6 +70,7 @@ import GHC.Utils.Misc
import System.IO.Unsafe
import qualified Data.ByteString as BS
import Data.IORef
+import GHC.Utils.Panic (assertPpr)
codeGen :: Logger
-> TmpFs
@@ -206,7 +207,7 @@ cgTopRhs cfg _rec bndr (StgRhsCon _cc con mn _ts args)
-- see Note [Post-unarisation invariants] in GHC.Stg.Unarise
cgTopRhs cfg rec bndr (StgRhsClosure fvs cc upd_flag args body)
- = assert (isEmptyDVarSet fvs) -- There should be no free variables
+ = assertPpr (isEmptyDVarSet fvs) (text "fvs:" <> ppr fvs) $ -- There should be no free variables
cgTopRhsClosure (stgToCmmPlatform cfg) rec bndr cc upd_flag args body
diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs
index 4fb3fd1fbe..25d04b323c 100644
--- a/compiler/GHC/StgToCmm/Bind.hs
+++ b/compiler/GHC/StgToCmm/Bind.hs
@@ -32,6 +32,7 @@ import GHC.StgToCmm.DataCon
import GHC.StgToCmm.Heap
import GHC.StgToCmm.Prof (ldvEnterClosure, enterCostCentreFun, enterCostCentreThunk,
initUpdFrameProf)
+import GHC.StgToCmm.TagCheck
import GHC.StgToCmm.Ticky
import GHC.StgToCmm.Layout
import GHC.StgToCmm.Utils
@@ -45,6 +46,7 @@ import GHC.Cmm.Info
import GHC.Cmm.Utils
import GHC.Cmm.CLabel
+import GHC.Stg.Utils
import GHC.Types.CostCentre
import GHC.Types.Id
import GHC.Types.Id.Info
@@ -217,14 +219,19 @@ cgRhs id (StgRhsCon cc con mn _ts args)
{- See Note [GC recovery] in "GHC.StgToCmm.Closure" -}
cgRhs id (StgRhsClosure fvs cc upd_flag args body)
- = do profile <- getProfile
- mkRhsClosure profile id cc (nonVoidIds (dVarSetElems fvs)) upd_flag args body
+ = do
+ checkFunctionArgTags (text "TagCheck Failed: Rhs of" <> ppr id) id args
+ profile <- getProfile
+ check_tags <- stgToCmmDoTagCheck <$> getStgToCmmConfig
+ mkRhsClosure profile check_tags id cc (nonVoidIds (dVarSetElems fvs)) upd_flag args body
+
------------------------------------------------------------------------
-- Non-constructor right hand sides
------------------------------------------------------------------------
-mkRhsClosure :: Profile -> Id -> CostCentreStack
+mkRhsClosure :: Profile -> Bool
+ -> Id -> CostCentreStack
-> [NonVoid Id] -- Free vars
-> UpdateFlag
-> [Id] -- Args
@@ -267,7 +274,7 @@ for semi-obvious reasons.
-}
---------- See Note [Selectors] ------------------
-mkRhsClosure profile bndr _cc
+mkRhsClosure profile _check_tags bndr _cc
[NonVoid the_fv] -- Just one free var
upd_flag -- Updatable thunk
[] -- A thunk
@@ -300,7 +307,7 @@ mkRhsClosure profile bndr _cc
in cgRhsStdThunk bndr lf_info [StgVarArg the_fv]
---------- See Note [Ap thunks] ------------------
-mkRhsClosure profile bndr _cc
+mkRhsClosure profile check_tags bndr _cc
fvs
upd_flag
[] -- No args; a thunk
@@ -321,8 +328,8 @@ mkRhsClosure profile bndr _cc
-- lose information about this particular
-- thunk (e.g. its type) (#949)
, idArity fun_id == unknownArity -- don't spoil a known call
-
-- Ha! an Ap thunk
+ , not check_tags -- See Note [Tag inference debugging]
= cgRhsStdThunk bndr lf_info payload
where
@@ -333,7 +340,7 @@ mkRhsClosure profile bndr _cc
payload = StgVarArg fun_id : args
---------- Default case ------------------
-mkRhsClosure profile bndr cc fvs upd_flag args body
+mkRhsClosure profile _check_tags bndr cc fvs upd_flag args body
= do { let lf_info = mkClosureLFInfo (profilePlatform profile) bndr NotTopLevel fvs upd_flag args
; (id_info, reg) <- rhsIdInfo bndr lf_info
; return (id_info, gen_code lf_info reg) }
@@ -517,6 +524,7 @@ closureCodeBody top_lvl bndr cl_info cc args@(arg0:_) body fv_details
-- Load free vars out of closure *after*
-- heap check, to reduce live vars over check
; when node_points $ load_fvs node lf_info fv_bindings
+ ; checkFunctionArgTags (text "TagCheck failed - Argument to local function:" <> ppr bndr) bndr (map fromNonVoid nv_args)
; void $ cgExpr body
}}}
diff --git a/compiler/GHC/StgToCmm/Closure.hs b/compiler/GHC/StgToCmm/Closure.hs
index b2f51c60fd..0d048a6be8 100644
--- a/compiler/GHC/StgToCmm/Closure.hs
+++ b/compiler/GHC/StgToCmm/Closure.hs
@@ -4,7 +4,6 @@
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE FlexibleInstances #-}
-
-----------------------------------------------------------------------------
--
-- Stg to C-- code generation:
@@ -101,6 +100,7 @@ import GHC.Utils.Misc
import Data.Coerce (coerce)
import qualified Data.ByteString.Char8 as BS8
import GHC.StgToCmm.Config
+import GHC.Stg.InferTags.TagSig (isTaggedSig)
-----------------------------------------------------------------------------
-- Data types and synonyms
@@ -478,20 +478,38 @@ When black-holing, single-entry closures could also be entered via node
(rather than directly) to catch double-entry. -}
data CallMethod
- = EnterIt -- No args, not a function
+ = EnterIt -- ^ No args, not a function
| JumpToIt BlockId [LocalReg] -- A join point or a header of a local loop
| ReturnIt -- It's a value (function, unboxed value,
-- or constructor), so just return it.
- | SlowCall -- Unknown fun, or known fun with
+ | InferedReturnIt -- A properly tagged value, as determined by tag inference.
+ -- See Note [Tag Inference] and Note [Tag inference passes] in
+ -- GHC.Stg.InferTags.
+ -- It behaves /precisely/ like `ReturnIt`, except that when debugging is
+ -- enabled we emit an extra assertion to check that the returned value is
+ -- properly tagged. We can use this as a check that tag inference is working
+ -- correctly.
+ -- TODO: SPJ suggested we could combine this with EnterIt, but for now I decided
+ -- not to do so.
+
+ | SlowCall -- Unknown fun, or known fun with
-- too few args.
| DirectEntry -- Jump directly, with args in regs
CLabel -- The code label
RepArity -- Its arity
+instance Outputable CallMethod where
+ ppr (EnterIt) = text "Enter"
+ ppr (JumpToIt {}) = text "JumpToIt"
+ ppr (ReturnIt ) = text "ReturnIt"
+ ppr (InferedReturnIt) = text "InferedReturnIt"
+ ppr (SlowCall ) = text "SlowCall"
+ ppr (DirectEntry {}) = text "DirectEntry"
+
getCallMethod :: StgToCmmConfig
-> Name -- Function being applied
-> Id -- Function Id used to chech if it can refer to
@@ -538,6 +556,12 @@ getCallMethod _ _name _ (LFCon _) n_args _v_args _cg_loc _self_loop_info
getCallMethod cfg name id (LFThunk _ _ updatable std_form_info is_fun)
n_args _v_args _cg_loc _self_loop_info
+
+ | Just sig <- idTagSig_maybe id
+ , isTaggedSig sig -- Infered to be already evaluated by Tag Inference
+ , n_args == 0 -- See Note [Tag Inference]
+ = InferedReturnIt
+
| is_fun -- it *might* be a function, so we must "call" it (which is always safe)
= SlowCall -- We cannot just enter it [in eval/apply, the entry code
-- is the fast-entry code]
@@ -568,12 +592,27 @@ getCallMethod cfg name id (LFThunk _ _ updatable std_form_info is_fun)
DirectEntry (thunkEntryLabel (stgToCmmPlatform cfg) name (idCafInfo id) std_form_info
updatable) 0
-getCallMethod _ _name _ (LFUnknown True) _n_arg _v_args _cg_locs _self_loop_info
- = SlowCall -- might be a function
-
-getCallMethod _ name _ (LFUnknown False) n_args _v_args _cg_loc _self_loop_info
- = assertPpr (n_args == 0) (ppr name <+> ppr n_args)
- EnterIt -- Not a function
+-- Imported(Unknown) Ids
+getCallMethod cfg name id (LFUnknown might_be_a_function) n_args _v_args _cg_locs _self_loop_info
+ | n_args == 0
+ , Just sig <- idTagSig_maybe id
+ , isTaggedSig sig -- Infered to be already evaluated by Tag Inference
+ -- When profiling we enter functions to update the SCC so we
+ -- can't use the infered enterInfo here.
+ -- See Note [Evaluating functions with profiling] in rts/Apply.cmm
+ , not (profileIsProfiling (stgToCmmProfile cfg) && might_be_a_function)
+ = InferedReturnIt -- See Note [Tag Inference]
+
+ | might_be_a_function = SlowCall
+
+ | otherwise =
+ assertPpr ( n_args == 0) ( ppr name <+> ppr n_args )
+ EnterIt -- Not a function
+
+-- TODO: Redundant with above match?
+-- getCallMethod _ name _ (LFUnknown False) n_args _v_args _cg_loc _self_loop_info
+-- = assertPpr (n_args == 0) (ppr name <+> ppr n_args)
+-- EnterIt -- Not a function
getCallMethod _ _name _ LFLetNoEscape _n_args _v_args (LneLoc blk_id lne_regs) _self_loop_info
= JumpToIt blk_id lne_regs
diff --git a/compiler/GHC/StgToCmm/Config.hs b/compiler/GHC/StgToCmm/Config.hs
index 8751f5ffe5..28c11e1321 100644
--- a/compiler/GHC/StgToCmm/Config.hs
+++ b/compiler/GHC/StgToCmm/Config.hs
@@ -52,6 +52,7 @@ data StgToCmmConfig = StgToCmmConfig
-- code for linking against dynamic libraries
, stgToCmmDoBoundsCheck :: !Bool -- ^ decides whether to check array bounds in StgToCmm.Prim
-- or not
+ , stgToCmmDoTagCheck :: !Bool -- ^ Verify tag inference predictions.
------------------------------ Backend Flags ----------------------------------
, stgToCmmAllowBigArith :: !Bool -- ^ Allowed to emit larger than native size arithmetic (only LLVM and C backends)
, stgToCmmAllowQuotRemInstr :: !Bool -- ^ Allowed to generate QuotRem instructions
diff --git a/compiler/GHC/StgToCmm/DataCon.hs b/compiler/GHC/StgToCmm/DataCon.hs
index bed2f164eb..2a7203e101 100644
--- a/compiler/GHC/StgToCmm/DataCon.hs
+++ b/compiler/GHC/StgToCmm/DataCon.hs
@@ -52,6 +52,8 @@ import GHC.Utils.Monad (mapMaybeM)
import Control.Monad
import Data.Char
import GHC.StgToCmm.Config (stgToCmmPlatform)
+import GHC.StgToCmm.TagCheck (checkConArgsStatic, checkConArgsDyn)
+import GHC.Utils.Outputable
---------------------------------------------------------------
-- Top-level constructors
@@ -93,7 +95,7 @@ cgTopRhsCon cfg id con mn args
-- Windows DLLs have a problem with static cross-DLL refs.
massert (not (isDllConApp platform (stgToCmmExtDynRefs cfg) this_mod con (map fromNonVoid args)))
; assert (args `lengthIs` countConRepArgs con ) return ()
-
+ ; checkConArgsStatic (text "TagCheck failed - Top level con") con (map fromNonVoid args)
-- LAY IT OUT
; let
(tot_wds, -- #ptr_wds + #nonptr_wds
@@ -210,6 +212,7 @@ buildDynCon' binder mn actually_bound ccs con args
; let ticky_name | actually_bound = Just binder
| otherwise = Nothing
+ ; checkConArgsDyn (text "TagCheck failed - con_alloc:" <> ppr binder) con (map fromNonVoid args)
; hp_plus_n <- allocDynClosure ticky_name info_tbl lf_info
use_cc blame_cc args_w_offsets
; return (mkRhsInit platform reg lf_info hp_plus_n) }
diff --git a/compiler/GHC/StgToCmm/Env.hs b/compiler/GHC/StgToCmm/Env.hs
index 8f82c02e8e..eef1420a72 100644
--- a/compiler/GHC/StgToCmm/Env.hs
+++ b/compiler/GHC/StgToCmm/Env.hs
@@ -19,7 +19,7 @@ module GHC.StgToCmm.Env (
bindArgToReg, idToReg,
getCgIdInfo,
maybeLetNoEscape,
- ) where
+ ) where
import GHC.Prelude
@@ -44,7 +44,6 @@ import GHC.Utils.Outputable
import GHC.Utils.Panic
import GHC.Utils.Panic.Plain
-
-------------------------------------
-- Manipulating CgIdInfo
-------------------------------------
@@ -116,12 +115,20 @@ addBindsC new_bindings = do
new_bindings
setBinds new_binds
+-- Inside GHC the average module creates 385 external references
+-- with noteable cgIdInfo (so not generated by mkLFArgument).
+-- On average 200 of these are covered by True/False/[]
+-- and nullary constructors make up ~80.
+-- One would think it would be worthwhile to cache these.
+-- Sadly it's not. See #16937
+
getCgIdInfo :: Id -> FCode CgIdInfo
getCgIdInfo id
= do { platform <- getPlatform
; local_binds <- getBinds -- Try local bindings first
; case lookupVarEnv local_binds id of {
- Just info -> return info ;
+ Just info -> -- pprTrace "getCgIdInfoLocal" (ppr id) $
+ return info ;
Nothing -> do {
-- Should be imported; make up a CgIdInfo for it
diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs
index 3a3c1db647..5129a45b1c 100644
--- a/compiler/GHC/StgToCmm/Expr.hs
+++ b/compiler/GHC/StgToCmm/Expr.hs
@@ -1,6 +1,7 @@
-{-# LANGUAGE BangPatterns #-}
-
{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE TypeFamilies #-}
-----------------------------------------------------------------------------
--
@@ -25,6 +26,7 @@ import GHC.StgToCmm.Layout
import GHC.StgToCmm.Lit
import GHC.StgToCmm.Prim
import GHC.StgToCmm.Hpc
+import GHC.StgToCmm.TagCheck
import GHC.StgToCmm.Ticky
import GHC.StgToCmm.Utils
import GHC.StgToCmm.Closure
@@ -36,6 +38,7 @@ import GHC.Cmm.BlockId
import GHC.Cmm hiding ( succ )
import GHC.Cmm.Info
import GHC.Cmm.Utils ( zeroExpr, cmmTagMask, mkWordCLit, mAX_PTR_TAG )
+import GHC.Cmm.Ppr
import GHC.Core
import GHC.Core.DataCon
import GHC.Types.ForeignCall
@@ -56,6 +59,7 @@ import GHC.Utils.Panic.Plain
import Control.Monad ( unless, void )
import Control.Arrow ( first )
import Data.List ( partition )
+import GHC.Stg.InferTags.TagSig (isTaggedSig)
------------------------------------------------------------------------
-- cgExpr: the main function
@@ -291,7 +295,7 @@ We adopt (b) because that is more likely to put the heap check at the
entry to a function, when not many things are live. After a bunch of
single-branch cases, we may have lots of things live
-Hence: two basic plans for
+Hence: Two basic plans for
case e of r { alts }
@@ -306,6 +310,13 @@ Hence: two basic plans for
...code for alts...
...alts do their own heap checks
+ When using GcInAlts the return point for heap checks and evaluating
+ the scrutinee is shared. This does mean we might execute the actual
+ branching code twice but it's rare enough to not matter.
+ The huge advantage of this pattern is that we do not require multiple
+ info tables for returning from gc as they can be shared between all
+ cases. Reducing code size nicely.
+
------ Plan B: special case when ---------
(i) e does not allocate or call GC
(ii) either upstream code performs allocation
@@ -320,6 +331,80 @@ Hence: two basic plans for
...code for alts...
...no heap check...
+
+ There is a variant B.2 which we use if:
+
+ (i) e is already evaluated+tagged
+ (ii) We have multiple alternatives
+ (iii) and there is no upstream allocation.
+
+ Here we also place one heap check before the `case` which
+ branches on `e`. Hopefully to be absorbed by an already existing
+ heap check further up. However the big difference in this case is that
+ there is no code for e. So we are not guaranteed that the heap
+ checks of the alts will be combined with an heap check further up.
+
+ Very common example: Casing on strict fields.
+
+ ...heap check...
+ ...assign bindings...
+
+ ...code for alts...
+ ...no heap check...
+
+ -- Reasoning for Plan B.2:
+ Since the scrutinee is already evaluated there is no evaluation
+ call which would force a info table that we can use as a shared
+ return point.
+ This means currently if we were to do GcInAlts like in Plan A then
+ we would end up with one info table per alternative.
+
+ To avoid this we unconditionally do gc outside of the alts with all
+ the pros and cons described in Note [Compiling case expressions].
+ Rewriting the logic to generate a shared return point before the case
+ expression while keeping the heap checks in the alternatives would be
+ possible. But it's unclear to me that this would actually be an improvement.
+
+ This means if we have code along these lines:
+
+ g x y = case x of
+ True -> Left $ (y + 1,y,y-1)
+ False -> Right $! y - (2 :: Int)
+
+ We get these potential heap check placements:
+
+ f = ...
+ !max(L,R)!; -- Might be absorbed upstream.
+ case x of
+ True -> !L!; ...L...
+ False -> !R!; ...R...
+
+ And we place a heap check at !max(L,R)!
+
+ The downsides of using !max(L,R)! are:
+
+ * If f is recursive, and the hot loop wouldn't allocate, but the exit branch does then we do
+ a redundant heap check.
+ * We use one more instruction to de-allocate the unused heap in the branch using less heap. (Neglible)
+ * A small risk of running gc slightly more often than needed especially if one branch allocates a lot.
+
+ The upsides are:
+ * May save a heap overflow test if there is an upstream check already.
+ * If the heap check is absorbed upstream we can also eliminate its info table.
+ * We generate at most one heap check (versus one per alt otherwise).
+ * No need to save volatile vars etc across heap checks in !L!, !R!
+ * We can use relative addressing from a single Hp to get at all the closures so allocated. (seems neglible)
+ * It fits neatly in the logic we already have for handling A/B
+
+ For containers:Data/Sequence/Internal/Sorting.o the difference is
+ about 10% in terms of code size compared to using Plan A for this case.
+ The main downside is we might put heap checks into loops, even if we
+ could avoid it (See Note [Compiling case expressions]).
+
+ Potential improvement: Investigate if heap checks in alts would be an
+ improvement if we generate and use a shared return point that is placed
+ in the common path for all alts.
+
-}
@@ -461,11 +546,13 @@ cgCase scrut bndr alt_type alts
; up_hp_usg <- getVirtHp -- Upstream heap usage
; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
alt_regs = map (idToReg platform) ret_bndrs
+
; simple_scrut <- isSimpleScrut scrut alt_type
; let do_gc | is_cmp_op scrut = False -- See Note [GC for conditionals]
| not simple_scrut = True
| isSingleton alts = False
| up_hp_usg > 0 = False
+ | evaluatedScrut = False
| otherwise = True
-- cf Note [Compiling case expressions]
gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
@@ -481,6 +568,13 @@ cgCase scrut bndr alt_type alts
where
is_cmp_op (StgOpApp (StgPrimOp op) _ _) = isComparisonPrimOp op
is_cmp_op _ = False
+ evaluatedScrut
+ | (StgApp v []) <- scrut
+ , Just sig <- idTagSig_maybe v
+ , isTaggedSig sig = True
+ | otherwise = False
+
+
{- Note [GC for conditionals]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -530,10 +624,13 @@ isSimpleScrut :: CgStgExpr -> AltType -> FCode Bool
-- heap usage from alternatives into the stuff before the case
-- NB: if you get this wrong, and claim that the expression doesn't allocate
-- when it does, you'll deeply mess up allocation
-isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args
-isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... }
-isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... }
-isSimpleScrut _ _ = return False
+isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args
+isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... }
+isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... }
+isSimpleScrut (StgApp f []) _
+ | Just sig <- idTagSig_maybe f
+ = return $! isTaggedSig sig -- case !x of { ... }
+isSimpleScrut _ _ = return False
isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
-- True iff the op cannot block or allocate
@@ -890,6 +987,7 @@ cgConApp con mn stg_args
cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
cgIdApp fun_id args = do
+ platform <- getPlatform
fun_info <- getCgIdInfo fun_id
cfg <- getStgToCmmConfig
self_loop <- getSelfLoop
@@ -904,8 +1002,26 @@ cgIdApp fun_id args = do
-- A value in WHNF, so we can just return it.
ReturnIt
| isZeroBitTy (idType fun_id) -> emitReturn []
- | otherwise -> emitReturn [fun]
- -- ToDo: does ReturnIt guarantee tagged?
+ | otherwise -> emitReturn [fun]
+
+ -- A value infered to be in WHNF, so we can just return it.
+ InferedReturnIt
+ | isZeroBitTy (idType fun_id) -> trace >> emitReturn []
+ | otherwise -> trace >> assertTag >>
+ emitReturn [fun]
+ where
+ trace = do
+ tickyTagged
+ use_id <- newUnique
+ _lbl <- emitTickyCounterTag use_id (NonVoid fun_id)
+ tickyTagSkip use_id fun_id
+
+ -- pprTraceM "WHNF:" (ppr fun_id <+> ppr args )
+ assertTag = whenCheckTags $ do
+ mod <- getModuleName
+ emitTagAssertion (showPprUnsafe
+ (text "TagCheck failed on entry in" <+> ppr mod <+> text "- value:" <> ppr fun_id <+> pprExpr platform fun))
+ fun
EnterIt -> assert (null args) $ -- Discarding arguments
emitEnter fun
diff --git a/compiler/GHC/StgToCmm/TagCheck.hs b/compiler/GHC/StgToCmm/TagCheck.hs
new file mode 100644
index 0000000000..4c9fb23cc5
--- /dev/null
+++ b/compiler/GHC/StgToCmm/TagCheck.hs
@@ -0,0 +1,156 @@
+{-# LANGUAGE CPP #-}
+
+-----------------------------------------------------------------------------
+--
+-- Code generator utilities; mostly monadic
+--
+-- (c) The University of Glasgow 2004-2006
+--
+-----------------------------------------------------------------------------
+
+module GHC.StgToCmm.TagCheck
+ ( emitTagAssertion, emitArgTagCheck, checkArg, whenCheckTags,
+ checkArgStatic, checkFunctionArgTags,checkConArgsStatic,checkConArgsDyn) where
+
+#include "ClosureTypes.h"
+
+import GHC.Prelude
+
+import GHC.StgToCmm.Env
+import GHC.StgToCmm.Monad
+import GHC.StgToCmm.Utils
+import GHC.Cmm
+import GHC.Cmm.BlockId
+import GHC.Cmm.Graph as CmmGraph
+
+import GHC.Core.Type
+import GHC.Types.Id
+import GHC.Utils.Misc
+import GHC.Utils.Outputable
+
+import GHC.Core.DataCon
+import Control.Monad
+import GHC.StgToCmm.Types
+import GHC.Utils.Panic (pprPanic)
+import GHC.Utils.Panic.Plain (panic)
+import GHC.Stg.Syntax
+import GHC.StgToCmm.Closure
+import GHC.Types.RepType (dataConRuntimeRepStrictness)
+import GHC.Types.Basic
+import GHC.Data.FastString (mkFastString)
+import GHC.Cmm.Info (cmmGetClosureType)
+import GHC.Cmm.Utils (mkWordCLit)
+
+-- | Check all arguments marked as already tagged for a function
+-- are tagged by inserting runtime checks.
+checkFunctionArgTags :: SDoc -> Id -> [Id] -> FCode ()
+checkFunctionArgTags msg f args = whenCheckTags $ do
+ onJust (return ()) (idCbvMarks_maybe f) $ \marks -> do
+ -- Only check args marked as strict, and only lifted ones.
+ let cbv_args = filter (isLiftedRuntimeRep . idType) $ filterByList (map isMarkedCbv marks) args
+ -- Get their (cmm) address
+ arg_infos <- mapM getCgIdInfo cbv_args
+ let arg_cmms = map idInfoToAmode arg_infos
+ mapM_ (emitTagAssertion (showPprUnsafe msg)) (arg_cmms)
+
+-- | Check all required-tagged arguments of a constructor are tagged *at compile time*.
+checkConArgsStatic :: SDoc -> DataCon -> [StgArg] -> FCode ()
+checkConArgsStatic msg con args = whenCheckTags $ do
+ let marks = dataConRuntimeRepStrictness con
+ zipWithM_ (checkArgStatic msg) marks args
+
+-- Check all required arguments of a constructor are tagged.
+-- Possible by emitting checks at runtime.
+checkConArgsDyn :: SDoc -> DataCon -> [StgArg] -> FCode ()
+checkConArgsDyn msg con args = whenCheckTags $ do
+ let marks = dataConRuntimeRepStrictness con
+ zipWithM_ (checkArg msg) (map cbvFromStrictMark marks) args
+
+whenCheckTags :: FCode () -> FCode ()
+whenCheckTags act = do
+ check_tags <- stgToCmmDoTagCheck <$> getStgToCmmConfig
+ when check_tags act
+
+-- | Call barf if we failed to predict a tag correctly.
+-- This is immensly useful when debugging issues in tag inference
+-- as it will result in a program abort when we encounter an invalid
+-- call/heap object, rather than leaving it be and segfaulting arbitrary
+-- or producing invalid results.
+-- We check if either:
+-- * A tag is present
+-- * Or the object is a PAP (for which zero is the proper tag)
+emitTagAssertion :: String -> CmmExpr -> FCode ()
+emitTagAssertion onWhat fun = do
+ { platform <- getPlatform
+ ; lret <- newBlockId
+ ; lno_tag <- newBlockId
+ ; lbarf <- newBlockId
+ -- Check for presence of any tag.
+ ; emit $ mkCbranch (cmmIsTagged platform fun)
+ lret lno_tag (Just True)
+ -- If there is no tag check if we are dealing with a PAP
+ ; emitLabel lno_tag
+ ; emitComment (mkFastString "closereTypeCheck")
+ ; align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig
+ ; profile <- getProfile
+ ; let closure_ty = cmmGetClosureType profile align_check fun
+ ; ty_reg <- newTemp (bWord platform)
+ ; emitAssign (CmmLocal ty_reg) closure_ty
+ ; emit $ mkCbranch (cmmEqWord platform
+ (CmmReg $ CmmLocal ty_reg)
+ (CmmLit $ mkWordCLit platform PAP))
+ lret lbarf (Just True)
+
+ ; emitLabel lbarf
+ ; emitBarf ("Tag inference failed on:" ++ onWhat)
+ ; emitLabel lret
+ }
+
+emitArgTagCheck :: SDoc -> [CbvMark] -> [Id] -> FCode ()
+emitArgTagCheck info marks args = whenCheckTags $ do
+ mod <- getModuleName
+ let cbv_args = filter (isLiftedRuntimeRep . idType) $ filterByList (map isMarkedCbv marks) args
+ arg_infos <- mapM getCgIdInfo cbv_args
+ let arg_cmms = map idInfoToAmode arg_infos
+ mk_msg arg = showPprUnsafe (text "Untagged arg:" <> (ppr mod) <> char ':' <> info <+> ppr arg)
+ zipWithM_ emitTagAssertion (map mk_msg args) (arg_cmms)
+
+taggedCgInfo :: CgIdInfo -> Bool
+taggedCgInfo cg_info
+ = case lf of
+ LFCon {} -> True
+ LFReEntrant {} -> True
+ LFUnlifted {} -> True
+ LFThunk {} -> False
+ LFUnknown {} -> False
+ LFLetNoEscape -> panic "Let no escape binding passed to top level con"
+ where
+ lf = cg_lf cg_info
+
+-- Check that one argument is properly tagged.
+checkArg :: SDoc -> CbvMark -> StgArg -> FCode ()
+checkArg _ NotMarkedCbv _ = return ()
+checkArg msg MarkedCbv arg = whenCheckTags $
+ case arg of
+ StgLitArg _ -> return ()
+ StgVarArg v -> do
+ info <- getCgIdInfo v
+ if taggedCgInfo info
+ then return ()
+ else case (cg_loc info) of
+ CmmLoc loc -> emitTagAssertion (showPprUnsafe msg) loc
+ LneLoc {} -> panic "LNE-arg"
+
+-- Check that argument is properly tagged.
+checkArgStatic :: SDoc -> StrictnessMark -> StgArg -> FCode ()
+checkArgStatic _ NotMarkedStrict _ = return ()
+checkArgStatic msg MarkedStrict arg = whenCheckTags $
+ case arg of
+ StgLitArg _ -> return ()
+ StgVarArg v -> do
+ info <- getCgIdInfo v
+ if taggedCgInfo info
+ then return ()
+ else pprPanic "Arg not tagged as expectd" (ppr msg <+> ppr arg)
+
+
diff --git a/compiler/GHC/StgToCmm/Ticky.hs b/compiler/GHC/StgToCmm/Ticky.hs
index 118c05d920..57bf1a97b6 100644
--- a/compiler/GHC/StgToCmm/Ticky.hs
+++ b/compiler/GHC/StgToCmm/Ticky.hs
@@ -64,6 +64,15 @@ the code generator as well as the RTS because:
* someone else might know how to repair it!
+
+Note [Ticky counters are static]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Currently GHC only supports static ticky events. That is -ticky emits
+code containing labels containing counters which then get bumped at runtime.
+
+There are currently only *static* ticky counters. Either we bump one of the
+static counters included in the RTS. Or we emit StgEntCounter structures in
+the object code and bump these.
-}
module GHC.StgToCmm.Ticky (
@@ -72,6 +81,7 @@ module GHC.StgToCmm.Ticky (
withNewTickyCounterThunk,
withNewTickyCounterStdThunk,
withNewTickyCounterCon,
+ emitTickyCounterTag,
tickyDynAlloc,
tickyAllocHeap,
@@ -97,7 +107,10 @@ module GHC.StgToCmm.Ticky (
tickyUnboxedTupleReturn,
tickyReturnOldCon, tickyReturnNewCon,
- tickySlowCall
+ tickyKnownCallTooFewArgs, tickyKnownCallExact, tickyKnownCallExtraArgs,
+ tickySlowCall, tickySlowCallPat,
+
+ tickyTagged, tickyUntagged, tickyTagSkip
) where
import GHC.Prelude
@@ -260,6 +273,93 @@ emitTickyCounter cloType name args
]
}
+{- Note [TagSkip ticky counters]
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+These counters keep track how often we execute code where we
+would have performed a tag check if we hadn't run tag inference.
+
+If we have some code of the form:
+ case v[tagged] of ...
+and we want to record how often we avoid a tag check on v
+through tag inference we have to emit a new StgEntCounter for
+each such case statement in order to record how often it's executed.
+
+In theory we could emit one per *binding*. But then we
+would have to either keep track of the bindings which
+already have a StgEntCounter associated with them in the
+code gen state or preallocate such a structure for each binding
+in the code unconditionally (since ticky-code can call non-ticky code)
+
+The first makes the compiler slower, even when ticky is not
+used (a big no no). The later is fairly complex but increases code size
+unconditionally. See also Note [Ticky counters are static].
+
+So instead we emit a new StgEntCounter for each use site of a binding
+where we infered a tag to be present. And increment the counter whenever
+this use site is executed.
+
+We use the fields as follows:
+
+entry_count: Entries avoided.
+str: : Name of the id.
+
+We use emitTickyCounterTag to emit the counter.
+
+Unlike the closure counters each *use* site of v has it's own
+counter. So there is no need to keep track of the closure/case we are
+in.
+
+We also have to pass a unique for the counter. An Id might be
+scrutinized in more than one place, so the ID alone isn't enough
+to distinguish between use sites.
+-}
+
+emitTickyCounterTag :: Unique -> NonVoid Id -> FCode CLabel
+emitTickyCounterTag unique (NonVoid id) =
+ let name = idName id
+ ctr_lbl = mkTagHitLabel name unique in
+ (>> return ctr_lbl) $
+ ifTicky $ do
+ { platform <- getPlatform
+ ; parent <- getTickyCtrLabel
+ ; mod_name <- getModuleName
+
+ -- When printing the name of a thing in a ticky file, we
+ -- want to give the module name even for *local* things. We
+ -- print just "x (M)" rather that "M.x" to distinguish them
+ -- from the global kind.
+ ; let ppr_for_ticky_name :: SDoc
+ ppr_for_ticky_name =
+ let n = ppr name
+ ext = empty -- parens (text "tagged")
+ p = case hasHaskellName parent of
+ -- NB the default "top" ticky ctr does not
+ -- have a Haskell name
+ Just pname -> text "at" <+> ppr (nameSrcLoc pname) <+>
+ text "in" <+> pprNameUnqualified name
+ _ -> empty
+ in if isInternalName name
+ then n <+> parens (ppr mod_name) <+> ext <+> p
+ else n <+> ext <+> p
+ ; sdoc_context <- stgToCmmContext <$> getStgToCmmConfig
+ ; fun_descr_lit <- newStringCLit $ renderWithContext sdoc_context ppr_for_ticky_name
+ ; arg_descr_lit <- newStringCLit $ "infer"
+ ; emitDataLits ctr_lbl
+ -- Must match layout of includes/rts/Ticky.h's StgEntCounter
+ --
+ -- krc: note that all the fields are I32 now; some were I16
+ -- before, but the code generator wasn't handling that
+ -- properly and it led to chaos, panic and disorder.
+ [ mkIntCLit platform 0, -- registered?
+ mkIntCLit platform 0, -- Arity
+ mkIntCLit platform 0, -- Heap allocated for this thing
+ fun_descr_lit,
+ arg_descr_lit,
+ zeroCLit platform, -- Entries into this thing
+ zeroCLit platform, -- Heap allocated by this thing
+ zeroCLit platform -- Link to next StgEntCounter
+ ]
+ }
-- -----------------------------------------------------------------------------
-- Ticky stack frames
@@ -560,6 +660,27 @@ tickyStackCheck :: FCode ()
tickyStackCheck = ifTicky $ bumpTickyCounter (fsLit "STK_CHK_ctr")
-- -----------------------------------------------------------------------------
+-- Ticky for tag inference characterisation
+
+-- | Predicted a pointer would be tagged correctly (GHC will crash if not so no miss case)
+tickyTagged :: FCode ()
+tickyTagged = ifTicky $ bumpTickyCounter (fsLit "TAG_TAGGED_pred")
+
+-- | Pass a boolean expr indicating if tag was present.
+tickyUntagged :: CmmExpr -> FCode ()
+tickyUntagged e = do
+ ifTicky $ bumpTickyCounter (fsLit "TAG_UNTAGGED_pred")
+ ifTicky $ bumpTickyCounterByE (fsLit "TAG_UNTAGGED_miss") e
+
+-- | Called when for `case v of ...` we can avoid entering v based on
+-- tag inference information.
+tickyTagSkip :: Unique -> Id -> FCode ()
+tickyTagSkip unique id = ifTicky $ do
+ let ctr_lbl = mkTagHitLabel (idName id) unique
+ registerTickyCtr ctr_lbl
+ bumpTickyTagSkip ctr_lbl
+
+-- -----------------------------------------------------------------------------
-- Ticky utils
isEnabled :: (StgToCmmConfig -> Bool) -> FCode Bool
@@ -597,7 +718,12 @@ bumpTickyEntryCount lbl = do
bumpTickyAllocd :: CLabel -> Int -> FCode ()
bumpTickyAllocd lbl bytes = do
platform <- getPlatform
- bumpTickyLitBy (cmmLabelOffB lbl (pc_OFFSET_StgEntCounter_allocd (platformConstants platform))) bytes
+ bumpTickyLitBy (cmmLabelOffB lbl (pc_OFFSET_StgEntCounter_entry_count (platformConstants platform))) bytes
+
+bumpTickyTagSkip :: CLabel -> FCode ()
+bumpTickyTagSkip lbl = do
+ platform <- getPlatform
+ bumpTickyLitBy (cmmLabelOffB lbl (pc_OFFSET_StgEntCounter_entry_count (platformConstants platform))) 1
bumpTickyLbl :: CLabel -> FCode ()
bumpTickyLbl lhs = bumpTickyLitBy (cmmLabelOffB lhs 0) 1
diff --git a/compiler/GHC/StgToCmm/Types.hs b/compiler/GHC/StgToCmm/Types.hs
index 1d50f0ae70..1a4d06ed9b 100644
--- a/compiler/GHC/StgToCmm/Types.hs
+++ b/compiler/GHC/StgToCmm/Types.hs
@@ -19,6 +19,7 @@ import GHC.Types.ForeignStubs
import GHC.Core.DataCon
import GHC.Types.Name.Env
import GHC.Types.Name.Set
+import GHC.Stg.InferTags.TagSig
import GHC.Utils.Outputable
@@ -90,6 +91,7 @@ data CgInfos = CgInfos
-- ^ LambdaFormInfos of exported closures in the current module.
, cgIPEStub :: !CStub
-- ^ The C stub which is used for IPE information
+ , cgTagSigs :: !(NameEnv TagSig)
}
--------------------------------------------------------------------------------
diff --git a/compiler/GHC/StgToCmm/Utils.hs b/compiler/GHC/StgToCmm/Utils.hs
index 270699b9ed..ddda97ab2a 100644
--- a/compiler/GHC/StgToCmm/Utils.hs
+++ b/compiler/GHC/StgToCmm/Utils.hs
@@ -12,7 +12,8 @@ module GHC.StgToCmm.Utils (
emitDataLits, emitRODataLits,
emitDataCon,
emitRtsCall, emitRtsCallWithResult, emitRtsCallGen,
- assignTemp,
+ emitBarf,
+ assignTemp, newTemp,
newUnboxedTupleRegs,
@@ -50,7 +51,7 @@ import GHC.Prelude
import GHC.Platform
import GHC.StgToCmm.Monad
import GHC.StgToCmm.Closure
-import GHC.StgToCmm.Lit (mkSimpleLit)
+import GHC.StgToCmm.Lit (mkSimpleLit, newStringCLit)
import GHC.Cmm
import GHC.Cmm.BlockId
import GHC.Cmm.Graph as CmmGraph
@@ -157,6 +158,11 @@ tagToClosure platform tycon tag
--
-------------------------------------------------------------------------
+emitBarf :: String -> FCode ()
+emitBarf msg = do
+ strLbl <- newStringCLit msg
+ emitRtsCall rtsUnitId (fsLit "barf") [(CmmLit strLbl,AddrHint)] False
+
emitRtsCall :: UnitId -> FastString -> [(CmmExpr,ForeignHint)] -> Bool -> FCode ()
emitRtsCall pkg fun = emitRtsCallGen [] (mkCmmCodeLabel pkg fun)
@@ -299,8 +305,6 @@ newUnboxedTupleRegs res_ty
choose_regs _ (AssignTo regs _) = return regs
choose_regs platform _ = mapM (newTemp . primRepCmmType platform) reps
-
-
-------------------------------------------------------------------------
-- emitMultiAssign
-------------------------------------------------------------------------
diff --git a/compiler/GHC/Tc/Module.hs b/compiler/GHC/Tc/Module.hs
index 026686b8cc..d9b59b4fd8 100644
--- a/compiler/GHC/Tc/Module.hs
+++ b/compiler/GHC/Tc/Module.hs
@@ -2975,6 +2975,7 @@ ppr_types debug type_env
| otherwise = hasTopUserName id
&& case idDetails id of
VanillaId -> True
+ StrictWorkerId{} -> True
RecSelId {} -> True
ClassOpId {} -> True
FCallId {} -> True
diff --git a/compiler/GHC/Tc/Solver.hs b/compiler/GHC/Tc/Solver.hs
index 6a1f2d3315..8b6ac9928d 100644
--- a/compiler/GHC/Tc/Solver.hs
+++ b/compiler/GHC/Tc/Solver.hs
@@ -677,7 +677,7 @@ type-class or type defined in N.
Secondly, when should these heuristics be enforced? We enforced them when the
type-class method call site is in a module marked `-XSafe` or `-XTrustworthy`.
This allows `-XUnsafe` modules to operate without restriction, and for Safe
-Haskell inferrence to infer modules with unsafe overlaps as unsafe.
+Haskell inference to infer modules with unsafe overlaps as unsafe.
One alternative design would be to also consider if an instance was imported as
a `safe` import or not and only apply the restriction to instances imported
@@ -745,7 +745,7 @@ How is this implemented? It's complicated! So we'll step through it all:
IORefs called `tcg_safe_infer` and `tcg_safe_infer_reason`.
7) `GHC.Driver.Main.tcRnModule'` -- Reads `tcg_safe_infer` after type-checking, calling
- `GHC.Driver.Main.markUnsafeInfer` (passing the reason along) when safe-inferrence
+ `GHC.Driver.Main.markUnsafeInfer` (passing the reason along) when safe-inference
failed.
Note [No defaulting in the ambiguity check]
diff --git a/compiler/GHC/Tc/Types.hs b/compiler/GHC/Tc/Types.hs
index 97f11c8a0b..e1f0400e44 100644
--- a/compiler/GHC/Tc/Types.hs
+++ b/compiler/GHC/Tc/Types.hs
@@ -50,7 +50,7 @@ module GHC.Tc.Types(
PromotionErr(..),
IdBindingInfo(..), ClosedTypeId, RhsNames,
IsGroupClosed(..),
- SelfBootInfo(..),
+ SelfBootInfo(..), bootExports,
tcTyThingCategory, pprTcTyThingCategory,
peCategory, pprPECategory,
CompleteMatch, CompleteMatches,
@@ -696,6 +696,15 @@ data SelfBootInfo
-- What is sb_tcs used for? See Note [Extra dependencies from .hs-boot files]
-- in GHC.Rename.Module
+bootExports :: SelfBootInfo -> NameSet
+bootExports boot =
+ case boot of
+ NoSelfBoot -> emptyNameSet
+ SelfBoot { sb_mds = mds} ->
+ let exports = md_exports mds
+ in availsToNameSet exports
+
+
{- Note [Tracking unused binding and imports]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/compiler/GHC/Tc/Types.hs-boot b/compiler/GHC/Tc/Types.hs-boot
index c6302adb57..9f107936a5 100644
--- a/compiler/GHC/Tc/Types.hs-boot
+++ b/compiler/GHC/Tc/Types.hs-boot
@@ -6,6 +6,8 @@ import GHC.Utils.Outputable
data TcLclEnv
+data SelfBootInfo
+
data TcIdSigInfo
instance Outputable TcIdSigInfo
diff --git a/compiler/GHC/Types/Basic.hs b/compiler/GHC/Types/Basic.hs
index e7c384d0a4..2ba185c25e 100644
--- a/compiler/GHC/Types/Basic.hs
+++ b/compiler/GHC/Types/Basic.hs
@@ -45,6 +45,8 @@ module GHC.Types.Basic (
Boxity(..), isBoxed,
+ CbvMark(..), isMarkedCbv,
+
PprPrec(..), topPrec, sigPrec, opPrec, funPrec, starPrec, appPrec,
maybeParen,
@@ -517,6 +519,37 @@ instance Binary Boxity where -- implemented via isBoxed-isomorphism to Bool
{-
************************************************************************
* *
+ Call by value flag
+* *
+************************************************************************
+-}
+
+-- | Should an argument be passed evaluated *and* tagged.
+data CbvMark = MarkedCbv | NotMarkedCbv
+ deriving Eq
+
+instance Outputable CbvMark where
+ ppr MarkedCbv = text "!"
+ ppr NotMarkedCbv = text "~"
+
+instance Binary CbvMark where
+ put_ bh NotMarkedCbv = putByte bh 0
+ put_ bh MarkedCbv = putByte bh 1
+ get bh =
+ do h <- getByte bh
+ case h of
+ 0 -> return NotMarkedCbv
+ 1 -> return MarkedCbv
+ _ -> panic "Invalid binary format"
+
+isMarkedCbv :: CbvMark -> Bool
+isMarkedCbv MarkedCbv = True
+isMarkedCbv NotMarkedCbv = False
+
+
+{-
+************************************************************************
+* *
Recursive/Non-Recursive flag
* *
************************************************************************
diff --git a/compiler/GHC/Types/Id.hs b/compiler/GHC/Types/Id.hs
index 1245b372af..76dc4d0721 100644
--- a/compiler/GHC/Types/Id.hs
+++ b/compiler/GHC/Types/Id.hs
@@ -114,17 +114,22 @@ module GHC.Types.Id (
setIdDemandInfo,
setIdDmdSig,
setIdCprSig,
+ setIdCbvMarks,
+ idCbvMarks_maybe,
+ idCbvMarkArity,
idDemandInfo,
idDmdSig,
idCprSig,
+ idTagSig_maybe,
+ setIdTagSig
) where
import GHC.Prelude
import GHC.Core ( CoreRule, isStableUnfolding, evaldUnfolding,
- isCompulsoryUnfolding, Unfolding( NoUnfolding ) )
+ isCompulsoryUnfolding, Unfolding( NoUnfolding ), isEvaldUnfolding )
import GHC.Types.Id.Info
import GHC.Types.Basic
@@ -163,6 +168,7 @@ import GHC.Utils.Panic
import GHC.Utils.Panic.Plain
import GHC.Utils.GlobalVars
import GHC.Utils.Trace
+import GHC.Stg.InferTags.TagSig
-- infixl so you can say (id `set` a `set` b)
infixl 1 `setIdUnfolding`,
@@ -181,7 +187,8 @@ infixl 1 `setIdUnfolding`,
`setIdCprSig`,
`asJoinId`,
- `asJoinId_maybe`
+ `asJoinId_maybe`,
+ `setIdCbvMarks`
{-
************************************************************************
@@ -259,6 +266,11 @@ maybeModifyIdInfo :: Maybe IdInfo -> Id -> Id
maybeModifyIdInfo (Just new_info) id = lazySetIdInfo id new_info
maybeModifyIdInfo Nothing id = id
+-- maybeModifyIdInfo tries to avoid unnecessary thrashing
+maybeModifyIdDetails :: Maybe IdDetails -> Id -> Id
+maybeModifyIdDetails (Just new_details) id = setIdDetails id new_details
+maybeModifyIdDetails Nothing id = id
+
{-
************************************************************************
* *
@@ -540,11 +552,12 @@ isJoinId id
_ -> False
| otherwise = False
+-- | Doesn't return strictness marks
isJoinId_maybe :: Var -> Maybe JoinArity
isJoinId_maybe id
| isId id = assertPpr (isId id) (ppr id) $
case Var.idDetails id of
- JoinId arity -> Just arity
+ JoinId arity _marks -> Just arity
_ -> Nothing
| otherwise = Nothing
@@ -609,10 +622,12 @@ asJoinId id arity = warnPprTrace (not (isLocalId id))
warnPprTrace (not (is_vanilla_or_join id))
"asJoinId"
(ppr id <+> pprIdDetails (idDetails id)) $
- id `setIdDetails` JoinId arity
+ id `setIdDetails` JoinId arity (idCbvMarks_maybe id)
where
is_vanilla_or_join id = case Var.idDetails id of
VanillaId -> True
+ -- Can workers become join ids? Yes!
+ StrictWorkerId {} -> pprTraceDebug "asJoinId (strict worker)" (ppr id) True
JoinId {} -> True
_ -> False
@@ -689,6 +704,9 @@ isStrictId id
isStrUsedDmd (idDemandInfo id)
-- Take the best of both strictnesses - old and new
+idTagSig_maybe :: Id -> Maybe TagSig
+idTagSig_maybe = tagSig . idInfo
+
---------------------------------
-- UNFOLDING
@@ -712,6 +730,49 @@ idDemandInfo id = demandInfo (idInfo id)
setIdDemandInfo :: Id -> Demand -> Id
setIdDemandInfo id dmd = modifyIdInfo (`setDemandInfo` dmd) id
+setIdTagSig :: Id -> TagSig -> Id
+setIdTagSig id sig = modifyIdInfo (`setTagSig` sig) id
+
+-- | If all marks are NotMarkedStrict we just set nothing.
+setIdCbvMarks :: Id -> [CbvMark] -> Id
+setIdCbvMarks id marks
+ | not (any isMarkedCbv marks) = maybeModifyIdDetails (removeMarks $ idDetails id) id
+ | otherwise =
+ -- pprTrace "setMarks:" (ppr id <> text ":" <> ppr marks) $
+ case idDetails id of
+ -- good ol (likely worker) function
+ VanillaId -> id `setIdDetails` (StrictWorkerId trimmedMarks)
+ JoinId arity _ -> id `setIdDetails` (JoinId arity (Just trimmedMarks))
+ -- Updating an existing strict worker.
+ StrictWorkerId _ -> id `setIdDetails` (StrictWorkerId trimmedMarks)
+ -- Do nothing for these
+ RecSelId{} -> id
+ DFunId{} -> id
+ _ -> pprTrace "setIdCbvMarks: Unable to set cbv marks for" (ppr id $$
+ text "marks:" <> ppr marks $$
+ text "idDetails:" <> ppr (idDetails id)) id
+
+ where
+ -- (Currently) no point in passing args beyond the arity unlifted.
+ -- We would have to eta expand all call sites to (length marks).
+ -- Perhaps that's sensible but for now be conservative.
+ trimmedMarks = take (idArity id) marks
+ removeMarks details = case details of
+ JoinId arity (Just _) -> Just $ JoinId arity Nothing
+ StrictWorkerId _ -> Just VanillaId
+ _ -> Nothing
+
+idCbvMarks_maybe :: Id -> Maybe [CbvMark]
+idCbvMarks_maybe id = case idDetails id of
+ StrictWorkerId marks -> Just marks
+ JoinId _arity marks -> marks
+ _ -> Nothing
+
+-- Id must be called with at least this arity in order to allow arguments to
+-- be passed unlifted.
+idCbvMarkArity :: Id -> Arity
+idCbvMarkArity fn = maybe 0 length (idCbvMarks_maybe fn)
+
setCaseBndrEvald :: StrictnessMark -> Id -> Id
-- Used for variables bound by a case expressions, both the case-binder
-- itself, and any pattern-bound variables that are argument of a
@@ -884,6 +945,7 @@ updOneShotInfo id one_shot
-- f = \x -> e
-- If we change the one-shot-ness of x, f's type changes
+-- Replaces the id info if the zapper returns @Just idinfo@
zapInfo :: (IdInfo -> Maybe IdInfo) -> Id -> Id
zapInfo zapper id = maybeModifyIdInfo (zapper (idInfo id)) id
@@ -969,7 +1031,7 @@ transferPolyIdInfo :: Id -- Original Id
-> Id -- New Id
-> Id
transferPolyIdInfo old_id abstract_wrt new_id
- = modifyIdInfo transfer new_id
+ = modifyIdInfo transfer new_id `setIdCbvMarks` new_cbv_marks
where
arity_increase = count isId abstract_wrt -- Arity increases by the
-- number of value binders
@@ -985,6 +1047,18 @@ transferPolyIdInfo old_id abstract_wrt new_id
new_strictness = prependArgsDmdSig arity_increase old_strictness
old_cpr = cprSigInfo old_info
+ old_cbv_marks = fromMaybe (replicate old_arity NotMarkedCbv) (idCbvMarks_maybe old_id)
+ abstr_cbv_marks = mapMaybe getMark abstract_wrt
+ new_cbv_marks = abstr_cbv_marks ++ old_cbv_marks
+
+ getMark v
+ | not (isId v)
+ = Nothing
+ | isId v
+ , isEvaldUnfolding (idUnfolding v)
+ , not (isUnliftedType $ idType v)
+ = Just MarkedCbv
+ | otherwise = Just NotMarkedCbv
transfer new_info = new_info `setArityInfo` new_arity
`setInlinePragInfo` old_inline_prag
`setOccInfo` new_occ_info
diff --git a/compiler/GHC/Types/Id/Info.hs b/compiler/GHC/Types/Id/Info.hs
index ec5607d40f..73e7169181 100644
--- a/compiler/GHC/Types/Id/Info.hs
+++ b/compiler/GHC/Types/Id/Info.hs
@@ -68,7 +68,7 @@ module GHC.Types.Id.Info (
emptyRuleInfo,
isEmptyRuleInfo, ruleInfoFreeVars,
ruleInfoRules, setRuleInfoHead,
- ruleInfo, setRuleInfo,
+ ruleInfo, setRuleInfo, tagSigInfo,
-- ** The CAFInfo type
CafInfo(..),
@@ -76,8 +76,10 @@ module GHC.Types.Id.Info (
cafInfo, setCafInfo,
-- ** The LambdaFormInfo type
- LambdaFormInfo(..),
- lfInfo, setLFInfo,
+ LambdaFormInfo,
+ lfInfo, setLFInfo, setTagSig,
+
+ tagSig,
-- ** Tick-box Info
TickBoxOp(..), TickBoxId,
@@ -108,10 +110,11 @@ import GHC.Utils.Misc
import GHC.Utils.Outputable
import GHC.Utils.Panic
import GHC.Utils.Panic.Plain
+import GHC.Stg.InferTags.TagSig
import Data.Word
-import GHC.StgToCmm.Types (LambdaFormInfo (..))
+import GHC.StgToCmm.Types (LambdaFormInfo)
-- infixl so you can say (id `set` a `set` b)
infixl 1 `setRuleInfo`,
@@ -126,7 +129,6 @@ infixl 1 `setRuleInfo`,
`setDemandInfo`,
`setNeverRepPoly`,
`setLevityInfoWithType`
-
{-
************************************************************************
* *
@@ -173,8 +175,59 @@ data IdDetails
| CoVarId -- ^ A coercion variable
-- This only covers /un-lifted/ coercions, of type
-- (t1 ~# t2) or (t1 ~R# t2), not their lifted variants
- | JoinId JoinArity -- ^ An 'Id' for a join point taking n arguments
- -- Note [Join points] in "GHC.Core"
+ | JoinId JoinArity (Maybe [CbvMark])
+ -- ^ An 'Id' for a join point taking n arguments
+ -- Note [Join points] in "GHC.Core"
+ | StrictWorkerId [CbvMark]
+ -- ^ An 'Id' for a worker function, which expects some arguments to be
+ -- passed both evaluated and tagged.
+ -- See Note [Strict Worker Ids]
+ -- See Note [Tag Inference]
+
+{- Note [Strict Worker Ids]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+StrictWorkerId essentially constrains the calling convention for the given Id.
+It requires arguments marked as tagged to be passed properly evaluated+*tagged*.
+
+While we were always able to express the fact that an argument is evaluated
+via attaching a evaldUnfolding to the functions arguments there used to be
+no way to express that an lifted argument is already properly tagged once we jump
+into the RHS.
+This means when branching on a value the RHS always needed to perform
+a tag check to ensure the argument wasn't an indirection (the evaldUnfolding
+already ruling out thunks).
+
+StrictWorkerIds give us this additional expressiveness which we use to improve
+runtime. This is all part of the TagInference work. See also Note [Tag Inference].
+
+What we do is:
+* If we think a function might benefit from passing certain arguments unlifted
+ for performance reasons we attach an evaldUnfolding to these arguments.
+* Either during W/W, but at latest during Tidy VanillaIds with arguments that
+ have evaldUnfoldings are turned into StrictWorkerIds.
+* During CorePrep calls to StrictWorkerIds are eta expanded.
+* During Stg CodeGen:
+ * When we call a binding that is a StrictWorkerId:
+ * We check if all arguments marked to be passed unlifted are already tagged.
+ * If they aren't we will wrap the call in case expressions which will evaluate+tag
+ these arguments before jumping to the function.
+* During Cmm codeGen:
+ * When generating code for the RHS of a StrictWorker binding
+ we omit tag checks when using arguments marked as tagged.
+
+We primarily use this for workers where we mark strictly demanded arguments
+and arguments representing strict fields as call-by-value during W/W. But we
+also check other functions during tidy and potentially turn some of them into
+strict workers and mark some of their arguments as call-by-value by looking at
+argument unfoldings.
+
+NB: I choose to put the information into a new Id constructor since these are loaded
+at all optimization levels. This makes it trivial to ensure the additional
+calling convention demands are available at all call sites. Putting it into
+IdInfo would require us at the very least to always decode the IdInfo
+just to decide if we need to throw it away or not after.
+
+-}
-- | Recursive Selector Parent
data RecSelParent = RecSelData TyCon | RecSelPatSyn PatSyn deriving Eq
@@ -198,8 +251,8 @@ isCoVarDetails :: IdDetails -> Bool
isCoVarDetails CoVarId = True
isCoVarDetails _ = False
-isJoinIdDetails_maybe :: IdDetails -> Maybe JoinArity
-isJoinIdDetails_maybe (JoinId join_arity) = Just join_arity
+isJoinIdDetails_maybe :: IdDetails -> Maybe (JoinArity, (Maybe [CbvMark]))
+isJoinIdDetails_maybe (JoinId join_arity marks) = Just (join_arity, marks)
isJoinIdDetails_maybe _ = Nothing
instance Outputable IdDetails where
@@ -210,6 +263,7 @@ pprIdDetails VanillaId = empty
pprIdDetails other = brackets (pp other)
where
pp VanillaId = panic "pprIdDetails"
+ pp (StrictWorkerId dmds) = text "StrictWorker" <> parens (ppr dmds)
pp (DataConWorkId _) = text "DataCon"
pp (DataConWrapId _) = text "DataConWrapper"
pp (ClassOpId {}) = text "ClassOp"
@@ -221,7 +275,7 @@ pprIdDetails other = brackets (pp other)
= brackets $ text "RecSel" <>
ppWhen is_naughty (text "(naughty)")
pp CoVarId = text "CoVarId"
- pp (JoinId arity) = text "JoinId" <> parens (int arity)
+ pp (JoinId arity marks) = text "JoinId" <> parens (int arity) <> parens (ppr marks)
{-
************************************************************************
@@ -274,7 +328,10 @@ data IdInfo
-- 4% in some programs. See #17497 and associated MR.
--
-- See documentation of the getters for what these packed fields mean.
- lfInfo :: !(Maybe LambdaFormInfo)
+ lfInfo :: !(Maybe LambdaFormInfo),
+
+ -- See documentation of the getters for what these packed fields mean.
+ tagSig :: !(Maybe TagSig)
}
-- | Encodes arities, OneShotInfo, CafInfo and LevityInfo.
@@ -365,6 +422,9 @@ cafInfo = bitfieldGetCafInfo . bitfield
callArityInfo :: IdInfo -> ArityInfo
callArityInfo = bitfieldGetCallArityInfo . bitfield
+tagSigInfo :: IdInfo -> Maybe TagSig
+tagSigInfo = tagSig
+
-- Setters
setRuleInfo :: IdInfo -> RuleInfo -> IdInfo
@@ -414,6 +474,9 @@ setCafInfo info caf =
setLFInfo :: IdInfo -> LambdaFormInfo -> IdInfo
setLFInfo info lf = info { lfInfo = Just lf }
+setTagSig :: IdInfo -> TagSig -> IdInfo
+setTagSig info sig = info { tagSig = Just sig }
+
setOneShotInfo :: IdInfo -> OneShotInfo -> IdInfo
setOneShotInfo info lb =
info { bitfield = bitfieldSetOneShotInfo lb (bitfield info) }
@@ -444,7 +507,8 @@ vanillaIdInfo
bitfieldSetOneShotInfo NoOneShotInfo $
bitfieldSetLevityInfo NoLevityInfo $
emptyBitField,
- lfInfo = Nothing
+ lfInfo = Nothing,
+ tagSig = Nothing
}
-- | More informative 'IdInfo' we can use when we know the 'Id' has no CAF references
diff --git a/compiler/GHC/Types/Id/Make.hs b/compiler/GHC/Types/Id/Make.hs
index 6fe9f0dafe..67f7e405a3 100644
--- a/compiler/GHC/Types/Id/Make.hs
+++ b/compiler/GHC/Types/Id/Make.hs
@@ -491,6 +491,10 @@ mkDictSelId name clas
| otherwise
= base_info `setRuleInfo` mkRuleInfo [rule]
+ `setInlinePragInfo` neverInlinePragma
+ `setUnfoldingInfo` mkInlineUnfoldingWithArity 1
+ defaultSimpleOpts
+ (mkDictSelRhs clas val_index)
-- Add a magic BuiltinRule, but no unfolding
-- so that the rule is always available to fire.
-- See Note [ClassOp/DFun selection] in GHC.Tc.TyCl.Instance
diff --git a/compiler/GHC/Types/RepType.hs b/compiler/GHC/Types/RepType.hs
index b2e8a1c3b8..cf6517bd39 100644
--- a/compiler/GHC/Types/RepType.hs
+++ b/compiler/GHC/Types/RepType.hs
@@ -14,7 +14,8 @@ module GHC.Types.RepType
typePrimRep, typePrimRep1, typeMonoPrimRep_maybe,
runtimeRepPrimRep, typePrimRepArgs,
PrimRep(..), primRepToType,
- countFunRepArgs, countConRepArgs, tyConPrimRep, tyConPrimRep1,
+ countFunRepArgs, countConRepArgs, dataConRuntimeRepStrictness,
+ tyConPrimRep, tyConPrimRep1,
-- * Unboxed sum representation type
ubxSumRepType, layoutUbxSum, typeSlotTy, SlotTy (..),
@@ -127,8 +128,41 @@ countConRepArgs dc = go (dataConRepArity dc) (dataConRepType dc)
| otherwise
= pprPanic "countConRepArgs: arity greater than type can handle" (ppr (n, ty, typePrimRep ty))
+dataConRuntimeRepStrictness :: HasDebugCallStack => DataCon -> [StrictnessMark]
+-- ^ Give the demands on the arguments of a
+-- Core constructor application (Con dc args) at runtime.
+-- Assumes the constructor is not levity polymorphic. For example
+-- unboxed tuples won't work.
+dataConRuntimeRepStrictness dc =
+
+ -- pprTrace "dataConRuntimeRepStrictness" (ppr dc $$ ppr (dataConRepArgTys dc)) $
+
+ let repMarks = dataConRepStrictness dc
+ repTys = map irrelevantMult $ dataConRepArgTys dc
+ in -- todo: assert dc != unboxedTuple/unboxedSum
+ go repMarks repTys []
+ where
+ go (mark:marks) (ty:types) out_marks
+ -- Zero-width argument, mark is irrelevant at runtime.
+ | -- pprTrace "VoidTy" (ppr ty) $
+ (isZeroBitTy ty)
+ = go marks types out_marks
+ -- Single rep argument, e.g. Int
+ -- Keep mark as-is
+ | [_] <- reps
+ = go marks types (mark:out_marks)
+ -- Multi-rep argument, e.g. (# Int, Bool #) or (# Int | Bool #)
+ -- Make up one non-strict mark per runtime argument.
+ | otherwise -- TODO: Assert real_reps /= null
+ = go marks types ((replicate (length real_reps) NotMarkedStrict)++out_marks)
+ where
+ reps = typePrimRep ty
+ real_reps = filter (not . isVoidRep) $ reps
+ go [] [] out_marks = reverse out_marks
+ go _m _t _o = pprPanic "dataConRuntimeRepStrictness2" (ppr dc $$ ppr _m $$ ppr _t $$ ppr _o)
+
-- | True if the type has zero width.
-isZeroBitTy :: Type -> Bool
+isZeroBitTy :: HasDebugCallStack => Type -> Bool
isZeroBitTy = null . typePrimRep
diff --git a/compiler/GHC/Unit/Module/ModGuts.hs b/compiler/GHC/Unit/Module/ModGuts.hs
index 1558e5944a..4fc683d844 100644
--- a/compiler/GHC/Unit/Module/ModGuts.hs
+++ b/compiler/GHC/Unit/Module/ModGuts.hs
@@ -31,6 +31,7 @@ import GHC.Types.Fixity.Env
import GHC.Types.ForeignStubs
import GHC.Types.HpcInfo
import GHC.Types.Name.Reader
+import GHC.Types.Name.Set (NameSet)
import GHC.Types.SafeHaskell
import GHC.Types.SourceFile ( HscSource(..), hscSourceToIsBoot )
import GHC.Types.SrcLoc
@@ -88,6 +89,7 @@ data ModGuts
mg_fam_inst_env :: FamInstEnv, -- ^ Type-family instance environment for
-- /home-package/ modules (including this
-- one); c.f. 'tcg_fam_inst_env'
+ mg_boot_exports :: !NameSet, -- Things that are also export via hs-boot file
mg_safe_haskell :: SafeHaskellMode, -- ^ Safe Haskell mode
mg_trust_pkg :: Bool, -- ^ Do we need to trust our
diff --git a/compiler/GHC/Utils/Misc.hs b/compiler/GHC/Utils/Misc.hs
index 8f33944b86..3bc4e6adb8 100644
--- a/compiler/GHC/Utils/Misc.hs
+++ b/compiler/GHC/Utils/Misc.hs
@@ -30,7 +30,7 @@ module GHC.Utils.Misc (
mapAndUnzip, mapAndUnzip3,
filterOut, partitionWith,
- dropWhileEndLE, spanEnd, last2, lastMaybe,
+ dropWhileEndLE, spanEnd, last2, lastMaybe, onJust,
List.foldl1', foldl2, count, countWhile, all2,
@@ -764,6 +764,10 @@ lastMaybe :: [a] -> Maybe a
lastMaybe [] = Nothing
lastMaybe xs = Just $ last xs
+-- | @onJust x m f@ applies f to the value inside the Just or returns the default.
+onJust :: b -> Maybe a -> (a->b) -> b
+onJust dflt = flip (maybe dflt)
+
-- | Split a list into its last element and the initial part of the list.
-- @snocView xs = Just (init xs, last xs)@ for non-empty lists.
-- @snocView xs = Nothing@ otherwise.
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index efc4639b96..068ded4e9c 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -546,10 +546,15 @@ Library
GHC.Stg.Lift.Config
GHC.Stg.Lift.Monad
GHC.Stg.Lint
+ GHC.Stg.InferTags
+ GHC.Stg.InferTags.Rewrite
+ GHC.Stg.InferTags.TagSig
+ GHC.Stg.InferTags.Types
GHC.Stg.Pipeline
GHC.Stg.Stats
GHC.Stg.Subst
GHC.Stg.Syntax
+ GHC.Stg.Utils
GHC.StgToByteCode
GHC.StgToCmm
GHC.StgToCmm.ArgRep
@@ -570,6 +575,7 @@ Library
GHC.StgToCmm.Prim
GHC.StgToCmm.Prof
GHC.StgToCmm.Sequel
+ GHC.StgToCmm.TagCheck
GHC.StgToCmm.Ticky
GHC.StgToCmm.Types
GHC.StgToCmm.Utils