diff options
Diffstat (limited to 'compiler/GHC/StgToCmm')
-rw-r--r-- | compiler/GHC/StgToCmm/Bind.hs | 63 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Closure.hs | 39 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Config.hs | 76 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/DataCon.hs | 61 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Env.hs | 12 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Expr.hs | 66 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/ExtCode.hs | 13 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Heap.hs | 12 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Hpc.hs | 10 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Layout.hs | 37 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Monad.hs | 310 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Prim.hs | 179 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Prof.hs | 98 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Sequel.hs | 46 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Ticky.hs | 58 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Types.hs | 8 | ||||
-rw-r--r-- | compiler/GHC/StgToCmm/Utils.hs | 19 |
17 files changed, 581 insertions, 526 deletions
diff --git a/compiler/GHC/StgToCmm/Bind.hs b/compiler/GHC/StgToCmm/Bind.hs index 7d89b71309..7107370698 100644 --- a/compiler/GHC/StgToCmm/Bind.hs +++ b/compiler/GHC/StgToCmm/Bind.hs @@ -26,6 +26,7 @@ import GHC.Stg.Syntax import GHC.Platform import GHC.Platform.Profile +import GHC.StgToCmm.Config import GHC.StgToCmm.Expr import GHC.StgToCmm.Monad import GHC.StgToCmm.Env @@ -106,10 +107,9 @@ cgTopRhsClosure platform rec id ccs upd_flag args body = gen_code lf_info _closure_label = do { profile <- getProfile - ; dflags <- getDynFlags ; let name = idName id ; mod_name <- getModuleName - ; let descr = closureDescription dflags mod_name name + ; let descr = closureDescription mod_name name closure_info = mkClosureInfo profile True id lf_info 0 0 descr -- We don't generate the static closure here, because we might @@ -356,9 +356,8 @@ mkRhsClosure profile bndr cc fvs upd_flag args body -- MAKE CLOSURE INFO FOR THIS CLOSURE ; mod_name <- getModuleName - ; dflags <- getDynFlags ; let name = idName bndr - descr = closureDescription dflags mod_name name + descr = closureDescription mod_name name fv_details :: [(NonVoid Id, ByteOff)] header = if isLFThunk lf_info then ThunkHeader else StdHeader (tot_wds, ptr_wds, fv_details) @@ -404,15 +403,15 @@ cgRhsStdThunk bndr lf_info payload do { -- LAY OUT THE OBJECT mod_name <- getModuleName - ; dflags <- getDynFlags - ; profile <- getProfile - ; let platform = profilePlatform profile + ; cfg <- getStgToCmmConfig + ; let profile = stgToCmmProfile cfg + ; let platform = stgToCmmPlatform cfg header = if isLFThunk lf_info then ThunkHeader else StdHeader (tot_wds, ptr_wds, payload_w_offsets) = mkVirtHeapOffsets profile header (addArgReps (nonVoidStgArgs payload)) - descr = closureDescription dflags mod_name (idName bndr) + descr = closureDescription mod_name (idName bndr) closure_info = mkClosureInfo profile False -- Not static bndr lf_info tot_wds ptr_wds descr @@ -563,16 +562,18 @@ mkSlowEntryCode :: Id -> ClosureInfo -> [LocalReg] -> FCode () -- Here, we emit the slow-entry code. mkSlowEntryCode bndr cl_info arg_regs -- function closure is already in `Node' | Just (_, ArgGen _) <- closureFunInfo cl_info - = do profile <- getProfile - platform <- getPlatform + = do cfg <- getStgToCmmConfig + upd_frame <- getUpdFrameOff let node = idToReg platform (NonVoid bndr) + profile = stgToCmmProfile cfg + platform = stgToCmmPlatform cfg slow_lbl = closureSlowEntryLabel platform cl_info fast_lbl = closureLocalEntryLabel platform cl_info -- mkDirectJump does not clobber `Node' containing function closure jump = mkJump profile NativeNodeCall (mkLblExpr fast_lbl) (map (CmmReg . CmmLocal) (node : arg_regs)) - (initUpdFrameOff platform) + upd_frame tscope <- getTickScope emitProcWithConvention Slow Nothing slow_lbl (node : arg_regs) (jump, tscope) @@ -620,9 +621,10 @@ blackHoleIt node_reg emitBlackHoleCode :: CmmExpr -> FCode () emitBlackHoleCode node = do - dflags <- getDynFlags - profile <- getProfile - let platform = profilePlatform profile + cfg <- getStgToCmmConfig + let profile = stgToCmmProfile cfg + platform = stgToCmmPlatform cfg + is_eager_bh = stgToCmmEagerBlackHole cfg -- Eager blackholing is normally disabled, but can be turned on with -- -feager-blackholing. When it is on, we replace the info pointer @@ -642,8 +644,7 @@ emitBlackHoleCode node = do -- Note the eager-blackholing check is here rather than in blackHoleOnEntry, -- because emitBlackHoleCode is called from GHC.Cmm.Parser. - let eager_blackholing = not (profileIsProfiling profile) - && gopt Opt_EagerBlackHoling dflags + let eager_blackholing = not (profileIsProfiling profile) && is_eager_bh -- Profiling needs slop filling (to support LDV -- profiling), so currently eager blackholing doesn't -- work with profiling. @@ -668,11 +669,11 @@ setupUpdate closure_info node body then do tickyUpdateFrameOmitted; body else do tickyPushUpdateFrame - dflags <- getDynFlags + cfg <- getStgToCmmConfig let - bh = blackHoleOnEntry closure_info && - not (sccProfilingEnabled dflags) && - gopt Opt_EagerBlackHoling dflags + bh = blackHoleOnEntry closure_info + && not (stgToCmmSCCProfiling cfg) + && stgToCmmEagerBlackHole cfg lbl | bh = mkBHUpdInfoLabel | otherwise = mkUpdInfoLabel @@ -730,11 +731,12 @@ link_caf :: LocalReg -- pointer to the closure -- This function returns the address of the black hole, so it can be -- updated with the new value when available. link_caf node = do - { profile <- getProfile + { cfg <- getStgToCmmConfig -- Call the RTS function newCAF, returning the newly-allocated -- blackhole indirection closure ; let newCAF_lbl = mkForeignLabel (fsLit "newCAF") Nothing ForeignLabelInExternalPackage IsFunction + ; let profile = stgToCmmProfile cfg ; let platform = profilePlatform profile ; bh <- newTemp (bWord platform) ; emitRtsCallGen [(bh,AddrHint)] newCAF_lbl @@ -744,8 +746,9 @@ link_caf node = do -- see Note [atomic CAF entry] in rts/sm/Storage.c ; updfr <- getUpdFrameOff - ; ptr_opts <- getPtrOpts - ; let target = entryCode platform (closureInfoPtr ptr_opts (CmmReg (CmmLocal node))) + ; let align_check = stgToCmmAlignCheck cfg + ; let target = entryCode platform + (closureInfoPtr platform align_check (CmmReg (CmmLocal node))) ; emit =<< mkCmmIfThen (cmmEqWord platform (CmmReg (CmmLocal bh)) (zeroExpr platform)) -- re-enter the CAF @@ -762,17 +765,11 @@ link_caf node = do -- @closureDescription@ from the let binding information. closureDescription - :: DynFlags - -> Module -- Module + :: Module -- Module -> Name -- Id of closure binding -> String -- Not called for StgRhsCon which have global info tables built in -- CgConTbls.hs with a description generated from the data constructor -closureDescription dflags mod_name name - = let ctx = initSDocContext dflags defaultDumpStyle - -- defaultDumpStyle, because we want to see the unique on the Name. - in renderWithContext ctx (char '<' <> - (if isExternalName name - then ppr name -- ppr will include the module name prefix - else pprModule mod_name <> char '.' <> ppr name) <> - char '>') +closureDescription mod_name name + = renderWithContext defaultSDocContext + (char '<' <> pprFullName mod_name name <> char '>') diff --git a/compiler/GHC/StgToCmm/Closure.hs b/compiler/GHC/StgToCmm/Closure.hs index 8b9e4f044b..b2f51c60fd 100644 --- a/compiler/GHC/StgToCmm/Closure.hs +++ b/compiler/GHC/StgToCmm/Closure.hs @@ -35,9 +35,9 @@ module GHC.StgToCmm.Closure ( isLFThunk, isLFReEntrant, lfUpdatable, -- * Used by other modules - CgLoc(..), SelfLoopInfo, CallMethod(..), + CgLoc(..), CallMethod(..), nodeMustPointToIt, isKnownFun, funTag, tagForArity, - CallOpts(..), getCallMethod, + getCallMethod, -- * ClosureInfo ClosureInfo, @@ -78,6 +78,7 @@ import GHC.Cmm import GHC.Cmm.Utils import GHC.Cmm.Ppr.Expr() -- For Outputable instances import GHC.StgToCmm.Types +import GHC.StgToCmm.Sequel import GHC.Types.CostCentre import GHC.Cmm.BlockId @@ -99,6 +100,7 @@ import GHC.Utils.Misc import Data.Coerce (coerce) import qualified Data.ByteString.Char8 as BS8 +import GHC.StgToCmm.Config ----------------------------------------------------------------------------- -- Data types and synonyms @@ -126,8 +128,6 @@ pprCgLoc platform = \case CmmLoc e -> text "cmm" <+> pdoc platform e LneLoc b rs -> text "lne" <+> ppr b <+> ppr rs -type SelfLoopInfo = (Id, BlockId, [LocalReg]) - -- used by ticky profiling isKnownFun :: LambdaFormInfo -> Bool isKnownFun LFReEntrant{} = True @@ -492,13 +492,7 @@ data CallMethod CLabel -- The code label RepArity -- Its arity -data CallOpts = CallOpts - { co_profile :: !Profile -- ^ Platform profile - , co_loopification :: !Bool -- ^ Loopification enabled (cf @-floopification@) - , co_ticky :: !Bool -- ^ Ticky profiling enabled (cf @-ticky@) - } - -getCallMethod :: CallOpts +getCallMethod :: StgToCmmConfig -> Name -- Function being applied -> Id -- Function Id used to chech if it can refer to -- CAF's and whether the function is tail-calling @@ -511,12 +505,11 @@ getCallMethod :: CallOpts -- tail calls using the same data constructor, -- JumpToIt. This saves us one case branch in -- cgIdApp - -> Maybe SelfLoopInfo -- can we perform a self-recursive tail call? + -> Maybe SelfLoopInfo -- can we perform a self-recursive tail-call -> CallMethod -getCallMethod opts _ id _ n_args v_args _cg_loc - (Just (self_loop_id, block_id, args)) - | co_loopification opts +getCallMethod cfg _ id _ n_args v_args _cg_loc (Just (self_loop_id, block_id, args)) + | stgToCmmLoopification cfg , id == self_loop_id , args `lengthIs` (n_args - v_args) -- If these patterns match then we know that: @@ -527,14 +520,13 @@ getCallMethod opts _ id _ n_args v_args _cg_loc -- self-recursive tail calls] in GHC.StgToCmm.Expr for more details = JumpToIt block_id args -getCallMethod opts name id (LFReEntrant _ arity _ _) n_args _v_args _cg_loc - _self_loop_info +getCallMethod cfg name id (LFReEntrant _ arity _ _) n_args _v_args _cg_loc _self_loop_info | n_args == 0 -- No args at all - && not (profileIsProfiling (co_profile opts)) + && not (profileIsProfiling (stgToCmmProfile cfg)) -- See Note [Evaluating functions with profiling] in rts/Apply.cmm = assert (arity /= 0) ReturnIt | n_args < arity = SlowCall -- Not enough args - | otherwise = DirectEntry (enterIdLabel (profilePlatform (co_profile opts)) name (idCafInfo id)) arity + | otherwise = DirectEntry (enterIdLabel (stgToCmmPlatform cfg) name (idCafInfo id)) arity getCallMethod _ _name _ LFUnlifted n_args _v_args _cg_loc _self_loop_info = assert (n_args == 0) ReturnIt @@ -544,14 +536,14 @@ getCallMethod _ _name _ (LFCon _) n_args _v_args _cg_loc _self_loop_info -- n_args=0 because it'd be ill-typed to apply a saturated -- constructor application to anything -getCallMethod opts name id (LFThunk _ _ updatable std_form_info is_fun) +getCallMethod cfg name id (LFThunk _ _ updatable std_form_info is_fun) n_args _v_args _cg_loc _self_loop_info | is_fun -- it *might* be a function, so we must "call" it (which is always safe) = SlowCall -- We cannot just enter it [in eval/apply, the entry code -- is the fast-entry code] -- Since is_fun is False, we are *definitely* looking at a data value - | updatable || co_ticky opts -- to catch double entry + | updatable || stgToCmmDoTicky cfg -- to catch double entry {- OLD: || opt_SMP I decided to remove this, because in SMP mode it doesn't matter if we enter the same thunk multiple times, so the optimisation @@ -573,7 +565,7 @@ getCallMethod opts name id (LFThunk _ _ updatable std_form_info is_fun) | otherwise -- Jump direct to code for single-entry thunks = assert (n_args == 0) $ - DirectEntry (thunkEntryLabel (profilePlatform (co_profile opts)) name (idCafInfo id) std_form_info + DirectEntry (thunkEntryLabel (stgToCmmPlatform cfg) name (idCafInfo id) std_form_info updatable) 0 getCallMethod _ _name _ (LFUnknown True) _n_arg _v_args _cg_locs _self_loop_info @@ -583,8 +575,7 @@ getCallMethod _ name _ (LFUnknown False) n_args _v_args _cg_loc _self_loop_info = assertPpr (n_args == 0) (ppr name <+> ppr n_args) EnterIt -- Not a function -getCallMethod _ _name _ LFLetNoEscape _n_args _v_args (LneLoc blk_id lne_regs) - _self_loop_info +getCallMethod _ _name _ LFLetNoEscape _n_args _v_args (LneLoc blk_id lne_regs) _self_loop_info = JumpToIt blk_id lne_regs getCallMethod _ _ _ _ _ _ _ _ = panic "Unknown call method" diff --git a/compiler/GHC/StgToCmm/Config.hs b/compiler/GHC/StgToCmm/Config.hs new file mode 100644 index 0000000000..8751f5ffe5 --- /dev/null +++ b/compiler/GHC/StgToCmm/Config.hs @@ -0,0 +1,76 @@ +-- | The stg to cmm code generator configuration + +module GHC.StgToCmm.Config + ( StgToCmmConfig(..) + , stgToCmmPlatform + ) where + +import GHC.Platform.Profile +import GHC.Platform +import GHC.Unit.Module +import GHC.Utils.Outputable +import GHC.Utils.TmpFs + +import GHC.Prelude + + +-- This config is static and contains information only passed *downwards* by StgToCmm.Monad +data StgToCmmConfig = StgToCmmConfig + ----------------------------- General Settings -------------------------------- + { stgToCmmProfile :: !Profile -- ^ Current profile + , stgToCmmThisModule :: Module -- ^ The module being compiled. This field kept lazy for + -- Cmm/Parser.y which preloads it with a panic + , stgToCmmTmpDir :: !TempDir -- ^ Temp Dir for files used in compilation + , stgToCmmContext :: !SDocContext -- ^ Context for StgToCmm phase + , stgToCmmDebugLevel :: !Int -- ^ The verbosity of debug messages + , stgToCmmBinBlobThresh :: !Word -- ^ Binary literals (e.g. strings) whose size is above this + -- threshold will be dumped in a binary file by the assembler + -- code generator (0 to disable) + , stgToCmmMaxInlAllocSize :: !Int -- ^ Max size, in bytes, of inline array allocations. + ------------------------------ Ticky Options ---------------------------------- + , stgToCmmDoTicky :: !Bool -- ^ Ticky profiling enabled (cf @-ticky@) + , stgToCmmTickyAllocd :: !Bool -- ^ True indicates ticky prof traces allocs of each named + -- thing in addition to allocs _by_ that thing + , stgToCmmTickyLNE :: !Bool -- ^ True indicates ticky uses name-specific counters for + -- join-points (let-no-escape) + , stgToCmmTickyDynThunk :: !Bool -- ^ True indicates ticky uses name-specific counters for + -- dynamic thunks + ---------------------------------- Flags -------------------------------------- + , stgToCmmLoopification :: !Bool -- ^ Loopification enabled (cf @-floopification@) + , stgToCmmAlignCheck :: !Bool -- ^ Insert alignment check (cf @-falignment-sanitisation@) + , stgToCmmOptHpc :: !Bool -- ^ perform code generation for code coverage + , stgToCmmFastPAPCalls :: !Bool -- ^ + , stgToCmmSCCProfiling :: !Bool -- ^ Check if cost-centre profiling is enabled + , stgToCmmEagerBlackHole :: !Bool -- ^ + , stgToCmmInfoTableMap :: !Bool -- ^ true means generate C Stub for IPE map, See note [Mapping + -- Info Tables to Source Positions] + , stgToCmmOmitYields :: !Bool -- ^ true means omit heap checks when no allocation is performed + , stgToCmmOmitIfPragmas :: !Bool -- ^ true means don't generate interface programs (implied by -O0) + , stgToCmmPIC :: !Bool -- ^ true if @-fPIC@ + , stgToCmmPIE :: !Bool -- ^ true if @-fPIE@ + , stgToCmmExtDynRefs :: !Bool -- ^ true if @-fexternal-dynamic-refs@, meaning generate + -- code for linking against dynamic libraries + , stgToCmmDoBoundsCheck :: !Bool -- ^ decides whether to check array bounds in StgToCmm.Prim + -- or not + ------------------------------ Backend Flags ---------------------------------- + , stgToCmmAllowBigArith :: !Bool -- ^ Allowed to emit larger than native size arithmetic (only LLVM and C backends) + , stgToCmmAllowQuotRemInstr :: !Bool -- ^ Allowed to generate QuotRem instructions + , stgToCmmAllowQuotRem2 :: !Bool -- ^ Allowed to generate QuotRem + , stgToCmmAllowExtendedAddSubInstrs :: !Bool -- ^ Allowed to generate AddWordC, SubWordC, Add2, etc. + , stgToCmmAllowIntMul2Instr :: !Bool -- ^ Allowed to generate IntMul2 instruction + , stgToCmmAllowFabsInstrs :: !Bool -- ^ Allowed to generate Fabs instructions + ------------------------------ SIMD flags ------------------------------------ + -- Each of these flags checks vector compatibility with the backend requested + -- during compilation. In essence, this means checking for @-fllvm@ which is + -- the only backend that currently allows SIMD instructions, see + -- Ghc.StgToCmm.Prim.checkVecCompatibility for these flags only call site. + , stgToCmmVecInstrsErr :: Maybe String -- ^ Error (if any) to raise when vector instructions are + -- used, see @StgToCmm.Prim.checkVecCompatibility@ + , stgToCmmAvx :: !Bool -- ^ check for Advanced Vector Extensions + , stgToCmmAvx2 :: !Bool -- ^ check for Advanced Vector Extensions 2 + , stgToCmmAvx512f :: !Bool -- ^ check for Advanced Vector 512-bit Extensions + } + + +stgToCmmPlatform :: StgToCmmConfig -> Platform +stgToCmmPlatform = profilePlatform . stgToCmmProfile diff --git a/compiler/GHC/StgToCmm/DataCon.hs b/compiler/GHC/StgToCmm/DataCon.hs index 2805aceb91..6a25562f6a 100644 --- a/compiler/GHC/StgToCmm/DataCon.hs +++ b/compiler/GHC/StgToCmm/DataCon.hs @@ -18,7 +18,6 @@ module GHC.StgToCmm.DataCon ( import GHC.Prelude import GHC.Platform -import GHC.Platform.Profile import GHC.Stg.Syntax import GHC.Core ( AltCon(..) ) @@ -38,7 +37,6 @@ import GHC.Runtime.Heap.Layout import GHC.Types.CostCentre import GHC.Unit import GHC.Core.DataCon -import GHC.Driver.Session import GHC.Data.FastString import GHC.Types.Id import GHC.Types.Id.Info( CafInfo( NoCafRefs ) ) @@ -53,19 +51,20 @@ import GHC.Utils.Monad (mapMaybeM) import Control.Monad import Data.Char +import GHC.StgToCmm.Config (stgToCmmPlatform) --------------------------------------------------------------- -- Top-level constructors --------------------------------------------------------------- -cgTopRhsCon :: DynFlags +cgTopRhsCon :: StgToCmmConfig -> Id -- Name of thing bound to this RHS -> DataCon -- Id -> ConstructorNumber -> [NonVoid StgArg] -- Args -> (CgIdInfo, FCode ()) -cgTopRhsCon dflags id con mn args - | Just static_info <- precomputedStaticConInfo_maybe dflags id con args +cgTopRhsCon cfg id con mn args + | Just static_info <- precomputedStaticConInfo_maybe cfg id con args , let static_code | isInternalName name = pure () | otherwise = gen_code = -- There is a pre-allocated static closure available; use it @@ -81,7 +80,7 @@ cgTopRhsCon dflags id con mn args = (id_Info, gen_code) where - platform = targetPlatform dflags + platform = stgToCmmPlatform cfg id_Info = litIdInfo platform id (mkConLFInfo con) (CmmLabel closure_label) name = idName id caffy = idCafInfo id -- any stgArgHasCafRefs args @@ -92,7 +91,7 @@ cgTopRhsCon dflags id con mn args ; this_mod <- getModuleName ; when (platformOS platform == OSMinGW32) $ -- Windows DLLs have a problem with static cross-DLL refs. - massert (not (isDllConApp dflags this_mod con (map fromNonVoid args))) + massert (not (isDllConApp platform (stgToCmmExtDynRefs cfg) this_mod con (map fromNonVoid args))) ; assert (args `lengthIs` countConRepArgs con ) return () -- LAY IT OUT @@ -166,18 +165,20 @@ buildDynCon :: Id -- Name of the thing to which this constr will -> FCode (CgIdInfo, FCode CmmAGraph) -- Return details about how to find it and initialization code buildDynCon binder mn actually_bound cc con args - = do dflags <- getDynFlags - buildDynCon' dflags binder mn actually_bound cc con args + = do cfg <- getStgToCmmConfig + -- pprTrace "noCodeLocal:" (ppr (binder,con,args,cgInfo)) True + case precomputedStaticConInfo_maybe cfg binder con args of + Just cgInfo -> return (cgInfo, return mkNop) + Nothing -> buildDynCon' binder mn actually_bound cc con args -buildDynCon' :: DynFlags - -> Id -> ConstructorNumber +buildDynCon' :: Id + -> ConstructorNumber -> Bool -> CostCentreStack -> DataCon -> [NonVoid StgArg] -> FCode (CgIdInfo, FCode CmmAGraph) - {- We used to pass a boolean indicating whether all the args were of size zero, so we could use a static constructor; but I concluded that it just isn't worth it. @@ -188,14 +189,8 @@ The reason for having a separate argument, rather than looking at the addr modes of the args is that we may be in a "knot", and premature looking at the args will cause the compiler to black-hole! -} - -buildDynCon' dflags binder _ _ _cc con args - | Just cgInfo <- precomputedStaticConInfo_maybe dflags binder con args - -- , pprTrace "noCodeLocal:" (ppr (binder,con,args,cgInfo)) True - = return (cgInfo, return mkNop) - -------- buildDynCon': the general case ----------- -buildDynCon' _ binder mn actually_bound ccs con args +buildDynCon' binder mn actually_bound ccs con args = do { (id_info, reg) <- rhsIdInfo binder lf_info ; return (id_info, gen_code reg) } @@ -204,8 +199,9 @@ buildDynCon' _ binder mn actually_bound ccs con args gen_code reg = do { modu <- getModuleName - ; profile <- getProfile - ; let platform = profilePlatform profile + ; cfg <- getStgToCmmConfig + ; let platform = stgToCmmPlatform cfg + profile = stgToCmmProfile cfg (tot_wds, ptr_wds, args_w_offsets) = mkVirtConstrOffsets profile (addArgReps args) nonptr_wds = tot_wds - ptr_wds @@ -224,6 +220,7 @@ buildDynCon' _ binder mn actually_bound ccs con args blame_cc = use_cc -- cost-centre on which to blame the alloc (same) + {- Note [Precomputed static closures] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -317,36 +314,36 @@ We don't support this optimization when compiling into Windows DLLs yet because they don't support cross package data references well. -} --- (precomputedStaticConInfo_maybe dflags id con args) +-- (precomputedStaticConInfo_maybe cfg id con args) -- returns (Just cg_id_info) -- if there is a precomputed static closure for (con args). -- In that case, cg_id_info addresses it. -- See Note [Precomputed static closures] -precomputedStaticConInfo_maybe :: DynFlags -> Id -> DataCon -> [NonVoid StgArg] -> Maybe CgIdInfo -precomputedStaticConInfo_maybe dflags binder con [] +precomputedStaticConInfo_maybe :: StgToCmmConfig -> Id -> DataCon -> [NonVoid StgArg] -> Maybe CgIdInfo +precomputedStaticConInfo_maybe cfg binder con [] -- Nullary constructors | isNullaryRepDataCon con - = Just $ litIdInfo (targetPlatform dflags) binder (mkConLFInfo con) + = Just $ litIdInfo (stgToCmmPlatform cfg) binder (mkConLFInfo con) (CmmLabel (mkClosureLabel (dataConName con) NoCafRefs)) -precomputedStaticConInfo_maybe dflags binder con [arg] +precomputedStaticConInfo_maybe cfg binder con [arg] -- Int/Char values with existing closures in the RTS | intClosure || charClosure - , platformOS platform /= OSMinGW32 || not (positionIndependent dflags) + , platformOS platform /= OSMinGW32 || not (stgToCmmPIE cfg || stgToCmmPIC cfg) , Just val <- getClosurePayload arg , inRange val = let intlike_lbl = mkCmmClosureLabel rtsUnitId (fsLit label) val_int = fromIntegral val :: Int - offsetW = (val_int - (fromIntegral min_static_range)) * (fixedHdrSizeW profile + 1) + offsetW = (val_int - fromIntegral min_static_range) * (fixedHdrSizeW profile + 1) -- INTLIKE/CHARLIKE closures consist of a header and one word payload static_amode = cmmLabelOffW platform intlike_lbl offsetW in Just $ litIdInfo platform binder (mkConLFInfo con) static_amode where - profile = targetProfile dflags - platform = profilePlatform profile - intClosure = maybeIntLikeCon con + profile = stgToCmmProfile cfg + platform = stgToCmmPlatform cfg + intClosure = maybeIntLikeCon con charClosure = maybeCharLikeCon con getClosurePayload (NonVoid (StgLitArg (LitNumber LitNumInt val))) = Just val - getClosurePayload (NonVoid (StgLitArg (LitChar val))) = Just $ (fromIntegral . ord $ val) + getClosurePayload (NonVoid (StgLitArg (LitChar val))) = Just (fromIntegral . ord $ val) getClosurePayload _ = Nothing -- Avoid over/underflow by comparisons at type Integer! inRange :: Integer -> Bool diff --git a/compiler/GHC/StgToCmm/Env.hs b/compiler/GHC/StgToCmm/Env.hs index f28f0d0ec2..8f82c02e8e 100644 --- a/compiler/GHC/StgToCmm/Env.hs +++ b/compiler/GHC/StgToCmm/Env.hs @@ -44,8 +44,6 @@ import GHC.Utils.Outputable import GHC.Utils.Panic import GHC.Utils.Panic.Plain -import GHC.Driver.Session - ------------------------------------- -- Manipulating CgIdInfo @@ -84,16 +82,16 @@ mkRhsInit platform reg lf_info expr idInfoToAmode :: CgIdInfo -> CmmExpr -- Returns a CmmExpr for the *tagged* pointer -idInfoToAmode (CgIdInfo { cg_loc = CmmLoc e }) = e +idInfoToAmode CgIdInfo { cg_loc = CmmLoc e } = e idInfoToAmode cg_info = pprPanic "idInfoToAmode" (ppr (cg_id cg_info)) -- LneLoc -- | A tag adds a byte offset to the pointer addDynTag :: Platform -> CmmExpr -> DynTag -> CmmExpr -addDynTag platform expr tag = cmmOffsetB platform expr tag +addDynTag = cmmOffsetB maybeLetNoEscape :: CgIdInfo -> Maybe (BlockId, [LocalReg]) -maybeLetNoEscape (CgIdInfo { cg_loc = LneLoc blk_id args}) = Just (blk_id, args) +maybeLetNoEscape CgIdInfo { cg_loc = LneLoc blk_id args} = Just (blk_id, args) maybeLetNoEscape _other = Nothing @@ -120,7 +118,7 @@ addBindsC new_bindings = do getCgIdInfo :: Id -> FCode CgIdInfo getCgIdInfo id - = do { platform <- targetPlatform <$> getDynFlags + = do { platform <- getPlatform ; local_binds <- getBinds -- Try local bindings first ; case lookupVarEnv local_binds id of { Just info -> return info ; @@ -179,7 +177,7 @@ bindArgToReg :: NonVoid Id -> FCode LocalReg bindArgToReg nvid@(NonVoid id) = bindToReg nvid (mkLFArgument id) bindArgsToRegs :: [NonVoid Id] -> FCode [LocalReg] -bindArgsToRegs args = mapM bindArgToReg args +bindArgsToRegs = mapM bindArgToReg idToReg :: Platform -> NonVoid Id -> LocalReg -- Make a register from an Id, typically a function argument, diff --git a/compiler/GHC/StgToCmm/Expr.hs b/compiler/GHC/StgToCmm/Expr.hs index 77476a4b7d..ff80c9eda2 100644 --- a/compiler/GHC/StgToCmm/Expr.hs +++ b/compiler/GHC/StgToCmm/Expr.hs @@ -91,9 +91,10 @@ cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do slow_path <- getCode $ do tmp <- newTemp (bWord platform) _ <- withSequel (AssignTo [tmp] False) (cgIdApp a []) - ptr_opts <- getPtrOpts + profile <- getProfile + align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig emitAssign (CmmLocal result_reg) - $ getConstrTag ptr_opts (cmmUntag platform (CmmReg (CmmLocal tmp))) + $ getConstrTag profile align_check (cmmUntag platform (CmmReg (CmmLocal tmp))) fast_path <- getCode $ do -- Return the constructor index from the pointer tag @@ -102,9 +103,10 @@ cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do $ cmmSubWord platform tag (CmmLit $ mkWordCLit platform 1) -- Return the constructor index recorded in the info table return_info_tag <- getCode $ do - ptr_opts <- getPtrOpts + profile <- getProfile + align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig emitAssign (CmmLocal result_reg) - $ getConstrTag ptr_opts (cmmUntag platform amode) + $ getConstrTag profile align_check (cmmUntag platform amode) emit =<< mkCmmIfThenElse' is_too_big_tag return_info_tag return_ptr_tag (Just False) @@ -540,9 +542,9 @@ isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSa isSimpleOp (StgPrimOp DataToTagOp) _ = return False isSimpleOp (StgPrimOp op) stg_args = do arg_exprs <- getNonVoidArgAmodes stg_args - dflags <- getDynFlags + cfg <- getStgToCmmConfig -- See Note [Inlining out-of-line primops and heap checks] - return $! shouldInlinePrimOp dflags op arg_exprs + return $! shouldInlinePrimOp cfg op arg_exprs isSimpleOp (StgPrimCallOp _) _ = return False ----------------- @@ -615,9 +617,10 @@ cgAlts gc_plan bndr (AlgAlt tycon) alts else -- No, the get exact tag from info table when mAX_PTR_TAG -- See Note [Double switching for big families] do - ptr_opts <- getPtrOpts + profile <- getProfile + align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig let !untagged_ptr = cmmUntag platform (CmmReg bndr_reg) - !itag_expr = getConstrTag ptr_opts untagged_ptr + !itag_expr = getConstrTag profile align_check untagged_ptr !info0 = first pred <$> via_info if null via_ptr then emitSwitch itag_expr info0 mb_deflt 0 (fam_sz - 1) @@ -888,16 +891,16 @@ cgConApp con mn stg_args cgIdApp :: Id -> [StgArg] -> FCode ReturnKind cgIdApp fun_id args = do fun_info <- getCgIdInfo fun_id - self_loop_info <- getSelfLoop - call_opts <- getCallOpts - profile <- getProfile - let fun_arg = StgVarArg fun_id - fun_name = idName fun_id - fun = idInfoToAmode fun_info - lf_info = cg_lf fun_info - n_args = length args - v_args = length $ filter (isZeroBitTy . stgArgType) args - case getCallMethod call_opts fun_name fun_id lf_info n_args v_args (cg_loc fun_info) self_loop_info of + cfg <- getStgToCmmConfig + self_loop <- getSelfLoop + let profile = stgToCmmProfile cfg + fun_arg = StgVarArg fun_id + fun_name = idName fun_id + fun = idInfoToAmode fun_info + lf_info = cg_lf fun_info + n_args = length args + v_args = length $ filter (isZeroBitTy . stgArgType) args + case getCallMethod cfg fun_name fun_id lf_info n_args v_args (cg_loc fun_info) self_loop of -- A value in WHNF, so we can just return it. ReturnIt | isZeroBitTy (idType fun_id) -> emitReturn [] @@ -975,7 +978,7 @@ cgIdApp fun_id args = do -- Implementation is spread across a couple of places in the code: -- -- * FCode monad stores additional information in its reader environment --- (cgd_self_loop field). This information tells us which function can +-- (stgToCmmSelfLoop field). This information tells us which function can -- tail call itself in an optimized way (it is the function currently -- being compiled), what is the label of a loop header (L1 in example above) -- and information about local registers in which we should arguments @@ -1008,7 +1011,7 @@ cgIdApp fun_id args = do -- command-line option. -- -- * Command line option to turn loopification on and off is implemented in --- DynFlags. +-- DynFlags, then passed to StgToCmmConfig for this phase. -- -- -- Note [Void arguments in self-recursive tail calls] @@ -1036,12 +1039,12 @@ cgIdApp fun_id args = do emitEnter :: CmmExpr -> FCode ReturnKind emitEnter fun = do - { ptr_opts <- getPtrOpts - ; platform <- getPlatform - ; profile <- getProfile + { platform <- getPlatform + ; profile <- getProfile ; adjustHpBackwards - ; sequel <- getSequel - ; updfr_off <- getUpdFrameOff + ; sequel <- getSequel + ; updfr_off <- getUpdFrameOff + ; align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig ; case sequel of -- For a return, we have the option of generating a tag-test or -- not. If the value is tagged, we can return directly, which @@ -1052,7 +1055,9 @@ emitEnter fun = do -- Right now, we do what the old codegen did, and omit the tag -- test, just generating an enter. Return -> do - { let entry = entryCode platform $ closureInfoPtr ptr_opts $ CmmReg nodeReg + { let entry = entryCode platform + $ closureInfoPtr platform align_check + $ CmmReg nodeReg ; emit $ mkJump profile NativeNodeCall entry [cmmUntag platform fun] updfr_off ; return AssignedDirectly @@ -1084,17 +1089,18 @@ emitEnter fun = do -- code in the enclosing case expression. -- AssignTo res_regs _ -> do - { lret <- newBlockId - ; let (off, _, copyin) = copyInOflow profile NativeReturn (Young lret) res_regs [] + { lret <- newBlockId ; lcall <- newBlockId - ; updfr_off <- getUpdFrameOff + ; updfr_off <- getUpdFrameOff + ; align_check <- stgToCmmAlignCheck <$> getStgToCmmConfig + ; let (off, _, copyin) = copyInOflow profile NativeReturn (Young lret) res_regs [] ; let area = Young lret ; let (outArgs, regs, copyout) = copyOutOflow profile NativeNodeCall Call area [fun] updfr_off [] -- refer to fun via nodeReg after the copyout, to avoid having -- both live simultaneously; this sometimes enables fun to be -- inlined in the RHS of the R1 assignment. - ; let entry = entryCode platform (closureInfoPtr ptr_opts (CmmReg nodeReg)) + ; let entry = entryCode platform (closureInfoPtr platform align_check (CmmReg nodeReg)) the_call = toCall entry (Just lret) updfr_off off outArgs regs ; tscope <- getTickScope ; emit $ diff --git a/compiler/GHC/StgToCmm/ExtCode.hs b/compiler/GHC/StgToCmm/ExtCode.hs index 380e4458e2..e80cf8b8b9 100644 --- a/compiler/GHC/StgToCmm/ExtCode.hs +++ b/compiler/GHC/StgToCmm/ExtCode.hs @@ -34,7 +34,7 @@ module GHC.StgToCmm.ExtCode ( getCode, getCodeR, getCodeScoped, emitOutOfLine, withUpdFrameOff, getUpdFrameOff, - getProfile, getPlatform, getPtrOpts + getProfile, getPlatform, getContext ) where @@ -50,10 +50,8 @@ import GHC.StgToCmm.Monad (FCode, newUnique) import GHC.Cmm import GHC.Cmm.CLabel import GHC.Cmm.Graph -import GHC.Cmm.Info import GHC.Cmm.BlockId -import GHC.Driver.Session import GHC.Data.FastString import GHC.Unit.Module import GHC.Types.Unique.FM @@ -61,6 +59,7 @@ import GHC.Types.Unique import GHC.Types.Unique.Supply import Control.Monad (ap) +import GHC.Utils.Outputable (SDocContext) -- | The environment contains variable definitions or blockids. data Named @@ -103,17 +102,14 @@ instance MonadUnique CmmParse where u <- getUniqueM return (decls, u) -instance HasDynFlags CmmParse where - getDynFlags = EC (\_ _ d -> (d,) <$> getDynFlags) - getProfile :: CmmParse Profile getProfile = EC (\_ _ d -> (d,) <$> F.getProfile) getPlatform :: CmmParse Platform getPlatform = EC (\_ _ d -> (d,) <$> F.getPlatform) -getPtrOpts :: CmmParse PtrOpts -getPtrOpts = EC (\_ _ d -> (d,) <$> F.getPtrOpts) +getContext :: CmmParse SDocContext +getContext = EC (\_ _ d -> (d,) <$> F.getContext) -- | Takes the variable declarations and imports from the monad -- and makes an environment, which is looped back into the computation. @@ -128,7 +124,6 @@ loopDecls (EC fcode) = fcode c (addListToUFM e decls) globalDecls return (globalDecls, a) - -- | Get the current environment from the monad. getEnv :: CmmParse Env getEnv = EC $ \_ e s -> return (s, e) diff --git a/compiler/GHC/StgToCmm/Heap.hs b/compiler/GHC/StgToCmm/Heap.hs index 16161cb028..39a7812b70 100644 --- a/compiler/GHC/StgToCmm/Heap.hs +++ b/compiler/GHC/StgToCmm/Heap.hs @@ -429,7 +429,7 @@ altHeapCheckReturnsTo regs lret off code -- is more efficient), but cannot be optimized away in the non-allocating -- case because it may occur in a loop noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a -noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code +noEscapeHeapCheck = altOrNoEscapeHeapCheck True cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff -> FCode a @@ -605,9 +605,9 @@ do_checks :: Maybe CmmExpr -- Should we check the stack? -> CmmAGraph -- What to do on failure -> FCode () do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do - dflags <- getDynFlags - platform <- getPlatform - gc_id <- newBlockId + omit_yields <- stgToCmmOmitYields <$> getStgToCmmConfig + platform <- getPlatform + gc_id <- newBlockId let Just alloc_lit = mb_alloc_lit @@ -644,13 +644,13 @@ do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id _otherwise -> return () - if (isJust mb_alloc_lit) + if isJust mb_alloc_lit then do tickyHeapCheck emitAssign hpReg bump_hp emit =<< mkCmmIfThen' hp_oflo (alloc_n <*> mkBranch gc_id) (Just False) else - when (checkYield && not (gopt Opt_OmitYields dflags)) $ do + when (checkYield && not omit_yields) $ do -- Yielding if HpLim == 0 let yielding = CmmMachOp (mo_wordEq platform) [CmmReg hpLimReg, diff --git a/compiler/GHC/StgToCmm/Hpc.hs b/compiler/GHC/StgToCmm/Hpc.hs index 707fe9725a..23e7519d80 100644 --- a/compiler/GHC/StgToCmm/Hpc.hs +++ b/compiler/GHC/StgToCmm/Hpc.hs @@ -11,8 +11,6 @@ module GHC.StgToCmm.Hpc ( initHpc, mkTickBox ) where import GHC.Prelude import GHC.Platform -import GHC.Driver.Session - import GHC.StgToCmm.Monad import GHC.StgToCmm.Utils @@ -39,13 +37,13 @@ mkTickBox platform mod n -- | Emit top-level tables for HPC and return code to initialise initHpc :: Module -> HpcInfo -> FCode () -initHpc _ (NoHpcInfo {}) +initHpc _ NoHpcInfo{} = return () initHpc this_mod (HpcInfo tickCount _hashNo) - = do dflags <- getDynFlags - when (gopt Opt_Hpc dflags) $ + = do do_hpc <- stgToCmmOptHpc <$> getStgToCmmConfig + when do_hpc $ emitDataLits (mkHpcTicksLabel this_mod) - [ (CmmInt 0 W64) + [ CmmInt 0 W64 | _ <- take tickCount [0 :: Int ..] ] diff --git a/compiler/GHC/StgToCmm/Layout.hs b/compiler/GHC/StgToCmm/Layout.hs index 6924e30483..9e14d1e766 100644 --- a/compiler/GHC/StgToCmm/Layout.hs +++ b/compiler/GHC/StgToCmm/Layout.hs @@ -33,9 +33,6 @@ module GHC.StgToCmm.Layout ( import GHC.Prelude hiding ((<*>)) -import GHC.Driver.Session -import GHC.Driver.Ppr - import GHC.StgToCmm.Closure import GHC.StgToCmm.Env import GHC.StgToCmm.ArgRep -- notably: ( slowCallPattern ) @@ -67,6 +64,8 @@ import GHC.Utils.Panic.Plain import GHC.Utils.Constants (debugIsOn) import GHC.Data.FastString import Control.Monad +import GHC.StgToCmm.Config (stgToCmmPlatform) +import GHC.StgToCmm.Types ------------------------------------------------------------------------ -- Call and return sequences @@ -196,9 +195,12 @@ directCall conv lbl arity stg_args slowCall :: CmmExpr -> [StgArg] -> FCode ReturnKind -- (slowCall fun args) applies fun to args, returning the results to Sequel slowCall fun stg_args - = do dflags <- getDynFlags - profile <- getProfile - let platform = profilePlatform profile + = do cfg <- getStgToCmmConfig + let profile = stgToCmmProfile cfg + platform = stgToCmmPlatform cfg + ctx = stgToCmmContext cfg + fast_pap = stgToCmmFastPAPCalls cfg + align_sat = stgToCmmAlignCheck cfg argsreps <- getArgRepsAmodes stg_args let (rts_fun, arity) = slowCallPattern (map fst argsreps) @@ -206,18 +208,17 @@ slowCall fun stg_args r <- direct_call "slow_call" NativeNodeCall (mkRtsApFastLabel rts_fun) arity ((P,Just fun):argsreps) emitComment $ mkFastString ("slow_call for " ++ - showSDoc dflags (pdoc platform fun) ++ + renderWithContext ctx (pdoc platform fun) ++ " with pat " ++ unpackFS rts_fun) return r -- Note [avoid intermediate PAPs] let n_args = length stg_args - if n_args > arity && gopt Opt_FastPAPCalls dflags + if n_args > arity && fast_pap then do - ptr_opts <- getPtrOpts funv <- (CmmReg . CmmLocal) `fmap` assignTemp fun fun_iptr <- (CmmReg . CmmLocal) `fmap` - assignTemp (closureInfoPtr ptr_opts (cmmUntag platform funv)) + assignTemp (closureInfoPtr platform align_sat (cmmUntag platform funv)) -- ToDo: we could do slightly better here by reusing the -- continuation from the slow call, which we have in r. @@ -303,15 +304,14 @@ direct_call caller call_conv lbl arity args = emitCall (call_conv, NativeReturn) target (nonVArgs args) | otherwise -- Note [over-saturated calls] - = do dflags <- getDynFlags + = do do_scc_prof <- stgToCmmSCCProfiling <$> getStgToCmmConfig emitCallWithExtraStack (call_conv, NativeReturn) target (nonVArgs fast_args) - (nonVArgs (stack_args dflags)) + (nonVArgs (slowArgs rest_args do_scc_prof)) where target = CmmLit (CmmLabel lbl) (fast_args, rest_args) = splitAt real_arity args - stack_args dflags = slowArgs dflags rest_args real_arity = case call_conv of NativeNodeCall -> arity+1 _ -> arity @@ -375,12 +375,11 @@ just more arguments that we are passing on the stack (cml_args). -- | 'slowArgs' takes a list of function arguments and prepares them for -- pushing on the stack for "extra" arguments to a function which requires -- fewer arguments than we currently have. -slowArgs :: DynFlags -> [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)] -slowArgs _ [] = [] -slowArgs dflags args -- careful: reps contains voids (V), but args does not - | sccProfilingEnabled dflags - = save_cccs ++ this_pat ++ slowArgs dflags rest_args - | otherwise = this_pat ++ slowArgs dflags rest_args +slowArgs :: [(ArgRep, Maybe CmmExpr)] -> DoSCCProfiling -> [(ArgRep, Maybe CmmExpr)] +slowArgs [] _ = mempty +slowArgs args sccProfilingEnabled -- careful: reps contains voids (V), but args does not + | sccProfilingEnabled = save_cccs ++ this_pat ++ slowArgs rest_args sccProfilingEnabled + | otherwise = this_pat ++ slowArgs rest_args sccProfilingEnabled where (arg_pat, n) = slowCallPattern (map fst args) (call_args, rest_args) = splitAt n args diff --git a/compiler/GHC/StgToCmm/Monad.hs b/compiler/GHC/StgToCmm/Monad.hs index 0eb9dc756d..d8d6600268 100644 --- a/compiler/GHC/StgToCmm/Monad.hs +++ b/compiler/GHC/StgToCmm/Monad.hs @@ -14,7 +14,7 @@ module GHC.StgToCmm.Monad ( FCode, -- type - initC, runC, fixC, + initC, initFCodeState, runC, fixC, newUnique, emitLabel, @@ -28,7 +28,7 @@ module GHC.StgToCmm.Monad ( getCmm, aGraphToGraph, getPlatform, getProfile, getCodeR, getCode, getCodeScoped, getHeapUsage, - getCallOpts, getPtrOpts, + getContext, mkCmmIfThenElse, mkCmmIfThen, mkCmmIfGoto, mkCmmIfThenElse', mkCmmIfThen', mkCmmIfGoto', @@ -45,7 +45,7 @@ module GHC.StgToCmm.Monad ( setTickyCtrLabel, getTickyCtrLabel, tickScope, getTickScope, - withUpdFrameOff, getUpdFrameOff, initUpdFrameOff, + withUpdFrameOff, getUpdFrameOff, HeapUsage(..), VirtualHpOffset, initHpUsage, getHpUsage, setHpUsage, heapHWM, @@ -54,13 +54,13 @@ module GHC.StgToCmm.Monad ( getModuleName, -- ideally we wouldn't export these, but some other modules access internal state - getState, setState, getSelfLoop, withSelfLoop, getInfoDown, getDynFlags, + getState, setState, getSelfLoop, withSelfLoop, getStgToCmmConfig, -- more localised access to monad state CgIdInfo(..), getBinds, setBinds, -- out of general friendliness, we also export ... - CgInfoDownwards(..), CgState(..) -- non-abstract + StgToCmmConfig(..), CgState(..) -- non-abstract ) where import GHC.Prelude hiding( sequence, succ ) @@ -68,13 +68,13 @@ import GHC.Prelude hiding( sequence, succ ) import GHC.Platform import GHC.Platform.Profile import GHC.Cmm +import GHC.StgToCmm.Config import GHC.StgToCmm.Closure -import GHC.Driver.Session +import GHC.StgToCmm.Sequel import GHC.Cmm.Dataflow.Collections import GHC.Cmm.Graph as CmmGraph import GHC.Cmm.BlockId import GHC.Cmm.CLabel -import GHC.Cmm.Info import GHC.Runtime.Heap.Layout import GHC.Unit import GHC.Types.Id @@ -109,24 +109,30 @@ import Data.List (mapAccumL) -- - the current heap usage -- - a UniqSupply -- --- - A reader monad, for CgInfoDownwards, containing --- - DynFlags, +-- - A reader monad, for StgToCmmConfig, containing +-- - the profile, -- - the current Module +-- - the debug level +-- - a bunch of flags see StgToCmm.Config for full details + +-- - A second reader monad with: -- - the update-frame offset -- - the ticky counter label -- - the Sequel (the continuation to return to) -- - the self-recursive tail call information +-- - The tick scope for new blocks and ticks +-- -------------------------------------------------------- -newtype FCode a = FCode' { doFCode :: CgInfoDownwards -> CgState -> (a, CgState) } +newtype FCode a = FCode' { doFCode :: StgToCmmConfig -> FCodeState -> CgState -> (a, CgState) } -- Not derived because of #18202. -- See Note [The one-shot state monad trick] in GHC.Utils.Monad instance Functor FCode where fmap f (FCode m) = - FCode $ \info_down state -> - case m info_down state of + FCode $ \cfg fst state -> + case m cfg fst state of (x, state') -> (f x, state') -- This pattern synonym makes the simplifier monad eta-expand, @@ -134,29 +140,31 @@ instance Functor FCode where -- See #18202. -- See Note [The one-shot state monad trick] in GHC.Utils.Monad {-# COMPLETE FCode #-} -pattern FCode :: (CgInfoDownwards -> CgState -> (a, CgState)) +pattern FCode :: (StgToCmmConfig -> FCodeState -> CgState -> (a, CgState)) -> FCode a pattern FCode m <- FCode' m where - FCode m = FCode' $ oneShot (\cgInfoDown -> oneShot (\state ->m cgInfoDown state)) + FCode m = FCode' $ oneShot (\cfg -> oneShot + (\fstate -> oneShot + (\state -> m cfg fstate state))) instance Applicative FCode where - pure val = FCode (\_info_down state -> (val, state)) + pure val = FCode (\_cfg _fstate state -> (val, state)) {-# INLINE pure #-} (<*>) = ap instance Monad FCode where FCode m >>= k = FCode $ - \info_down state -> - case m info_down state of + \cfg fstate state -> + case m cfg fstate state of (m_result, new_state) -> case k m_result of - FCode kcode -> kcode info_down new_state + FCode kcode -> kcode cfg fstate new_state {-# INLINE (>>=) #-} instance MonadUnique FCode where getUniqueSupplyM = cgs_uniqs <$> getState - getUniqueM = FCode $ \_ st -> + getUniqueM = FCode $ \_ _ st -> let (u, us') = takeUniqFromSupply (cgs_uniqs st) in (u, st { cgs_uniqs = us' }) @@ -164,36 +172,18 @@ initC :: IO CgState initC = do { uniqs <- mkSplitUniqSupply 'c' ; return (initCgState uniqs) } -runC :: DynFlags -> Module -> CgState -> FCode a -> (a,CgState) -runC dflags mod st fcode = doFCode fcode (initCgInfoDown dflags mod) st +runC :: StgToCmmConfig -> FCodeState -> CgState -> FCode a -> (a, CgState) +runC cfg fst st fcode = doFCode fcode cfg fst st fixC :: (a -> FCode a) -> FCode a fixC fcode = FCode $ - \info_down state -> let (v, s) = doFCode (fcode v) info_down state - in (v, s) + \cfg fstate state -> + let (v, s) = doFCode (fcode v) cfg fstate state + in (v, s) -------------------------------------------------------- -- The code generator environment -------------------------------------------------------- - --- This monadery has some information that it only passes --- *downwards*, as well as some ``state'' which is modified --- as we go along. - -data CgInfoDownwards -- information only passed *downwards* by the monad - = MkCgInfoDown { - cgd_dflags :: DynFlags, - cgd_mod :: Module, -- Module being compiled - cgd_updfr_off :: UpdFrameOffset, -- Size of current update frame - cgd_ticky :: CLabel, -- Current destination for ticky counts - cgd_sequel :: Sequel, -- What to do at end of basic block - cgd_self_loop :: Maybe SelfLoopInfo,-- Which tail calls can be compiled - -- as local jumps? See Note - -- [Self-recursive tail calls] in - -- GHC.StgToCmm.Expr - cgd_tick_scope:: CmmTickScope -- Tick scope for new blocks & ticks - } - type CgBindings = IdEnv CgIdInfo data CgIdInfo @@ -207,24 +197,6 @@ instance OutputableP Platform CgIdInfo where pdoc env (CgIdInfo { cg_id = id, cg_loc = loc }) = ppr id <+> text "-->" <+> pdoc env loc --- Sequel tells what to do with the result of this expression -data Sequel - = Return -- Return result(s) to continuation found on the stack. - - | AssignTo - [LocalReg] -- Put result(s) in these regs and fall through - -- NB: no void arguments here - -- - Bool -- Should we adjust the heap pointer back to - -- recover space that's unused on this path? - -- We need to do this only if the expression - -- may allocate (e.g. it's a foreign call or - -- allocating primOp) - -instance Outputable Sequel where - ppr Return = text "Return" - ppr (AssignTo regs b) = text "AssignTo" <+> ppr regs <+> ppr b - -- See Note [sharing continuations] below data ReturnKind = AssignedDirectly @@ -297,24 +269,6 @@ data ReturnKind -- fall back to AssignedDirectly. -- - -initCgInfoDown :: DynFlags -> Module -> CgInfoDownwards -initCgInfoDown dflags mod - = MkCgInfoDown { cgd_dflags = dflags - , cgd_mod = mod - , cgd_updfr_off = initUpdFrameOff (targetPlatform dflags) - , cgd_ticky = mkTopTickyCtrLabel - , cgd_sequel = initSequel - , cgd_self_loop = Nothing - , cgd_tick_scope= GlobalScope } - -initSequel :: Sequel -initSequel = Return - -initUpdFrameOff :: Platform -> UpdFrameOffset -initUpdFrameOff platform = platformWordSizeInBytes platform -- space for the RA - - -------------------------------------------------------- -- The code generator state -------------------------------------------------------- @@ -337,6 +291,17 @@ data CgState -- the reason is the knot-tying in 'getHeapUsage'. This problem is tracked -- in #19245 +data FCodeState = + MkFCodeState { fcs_upframeoffset :: UpdFrameOffset -- ^ Size of current update frame UpdFrameOffset must be kept lazy or + -- else the RTS will deadlock _and_ also experience a severe + -- performance degredation + , fcs_sequel :: !Sequel -- ^ What to do at end of basic block + , fcs_selfloop :: Maybe SelfLoopInfo -- ^ Which tail calls can be compiled as local jumps? + -- See Note [Self-recursive tail calls] in GHC.StgToCmm.Expr + , fcs_ticky :: !CLabel -- ^ Destination for ticky counts + , fcs_tickscope :: !CmmTickScope -- ^ Tick scope for new blocks & ticks + } + data HeapUsage -- See Note [Virtual and real heap pointers] = HeapUsage { virtHp :: VirtualHpOffset, -- Virtual offset of highest-allocated word @@ -418,14 +383,14 @@ maxHpHw :: HeapUsage -> VirtualHpOffset -> HeapUsage hp_usg `maxHpHw` hw = hp_usg { virtHp = virtHp hp_usg `max` hw } -------------------------------------------------------- --- Operators for getting and setting the state and "info_down". +-- Operators for getting and setting the state and "stgToCmmConfig". -------------------------------------------------------- getState :: FCode CgState -getState = FCode $ \_info_down state -> (state, state) +getState = FCode $ \_cfg _fstate state -> (state, state) setState :: CgState -> FCode () -setState state = FCode $ \_info_down _ -> ((), state) +setState state = FCode $ \_cfg _fstate _ -> ((), state) getHpUsage :: FCode HeapUsage getHpUsage = do @@ -462,9 +427,9 @@ setBinds new_binds = do state <- getState setState $ state {cgs_binds = new_binds} -withState :: FCode a -> CgState -> FCode (a,CgState) -withState (FCode fcode) newstate = FCode $ \info_down state -> - case fcode info_down newstate of +withCgState :: FCode a -> CgState -> FCode (a,CgState) +withCgState (FCode fcode) newstate = FCode $ \cfg fstate state -> + case fcode cfg fstate newstate of (retval, state2) -> ((retval,state2), state) newUniqSupply :: FCode UniqSupply @@ -486,68 +451,41 @@ newTemp rep = do { uniq <- getUniqueM ; return (LocalReg uniq rep) } ------------------ -getInfoDown :: FCode CgInfoDownwards -getInfoDown = FCode $ \info_down state -> (info_down,state) +initFCodeState :: Platform -> FCodeState +initFCodeState p = + MkFCodeState { fcs_upframeoffset = platformWordSizeInBytes p + , fcs_sequel = Return + , fcs_selfloop = Nothing + , fcs_ticky = mkTopTickyCtrLabel + , fcs_tickscope = GlobalScope + } + +getFCodeState :: FCode FCodeState +getFCodeState = FCode $ \_ fstate state -> (fstate,state) + +-- basically local for the reader monad +withFCodeState :: FCode a -> FCodeState -> FCode a +withFCodeState (FCode fcode) fst = FCode $ \cfg _ state -> fcode cfg fst state getSelfLoop :: FCode (Maybe SelfLoopInfo) -getSelfLoop = do - info_down <- getInfoDown - return $ cgd_self_loop info_down +getSelfLoop = fcs_selfloop <$> getFCodeState withSelfLoop :: SelfLoopInfo -> FCode a -> FCode a withSelfLoop self_loop code = do - info_down <- getInfoDown - withInfoDown code (info_down {cgd_self_loop = Just self_loop}) - -instance HasDynFlags FCode where - getDynFlags = liftM cgd_dflags getInfoDown - -getProfile :: FCode Profile -getProfile = targetProfile <$> getDynFlags - -getPlatform :: FCode Platform -getPlatform = profilePlatform <$> getProfile - -getCallOpts :: FCode CallOpts -getCallOpts = do - dflags <- getDynFlags - profile <- getProfile - pure $ CallOpts - { co_profile = profile - , co_loopification = gopt Opt_Loopification dflags - , co_ticky = gopt Opt_Ticky dflags - } - -getPtrOpts :: FCode PtrOpts -getPtrOpts = do - dflags <- getDynFlags - profile <- getProfile - pure $ PtrOpts - { po_profile = profile - , po_align_check = gopt Opt_AlignmentSanitisation dflags - } - - -withInfoDown :: FCode a -> CgInfoDownwards -> FCode a -withInfoDown (FCode fcode) info_down = FCode $ \_ state -> fcode info_down state - --- ---------------------------------------------------------------------------- --- Get the current module name - -getModuleName :: FCode Module -getModuleName = do { info <- getInfoDown; return (cgd_mod info) } + fstate <- getFCodeState + withFCodeState code (fstate {fcs_selfloop = Just self_loop}) -- ---------------------------------------------------------------------------- -- Get/set the end-of-block info withSequel :: Sequel -> FCode a -> FCode a withSequel sequel code - = do { info <- getInfoDown - ; withInfoDown code (info {cgd_sequel = sequel, cgd_self_loop = Nothing }) } + = do { fstate <- getFCodeState + ; withFCodeState code (fstate { fcs_sequel = sequel + , fcs_selfloop = Nothing }) } getSequel :: FCode Sequel -getSequel = do { info <- getInfoDown - ; return (cgd_sequel info) } +getSequel = fcs_sequel <$> getFCodeState -- ---------------------------------------------------------------------------- -- Get/set the size of the update frame @@ -561,35 +499,29 @@ getSequel = do { info <- getInfoDown withUpdFrameOff :: UpdFrameOffset -> FCode a -> FCode a withUpdFrameOff size code - = do { info <- getInfoDown - ; withInfoDown code (info {cgd_updfr_off = size }) } + = do { fstate <- getFCodeState + ; withFCodeState code (fstate {fcs_upframeoffset = size }) } getUpdFrameOff :: FCode UpdFrameOffset -getUpdFrameOff - = do { info <- getInfoDown - ; return $ cgd_updfr_off info } +getUpdFrameOff = fcs_upframeoffset <$> getFCodeState -- ---------------------------------------------------------------------------- -- Get/set the current ticky counter label getTickyCtrLabel :: FCode CLabel -getTickyCtrLabel = do - info <- getInfoDown - return (cgd_ticky info) +getTickyCtrLabel = fcs_ticky <$> getFCodeState setTickyCtrLabel :: CLabel -> FCode a -> FCode a setTickyCtrLabel ticky code = do - info <- getInfoDown - withInfoDown code (info {cgd_ticky = ticky}) + fstate <- getFCodeState + withFCodeState code (fstate {fcs_ticky = ticky}) -- ---------------------------------------------------------------------------- -- Manage tick scopes -- | The current tick scope. We will assign this to generated blocks. getTickScope :: FCode CmmTickScope -getTickScope = do - info <- getInfoDown - return (cgd_tick_scope info) +getTickScope = fcs_tickscope <$> getFCodeState -- | Places blocks generated by the given code into a fresh -- (sub-)scope. This will make sure that Cmm annotations in our scope @@ -597,11 +529,33 @@ getTickScope = do -- way around. tickScope :: FCode a -> FCode a tickScope code = do - info <- getInfoDown - if debugLevel (cgd_dflags info) == 0 then code else do + cfg <- getStgToCmmConfig + fstate <- getFCodeState + if stgToCmmDebugLevel cfg == 0 then code else do u <- newUnique - let scope' = SubScope u (cgd_tick_scope info) - withInfoDown code info{ cgd_tick_scope = scope' } + let scope' = SubScope u (fcs_tickscope fstate) + withFCodeState code fstate{ fcs_tickscope = scope' } + +-- ---------------------------------------------------------------------------- +-- Config related helpers + +getStgToCmmConfig :: FCode StgToCmmConfig +getStgToCmmConfig = FCode $ \cfg _ state -> (cfg,state) + +getProfile :: FCode Profile +getProfile = stgToCmmProfile <$> getStgToCmmConfig + +getPlatform :: FCode Platform +getPlatform = profilePlatform <$> getProfile + +getContext :: FCode SDocContext +getContext = stgToCmmContext <$> getStgToCmmConfig + +-- ---------------------------------------------------------------------------- +-- Get the current module name + +getModuleName :: FCode Module +getModuleName = stgToCmmThisModule <$> getStgToCmmConfig -------------------------------------------------------- @@ -618,14 +572,16 @@ forkClosureBody :: FCode () -> FCode () forkClosureBody body_code = do { platform <- getPlatform - ; info <- getInfoDown - ; us <- newUniqSupply - ; state <- getState - ; let body_info_down = info { cgd_sequel = initSequel - , cgd_updfr_off = initUpdFrameOff platform - , cgd_self_loop = Nothing } + ; cfg <- getStgToCmmConfig + ; fstate <- getFCodeState + ; us <- newUniqSupply + ; state <- getState + ; let fcs = fstate { fcs_sequel = Return + , fcs_upframeoffset = platformWordSizeInBytes platform + , fcs_selfloop = Nothing + } fork_state_in = (initCgState us) { cgs_binds = cgs_binds state } - ((),fork_state_out) = doFCode body_code body_info_down fork_state_in + ((),fork_state_out) = doFCode body_code cfg fcs fork_state_in ; setState $ state `addCodeBlocksFrom` fork_state_out } forkLneBody :: FCode a -> FCode a @@ -636,11 +592,12 @@ forkLneBody :: FCode a -> FCode a -- the successor. In particular, any heap usage from the enclosed -- code is discarded; it should deal with its own heap consumption. forkLneBody body_code - = do { info_down <- getInfoDown - ; us <- newUniqSupply - ; state <- getState + = do { cfg <- getStgToCmmConfig + ; us <- newUniqSupply + ; state <- getState + ; fstate <- getFCodeState ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state } - (result, fork_state_out) = doFCode body_code info_down fork_state_in + (result, fork_state_out) = doFCode body_code cfg fstate fork_state_in ; setState $ state `addCodeBlocksFrom` fork_state_out ; return result } @@ -649,12 +606,13 @@ codeOnly :: FCode () -> FCode () -- Do not affect anything else in the outer state -- Used in almost-circular code to prevent false loop dependencies codeOnly body_code - = do { info_down <- getInfoDown - ; us <- newUniqSupply - ; state <- getState + = do { cfg <- getStgToCmmConfig + ; us <- newUniqSupply + ; state <- getState + ; fstate <- getFCodeState ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state , cgs_hp_usg = cgs_hp_usg state } - ((), fork_state_out) = doFCode body_code info_down fork_state_in + ((), fork_state_out) = doFCode body_code cfg fstate fork_state_in ; setState $ state `addCodeBlocksFrom` fork_state_out } forkAlts :: [FCode a] -> FCode [a] @@ -664,11 +622,12 @@ forkAlts :: [FCode a] -> FCode [a] -- that the virtual Hp is moved on to the worst virtual Hp for the branches forkAlts branch_fcodes - = do { info_down <- getInfoDown - ; us <- newUniqSupply + = do { cfg <- getStgToCmmConfig + ; us <- newUniqSupply ; state <- getState + ; fstate <- getFCodeState ; let compile us branch - = (us2, doFCode branch info_down branch_state) + = (us2, doFCode branch cfg fstate branch_state) where (us1,us2) = splitUniqSupply us branch_state = (initCgState us1) { @@ -693,7 +652,7 @@ forkAltPair x y = do getCodeR :: FCode a -> FCode (a, CmmAGraph) getCodeR fcode = do { state1 <- getState - ; (a, state2) <- withState fcode (state1 { cgs_stmts = mkNop }) + ; (a, state2) <- withCgState fcode (state1 { cgs_stmts = mkNop }) ; setState $ state2 { cgs_stmts = cgs_stmts state1 } ; return (a, cgs_stmts state2) } @@ -706,7 +665,7 @@ getCodeScoped fcode = do { state1 <- getState ; ((a, tscope), state2) <- tickScope $ - flip withState state1 { cgs_stmts = mkNop } $ + flip withCgState state1 { cgs_stmts = mkNop } $ do { a <- fcode ; scp <- getTickScope ; return (a, scp) } @@ -725,10 +684,11 @@ getCodeScoped fcode getHeapUsage :: (VirtualHpOffset -> FCode a) -> FCode a getHeapUsage fcode - = do { info_down <- getInfoDown + = do { cfg <- getStgToCmmConfig ; state <- getState + ; fcstate <- getFCodeState ; let fstate_in = state { cgs_hp_usg = initHpUsage } - (r, fstate_out) = doFCode (fcode hp_hw) info_down fstate_in + (r, fstate_out) = doFCode (fcode hp_hw) cfg fcstate fstate_in hp_hw = heapHWM (cgs_hp_usg fstate_out) -- Loop here! ; setState $ fstate_out { cgs_hp_usg = cgs_hp_usg state } @@ -757,8 +717,8 @@ emitTick = emitCgStmt . CgStmt . CmmTick emitUnwind :: [(GlobalReg, Maybe CmmExpr)] -> FCode () emitUnwind regs = do - dflags <- getDynFlags - when (debugLevel dflags > 0) $ + debug_level <- stgToCmmDebugLevel <$> getStgToCmmConfig + when (debug_level > 0) $ emitCgStmt $ CgStmt $ CmmUnwind regs emitAssign :: CmmReg -> CmmExpr -> FCode () @@ -838,7 +798,7 @@ getCmm :: FCode a -> FCode (a, CmmGroup) -- object splitting (at a later stage) getCmm code = do { state1 <- getState - ; (a, state2) <- withState code (state1 { cgs_tops = nilOL }) + ; (a, state2) <- withCgState code (state1 { cgs_tops = nilOL }) ; setState $ state2 { cgs_tops = cgs_tops state1 } ; return (a, fromOL (cgs_tops state2)) } diff --git a/compiler/GHC/StgToCmm/Prim.hs b/compiler/GHC/StgToCmm/Prim.hs index 5c538c45c8..8d119a4e6c 100644 --- a/compiler/GHC/StgToCmm/Prim.hs +++ b/compiler/GHC/StgToCmm/Prim.hs @@ -20,6 +20,7 @@ import GHC.Prelude hiding ((<*>)) import GHC.Platform import GHC.Platform.Profile +import GHC.StgToCmm.Config import GHC.StgToCmm.Layout import GHC.StgToCmm.Foreign import GHC.StgToCmm.Monad @@ -29,7 +30,6 @@ import GHC.StgToCmm.Heap import GHC.StgToCmm.Prof ( costCentreFrom ) import GHC.Driver.Session -import GHC.Driver.Backend import GHC.Types.Basic import GHC.Cmm.BlockId import GHC.Cmm.Graph @@ -77,18 +77,18 @@ cgOpApp (StgFCallOp fcall ty) stg_args res_ty -- Note [Foreign call results] cgOpApp (StgPrimOp primop) args res_ty = do - dflags <- getDynFlags + cfg <- getStgToCmmConfig cmm_args <- getNonVoidArgAmodes args - cmmPrimOpApp dflags primop cmm_args (Just res_ty) + cmmPrimOpApp cfg primop cmm_args (Just res_ty) cgOpApp (StgPrimCallOp primcall) args _res_ty = do { cmm_args <- getNonVoidArgAmodes args ; let fun = CmmLit (CmmLabel (mkPrimCallLabel primcall)) ; emitCall (NativeNodeCall, NativeReturn) fun cmm_args } -cmmPrimOpApp :: DynFlags -> PrimOp -> [CmmExpr] -> Maybe Type -> FCode ReturnKind -cmmPrimOpApp dflags primop cmm_args mres_ty = - case emitPrimOp dflags primop cmm_args of +cmmPrimOpApp :: StgToCmmConfig -> PrimOp -> [CmmExpr] -> Maybe Type -> FCode ReturnKind +cmmPrimOpApp cfg primop cmm_args mres_ty = + case emitPrimOp cfg primop cmm_args of PrimopCmmEmit_Internal f -> let -- if the result type isn't explicitly given, we directly use the @@ -119,8 +119,8 @@ asUnsigned w n = n .&. (bit (widthInBits w) - 1) -- Emitting code for a primop ------------------------------------------------------------------------ -shouldInlinePrimOp :: DynFlags -> PrimOp -> [CmmExpr] -> Bool -shouldInlinePrimOp dflags op args = case emitPrimOp dflags op args of +shouldInlinePrimOp :: StgToCmmConfig -> PrimOp -> [CmmExpr] -> Bool +shouldInlinePrimOp cfg op args = case emitPrimOp cfg op args of PrimopCmmEmit_External -> False PrimopCmmEmit_Internal _ -> True @@ -143,20 +143,22 @@ shouldInlinePrimOp dflags op args = case emitPrimOp dflags op args of -- might happen e.g. if there's enough static information, such as statically -- know arguments. emitPrimOp - :: DynFlags + :: StgToCmmConfig -> PrimOp -- ^ The primop -> [CmmExpr] -- ^ The primop arguments -> PrimopCmmEmit -emitPrimOp dflags primop = case primop of +emitPrimOp cfg primop = + let max_inl_alloc_size = fromIntegral (stgToCmmMaxInlAllocSize cfg) + in case primop of NewByteArrayOp_Char -> \case [(CmmLit (CmmInt n w))] - | asUnsigned w n <= fromIntegral (maxInlineAllocSize dflags) + | asUnsigned w n <= max_inl_alloc_size -> opIntoRegs $ \ [res] -> doNewByteArrayOp res (fromInteger n) _ -> PrimopCmmEmit_External NewArrayOp -> \case [(CmmLit (CmmInt n w)), init] - | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) + | wordsToBytes platform (asUnsigned w n) <= max_inl_alloc_size -> opIntoRegs $ \[res] -> doNewArrayOp res (arrPtrsRep platform (fromInteger n)) mkMAP_DIRTY_infoLabel [ (mkIntExpr platform (fromInteger n), fixedHdrSize profile + pc_OFFSET_StgMutArrPtrs_ptrs (platformConstants platform)) @@ -178,31 +180,31 @@ emitPrimOp dflags primop = case primop of CloneArrayOp -> \case [src, src_off, (CmmLit (CmmInt n w))] - | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) + | wordsToBytes platform (asUnsigned w n) <= max_inl_alloc_size -> opIntoRegs $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n) _ -> PrimopCmmEmit_External CloneMutableArrayOp -> \case [src, src_off, (CmmLit (CmmInt n w))] - | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) + | wordsToBytes platform (asUnsigned w n) <= max_inl_alloc_size -> opIntoRegs $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n) _ -> PrimopCmmEmit_External FreezeArrayOp -> \case [src, src_off, (CmmLit (CmmInt n w))] - | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) + | wordsToBytes platform (asUnsigned w n) <= max_inl_alloc_size -> opIntoRegs $ \ [res] -> emitCloneArray mkMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n) _ -> PrimopCmmEmit_External ThawArrayOp -> \case [src, src_off, (CmmLit (CmmInt n w))] - | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) + | wordsToBytes platform (asUnsigned w n) <= max_inl_alloc_size -> opIntoRegs $ \ [res] -> emitCloneArray mkMAP_DIRTY_infoLabel res src src_off (fromInteger n) _ -> PrimopCmmEmit_External NewSmallArrayOp -> \case [(CmmLit (CmmInt n w)), init] - | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) + | wordsToBytes platform (asUnsigned w n) <= max_inl_alloc_size -> opIntoRegs $ \ [res] -> doNewArrayOp res (smallArrPtrsRep (fromInteger n)) mkSMAP_DIRTY_infoLabel [ (mkIntExpr platform (fromInteger n), @@ -223,25 +225,25 @@ emitPrimOp dflags primop = case primop of CloneSmallArrayOp -> \case [src, src_off, (CmmLit (CmmInt n w))] - | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) + | wordsToBytes platform (asUnsigned w n) <= max_inl_alloc_size -> opIntoRegs $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n) _ -> PrimopCmmEmit_External CloneSmallMutableArrayOp -> \case [src, src_off, (CmmLit (CmmInt n w))] - | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) + | wordsToBytes platform (asUnsigned w n) <= max_inl_alloc_size -> opIntoRegs $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n) _ -> PrimopCmmEmit_External FreezeSmallArrayOp -> \case [src, src_off, (CmmLit (CmmInt n w))] - | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) + | wordsToBytes platform (asUnsigned w n) <= max_inl_alloc_size -> opIntoRegs $ \ [res] -> emitCloneSmallArray mkSMAP_FROZEN_CLEAN_infoLabel res src src_off (fromInteger n) _ -> PrimopCmmEmit_External ThawSmallArrayOp -> \case [src, src_off, (CmmLit (CmmInt n w))] - | wordsToBytes platform (asUnsigned w n) <= fromIntegral (maxInlineAllocSize dflags) + | wordsToBytes platform (asUnsigned w n) <= max_inl_alloc_size -> opIntoRegs $ \ [res] -> emitCloneSmallArray mkSMAP_DIRTY_infoLabel res src src_off (fromInteger n) _ -> PrimopCmmEmit_External @@ -295,14 +297,14 @@ emitPrimOp dflags primop = case primop of emitPrimCall res MO_WriteBarrier [] emitStore (cmmOffsetW platform mutv (fixedHdrSizeW profile)) var - ptrOpts <- getPtrOpts platform <- getPlatform mkdirtyMutVarCCall <- getCode $! emitCCall [{-no results-}] (CmmLit (CmmLabel mkDirty_MUT_VAR_Label)) [(baseExpr, AddrHint), (mutv, AddrHint), (CmmReg old_val, AddrHint)] emit =<< mkCmmIfThen - (cmmEqWord platform (mkLblExpr mkMUT_VAR_CLEAN_infoLabel) (closureInfoPtr ptrOpts mutv)) + (cmmEqWord platform (mkLblExpr mkMUT_VAR_CLEAN_infoLabel) + (closureInfoPtr platform (stgToCmmAlignCheck cfg) mutv)) mkdirtyMutVarCCall -- #define sizzeofByteArrayzh(r,a) \ @@ -312,7 +314,7 @@ emitPrimOp dflags primop = case primop of -- #define sizzeofMutableByteArrayzh(r,a) \ -- r = ((StgArrBytes *)(a))->bytes - SizeofMutableByteArrayOp -> emitPrimOp dflags SizeofByteArrayOp + SizeofMutableByteArrayOp -> emitPrimOp cfg SizeofByteArrayOp -- #define getSizzeofMutableByteArrayzh(r,a) \ -- r = ((StgArrBytes *)(a))->bytes @@ -394,15 +396,15 @@ emitPrimOp dflags primop = case primop of emit $ mkAssign (CmmLocal res) (cmmLoadIndexW platform arg (fixedHdrSizeW profile + bytesToWordsRoundUp platform (pc_OFFSET_StgMutArrPtrs_ptrs (platformConstants platform))) (bWord platform)) - SizeofMutableArrayOp -> emitPrimOp dflags SizeofArrayOp + SizeofMutableArrayOp -> emitPrimOp cfg SizeofArrayOp SizeofSmallArrayOp -> \[arg] -> opIntoRegs $ \[res] -> emit $ mkAssign (CmmLocal res) (cmmLoadIndexW platform arg (fixedHdrSizeW profile + bytesToWordsRoundUp platform (pc_OFFSET_StgSmallMutArrPtrs_ptrs (platformConstants platform))) (bWord platform)) - SizeofSmallMutableArrayOp -> emitPrimOp dflags SizeofSmallArrayOp - GetSizeofSmallMutableArrayOp -> emitPrimOp dflags SizeofSmallArrayOp + SizeofSmallMutableArrayOp -> emitPrimOp cfg SizeofSmallArrayOp + GetSizeofSmallMutableArrayOp -> emitPrimOp cfg SizeofSmallArrayOp -- IndexXXXoffAddr @@ -850,7 +852,7 @@ emitPrimOp dflags primop = case primop of -- SIMD primops (VecBroadcastOp vcat n w) -> \[e] -> opIntoRegs $ \[res] -> do - checkVecCompatibility dflags vcat n w + checkVecCompatibility cfg vcat n w doVecPackOp (vecElemInjectCast platform vcat w) ty zeros (replicate n e) res where zeros :: CmmExpr @@ -866,7 +868,7 @@ emitPrimOp dflags primop = case primop of ty = vecVmmType vcat n w (VecPackOp vcat n w) -> \es -> opIntoRegs $ \[res] -> do - checkVecCompatibility dflags vcat n w + checkVecCompatibility cfg vcat n w when (es `lengthIsNot` n) $ panic "emitPrimOp: VecPackOp has wrong number of arguments" doVecPackOp (vecElemInjectCast platform vcat w) ty zeros es res @@ -884,7 +886,7 @@ emitPrimOp dflags primop = case primop of ty = vecVmmType vcat n w (VecUnpackOp vcat n w) -> \[arg] -> opIntoRegs $ \res -> do - checkVecCompatibility dflags vcat n w + checkVecCompatibility cfg vcat n w when (res `lengthIsNot` n) $ panic "emitPrimOp: VecUnpackOp has wrong number of results" doVecUnpackOp (vecElemProjectCast platform vcat w) ty arg res @@ -893,56 +895,56 @@ emitPrimOp dflags primop = case primop of ty = vecVmmType vcat n w (VecInsertOp vcat n w) -> \[v,e,i] -> opIntoRegs $ \[res] -> do - checkVecCompatibility dflags vcat n w + checkVecCompatibility cfg vcat n w doVecInsertOp (vecElemInjectCast platform vcat w) ty v e i res where ty :: CmmType ty = vecVmmType vcat n w (VecIndexByteArrayOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do - checkVecCompatibility dflags vcat n w + checkVecCompatibility cfg vcat n w doIndexByteArrayOp Nothing ty res0 args where ty :: CmmType ty = vecVmmType vcat n w (VecReadByteArrayOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do - checkVecCompatibility dflags vcat n w + checkVecCompatibility cfg vcat n w doIndexByteArrayOp Nothing ty res0 args where ty :: CmmType ty = vecVmmType vcat n w (VecWriteByteArrayOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do - checkVecCompatibility dflags vcat n w + checkVecCompatibility cfg vcat n w doWriteByteArrayOp Nothing ty res0 args where ty :: CmmType ty = vecVmmType vcat n w (VecIndexOffAddrOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do - checkVecCompatibility dflags vcat n w + checkVecCompatibility cfg vcat n w doIndexOffAddrOp Nothing ty res0 args where ty :: CmmType ty = vecVmmType vcat n w (VecReadOffAddrOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do - checkVecCompatibility dflags vcat n w + checkVecCompatibility cfg vcat n w doIndexOffAddrOp Nothing ty res0 args where ty :: CmmType ty = vecVmmType vcat n w (VecWriteOffAddrOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do - checkVecCompatibility dflags vcat n w + checkVecCompatibility cfg vcat n w doWriteOffAddrOp Nothing ty res0 args where ty :: CmmType ty = vecVmmType vcat n w (VecIndexScalarByteArrayOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do - checkVecCompatibility dflags vcat n w + checkVecCompatibility cfg vcat n w doIndexByteArrayOpAs Nothing vecty ty res0 args where vecty :: CmmType @@ -952,7 +954,7 @@ emitPrimOp dflags primop = case primop of ty = vecCmmCat vcat w (VecReadScalarByteArrayOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do - checkVecCompatibility dflags vcat n w + checkVecCompatibility cfg vcat n w doIndexByteArrayOpAs Nothing vecty ty res0 args where vecty :: CmmType @@ -962,14 +964,14 @@ emitPrimOp dflags primop = case primop of ty = vecCmmCat vcat w (VecWriteScalarByteArrayOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do - checkVecCompatibility dflags vcat n w + checkVecCompatibility cfg vcat n w doWriteByteArrayOp Nothing ty res0 args where ty :: CmmType ty = vecCmmCat vcat w (VecIndexScalarOffAddrOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do - checkVecCompatibility dflags vcat n w + checkVecCompatibility cfg vcat n w doIndexOffAddrOpAs Nothing vecty ty res0 args where vecty :: CmmType @@ -979,7 +981,7 @@ emitPrimOp dflags primop = case primop of ty = vecCmmCat vcat w (VecReadScalarOffAddrOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do - checkVecCompatibility dflags vcat n w + checkVecCompatibility cfg vcat n w doIndexOffAddrOpAs Nothing vecty ty res0 args where vecty :: CmmType @@ -989,7 +991,7 @@ emitPrimOp dflags primop = case primop of ty = vecCmmCat vcat w (VecWriteScalarOffAddrOp vcat n w) -> \args -> opIntoRegs $ \res0 -> do - checkVecCompatibility dflags vcat n w + checkVecCompatibility cfg vcat n w doWriteOffAddrOp Nothing ty res0 args where ty :: CmmType @@ -1444,92 +1446,92 @@ emitPrimOp dflags primop = case primop of DoubleToFloatOp -> \args -> opTranslate args (MO_FF_Conv W64 W32) IntQuotRemOp -> \args -> opCallishHandledLater args $ - if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args) + if allowQuotRem && not (quotRemCanBeOptimized args) then Left (MO_S_QuotRem (wordWidth platform)) else Right (genericIntQuotRemOp (wordWidth platform)) Int8QuotRemOp -> \args -> opCallishHandledLater args $ - if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args) + if allowQuotRem && not (quotRemCanBeOptimized args) then Left (MO_S_QuotRem W8) else Right (genericIntQuotRemOp W8) Int16QuotRemOp -> \args -> opCallishHandledLater args $ - if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args) + if allowQuotRem && not (quotRemCanBeOptimized args) then Left (MO_S_QuotRem W16) else Right (genericIntQuotRemOp W16) Int32QuotRemOp -> \args -> opCallishHandledLater args $ - if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args) + if allowQuotRem && not (quotRemCanBeOptimized args) then Left (MO_S_QuotRem W32) else Right (genericIntQuotRemOp W32) WordQuotRemOp -> \args -> opCallishHandledLater args $ - if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args) + if allowQuotRem && not (quotRemCanBeOptimized args) then Left (MO_U_QuotRem (wordWidth platform)) else Right (genericWordQuotRemOp (wordWidth platform)) WordQuotRem2Op -> \args -> opCallishHandledLater args $ - if (ncg && (x86ish || ppc)) || llvm + if allowQuotRem2 then Left (MO_U_QuotRem2 (wordWidth platform)) else Right (genericWordQuotRem2Op platform) Word8QuotRemOp -> \args -> opCallishHandledLater args $ - if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args) + if allowQuotRem && not (quotRemCanBeOptimized args) then Left (MO_U_QuotRem W8) else Right (genericWordQuotRemOp W8) Word16QuotRemOp -> \args -> opCallishHandledLater args $ - if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args) + if allowQuotRem && not (quotRemCanBeOptimized args) then Left (MO_U_QuotRem W16) else Right (genericWordQuotRemOp W16) Word32QuotRemOp -> \args -> opCallishHandledLater args $ - if ncg && (x86ish || ppc) && not (quotRemCanBeOptimized args) + if allowQuotRem && not (quotRemCanBeOptimized args) then Left (MO_U_QuotRem W32) else Right (genericWordQuotRemOp W32) WordAdd2Op -> \args -> opCallishHandledLater args $ - if (ncg && (x86ish || ppc)) || llvm + if allowExtAdd then Left (MO_Add2 (wordWidth platform)) else Right genericWordAdd2Op WordAddCOp -> \args -> opCallishHandledLater args $ - if (ncg && (x86ish || ppc)) || llvm + if allowExtAdd then Left (MO_AddWordC (wordWidth platform)) else Right genericWordAddCOp WordSubCOp -> \args -> opCallishHandledLater args $ - if (ncg && (x86ish || ppc)) || llvm + if allowExtAdd then Left (MO_SubWordC (wordWidth platform)) else Right genericWordSubCOp IntAddCOp -> \args -> opCallishHandledLater args $ - if (ncg && (x86ish || ppc)) || llvm + if allowExtAdd then Left (MO_AddIntC (wordWidth platform)) else Right genericIntAddCOp IntSubCOp -> \args -> opCallishHandledLater args $ - if (ncg && (x86ish || ppc)) || llvm + if allowExtAdd then Left (MO_SubIntC (wordWidth platform)) else Right genericIntSubCOp WordMul2Op -> \args -> opCallishHandledLater args $ - if ncg && (x86ish || ppc) || llvm + if allowExtAdd then Left (MO_U_Mul2 (wordWidth platform)) else Right genericWordMul2Op IntMul2Op -> \args -> opCallishHandledLater args $ - if ncg && x86ish || llvm + if allowInt2Mul then Left (MO_S_Mul2 (wordWidth platform)) else Right genericIntMul2Op FloatFabsOp -> \args -> opCallishHandledLater args $ - if (ncg && (x86ish || ppc || aarch64)) || llvm + if allowFab then Left MO_F32_Fabs else Right $ genericFabsOp W32 DoubleFabsOp -> \args -> opCallishHandledLater args $ - if (ncg && (x86ish || ppc || aarch64)) || llvm + if allowFab then Left MO_F64_Fabs else Right $ genericFabsOp W64 @@ -1643,8 +1645,8 @@ emitPrimOp dflags primop = case primop of KeepAliveOp -> panic "keepAlive# should have been eliminated in CorePrep" where - profile = targetProfile dflags - platform = profilePlatform profile + profile = stgToCmmProfile cfg + platform = stgToCmmPlatform cfg result_info = getPrimOpResultInfo primop opNop :: [CmmExpr] -> PrimopCmmEmit @@ -1677,7 +1679,7 @@ emitPrimOp dflags primop = case primop of opTranslate64 args mkMop callish = case platformWordSize platform of -- LLVM and C `can handle larger than native size arithmetic natively. - _ | not ncg -> opTranslate args $ mkMop W64 + _ | stgToCmmAllowBigArith cfg -> opTranslate args $ mkMop W64 PW4 -> opCallish args callish PW8 -> opTranslate args $ mkMop W64 @@ -1731,17 +1733,11 @@ emitPrimOp dflags primop = case primop of [_, CmmLit (CmmInt n _) ] -> isJust (exactLog2 n) _ -> False - ncg = backend dflags == NCG - llvm = backend dflags == LLVM - x86ish = case platformArch platform of - ArchX86 -> True - ArchX86_64 -> True - _ -> False - ppc = case platformArch platform of - ArchPPC -> True - ArchPPC_64 _ -> True - _ -> False - aarch64 = platformArch platform == ArchAArch64 + allowQuotRem = stgToCmmAllowQuotRemInstr cfg + allowQuotRem2 = stgToCmmAllowQuotRem2 cfg + allowExtAdd = stgToCmmAllowExtendedAddSubInstrs cfg + allowInt2Mul = stgToCmmAllowIntMul2Instr cfg + allowFab = stgToCmmAllowFabsInstrs cfg data PrimopCmmEmit -- | Out of line fake primop that's actually just a foreign call to other @@ -2008,14 +2004,14 @@ genericWordMul2Op _ _ = panic "genericWordMul2Op" genericIntMul2Op :: GenericOp genericIntMul2Op [res_c, res_h, res_l] both_args@[arg_x, arg_y] - = do dflags <- getDynFlags - platform <- getPlatform + = do cfg <- getStgToCmmConfig -- Implement algorithm from Hacker's Delight, 2nd edition, p.174 - let t = cmmExprType platform arg_x + let t = cmmExprType platform arg_x + platform = stgToCmmPlatform cfg p <- newTemp t -- 1) compute the multiplication as if numbers were unsigned _ <- withSequel (AssignTo [p, res_l] False) $ - cmmPrimOpApp dflags WordMul2Op both_args Nothing + cmmPrimOpApp cfg WordMul2Op both_args Nothing -- 2) correct the high bits of the unsigned result let carryFill x = CmmMachOp (MO_S_Shr ww) [x, wwm1] sub x y = CmmMachOp (MO_Sub ww) [x, y] @@ -2299,14 +2295,13 @@ vecElemProjectCast _ _ _ = Nothing -- it may very well be a design perspective that helps guide improving the NCG. -checkVecCompatibility :: DynFlags -> PrimOpVecCat -> Length -> Width -> FCode () -checkVecCompatibility dflags vcat l w = do - when (backend dflags /= LLVM) $ - sorry $ unlines ["SIMD vector instructions require the LLVM back-end." - ,"Please use -fllvm."] - check vecWidth vcat l w +checkVecCompatibility :: StgToCmmConfig -> PrimOpVecCat -> Length -> Width -> FCode () +checkVecCompatibility cfg vcat l w = + case stgToCmmVecInstrsErr cfg of + Nothing -> check vecWidth vcat l w -- We are in a compatible backend + Just err -> sorry err -- incompatible backend, do panic where - platform = targetPlatform dflags + platform = stgToCmmPlatform cfg check :: Width -> PrimOpVecCat -> Length -> Width -> FCode () check W128 FloatVec 4 W32 | not (isSseEnabled platform) = sorry $ "128-bit wide single-precision floating point " ++ @@ -2314,13 +2309,13 @@ checkVecCompatibility dflags vcat l w = do check W128 _ _ _ | not (isSse2Enabled platform) = sorry $ "128-bit wide integer and double precision " ++ "SIMD vector instructions require at least -msse2." - check W256 FloatVec _ _ | not (isAvxEnabled dflags) = + check W256 FloatVec _ _ | not (stgToCmmAvx cfg) = sorry $ "256-bit wide floating point " ++ "SIMD vector instructions require at least -mavx." - check W256 _ _ _ | not (isAvx2Enabled dflags) = + check W256 _ _ _ | not (stgToCmmAvx2 cfg) = sorry $ "256-bit wide integer " ++ "SIMD vector instructions require at least -mavx2." - check W512 _ _ _ | not (isAvx512fEnabled dflags) = + check W512 _ _ _ | not (stgToCmmAvx512f cfg) = sorry $ "512-bit wide " ++ "SIMD vector instructions require -mavx512f." check _ _ _ _ = return () @@ -3238,9 +3233,9 @@ doBoundsCheck :: CmmExpr -- ^ accessed index -> CmmExpr -- ^ array size (in elements) -> FCode () doBoundsCheck idx sz = do - dflags <- getDynFlags - platform <- getPlatform - when (gopt Opt_DoBoundsChecking dflags) (doCheck platform) + do_bounds_check <- stgToCmmDoBoundsCheck <$> getStgToCmmConfig + platform <- getPlatform + when do_bounds_check (doCheck platform) where doCheck platform = do boundsCheckFailed <- getCode $ emitCCall [] (mkLblExpr mkOutOfBoundsAccessLabel) [] diff --git a/compiler/GHC/StgToCmm/Prof.hs b/compiler/GHC/StgToCmm/Prof.hs index 852b77ef2b..8af9189e9a 100644 --- a/compiler/GHC/StgToCmm/Prof.hs +++ b/compiler/GHC/StgToCmm/Prof.hs @@ -29,11 +29,11 @@ module GHC.StgToCmm.Prof ( import GHC.Prelude import GHC.Driver.Session -import GHC.Driver.Ppr import GHC.Platform import GHC.Platform.Profile import GHC.StgToCmm.Closure +import GHC.StgToCmm.Config import GHC.StgToCmm.Utils import GHC.StgToCmm.Monad import GHC.StgToCmm.Lit @@ -56,7 +56,9 @@ import GHC.Driver.CodeOutput ( ipInitCode ) import GHC.Utils.Encoding import Control.Monad -import Data.Char (ord) +import Data.Char (ord) +import Data.Bifunctor (first) +import GHC.Utils.Monad (whenM) ----------------------------------------------------------------------------- -- @@ -72,7 +74,7 @@ ccType :: Platform -> CmmType -- Type of a cost centre ccType = bWord storeCurCCS :: CmmExpr -> CmmAGraph -storeCurCCS e = mkAssign cccsReg e +storeCurCCS = mkAssign cccsReg mkCCostCentre :: CostCentre -> CmmLit mkCCostCentre cc = CmmLabel (mkCCLabel cc) @@ -139,9 +141,9 @@ We want this kind of code: saveCurrentCostCentre :: FCode (Maybe LocalReg) -- Returns Nothing if profiling is off saveCurrentCostCentre - = do dflags <- getDynFlags - platform <- getPlatform - if not (sccProfilingEnabled dflags) + = do sccProfilingEnabled <- stgToCmmSCCProfiling <$> getStgToCmmConfig + platform <- getPlatform + if not sccProfilingEnabled then return Nothing else do local_cc <- newTemp (ccType platform) emitAssign (CmmLocal local_cc) cccsExpr @@ -163,7 +165,7 @@ restoreCurrentCostCentre (Just local_cc) profDynAlloc :: SMRep -> CmmExpr -> FCode () profDynAlloc rep ccs = ifProfiling $ - do profile <- targetProfile <$> getDynFlags + do profile <- getProfile let platform = profilePlatform profile profAlloc (mkIntExpr platform (heapClosureSizeW profile rep)) ccs @@ -173,12 +175,12 @@ profDynAlloc rep ccs profAlloc :: CmmExpr -> CmmExpr -> FCode () profAlloc words ccs = ifProfiling $ - do profile <- targetProfile <$> getDynFlags + do profile <- getProfile let platform = profilePlatform profile let alloc_rep = rEP_CostCentreStack_mem_alloc platform emit $ addToMemE alloc_rep (cmmOffsetB platform ccs (pc_OFFSET_CostCentreStack_mem_alloc (platformConstants platform))) - (CmmMachOp (MO_UU_Conv (wordWidth platform) (typeWidth alloc_rep)) $ + (CmmMachOp (MO_UU_Conv (wordWidth platform) (typeWidth alloc_rep)) -- subtract the "profiling overhead", which is the -- profiling header in a closure. [CmmMachOp (mo_wordSub platform) [ words, mkIntExpr platform (profHdrSize profile)]] @@ -194,21 +196,18 @@ enterCostCentreThunk closure = emit $ storeCurCCS (costCentreFrom platform closure) enterCostCentreFun :: CostCentreStack -> CmmExpr -> FCode () -enterCostCentreFun ccs closure = - ifProfiling $ - if isCurrentCCS ccs - then do platform <- getPlatform - emitRtsCall rtsUnitId (fsLit "enterFunCCS") - [(baseExpr, AddrHint), - (costCentreFrom platform closure, AddrHint)] False - else return () -- top-level function, nothing to do +enterCostCentreFun ccs closure = ifProfiling $ + when (isCurrentCCS ccs) $ + do platform <- getPlatform + emitRtsCall + rtsUnitId + (fsLit "enterFunCCS") + [(baseExpr, AddrHint), (costCentreFrom platform closure, AddrHint)] + False + -- otherwise we have a top-level function, nothing to do ifProfiling :: FCode () -> FCode () -ifProfiling code - = do profile <- targetProfile <$> getDynFlags - if profileIsProfiling profile - then code - else return () +ifProfiling = whenM (stgToCmmSCCProfiling <$> getStgToCmmConfig) --------------------------------------------------------------- -- Initialising Cost Centres & CCSs @@ -224,7 +223,7 @@ initCostCentres (local_CCs, singleton_CCSs) emitCostCentreDecl :: CostCentre -> FCode () emitCostCentreDecl cc = do - { dflags <- getDynFlags + { ctx <- stgToCmmContext <$> getStgToCmmConfig ; platform <- getPlatform ; let is_caf | isCafCC cc = mkIntCLit platform (ord 'c') -- 'c' == is a CAF | otherwise = zero platform @@ -234,7 +233,7 @@ emitCostCentreDecl cc = do $ moduleName $ cc_mod cc) ; loc <- newByteStringCLit $ utf8EncodeString $ - showPpr dflags (costCentreSrcSpan cc) + renderWithContext ctx (ppr $! costCentreSrcSpan cc) ; let lits = [ zero platform, -- StgInt ccID, label, -- char *label, @@ -278,35 +277,39 @@ sizeof_ccs_words platform (ws,ms) = pc_SIZEOF_CostCentreStack (platformConstants platform) `divMod` platformWordSizeInBytes platform -initInfoTableProv :: [CmmInfoTable] -> InfoTableProvMap -> Module -> FCode CStub +initInfoTableProv :: [CmmInfoTable] -> InfoTableProvMap -> FCode CStub -- Emit the declarations -initInfoTableProv infos itmap this_mod +initInfoTableProv infos itmap = do - dflags <- getDynFlags - let ents = convertInfoProvMap dflags infos this_mod itmap + cfg <- getStgToCmmConfig + let ents = convertInfoProvMap infos this_mod itmap + info_table = stgToCmmInfoTableMap cfg + platform = stgToCmmPlatform cfg + this_mod = stgToCmmThisModule cfg -- Output the actual IPE data mapM_ emitInfoTableProv ents -- Create the C stub which initialises the IPE map - return (ipInitCode dflags this_mod ents) + return (ipInitCode info_table platform this_mod ents) --- Info Table Prov stuff emitInfoTableProv :: InfoProvEnt -> FCode () emitInfoTableProv ip = do - { dflags <- getDynFlags - ; let mod = infoProvModule ip - ; let (src, label) = maybe ("", "") (\(s, l) -> (showPpr dflags s, l)) (infoTableProv ip) - ; platform <- getPlatform - ; let mk_string = newByteStringCLit . utf8EncodeString + { cfg <- getStgToCmmConfig + ; let mod = infoProvModule ip + ctx = stgToCmmContext cfg + platform = stgToCmmPlatform cfg + ; let (src, label) = maybe ("", "") (first (renderWithContext ctx . ppr)) (infoTableProv ip) + mk_string = newByteStringCLit . utf8EncodeString ; label <- mk_string label ; modl <- newByteStringCLit (bytesFS $ moduleNameFS - $ moduleName - $ mod) + $ moduleName mod) ; ty_string <- mk_string (infoTableType ip) - ; loc <- mk_string src - ; table_name <- mk_string (showPpr dflags (pprCLabel platform CStyle (infoTablePtr ip))) - ; closure_type <- mk_string - (showPpr dflags (text $ show $ infoProvEntClosureType ip)) + ; loc <- mk_string src + ; table_name <- mk_string (renderWithContext ctx + (pprCLabel platform CStyle (infoTablePtr ip))) + ; closure_type <- mk_string (renderWithContext ctx + (text $ show $ infoProvEntClosureType ip)) ; let lits = [ CmmLabel (infoTablePtr ip), -- Info table pointer table_name, -- char *table_name @@ -323,15 +326,12 @@ emitInfoTableProv ip = do -- Set the current cost centre stack emitSetCCC :: CostCentre -> Bool -> Bool -> FCode () -emitSetCCC cc tick push - = do profile <- targetProfile <$> getDynFlags - let platform = profilePlatform profile - if not (profileIsProfiling profile) - then return () - else do tmp <- newTemp (ccsType platform) - pushCostCentre tmp cccsExpr cc - when tick $ emit (bumpSccCount platform (CmmReg (CmmLocal tmp))) - when push $ emit (storeCurCCS (CmmReg (CmmLocal tmp))) +emitSetCCC cc tick push = ifProfiling $ + do platform <- getPlatform + tmp <- newTemp (ccsType platform) + pushCostCentre tmp cccsExpr cc + when tick $ emit (bumpSccCount platform (CmmReg (CmmLocal tmp))) + when push $ emit (storeCurCCS (CmmReg (CmmLocal tmp))) pushCostCentre :: LocalReg -> CmmExpr -> CostCentre -> FCode () pushCostCentre result ccs cc diff --git a/compiler/GHC/StgToCmm/Sequel.hs b/compiler/GHC/StgToCmm/Sequel.hs new file mode 100644 index 0000000000..ac55c3620f --- /dev/null +++ b/compiler/GHC/StgToCmm/Sequel.hs @@ -0,0 +1,46 @@ +----------------------------------------------------------------------------- +-- +-- Sequel type for Stg to C-- code generation +-- +-- (c) The University of Glasgow 2004-2006 +-- +-- This module is just a bucket of types used in StgToCmm.Monad and +-- StgToCmm.Closure. Its sole purpose is to break a cyclic dependency between +-- StgToCmm.Monad and StgToCmm.Closure which derives from coupling around +-- the BlockId and LocalReg types +----------------------------------------------------------------------------- + +module GHC.StgToCmm.Sequel + ( Sequel(..) + , SelfLoopInfo + ) where + +import GHC.Cmm.BlockId +import GHC.Cmm +import GHC.Cmm.Ppr() + +import GHC.Types.Id +import GHC.Utils.Outputable + +import GHC.Prelude + +-------------------------------------------------------------------------------- +-- | A Sequel tells what to do with the result of this expression +data Sequel + = Return -- ^ Return result(s) to continuation found on the stack. + + | AssignTo + [LocalReg] -- ^ Put result(s) in these regs and fall through + -- NB: no void arguments here + -- + Bool -- ^ Should we adjust the heap pointer back to recover + -- space that's unused on this path? We need to do this + -- only if the expression may allocate (e.g. it's a + -- foreign call or allocating primOp) + +instance Outputable Sequel where + ppr Return = text "Return" + ppr (AssignTo regs b) = text "AssignTo" <+> ppr regs <+> ppr b + +type SelfLoopInfo = (Id, BlockId, [LocalReg]) +-------------------------------------------------------------------------------- diff --git a/compiler/GHC/StgToCmm/Ticky.hs b/compiler/GHC/StgToCmm/Ticky.hs index 3aff61ac80..2a543b6553 100644 --- a/compiler/GHC/StgToCmm/Ticky.hs +++ b/compiler/GHC/StgToCmm/Ticky.hs @@ -109,6 +109,7 @@ import GHC.Platform.Profile import GHC.StgToCmm.ArgRep ( slowCallPattern , toArgRep , argRepString ) import GHC.StgToCmm.Closure +import GHC.StgToCmm.Config import {-# SOURCE #-} GHC.StgToCmm.Foreign ( emitPrimCall ) import GHC.StgToCmm.Lit ( newStringCLit ) import GHC.StgToCmm.Monad @@ -128,6 +129,7 @@ import GHC.Data.FastString import GHC.Utils.Outputable import GHC.Utils.Panic import GHC.Utils.Misc +import GHC.Utils.Monad (whenM) -- Turgid imports for showTypeCategory import GHC.Builtin.Names @@ -138,7 +140,7 @@ import GHC.Core.Predicate import Data.Maybe import qualified Data.Char -import Control.Monad ( when ) +import Control.Monad ( when, unless ) ----------------------------------------------------------------------------- -- @@ -161,13 +163,11 @@ withNewTickyCounterFun single_entry = withNewTickyCounter (TickyFun single_entry withNewTickyCounterLNE :: Name -> [NonVoid Id] -> FCode a -> FCode a withNewTickyCounterLNE nm args code = do - b <- tickyLNEIsOn + b <- isEnabled stgToCmmTickyLNE if not b then code else withNewTickyCounter TickyLNE nm args code thunkHasCounter :: Bool -> FCode Bool -thunkHasCounter isStatic = do - b <- tickyDynThunkIsOn - pure (not isStatic && b) +thunkHasCounter isStatic = (not isStatic &&) <$> isEnabled stgToCmmTickyDynThunk withNewTickyCounterThunk :: Bool -- ^ static @@ -214,19 +214,18 @@ emitTickyCounter cloType name args = let ctr_lbl = mkRednCountsLabel name in (>> return ctr_lbl) $ ifTicky $ do - { dflags <- getDynFlags - ; platform <- getPlatform + { cfg <- getStgToCmmConfig ; parent <- getTickyCtrLabel ; mod_name <- getModuleName -- When printing the name of a thing in a ticky file, we -- want to give the module name even for *local* things. We -- print just "x (M)" rather that "M.x" to distinguish them - -- from the global kind. - ; let ppr_for_ticky_name :: SDoc + -- from the global kind by calling to @pprTickyName@ + ; let platform = stgToCmmPlatform cfg + ppr_for_ticky_name :: SDoc ppr_for_ticky_name = - let n = ppr name - ext = case cloType of + let ext = case cloType of TickyFun single_entry -> parens $ hcat $ punctuate comma $ [text "fun"] ++ [text "se"|single_entry] TickyCon datacon -> parens (text "con:" <+> ppr (dataConName datacon)) @@ -239,12 +238,9 @@ emitTickyCounter cloType name args -- have a Haskell name Just pname -> text "in" <+> ppr (nameUnique pname) _ -> empty - in if isInternalName name - then n <+> parens (ppr mod_name) <+> ext <+> p - else n <+> ext <+> p + in pprTickyName mod_name name <+> ext <+> p - ; let ctx = (initSDocContext dflags defaultDumpStyle) - { sdocPprDebug = True } + ; let ctx = defaultSDocContext {sdocPprDebug = True} ; fun_descr_lit <- newStringCLit $ renderWithContext ctx ppr_for_ticky_name ; arg_descr_lit <- newStringCLit $ map (showTypeCategory . idType . fromNonVoid) args ; emitDataLits ctr_lbl @@ -337,8 +333,8 @@ tickyEnterLNE = ifTicky $ do -- since the counter was registered already upon being alloc'd registerTickyCtrAtEntryDyn :: CLabel -> FCode () registerTickyCtrAtEntryDyn ctr_lbl = do - already_registered <- tickyAllocdIsOn - when (not already_registered) $ registerTickyCtr ctr_lbl + already_registered <- isEnabled stgToCmmTickyAllocd + unless already_registered $ registerTickyCtr ctr_lbl -- | Register a ticky counter. -- @@ -566,33 +562,29 @@ tickyStackCheck = ifTicky $ bumpTickyCounter (fsLit "STK_CHK_ctr") -- ----------------------------------------------------------------------------- -- Ticky utils -ifTicky :: FCode () -> FCode () -ifTicky code = - getDynFlags >>= \dflags -> when (gopt Opt_Ticky dflags) code - -tickyAllocdIsOn :: FCode Bool -tickyAllocdIsOn = gopt Opt_Ticky_Allocd `fmap` getDynFlags +isEnabled :: (StgToCmmConfig -> Bool) -> FCode Bool +isEnabled = flip fmap getStgToCmmConfig -tickyLNEIsOn :: FCode Bool -tickyLNEIsOn = gopt Opt_Ticky_LNE `fmap` getDynFlags +runIfFlag :: (StgToCmmConfig -> Bool) -> FCode () -> FCode () +runIfFlag f = whenM (f <$> getStgToCmmConfig) -tickyDynThunkIsOn :: FCode Bool -tickyDynThunkIsOn = gopt Opt_Ticky_Dyn_Thunk `fmap` getDynFlags +ifTicky :: FCode () -> FCode () +ifTicky = runIfFlag stgToCmmDoTicky ifTickyAllocd :: FCode () -> FCode () -ifTickyAllocd code = tickyAllocdIsOn >>= \b -> when b code +ifTickyAllocd = runIfFlag stgToCmmTickyAllocd ifTickyLNE :: FCode () -> FCode () -ifTickyLNE code = tickyLNEIsOn >>= \b -> when b code +ifTickyLNE = runIfFlag stgToCmmTickyLNE ifTickyDynThunk :: FCode () -> FCode () -ifTickyDynThunk code = tickyDynThunkIsOn >>= \b -> when b code +ifTickyDynThunk = runIfFlag stgToCmmTickyDynThunk bumpTickyCounter :: FastString -> FCode () -bumpTickyCounter lbl = bumpTickyLbl (mkRtsCmmDataLabel lbl) +bumpTickyCounter = bumpTickyLbl . mkRtsCmmDataLabel bumpTickyCounterBy :: FastString -> Int -> FCode () -bumpTickyCounterBy lbl = bumpTickyLblBy (mkRtsCmmDataLabel lbl) +bumpTickyCounterBy = bumpTickyLblBy . mkRtsCmmDataLabel bumpTickyCounterByE :: FastString -> CmmExpr -> FCode () bumpTickyCounterByE lbl = bumpTickyLblByE (mkRtsCmmDataLabel lbl) diff --git a/compiler/GHC/StgToCmm/Types.hs b/compiler/GHC/StgToCmm/Types.hs index fa6a663038..1d50f0ae70 100644 --- a/compiler/GHC/StgToCmm/Types.hs +++ b/compiler/GHC/StgToCmm/Types.hs @@ -8,6 +8,8 @@ module GHC.StgToCmm.Types , ArgDescr (..) , StandardFormInfo (..) , WordOff + , DoSCCProfiling + , DoExtDynRefs ) where import GHC.Prelude @@ -229,3 +231,9 @@ instance Outputable StandardFormInfo where ppr NonStandardThunk = text "RegThunk" ppr (SelectorThunk w) = text "SelThunk:" <> ppr w ppr (ApThunk n) = text "ApThunk:" <> ppr n + +-------------------------------------------------------------------------------- +-- Gaining sight in a sea of blindness +-------------------------------------------------------------------------------- +type DoSCCProfiling = Bool +type DoExtDynRefs = Bool diff --git a/compiler/GHC/StgToCmm/Utils.hs b/compiler/GHC/StgToCmm/Utils.hs index 3d79193de1..4e061ad168 100644 --- a/compiler/GHC/StgToCmm/Utils.hs +++ b/compiler/GHC/StgToCmm/Utils.hs @@ -70,7 +70,6 @@ import GHC.Types.Literal import GHC.Data.Graph.Directed import GHC.Utils.Misc import GHC.Types.Unique -import GHC.Driver.Session import GHC.Data.FastString import GHC.Utils.Outputable import GHC.Utils.Panic @@ -84,7 +83,6 @@ import Data.List (sortBy) import Data.Ord import GHC.Types.Unique.Map import Data.Maybe -import GHC.Driver.Ppr import qualified Data.List.NonEmpty as NE import GHC.Core.DataCon import GHC.Types.Unique.FM @@ -99,7 +97,7 @@ import qualified Data.Map.Strict as Map -------------------------------------------------------------------------- addToMemLbl :: CmmType -> CLabel -> Int -> CmmAGraph -addToMemLbl rep lbl n = addToMem rep (CmmLit (CmmLabel lbl)) n +addToMemLbl rep lbl = addToMem rep (CmmLit (CmmLabel lbl)) addToMemLblE :: CmmType -> CLabel -> CmmExpr -> CmmAGraph addToMemLblE rep lbl = addToMemE rep (CmmLit (CmmLabel lbl)) @@ -157,12 +155,11 @@ tagToClosure platform tycon tag ------------------------------------------------------------------------- emitRtsCall :: UnitId -> FastString -> [(CmmExpr,ForeignHint)] -> Bool -> FCode () -emitRtsCall pkg fun args safe = emitRtsCallGen [] (mkCmmCodeLabel pkg fun) args safe +emitRtsCall pkg fun = emitRtsCallGen [] (mkCmmCodeLabel pkg fun) emitRtsCallWithResult :: LocalReg -> ForeignHint -> UnitId -> FastString -> [(CmmExpr,ForeignHint)] -> Bool -> FCode () -emitRtsCallWithResult res hint pkg fun args safe - = emitRtsCallGen [(res,hint)] (mkCmmCodeLabel pkg fun) args safe +emitRtsCallWithResult res hint pkg = emitRtsCallGen [(res,hint)] . mkCmmCodeLabel pkg -- Make a call to an RTS C procedure emitRtsCallGen @@ -172,7 +169,7 @@ emitRtsCallGen -> Bool -- True <=> CmmSafe call -> FCode () emitRtsCallGen res lbl args safe - = do { platform <- targetPlatform <$> getDynFlags + = do { platform <- getPlatform ; updfr_off <- getUpdFrameOff ; let (caller_save, caller_load) = callerSaveVolatileRegs platform ; emit caller_save @@ -599,14 +596,14 @@ cmmInfoTableToInfoProvEnt this_mod cmit = -- | Convert source information collected about identifiers in 'GHC.STG.Debug' -- to entries suitable for placing into the info table provenenance table. -convertInfoProvMap :: DynFlags -> [CmmInfoTable] -> Module -> InfoTableProvMap -> [InfoProvEnt] -convertInfoProvMap dflags defns this_mod (InfoTableProvMap (UniqMap dcenv) denv infoTableToSourceLocationMap) = +convertInfoProvMap :: [CmmInfoTable] -> Module -> InfoTableProvMap -> [InfoProvEnt] +convertInfoProvMap defns this_mod (InfoTableProvMap (UniqMap dcenv) denv infoTableToSourceLocationMap) = map (\cmit -> let cl = cit_lbl cmit cn = rtsClosureType (cit_rep cmit) tyString :: Outputable a => a -> String - tyString t = showPpr dflags t + tyString = renderWithContext defaultSDocContext . ppr lookupClosureMap :: Maybe InfoProvEnt lookupClosureMap = case hasHaskellName cl >>= lookupUniqMap denv of @@ -616,7 +613,7 @@ convertInfoProvMap dflags defns this_mod (InfoTableProvMap (UniqMap dcenv) denv lookupDataConMap = do UsageSite _ n <- hasIdLabelInfo cl >>= getConInfoTableLocation -- This is a bit grimy, relies on the DataCon and Name having the same Unique, which they do - (dc, ns) <- (hasHaskellName cl >>= lookupUFM_Directly dcenv . getUnique) + (dc, ns) <- hasHaskellName cl >>= lookupUFM_Directly dcenv . getUnique -- Lookup is linear but lists will be small (< 100) return $ InfoProvEnt cl cn (tyString (dataConTyCon dc)) this_mod (join $ lookup n (NE.toList ns)) |