diff options
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/codeGen/StgCmmBind.hs | 16 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmClosure.hs | 110 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmExpr.hs | 104 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmMonad.hs | 86 |
4 files changed, 222 insertions, 94 deletions
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index ce5491dc10..dccefd0fb0 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -30,6 +30,7 @@ import StgCmmForeign (emitPrimCall) import MkGraph import CoreSyn ( AltCon(..) ) import SMRep +import BlockId import Cmm import CmmInfo import CmmUtils @@ -476,7 +477,17 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details ; let node_points = nodeMustPointToIt dflags lf_info node' = if node_points then Just node else Nothing ; when node_points (ldvEnterClosure cl_info) - + -- Emit new label that might potentially be a header + -- of a self-recursive tail call. See Note + -- [Self-recursive tail calls] in StgCmmExpr + ; u <- newUnique + ; let loop_header_id = mkBlockId u + ; emitLabel loop_header_id + -- Extend reader monad with information that + -- self-recursive tail calls can be optimized into local + -- jumps + ; withSelfLoop (bndr, loop_header_id, arg_regs) $ do + { -- Main payload ; entryHeapCheck cl_info node' arity arg_regs $ do { -- ticky after heap check to avoid double counting @@ -490,7 +501,8 @@ closureCodeBody top_lvl bndr cl_info cc args arity body fv_details -- heap check, to reduce live vars over check ; when node_points $ load_fvs node lf_info fv_bindings ; void $ cgExpr body - }} + }}} + } -- A function closure pointer may be tagged, so we diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs index 3b9f38d36b..04297b4258 100644 --- a/compiler/codeGen/StgCmmClosure.hs +++ b/compiler/codeGen/StgCmmClosure.hs @@ -27,10 +27,9 @@ module StgCmmClosure ( lfDynTag, maybeIsLFCon, isLFThunk, isLFReEntrant, lfUpdatable, - nodeMustPointToIt, - CallMethod(..), getCallMethod, - - isKnownFun, funTag, tagForArity, + -- * Used by other modules + CgLoc(..), SelfLoopInfo, CallMethod(..), + nodeMustPointToIt, isKnownFun, funTag, tagForArity, getCallMethod, -- * ClosureInfo ClosureInfo, @@ -69,11 +68,14 @@ module StgCmmClosure ( import StgSyn import SMRep import Cmm +import PprCmmExpr() +import BlockId import CLabel import Id import IdInfo import DataCon +import FastString import Name import Type import TypeRep @@ -85,6 +87,37 @@ import DynFlags import Util ----------------------------------------------------------------------------- +-- Data types and synonyms +----------------------------------------------------------------------------- + +-- These data types are mostly used by other modules, especially StgCmmMonad, +-- but we define them here because some functions in this module need to +-- have access to them as well + +data CgLoc + = CmmLoc CmmExpr -- A stable CmmExpr; that is, one not mentioning + -- Hp, so that it remains valid across calls + + | LneLoc BlockId [LocalReg] -- A join point + -- A join point (= let-no-escape) should only + -- be tail-called, and in a saturated way. + -- To tail-call it, assign to these locals, + -- and branch to the block id + +instance Outputable CgLoc where + ppr (CmmLoc e) = ptext (sLit "cmm") <+> ppr e + ppr (LneLoc b rs) = ptext (sLit "lne") <+> ppr b <+> ppr rs + +type SelfLoopInfo = (Id, BlockId, [LocalReg]) + +-- used by ticky profiling +isKnownFun :: LambdaFormInfo -> Bool +isKnownFun (LFReEntrant _ _ _ _) = True +isKnownFun LFLetNoEscape = True +isKnownFun _ = False + + +----------------------------------------------------------------------------- -- Representations ----------------------------------------------------------------------------- @@ -465,49 +498,65 @@ When black-holing, single-entry closures could also be entered via node (rather than directly) to catch double-entry. -} data CallMethod - = EnterIt -- No args, not a function + = EnterIt -- No args, not a function - | JumpToIt -- A join point + | JumpToIt BlockId [LocalReg] -- A join point or a header of a local loop - | ReturnIt -- It's a value (function, unboxed value, + | ReturnIt -- It's a value (function, unboxed value, -- or constructor), so just return it. | SlowCall -- Unknown fun, or known fun with -- too few args. | DirectEntry -- Jump directly, with args in regs - CLabel -- The code label - RepArity -- Its arity + CLabel -- The code label + RepArity -- Its arity getCallMethod :: DynFlags -> Name -- Function being applied - -> CafInfo -- Can it refer to CAF's? - -> LambdaFormInfo -- Its info - -> RepArity -- Number of available arguments + -> Id -- Function Id used to chech if it can refer to + -- CAF's and whether the function is tail-calling + -- itself + -> LambdaFormInfo -- Its info + -> RepArity -- Number of available arguments + -> CgLoc -- Passed in from cgIdApp so that we can + -- handle let-no-escape bindings and self-recursive + -- tail calls using the same data constructor, + -- JumpToIt. This saves us one case branch in + -- cgIdApp + -> Maybe SelfLoopInfo -- can we perform a self-recursive tail call? -> CallMethod -getCallMethod dflags _name _ lf_info _n_args +getCallMethod _ _ id _ n_args _cg_loc (Just (self_loop_id, block_id, args)) + | id == self_loop_id, n_args == length args + -- If these patterns match then we know that: + -- * function is performing a self-recursive call in a tail position + -- * number of parameters of the function matches functions arity. + -- See Note [Self-recursive tail calls] in StgCmmExpr for more details + = JumpToIt block_id args + +getCallMethod dflags _name _ lf_info _n_args _cg_loc _self_loop_info | nodeMustPointToIt dflags lf_info && gopt Opt_Parallel dflags - = -- If we're parallel, then we must always enter via node. + = -- If we're parallel, then we must always enter via node. -- The reason is that the closure may have been -- fetched since we allocated it. EnterIt -getCallMethod dflags name caf (LFReEntrant _ arity _ _) n_args +getCallMethod dflags name id (LFReEntrant _ arity _ _) n_args _cg_loc _self_loop_info | n_args == 0 = ASSERT( arity /= 0 ) ReturnIt -- No args at all | n_args < arity = SlowCall -- Not enough args - | otherwise = DirectEntry (enterIdLabel dflags name caf) arity + | otherwise = DirectEntry (enterIdLabel dflags name (idCafInfo id)) arity -getCallMethod _ _name _ LFUnLifted n_args +getCallMethod _ _name _ LFUnLifted n_args _cg_loc _self_loop_info = ASSERT( n_args == 0 ) ReturnIt -getCallMethod _ _name _ (LFCon _) n_args +getCallMethod _ _name _ (LFCon _) n_args _cg_loc _self_loop_info = ASSERT( n_args == 0 ) ReturnIt -getCallMethod dflags name caf (LFThunk _ _ updatable std_form_info is_fun) n_args - | is_fun -- it *might* be a function, so we must "call" it (which is always safe) - = SlowCall -- We cannot just enter it [in eval/apply, the entry code +getCallMethod dflags name id (LFThunk _ _ updatable std_form_info is_fun) n_args _cg_loc _self_loop_info + | is_fun -- it *might* be a function, so we must "call" it (which is always safe) + = SlowCall -- We cannot just enter it [in eval/apply, the entry code -- is the fast-entry code] -- Since is_fun is False, we are *definitely* looking at a data value @@ -527,27 +576,24 @@ getCallMethod dflags name caf (LFThunk _ _ updatable std_form_info is_fun) n_arg | otherwise -- Jump direct to code for single-entry thunks = ASSERT( n_args == 0 ) - DirectEntry (thunkEntryLabel dflags name caf std_form_info updatable) 0 + DirectEntry (thunkEntryLabel dflags name (idCafInfo id) std_form_info updatable) 0 -getCallMethod _ _name _ (LFUnknown True) _n_args +getCallMethod _ _name _ (LFUnknown True) _n_arg _cg_locs _self_loop_info = SlowCall -- might be a function -getCallMethod _ name _ (LFUnknown False) n_args +getCallMethod _ name _ (LFUnknown False) n_args _cg_loc _self_loop_info = ASSERT2( n_args == 0, ppr name <+> ppr n_args ) EnterIt -- Not a function -getCallMethod _ _name _ LFBlackHole _n_args - = SlowCall -- Presumably the black hole has by now +getCallMethod _ _name _ LFBlackHole _n_args _cg_loc _self_loop_info + = SlowCall -- Presumably the black hole has by now -- been updated, but we don't know with -- what, so we slow call it -getCallMethod _ _name _ LFLetNoEscape _n_args - = JumpToIt +getCallMethod _ _name _ LFLetNoEscape _n_args (LneLoc blk_id lne_regs) _self_loop_info + = JumpToIt blk_id lne_regs -isKnownFun :: LambdaFormInfo -> Bool -isKnownFun (LFReEntrant _ _ _ _) = True -isKnownFun LFLetNoEscape = True -isKnownFun _ = False +getCallMethod _ _ _ _ _ _ _ = panic "Unknown call method" ----------------------------------------------------------------------------- -- staticClosureRequired diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs index c0623690b0..331e65819f 100644 --- a/compiler/codeGen/StgCmmExpr.hs +++ b/compiler/codeGen/StgCmmExpr.hs @@ -160,7 +160,7 @@ cgLetNoEscapeClosure bndr cc_slot _unused_cc args body return ( lneIdInfo dflags bndr args , code ) where - code = forkProc $ do { + code = forkLneBody $ do { ; withNewTickyCounterLNE (idName bndr) args $ do ; restoreCurrentCostCentre cc_slot ; arg_regs <- bindArgsToRegs args @@ -632,19 +632,20 @@ cgConApp con stg_args cgIdApp :: Id -> [StgArg] -> FCode ReturnKind cgIdApp fun_id [] | isVoidId fun_id = emitReturn [] cgIdApp fun_id args = do - dflags <- getDynFlags - fun_info <- getCgIdInfo fun_id + dflags <- getDynFlags + fun_info <- getCgIdInfo fun_id + self_loop_info <- getSelfLoop let cg_fun_id = cg_id fun_info -- NB: use (cg_id fun_info) instead of fun_id, because -- the former may be externalised for -split-objs. -- See Note [Externalise when splitting] in StgCmmMonad fun_arg = StgVarArg cg_fun_id - fun_name = idName cg_fun_id - fun = idInfoToAmode fun_info - lf_info = cg_lf fun_info + fun_name = idName cg_fun_id + fun = idInfoToAmode fun_info + lf_info = cg_lf fun_info node_points dflags = nodeMustPointToIt dflags lf_info - case (getCallMethod dflags fun_name (idCafInfo cg_fun_id) lf_info (length args)) of + case (getCallMethod dflags fun_name cg_fun_id lf_info (length args) (cg_loc fun_info) self_loop_info) of -- A value in WHNF, so we can just return it. ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged? @@ -664,14 +665,87 @@ cgIdApp fun_id args = do then directCall NativeNodeCall lbl arity (fun_arg:args) else directCall NativeDirectCall lbl arity args } - -- Let-no-escape call - JumpToIt -> let (LneLoc blk_id lne_regs) = cg_loc fun_info - in do - { adjustHpBackwards -- always do this before a tail-call - ; cmm_args <- getNonVoidArgAmodes args - ; emitMultiAssign lne_regs cmm_args - ; emit (mkBranch blk_id) - ; return AssignedDirectly } + -- Let-no-escape call or self-recursive tail-call + JumpToIt blk_id lne_regs -> do + { adjustHpBackwards -- always do this before a tail-call + ; cmm_args <- getNonVoidArgAmodes args + ; emitMultiAssign lne_regs cmm_args + ; emit (mkBranch blk_id) + ; return AssignedDirectly } + +-- Note [Self-recursive tail calls] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Self-recursive tail calls can be optimized into a local jump in the same +-- way as let-no-escape bindings (see Note [What is a non-escaping let] in +-- stgSyn/CoreToStg.lhs). Consider this: +-- +-- foo.info: +-- a = R1 // calling convention +-- b = R2 +-- goto L1 +-- L1: ... +-- ... +-- ... +-- L2: R1 = x +-- R2 = y +-- call foo(R1,R2) +-- +-- Instead of putting x and y into registers (or other locations required by the +-- calling convention) and performing a call we can put them into local +-- variables a and b and perform jump to L1: +-- +-- foo.info: +-- a = R1 +-- b = R2 +-- goto L1 +-- L1: ... +-- ... +-- ... +-- L2: a = x +-- b = y +-- goto L1 +-- +-- This can be done only when function is calling itself in a tail position +-- and only if the call passes number of parameters equal to function's arity. +-- Note that this cannot be performed if a function calls itself with a +-- continuation. +-- +-- This in fact implements optimization known as "loopification". It was +-- described in "Low-level code optimizations in the Glasgow Haskell Compiler" +-- by Krzysztof Woś, though we use different approach. Krzysztof performed his +-- optimization at the Cmm level, whereas we perform ours during code generation +-- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is +-- generated in the first place. +-- +-- Implementation is spread across a couple of places in the code: +-- +-- * FCode monad stores additional information in its reader environment +-- (cgd_self_loop field). This information tells us which function can +-- tail call itself in an optimized way (it is the function currently +-- being compiled), what is the label of a loop header (L1 in example above) +-- and information about local registers in which we should arguments +-- before making a call (this would be a and b in example above). +-- +-- * Whenever we are compiling a function, we set that information to reflect +-- the fact that function currently being compiled can be jumped to, instead +-- of called. We also have to emit a label to which we will be jumping. Both +-- things are done in closureCodyBody in StgCmmBind. +-- +-- * When we began compilation of another closure we remove the additional +-- information from the environment. This is done by forkClosureBody +-- in StgCmmMonad. Other functions that duplicate the environment - +-- forkLneBody, forkAlts, codeOnly - duplicate that information. In other +-- words, we only need to clean the environment of the self-loop information +-- when compiling right hand side of a closure (binding). +-- +-- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind +-- of call will be generated. getCallMethod decides to generate a self +-- recursive tail call when (a) environment stores information about +-- possible self tail-call; (b) that tail call is to a function currently +-- being compiled; (c) number of passed arguments is equal to function's +-- arity. + emitEnter :: CmmExpr -> FCode ReturnKind emitEnter fun = do diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs index ddb677a49e..27d4fd6386 100644 --- a/compiler/codeGen/StgCmmMonad.hs +++ b/compiler/codeGen/StgCmmMonad.hs @@ -26,7 +26,7 @@ module StgCmmMonad ( mkCmmIfThenElse, mkCmmIfThen, mkCmmIfGoto, mkCall, mkCmmCall, - forkClosureBody, forkAlts, forkProc, codeOnly, + forkClosureBody, forkLneBody, forkAlts, codeOnly, ConTagZ, @@ -44,10 +44,10 @@ module StgCmmMonad ( getModuleName, -- ideally we wouldn't export these, but some other modules access internal state - getState, setState, getInfoDown, getDynFlags, getThisPackage, + getState, setState, getSelfLoop, withSelfLoop, getInfoDown, getDynFlags, getThisPackage, -- more localised access to monad state - CgIdInfo(..), CgLoc(..), + CgIdInfo(..), getBinds, setBinds, -- out of general friendliness, we also export ... @@ -60,6 +60,7 @@ import Cmm import StgCmmClosure import DynFlags import Hoopl +import Maybes import MkGraph import BlockId import CLabel @@ -100,11 +101,10 @@ infixr 9 `thenFC` -- - A reader monad, for CgInfoDownwards, containing -- - DynFlags, -- - the current Module --- - the static top-level environmnet -- - the update-frame offset -- - the ticky counter label -- - the Sequel (the continuation to return to) - +-- - the self-recursive tail call information -------------------------------------------------------- @@ -169,11 +169,15 @@ fixC fcode = FCode ( data CgInfoDownwards -- information only passed *downwards* by the monad = MkCgInfoDown { - cgd_dflags :: DynFlags, - cgd_mod :: Module, -- Module being compiled - cgd_updfr_off :: UpdFrameOffset, -- Size of current update frame - cgd_ticky :: CLabel, -- Current destination for ticky counts - cgd_sequel :: Sequel -- What to do at end of basic block + cgd_dflags :: DynFlags, + cgd_mod :: Module, -- Module being compiled + cgd_updfr_off :: UpdFrameOffset, -- Size of current update frame + cgd_ticky :: CLabel, -- Current destination for ticky counts + cgd_sequel :: Sequel, -- What to do at end of basic block + cgd_self_loop :: Maybe SelfLoopInfo -- Which tail calls can be compiled + -- as local jumps? See Note + -- [Self-recursive tail calls] in + -- StgCmmExpr } type CgBindings = IdEnv CgIdInfo @@ -195,25 +199,10 @@ data CgIdInfo -- use the externalised one in any C label we use which refers to this -- name. -data CgLoc - = CmmLoc CmmExpr -- A stable CmmExpr; that is, one not mentioning - -- Hp, so that it remains valid across calls - - | LneLoc BlockId [LocalReg] -- A join point - -- A join point (= let-no-escape) should only - -- be tail-called, and in a saturated way. - -- To tail-call it, assign to these locals, - -- and branch to the block id - instance Outputable CgIdInfo where ppr (CgIdInfo { cg_id = id, cg_loc = loc }) = ppr id <+> ptext (sLit "-->") <+> ppr loc -instance Outputable CgLoc where - ppr (CmmLoc e) = ptext (sLit "cmm") <+> ppr e - ppr (LneLoc b rs) = ptext (sLit "lne") <+> ppr b <+> ppr rs - - -- Sequel tells what to do with the result of this expression data Sequel = Return Bool -- Return result(s) to continuation found on the stack. @@ -308,7 +297,8 @@ initCgInfoDown dflags mod , cgd_mod = mod , cgd_updfr_off = initUpdFrameOff dflags , cgd_ticky = mkTopTickyCtrLabel - , cgd_sequel = initSequel } + , cgd_sequel = initSequel + , cgd_self_loop = Nothing } initSequel :: Sequel initSequel = Return False @@ -455,6 +445,16 @@ newUnique = do getInfoDown :: FCode CgInfoDownwards getInfoDown = FCode $ \info_down state -> (# info_down,state #) +getSelfLoop :: FCode (Maybe SelfLoopInfo) +getSelfLoop = do + info_down <- getInfoDown + return $ cgd_self_loop info_down + +withSelfLoop :: SelfLoopInfo -> FCode a -> FCode a +withSelfLoop self_loop code = do + info_down <- getInfoDown + withInfoDown code (info_down {cgd_self_loop = Just self_loop}) + instance HasDynFlags FCode where getDynFlags = liftM cgd_dflags getInfoDown @@ -481,7 +481,7 @@ getModuleName = do { info <- getInfoDown; return (cgd_mod info) } withSequel :: Sequel -> FCode a -> FCode a withSequel sequel code = do { info <- getInfoDown - ; withInfoDown code (info {cgd_sequel = sequel }) } + ; withInfoDown code (info {cgd_sequel = sequel, cgd_self_loop = Nothing }) } getSequel :: FCode Sequel getSequel = do { info <- getInfoDown @@ -526,15 +526,12 @@ setTickyCtrLabel ticky code = do -------------------------------------------------------- forkClosureBody :: FCode () -> FCode () --- forkClosureBody takes a code, $c$, and compiles it in a --- fresh environment, except that: --- - compilation info and statics are passed in unchanged. --- - local bindings are passed in unchanged --- (it's up to the enclosed code to re-bind the --- free variables to a field of the closure) --- --- The current state is passed on completely unaltered, except that --- C-- from the fork is incorporated. +-- forkClosureBody compiles body_code in environment where: +-- - sequel, update stack frame and self loop info are +-- set to fresh values +-- - state is set to a fresh value, except for local bindings +-- that are passed in unchanged. It's up to the enclosed code to +-- re-bind the free variables to a field of the closure. forkClosureBody body_code = do { dflags <- getDynFlags @@ -542,26 +539,25 @@ forkClosureBody body_code ; us <- newUniqSupply ; state <- getState ; let body_info_down = info { cgd_sequel = initSequel - , cgd_updfr_off = initUpdFrameOff dflags } + , cgd_updfr_off = initUpdFrameOff dflags + , cgd_self_loop = Nothing } fork_state_in = (initCgState us) { cgs_binds = cgs_binds state } ((),fork_state_out) = doFCode body_code body_info_down fork_state_in ; setState $ state `addCodeBlocksFrom` fork_state_out } -forkProc :: FCode a -> FCode a --- 'forkProc' takes a code and compiles it in the *current* environment, --- returning the graph thus constructed. +forkLneBody :: FCode a -> FCode a +-- 'forkLneBody' takes a body of let-no-escape binding and compiles +-- it in the *current* environment, returning the graph thus constructed. -- -- The current environment is passed on completely unchanged to -- the successor. In particular, any heap usage from the enclosed -- code is discarded; it should deal with its own heap consumption. --- forkProc is used to compile let-no-escape bindings. -forkProc body_code +forkLneBody body_code = do { info_down <- getInfoDown ; us <- newUniqSupply ; state <- getState - ; let info_down' = info_down -- { cgd_sequel = initSequel } - fork_state_in = (initCgState us) { cgs_binds = cgs_binds state } - (result, fork_state_out) = doFCode body_code info_down' fork_state_in + ; let fork_state_in = (initCgState us) { cgs_binds = cgs_binds state } + (result, fork_state_out) = doFCode body_code info_down fork_state_in ; setState $ state `addCodeBlocksFrom` fork_state_out ; return result } |