summaryrefslogtreecommitdiff
path: root/compiler/codeGen/StgCmmExpr.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/codeGen/StgCmmExpr.hs')
-rw-r--r--compiler/codeGen/StgCmmExpr.hs992
1 files changed, 0 insertions, 992 deletions
diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs
deleted file mode 100644
index 70a044a7ab..0000000000
--- a/compiler/codeGen/StgCmmExpr.hs
+++ /dev/null
@@ -1,992 +0,0 @@
-{-# LANGUAGE CPP #-}
-
------------------------------------------------------------------------------
---
--- Stg to C-- code generation: expressions
---
--- (c) The University of Glasgow 2004-2006
---
------------------------------------------------------------------------------
-
-module StgCmmExpr ( cgExpr ) where
-
-#include "HsVersions.h"
-
-import GhcPrelude hiding ((<*>))
-
-import {-# SOURCE #-} StgCmmBind ( cgBind )
-
-import StgCmmMonad
-import StgCmmHeap
-import StgCmmEnv
-import StgCmmCon
-import StgCmmProf (saveCurrentCostCentre, restoreCurrentCostCentre, emitSetCCC)
-import StgCmmLayout
-import StgCmmPrim
-import StgCmmHpc
-import StgCmmTicky
-import StgCmmUtils
-import StgCmmClosure
-
-import StgSyn
-
-import MkGraph
-import BlockId
-import Cmm
-import CmmInfo
-import CoreSyn
-import DataCon
-import ForeignCall
-import Id
-import PrimOp
-import TyCon
-import Type ( isUnliftedType )
-import RepType ( isVoidTy, countConRepArgs, primRepSlot )
-import CostCentre ( CostCentreStack, currentCCS )
-import Maybes
-import Util
-import FastString
-import Outputable
-
-import Control.Monad (unless,void)
-import Control.Arrow (first)
-import Data.Function ( on )
-
-------------------------------------------------------------------------
--- cgExpr: the main function
-------------------------------------------------------------------------
-
-cgExpr :: CgStgExpr -> FCode ReturnKind
-
-cgExpr (StgApp fun args) = cgIdApp fun args
-
--- seq# a s ==> a
--- See Note [seq# magic] in PrelRules
-cgExpr (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _res_ty) =
- cgIdApp a []
-
--- dataToTag# :: a -> Int#
--- See Note [dataToTag#] in primops.txt.pp
-cgExpr (StgOpApp (StgPrimOp DataToTagOp) [StgVarArg a] _res_ty) = do
- dflags <- getDynFlags
- emitComment (mkFastString "dataToTag#")
- tmp <- newTemp (bWord dflags)
- _ <- withSequel (AssignTo [tmp] False) (cgIdApp a [])
- -- TODO: For small types look at the tag bits instead of reading info table
- emitReturn [getConstrTag dflags (cmmUntag dflags (CmmReg (CmmLocal tmp)))]
-
-cgExpr (StgOpApp op args ty) = cgOpApp op args ty
-cgExpr (StgConApp con args _)= cgConApp con args
-cgExpr (StgTick t e) = cgTick t >> cgExpr e
-cgExpr (StgLit lit) = do cmm_lit <- cgLit lit
- emitReturn [CmmLit cmm_lit]
-
-cgExpr (StgLet _ binds expr) = do { cgBind binds; cgExpr expr }
-cgExpr (StgLetNoEscape _ binds expr) =
- do { u <- newUnique
- ; let join_id = mkBlockId u
- ; cgLneBinds join_id binds
- ; r <- cgExpr expr
- ; emitLabel join_id
- ; return r }
-
-cgExpr (StgCase expr bndr alt_type alts) =
- cgCase expr bndr alt_type alts
-
-cgExpr (StgLam {}) = panic "cgExpr: StgLam"
-
-------------------------------------------------------------------------
--- Let no escape
-------------------------------------------------------------------------
-
-{- Generating code for a let-no-escape binding, aka join point is very
-very similar to what we do for a case expression. The duality is
-between
- let-no-escape x = b
- in e
-and
- case e of ... -> b
-
-That is, the RHS of 'x' (ie 'b') will execute *later*, just like
-the alternative of the case; it needs to be compiled in an environment
-in which all volatile bindings are forgotten, and the free vars are
-bound only to stable things like stack locations.. The 'e' part will
-execute *next*, just like the scrutinee of a case. -}
-
--------------------------
-cgLneBinds :: BlockId -> CgStgBinding -> FCode ()
-cgLneBinds join_id (StgNonRec bndr rhs)
- = do { local_cc <- saveCurrentCostCentre
- -- See Note [Saving the current cost centre]
- ; (info, fcode) <- cgLetNoEscapeRhs join_id local_cc bndr rhs
- ; fcode
- ; addBindC info }
-
-cgLneBinds join_id (StgRec pairs)
- = do { local_cc <- saveCurrentCostCentre
- ; r <- sequence $ unzipWith (cgLetNoEscapeRhs join_id local_cc) pairs
- ; let (infos, fcodes) = unzip r
- ; addBindsC infos
- ; sequence_ fcodes
- }
-
--------------------------
-cgLetNoEscapeRhs
- :: BlockId -- join point for successor of let-no-escape
- -> Maybe LocalReg -- Saved cost centre
- -> Id
- -> CgStgRhs
- -> FCode (CgIdInfo, FCode ())
-
-cgLetNoEscapeRhs join_id local_cc bndr rhs =
- do { (info, rhs_code) <- cgLetNoEscapeRhsBody local_cc bndr rhs
- ; let (bid, _) = expectJust "cgLetNoEscapeRhs" $ maybeLetNoEscape info
- ; let code = do { (_, body) <- getCodeScoped rhs_code
- ; emitOutOfLine bid (first (<*> mkBranch join_id) body) }
- ; return (info, code)
- }
-
-cgLetNoEscapeRhsBody
- :: Maybe LocalReg -- Saved cost centre
- -> Id
- -> CgStgRhs
- -> FCode (CgIdInfo, FCode ())
-cgLetNoEscapeRhsBody local_cc bndr (StgRhsClosure _ cc _upd args body)
- = cgLetNoEscapeClosure bndr local_cc cc (nonVoidIds args) body
-cgLetNoEscapeRhsBody local_cc bndr (StgRhsCon cc con args)
- = cgLetNoEscapeClosure bndr local_cc cc []
- (StgConApp con args (pprPanic "cgLetNoEscapeRhsBody" $
- text "StgRhsCon doesn't have type args"))
- -- For a constructor RHS we want to generate a single chunk of
- -- code which can be jumped to from many places, which will
- -- return the constructor. It's easy; just behave as if it
- -- was an StgRhsClosure with a ConApp inside!
-
--------------------------
-cgLetNoEscapeClosure
- :: Id -- binder
- -> Maybe LocalReg -- Slot for saved current cost centre
- -> CostCentreStack -- XXX: *** NOT USED *** why not?
- -> [NonVoid Id] -- Args (as in \ args -> body)
- -> CgStgExpr -- Body (as in above)
- -> FCode (CgIdInfo, FCode ())
-
-cgLetNoEscapeClosure bndr cc_slot _unused_cc args body
- = do dflags <- getDynFlags
- return ( lneIdInfo dflags bndr args
- , code )
- where
- code = forkLneBody $ do {
- ; withNewTickyCounterLNE (idName bndr) args $ do
- ; restoreCurrentCostCentre cc_slot
- ; arg_regs <- bindArgsToRegs args
- ; void $ noEscapeHeapCheck arg_regs (tickyEnterLNE >> cgExpr body) }
-
-
-------------------------------------------------------------------------
--- Case expressions
-------------------------------------------------------------------------
-
-{- Note [Compiling case expressions]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-It is quite interesting to decide whether to put a heap-check at the
-start of each alternative. Of course we certainly have to do so if
-the case forces an evaluation, or if there is a primitive op which can
-trigger GC.
-
-A more interesting situation is this (a Plan-B situation)
-
- !P!;
- ...P...
- case x# of
- 0# -> !Q!; ...Q...
- default -> !R!; ...R...
-
-where !x! indicates a possible heap-check point. The heap checks
-in the alternatives *can* be omitted, in which case the topmost
-heapcheck will take their worst case into account.
-
-In favour of omitting !Q!, !R!:
-
- - *May* save a heap overflow test,
- if ...P... allocates anything.
-
- - We can use relative addressing from a single Hp to
- get at all the closures so allocated.
-
- - No need to save volatile vars etc across heap checks
- in !Q!, !R!
-
-Against omitting !Q!, !R!
-
- - May put a heap-check into the inner loop. Suppose
- the main loop is P -> R -> P -> R...
- Q is the loop exit, and only it does allocation.
- This only hurts us if P does no allocation. If P allocates,
- then there is a heap check in the inner loop anyway.
-
- - May do more allocation than reqd. This sometimes bites us
- badly. For example, nfib (ha!) allocates about 30\% more space if the
- worst-casing is done, because many many calls to nfib are leaf calls
- which don't need to allocate anything.
-
- We can un-allocate, but that costs an instruction
-
-Neither problem hurts us if there is only one alternative.
-
-Suppose the inner loop is P->R->P->R etc. Then here is
-how many heap checks we get in the *inner loop* under various
-conditions
-
- Alloc Heap check in branches (!Q!, !R!)?
- P Q R yes no (absorb to !P!)
---------------------------------------
- n n n 0 0
- n y n 0 1
- n . y 1 1
- y . y 2 1
- y . n 1 1
-
-Best choices: absorb heap checks from Q and R into !P! iff
- a) P itself does some allocation
-or
- b) P does allocation, or there is exactly one alternative
-
-We adopt (b) because that is more likely to put the heap check at the
-entry to a function, when not many things are live. After a bunch of
-single-branch cases, we may have lots of things live
-
-Hence: two basic plans for
-
- case e of r { alts }
-
------- Plan A: the general case ---------
-
- ...save current cost centre...
-
- ...code for e,
- with sequel (SetLocals r)
-
- ...restore current cost centre...
- ...code for alts...
- ...alts do their own heap checks
-
------- Plan B: special case when ---------
- (i) e does not allocate or call GC
- (ii) either upstream code performs allocation
- or there is just one alternative
-
- Then heap allocation in the (single) case branch
- is absorbed by the upstream check.
- Very common example: primops on unboxed values
-
- ...code for e,
- with sequel (SetLocals r)...
-
- ...code for alts...
- ...no heap check...
--}
-
-
-
--------------------------------------
-data GcPlan
- = GcInAlts -- Put a GC check at the start the case alternatives,
- [LocalReg] -- which binds these registers
- | NoGcInAlts -- The scrutinee is a primitive value, or a call to a
- -- primitive op which does no GC. Absorb the allocation
- -- of the case alternative(s) into the upstream check
-
--------------------------------------
-cgCase :: CgStgExpr -> Id -> AltType -> [CgStgAlt] -> FCode ReturnKind
-
-cgCase (StgOpApp (StgPrimOp op) args _) bndr (AlgAlt tycon) alts
- | isEnumerationTyCon tycon -- Note [case on bool]
- = do { tag_expr <- do_enum_primop op args
-
- -- If the binder is not dead, convert the tag to a constructor
- -- and assign it. See Note [Dead-binder optimisation]
- ; unless (isDeadBinder bndr) $ do
- { dflags <- getDynFlags
- ; tmp_reg <- bindArgToReg (NonVoid bndr)
- ; emitAssign (CmmLocal tmp_reg)
- (tagToClosure dflags tycon tag_expr) }
-
- ; (mb_deflt, branches) <- cgAlgAltRhss (NoGcInAlts,AssignedDirectly)
- (NonVoid bndr) alts
- -- See Note [GC for conditionals]
- ; emitSwitch tag_expr branches mb_deflt 0 (tyConFamilySize tycon - 1)
- ; return AssignedDirectly
- }
- where
- do_enum_primop :: PrimOp -> [StgArg] -> FCode CmmExpr
- do_enum_primop TagToEnumOp [arg] -- No code!
- = getArgAmode (NonVoid arg)
- do_enum_primop primop args
- = do dflags <- getDynFlags
- tmp <- newTemp (bWord dflags)
- cgPrimOp [tmp] primop args
- return (CmmReg (CmmLocal tmp))
-
-{-
-Note [case on bool]
-~~~~~~~~~~~~~~~~~~~
-This special case handles code like
-
- case a <# b of
- True ->
- False ->
-
---> case tagToEnum# (a <$# b) of
- True -> .. ; False -> ...
-
---> case (a <$# b) of r ->
- case tagToEnum# r of
- True -> .. ; False -> ...
-
-If we let the ordinary case code handle it, we'll get something like
-
- tmp1 = a < b
- tmp2 = Bool_closure_tbl[tmp1]
- if (tmp2 & 7 != 0) then ... // normal tagged case
-
-but this junk won't optimise away. What we really want is just an
-inline comparison:
-
- if (a < b) then ...
-
-So we add a special case to generate
-
- tmp1 = a < b
- if (tmp1 == 0) then ...
-
-and later optimisations will further improve this.
-
-Now that #6135 has been resolved it should be possible to remove that
-special case. The idea behind this special case and pre-6135 implementation
-of Bool-returning primops was that tagToEnum# was added implicitly in the
-codegen and then optimized away. Now the call to tagToEnum# is explicit
-in the source code, which allows to optimize it away at the earlier stages
-of compilation (i.e. at the Core level).
-
-Note [Scrutinising VoidRep]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Suppose we have this STG code:
- f = \[s : State# RealWorld] ->
- case s of _ -> blah
-This is very odd. Why are we scrutinising a state token? But it
-can arise with bizarre NOINLINE pragmas (#9964)
- crash :: IO ()
- crash = IO (\s -> let {-# NOINLINE s' #-}
- s' = s
- in (# s', () #))
-
-Now the trouble is that 's' has VoidRep, and we do not bind void
-arguments in the environment; they don't live anywhere. See the
-calls to nonVoidIds in various places. So we must not look up
-'s' in the environment. Instead, just evaluate the RHS! Simple.
-
-Note [Dead-binder optimisation]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-A case-binder, or data-constructor argument, may be marked as dead,
-because we preserve occurrence-info on binders in CoreTidy (see
-CoreTidy.tidyIdBndr).
-
-If the binder is dead, we can sometimes eliminate a load. While
-CmmSink will eliminate that load, it's very easy to kill it at source
-(giving CmmSink less work to do), and in any case CmmSink only runs
-with -O. Since the majority of case binders are dead, this
-optimisation probably still has a great benefit-cost ratio and we want
-to keep it for -O0. See also Phab:D5358.
-
-This probably also was the reason for occurrence hack in Phab:D5339 to
-exist, perhaps because the occurrence information preserved by
-'CoreTidy.tidyIdBndr' was insufficient. But now that CmmSink does the
-job we deleted the hacks.
--}
-
-cgCase (StgApp v []) _ (PrimAlt _) alts
- | isVoidRep (idPrimRep v) -- See Note [Scrutinising VoidRep]
- , [(DEFAULT, _, rhs)] <- alts
- = cgExpr rhs
-
-{- Note [Dodgy unsafeCoerce 1]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider
- case (x :: HValue) |> co of (y :: MutVar# Int)
- DEFAULT -> ...
-We want to gnerate an assignment
- y := x
-We want to allow this assignment to be generated in the case when the
-types are compatible, because this allows some slightly-dodgy but
-occasionally-useful casts to be used, such as in RtClosureInspect
-where we cast an HValue to a MutVar# so we can print out the contents
-of the MutVar#. If instead we generate code that enters the HValue,
-then we'll get a runtime panic, because the HValue really is a
-MutVar#. The types are compatible though, so we can just generate an
-assignment.
--}
-cgCase (StgApp v []) bndr alt_type@(PrimAlt _) alts
- | isUnliftedType (idType v) -- Note [Dodgy unsafeCoerce 1]
- || reps_compatible
- = -- assignment suffices for unlifted types
- do { dflags <- getDynFlags
- ; unless reps_compatible $
- pprPanic "cgCase: reps do not match, perhaps a dodgy unsafeCoerce?"
- (pp_bndr v $$ pp_bndr bndr)
- ; v_info <- getCgIdInfo v
- ; emitAssign (CmmLocal (idToReg dflags (NonVoid bndr)))
- (idInfoToAmode v_info)
- -- Add bndr to the environment
- ; _ <- bindArgToReg (NonVoid bndr)
- ; cgAlts (NoGcInAlts,AssignedDirectly) (NonVoid bndr) alt_type alts }
- where
- reps_compatible = ((==) `on` (primRepSlot . idPrimRep)) v bndr
- -- Must compare SlotTys, not proper PrimReps, because with unboxed sums,
- -- the types of the binders are generated from slotPrimRep and might not
- -- match. Test case:
- -- swap :: (# Int | Int #) -> (# Int | Int #)
- -- swap (# x | #) = (# | x #)
- -- swap (# | y #) = (# y | #)
-
- pp_bndr id = ppr id <+> dcolon <+> ppr (idType id) <+> parens (ppr (idPrimRep id))
-
-{- Note [Dodgy unsafeCoerce 2, #3132]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-In all other cases of a lifted Id being cast to an unlifted type, the
-Id should be bound to bottom, otherwise this is an unsafe use of
-unsafeCoerce. We can generate code to enter the Id and assume that
-it will never return. Hence, we emit the usual enter/return code, and
-because bottom must be untagged, it will be entered. The Sequel is a
-type-correct assignment, albeit bogus. The (dead) continuation loops;
-it would be better to invoke some kind of panic function here.
--}
-cgCase scrut@(StgApp v []) _ (PrimAlt _) _
- = do { dflags <- getDynFlags
- ; mb_cc <- maybeSaveCostCentre True
- ; _ <- withSequel
- (AssignTo [idToReg dflags (NonVoid v)] False) (cgExpr scrut)
- ; restoreCurrentCostCentre mb_cc
- ; emitComment $ mkFastString "should be unreachable code"
- ; l <- newBlockId
- ; emitLabel l
- ; emit (mkBranch l) -- an infinite loop
- ; return AssignedDirectly
- }
-
-{- Note [Handle seq#]
-~~~~~~~~~~~~~~~~~~~~~
-See Note [seq# magic] in PrelRules.
-The special case for seq# in cgCase does this:
-
- case seq# a s of v
- (# s', a' #) -> e
-==>
- case a of v
- (# s', a' #) -> e
-
-(taking advantage of the fact that the return convention for (# State#, a #)
-is the same as the return convention for just 'a')
--}
-
-cgCase (StgOpApp (StgPrimOp SeqOp) [StgVarArg a, _] _) bndr alt_type alts
- = -- Note [Handle seq#]
- -- And see Note [seq# magic] in PrelRules
- -- Use the same return convention as vanilla 'a'.
- cgCase (StgApp a []) bndr alt_type alts
-
-cgCase scrut bndr alt_type alts
- = -- the general case
- do { dflags <- getDynFlags
- ; up_hp_usg <- getVirtHp -- Upstream heap usage
- ; let ret_bndrs = chooseReturnBndrs bndr alt_type alts
- alt_regs = map (idToReg dflags) ret_bndrs
- ; simple_scrut <- isSimpleScrut scrut alt_type
- ; let do_gc | is_cmp_op scrut = False -- See Note [GC for conditionals]
- | not simple_scrut = True
- | isSingleton alts = False
- | up_hp_usg > 0 = False
- | otherwise = True
- -- cf Note [Compiling case expressions]
- gc_plan = if do_gc then GcInAlts alt_regs else NoGcInAlts
-
- ; mb_cc <- maybeSaveCostCentre simple_scrut
-
- ; let sequel = AssignTo alt_regs do_gc{- Note [scrut sequel] -}
- ; ret_kind <- withSequel sequel (cgExpr scrut)
- ; restoreCurrentCostCentre mb_cc
- ; _ <- bindArgsToRegs ret_bndrs
- ; cgAlts (gc_plan,ret_kind) (NonVoid bndr) alt_type alts
- }
- where
- is_cmp_op (StgOpApp (StgPrimOp op) _ _) = isComparisonPrimOp op
- is_cmp_op _ = False
-
-{- Note [GC for conditionals]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-For boolean conditionals it seems that we have always done NoGcInAlts.
-That is, we have always done the GC check before the conditional.
-This is enshrined in the special case for
- case tagToEnum# (a>b) of ...
-See Note [case on bool]
-
-It's odd, and it's flagrantly inconsistent with the rules described
-Note [Compiling case expressions]. However, after eliminating the
-tagToEnum# (#13397) we will have:
- case (a>b) of ...
-Rather than make it behave quite differently, I am testing for a
-comparison operator here in in the general case as well.
-
-ToDo: figure out what the Right Rule should be.
-
-Note [scrut sequel]
-~~~~~~~~~~~~~~~~~~~
-The job of the scrutinee is to assign its value(s) to alt_regs.
-Additionally, if we plan to do a heap-check in the alternatives (see
-Note [Compiling case expressions]), then we *must* retreat Hp to
-recover any unused heap before passing control to the sequel. If we
-don't do this, then any unused heap will become slop because the heap
-check will reset the heap usage. Slop in the heap breaks LDV profiling
-(+RTS -hb) which needs to do a linear sweep through the nursery.
-
-
-Note [Inlining out-of-line primops and heap checks]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-If shouldInlinePrimOp returns True when called from StgCmmExpr for the
-purpose of heap check placement, we *must* inline the primop later in
-StgCmmPrim. If we don't things will go wrong.
--}
-
------------------
-maybeSaveCostCentre :: Bool -> FCode (Maybe LocalReg)
-maybeSaveCostCentre simple_scrut
- | simple_scrut = return Nothing
- | otherwise = saveCurrentCostCentre
-
-
------------------
-isSimpleScrut :: CgStgExpr -> AltType -> FCode Bool
--- Simple scrutinee, does not block or allocate; hence safe to amalgamate
--- heap usage from alternatives into the stuff before the case
--- NB: if you get this wrong, and claim that the expression doesn't allocate
--- when it does, you'll deeply mess up allocation
-isSimpleScrut (StgOpApp op args _) _ = isSimpleOp op args
-isSimpleScrut (StgLit _) _ = return True -- case 1# of { 0# -> ..; ... }
-isSimpleScrut (StgApp _ []) (PrimAlt _) = return True -- case x# of { 0# -> ..; ... }
-isSimpleScrut _ _ = return False
-
-isSimpleOp :: StgOp -> [StgArg] -> FCode Bool
--- True iff the op cannot block or allocate
-isSimpleOp (StgFCallOp (CCall (CCallSpec _ _ safe)) _) _ = return $! not (playSafe safe)
--- dataToTag# evalautes its argument, see Note [dataToTag#] in primops.txt.pp
-isSimpleOp (StgPrimOp DataToTagOp) _ = return False
-isSimpleOp (StgPrimOp op) stg_args = do
- arg_exprs <- getNonVoidArgAmodes stg_args
- dflags <- getDynFlags
- -- See Note [Inlining out-of-line primops and heap checks]
- return $! isJust $ shouldInlinePrimOp dflags op arg_exprs
-isSimpleOp (StgPrimCallOp _) _ = return False
-
------------------
-chooseReturnBndrs :: Id -> AltType -> [CgStgAlt] -> [NonVoid Id]
--- These are the binders of a case that are assigned by the evaluation of the
--- scrutinee.
--- They're non-void, see Note [Post-unarisation invariants] in UnariseStg.
-chooseReturnBndrs bndr (PrimAlt _) _alts
- = assertNonVoidIds [bndr]
-
-chooseReturnBndrs _bndr (MultiValAlt n) [(_, ids, _)]
- = ASSERT2(ids `lengthIs` n, ppr n $$ ppr ids $$ ppr _bndr)
- assertNonVoidIds ids -- 'bndr' is not assigned!
-
-chooseReturnBndrs bndr (AlgAlt _) _alts
- = assertNonVoidIds [bndr] -- Only 'bndr' is assigned
-
-chooseReturnBndrs bndr PolyAlt _alts
- = assertNonVoidIds [bndr] -- Only 'bndr' is assigned
-
-chooseReturnBndrs _ _ _ = panic "chooseReturnBndrs"
- -- MultiValAlt has only one alternative
-
--------------------------------------
-cgAlts :: (GcPlan,ReturnKind) -> NonVoid Id -> AltType -> [CgStgAlt]
- -> FCode ReturnKind
--- At this point the result of the case are in the binders
-cgAlts gc_plan _bndr PolyAlt [(_, _, rhs)]
- = maybeAltHeapCheck gc_plan (cgExpr rhs)
-
-cgAlts gc_plan _bndr (MultiValAlt _) [(_, _, rhs)]
- = maybeAltHeapCheck gc_plan (cgExpr rhs)
- -- Here bndrs are *already* in scope, so don't rebind them
-
-cgAlts gc_plan bndr (PrimAlt _) alts
- = do { dflags <- getDynFlags
-
- ; tagged_cmms <- cgAltRhss gc_plan bndr alts
-
- ; let bndr_reg = CmmLocal (idToReg dflags bndr)
- (DEFAULT,deflt) = head tagged_cmms
- -- PrimAlts always have a DEFAULT case
- -- and it always comes first
-
- tagged_cmms' = [(lit,code)
- | (LitAlt lit, code) <- tagged_cmms]
- ; emitCmmLitSwitch (CmmReg bndr_reg) tagged_cmms' deflt
- ; return AssignedDirectly }
-
-cgAlts gc_plan bndr (AlgAlt tycon) alts
- = do { dflags <- getDynFlags
-
- ; (mb_deflt, branches) <- cgAlgAltRhss gc_plan bndr alts
-
- ; let fam_sz = tyConFamilySize tycon
- bndr_reg = CmmLocal (idToReg dflags bndr)
-
- -- Is the constructor tag in the node reg?
- ; if isSmallFamily dflags fam_sz
- then do
- let -- Yes, bndr_reg has constr. tag in ls bits
- tag_expr = cmmConstrTag1 dflags (CmmReg bndr_reg)
- branches' = [(tag+1,branch) | (tag,branch) <- branches]
- emitSwitch tag_expr branches' mb_deflt 1 fam_sz
-
- else -- No, get tag from info table
- let -- Note that ptr _always_ has tag 1
- -- when the family size is big enough
- untagged_ptr = cmmRegOffB bndr_reg (-1)
- tag_expr = getConstrTag dflags (untagged_ptr)
- in emitSwitch tag_expr branches mb_deflt 0 (fam_sz - 1)
-
- ; return AssignedDirectly }
-
-cgAlts _ _ _ _ = panic "cgAlts"
- -- UbxTupAlt and PolyAlt have only one alternative
-
-
--- Note [alg-alt heap check]
---
--- In an algebraic case with more than one alternative, we will have
--- code like
---
--- L0:
--- x = R1
--- goto L1
--- L1:
--- if (x & 7 >= 2) then goto L2 else goto L3
--- L2:
--- Hp = Hp + 16
--- if (Hp > HpLim) then goto L4
--- ...
--- L4:
--- call gc() returns to L5
--- L5:
--- x = R1
--- goto L1
-
--------------------
-cgAlgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt]
- -> FCode ( Maybe CmmAGraphScoped
- , [(ConTagZ, CmmAGraphScoped)] )
-cgAlgAltRhss gc_plan bndr alts
- = do { tagged_cmms <- cgAltRhss gc_plan bndr alts
-
- ; let { mb_deflt = case tagged_cmms of
- ((DEFAULT,rhs) : _) -> Just rhs
- _other -> Nothing
- -- DEFAULT is always first, if present
-
- ; branches = [ (dataConTagZ con, cmm)
- | (DataAlt con, cmm) <- tagged_cmms ]
- }
-
- ; return (mb_deflt, branches)
- }
-
-
--------------------
-cgAltRhss :: (GcPlan,ReturnKind) -> NonVoid Id -> [CgStgAlt]
- -> FCode [(AltCon, CmmAGraphScoped)]
-cgAltRhss gc_plan bndr alts = do
- dflags <- getDynFlags
- let
- base_reg = idToReg dflags bndr
- cg_alt :: CgStgAlt -> FCode (AltCon, CmmAGraphScoped)
- cg_alt (con, bndrs, rhs)
- = getCodeScoped $
- maybeAltHeapCheck gc_plan $
- do { _ <- bindConArgs con base_reg (assertNonVoidIds bndrs)
- -- alt binders are always non-void,
- -- see Note [Post-unarisation invariants] in UnariseStg
- ; _ <- cgExpr rhs
- ; return con }
- forkAlts (map cg_alt alts)
-
-maybeAltHeapCheck :: (GcPlan,ReturnKind) -> FCode a -> FCode a
-maybeAltHeapCheck (NoGcInAlts,_) code = code
-maybeAltHeapCheck (GcInAlts regs, AssignedDirectly) code =
- altHeapCheck regs code
-maybeAltHeapCheck (GcInAlts regs, ReturnedTo lret off) code =
- altHeapCheckReturnsTo regs lret off code
-
------------------------------------------------------------------------------
--- Tail calls
------------------------------------------------------------------------------
-
-cgConApp :: DataCon -> [StgArg] -> FCode ReturnKind
-cgConApp con stg_args
- | isUnboxedTupleCon con -- Unboxed tuple: assign and return
- = do { arg_exprs <- getNonVoidArgAmodes stg_args
- ; tickyUnboxedTupleReturn (length arg_exprs)
- ; emitReturn arg_exprs }
-
- | otherwise -- Boxed constructors; allocate and return
- = ASSERT2( stg_args `lengthIs` countConRepArgs con, ppr con <> parens (ppr (countConRepArgs con)) <+> ppr stg_args )
- do { (idinfo, fcode_init) <- buildDynCon (dataConWorkId con) False
- currentCCS con (assertNonVoidStgArgs stg_args)
- -- con args are always non-void,
- -- see Note [Post-unarisation invariants] in UnariseStg
- -- The first "con" says that the name bound to this
- -- closure is "con", which is a bit of a fudge, but
- -- it only affects profiling (hence the False)
-
- ; emit =<< fcode_init
- ; tickyReturnNewCon (length stg_args)
- ; emitReturn [idInfoToAmode idinfo] }
-
-cgIdApp :: Id -> [StgArg] -> FCode ReturnKind
-cgIdApp fun_id args = do
- dflags <- getDynFlags
- fun_info <- getCgIdInfo fun_id
- self_loop_info <- getSelfLoop
- let fun_arg = StgVarArg fun_id
- fun_name = idName fun_id
- fun = idInfoToAmode fun_info
- lf_info = cg_lf fun_info
- n_args = length args
- v_args = length $ filter (isVoidTy . stgArgType) args
- node_points dflags = nodeMustPointToIt dflags lf_info
- case getCallMethod dflags fun_name fun_id lf_info n_args v_args (cg_loc fun_info) self_loop_info of
- -- A value in WHNF, so we can just return it.
- ReturnIt
- | isVoidTy (idType fun_id) -> emitReturn []
- | otherwise -> emitReturn [fun]
- -- ToDo: does ReturnIt guarantee tagged?
-
- EnterIt -> ASSERT( null args ) -- Discarding arguments
- emitEnter fun
-
- SlowCall -> do -- A slow function call via the RTS apply routines
- { tickySlowCall lf_info args
- ; emitComment $ mkFastString "slowCall"
- ; slowCall fun args }
-
- -- A direct function call (possibly with some left-over arguments)
- DirectEntry lbl arity -> do
- { tickyDirectCall arity args
- ; if node_points dflags
- then directCall NativeNodeCall lbl arity (fun_arg:args)
- else directCall NativeDirectCall lbl arity args }
-
- -- Let-no-escape call or self-recursive tail-call
- JumpToIt blk_id lne_regs -> do
- { adjustHpBackwards -- always do this before a tail-call
- ; cmm_args <- getNonVoidArgAmodes args
- ; emitMultiAssign lne_regs cmm_args
- ; emit (mkBranch blk_id)
- ; return AssignedDirectly }
-
--- Note [Self-recursive tail calls]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- Self-recursive tail calls can be optimized into a local jump in the same
--- way as let-no-escape bindings (see Note [What is a non-escaping let] in
--- stgSyn/CoreToStg.hs). Consider this:
---
--- foo.info:
--- a = R1 // calling convention
--- b = R2
--- goto L1
--- L1: ...
--- ...
--- ...
--- L2: R1 = x
--- R2 = y
--- call foo(R1,R2)
---
--- Instead of putting x and y into registers (or other locations required by the
--- calling convention) and performing a call we can put them into local
--- variables a and b and perform jump to L1:
---
--- foo.info:
--- a = R1
--- b = R2
--- goto L1
--- L1: ...
--- ...
--- ...
--- L2: a = x
--- b = y
--- goto L1
---
--- This can be done only when function is calling itself in a tail position
--- and only if the call passes number of parameters equal to function's arity.
--- Note that this cannot be performed if a function calls itself with a
--- continuation.
---
--- This in fact implements optimization known as "loopification". It was
--- described in "Low-level code optimizations in the Glasgow Haskell Compiler"
--- by Krzysztof Woś, though we use different approach. Krzysztof performed his
--- optimization at the Cmm level, whereas we perform ours during code generation
--- (Stg-to-Cmm pass) essentially making sure that optimized Cmm code is
--- generated in the first place.
---
--- Implementation is spread across a couple of places in the code:
---
--- * FCode monad stores additional information in its reader environment
--- (cgd_self_loop field). This information tells us which function can
--- tail call itself in an optimized way (it is the function currently
--- being compiled), what is the label of a loop header (L1 in example above)
--- and information about local registers in which we should arguments
--- before making a call (this would be a and b in example above).
---
--- * Whenever we are compiling a function, we set that information to reflect
--- the fact that function currently being compiled can be jumped to, instead
--- of called. This is done in closureCodyBody in StgCmmBind.
---
--- * We also have to emit a label to which we will be jumping. We make sure
--- that the label is placed after a stack check but before the heap
--- check. The reason is that making a recursive tail-call does not increase
--- the stack so we only need to check once. But it may grow the heap, so we
--- have to repeat the heap check in every self-call. This is done in
--- do_checks in StgCmmHeap.
---
--- * When we begin compilation of another closure we remove the additional
--- information from the environment. This is done by forkClosureBody
--- in StgCmmMonad. Other functions that duplicate the environment -
--- forkLneBody, forkAlts, codeOnly - duplicate that information. In other
--- words, we only need to clean the environment of the self-loop information
--- when compiling right hand side of a closure (binding).
---
--- * When compiling a call (cgIdApp) we use getCallMethod to decide what kind
--- of call will be generated. getCallMethod decides to generate a self
--- recursive tail call when (a) environment stores information about
--- possible self tail-call; (b) that tail call is to a function currently
--- being compiled; (c) number of passed non-void arguments is equal to
--- function's arity. (d) loopification is turned on via -floopification
--- command-line option.
---
--- * Command line option to turn loopification on and off is implemented in
--- DynFlags.
---
---
--- Note [Void arguments in self-recursive tail calls]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
--- State# tokens can get in the way of the loopification optimization as seen in
--- #11372. Consider this:
---
--- foo :: [a]
--- -> (a -> State# s -> (# State s, Bool #))
--- -> State# s
--- -> (# State# s, Maybe a #)
--- foo [] f s = (# s, Nothing #)
--- foo (x:xs) f s = case f x s of
--- (# s', b #) -> case b of
--- True -> (# s', Just x #)
--- False -> foo xs f s'
---
--- We would like to compile the call to foo as a local jump instead of a call
--- (see Note [Self-recursive tail calls]). However, the generated function has
--- an arity of 2 while we apply it to 3 arguments, one of them being of void
--- type. Thus, we mustn't count arguments of void type when checking whether
--- we can turn a call into a self-recursive jump.
---
-
-emitEnter :: CmmExpr -> FCode ReturnKind
-emitEnter fun = do
- { dflags <- getDynFlags
- ; adjustHpBackwards
- ; sequel <- getSequel
- ; updfr_off <- getUpdFrameOff
- ; case sequel of
- -- For a return, we have the option of generating a tag-test or
- -- not. If the value is tagged, we can return directly, which
- -- is quicker than entering the value. This is a code
- -- size/speed trade-off: when optimising for speed rather than
- -- size we could generate the tag test.
- --
- -- Right now, we do what the old codegen did, and omit the tag
- -- test, just generating an enter.
- Return -> do
- { let entry = entryCode dflags $ closureInfoPtr dflags $ CmmReg nodeReg
- ; emit $ mkJump dflags NativeNodeCall entry
- [cmmUntag dflags fun] updfr_off
- ; return AssignedDirectly
- }
-
- -- The result will be scrutinised in the sequel. This is where
- -- we generate a tag-test to avoid entering the closure if
- -- possible.
- --
- -- The generated code will be something like this:
- --
- -- R1 = fun -- copyout
- -- if (fun & 7 != 0) goto Lret else goto Lcall
- -- Lcall:
- -- call [fun] returns to Lret
- -- Lret:
- -- fun' = R1 -- copyin
- -- ...
- --
- -- Note in particular that the label Lret is used as a
- -- destination by both the tag-test and the call. This is
- -- because Lret will necessarily be a proc-point, and we want to
- -- ensure that we generate only one proc-point for this
- -- sequence.
- --
- -- Furthermore, we tell the caller that we generated a native
- -- return continuation by returning (ReturnedTo Lret off), so
- -- that the continuation can be reused by the heap-check failure
- -- code in the enclosing case expression.
- --
- AssignTo res_regs _ -> do
- { lret <- newBlockId
- ; let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) res_regs []
- ; lcall <- newBlockId
- ; updfr_off <- getUpdFrameOff
- ; let area = Young lret
- ; let (outArgs, regs, copyout) = copyOutOflow dflags NativeNodeCall Call area
- [fun] updfr_off []
- -- refer to fun via nodeReg after the copyout, to avoid having
- -- both live simultaneously; this sometimes enables fun to be
- -- inlined in the RHS of the R1 assignment.
- ; let entry = entryCode dflags (closureInfoPtr dflags (CmmReg nodeReg))
- the_call = toCall entry (Just lret) updfr_off off outArgs regs
- ; tscope <- getTickScope
- ; emit $
- copyout <*>
- mkCbranch (cmmIsTagged dflags (CmmReg nodeReg))
- lret lcall Nothing <*>
- outOfLine lcall (the_call,tscope) <*>
- mkLabel lret tscope <*>
- copyin
- ; return (ReturnedTo lret off)
- }
- }
-
-------------------------------------------------------------------------
--- Ticks
-------------------------------------------------------------------------
-
--- | Generate Cmm code for a tick. Depending on the type of Tickish,
--- this will either generate actual Cmm instrumentation code, or
--- simply pass on the annotation as a @CmmTickish@.
-cgTick :: Tickish Id -> FCode ()
-cgTick tick
- = do { dflags <- getDynFlags
- ; case tick of
- ProfNote cc t p -> emitSetCCC cc t p
- HpcTick m n -> emit (mkTickBox dflags m n)
- SourceNote s n -> emitTick $ SourceNote s n
- _other -> return () -- ignore
- }