diff options
author | Luite Stegeman <stegeman@gmail.com> | 2021-01-22 00:09:17 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2021-03-20 07:49:15 -0400 |
commit | 1f94e0f7601f8e22fdd81a47f130650265a44196 (patch) | |
tree | d06d02317049b56763b2f1da27f71f3663efa5a0 /compiler/GHC/StgToByteCode.hs | |
parent | 7de3532f0317032f75b76150c5d3a6f76178be04 (diff) | |
download | haskell-1f94e0f7601f8e22fdd81a47f130650265a44196.tar.gz |
Generate GHCi bytecode from STG instead of Core and support unboxed
tuples and sums.
fixes #1257
Diffstat (limited to 'compiler/GHC/StgToByteCode.hs')
-rw-r--r-- | compiler/GHC/StgToByteCode.hs | 2277 |
1 files changed, 2277 insertions, 0 deletions
diff --git a/compiler/GHC/StgToByteCode.hs b/compiler/GHC/StgToByteCode.hs new file mode 100644 index 0000000000..e14de72eb5 --- /dev/null +++ b/compiler/GHC/StgToByteCode.hs @@ -0,0 +1,2277 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE RecordWildCards #-} + +{-# OPTIONS_GHC -fprof-auto-top #-} +{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} + +-- +-- (c) The University of Glasgow 2002-2006 +-- + +-- | GHC.StgToByteCode: Generate bytecode from STG +module GHC.StgToByteCode ( UnlinkedBCO, byteCodeGen, stgExprToBCOs ) where + +#include "HsVersions.h" + +import GHC.Prelude + +import GHC.Driver.Session +import GHC.Driver.Env + +import GHC.ByteCode.Instr +import GHC.ByteCode.Asm +import GHC.ByteCode.Types + +import GHC.Cmm.CallConv +import GHC.Cmm.Expr +import GHC.Cmm.Node +import GHC.Cmm.Utils + +import GHC.Platform +import GHC.Platform.Profile + +import GHC.Runtime.Interpreter +import GHCi.FFI +import GHCi.RemoteTypes +import GHC.Types.Basic +import GHC.Utils.Outputable +import GHC.Types.Name +import GHC.Types.Id.Make +import GHC.Types.Id +import GHC.Types.ForeignCall +import GHC.Core +import GHC.Types.Literal +import GHC.Builtin.PrimOps +import GHC.Core.Type +import GHC.Types.RepType +import GHC.Core.DataCon +import GHC.Core.TyCon +import GHC.Utils.Misc +import GHC.Utils.Logger +import GHC.Types.Var.Set +import GHC.Builtin.Types ( unboxedUnitTy ) +import GHC.Builtin.Types.Prim +import GHC.Core.TyCo.Ppr ( pprType ) +import GHC.Utils.Error +import GHC.Types.Unique +import GHC.Builtin.Uniques +import GHC.Builtin.Utils ( primOpId ) +import GHC.Data.FastString +import GHC.Utils.Panic +import GHC.StgToCmm.Closure ( NonVoid(..), fromNonVoid, nonVoidIds ) +import GHC.StgToCmm.Layout +import GHC.Runtime.Heap.Layout hiding (WordOff, ByteOff, wordsToBytes) +import GHC.Data.Bitmap +import GHC.Data.OrdList +import GHC.Data.Maybe +import GHC.Types.Var.Env +import GHC.Types.Tickish + +import Data.List ( genericReplicate, genericLength, intersperse + , partition, scanl', sort, sortBy, zip4, zip6, nub ) +import Foreign +import Control.Monad +import Data.Char + +import GHC.Types.Unique.Supply +import GHC.Unit.Module + +import Control.Exception +import Data.Array +import Data.Coerce (coerce) +import Data.ByteString (ByteString) +import Data.Map (Map) +import Data.IntMap (IntMap) +import qualified Data.Map as Map +import qualified Data.IntMap as IntMap +import qualified GHC.Data.FiniteMap as Map +import Data.Ord +import GHC.Stack.CCS +import Data.Either ( partitionEithers ) + +import qualified GHC.Types.CostCentre as CC +import GHC.Stg.Syntax +import GHC.Stg.FVs + +-- ----------------------------------------------------------------------------- +-- Generating byte code for a complete module + +byteCodeGen :: HscEnv + -> Module + -> [StgTopBinding] + -> [TyCon] + -> Maybe ModBreaks + -> IO CompiledByteCode +byteCodeGen hsc_env this_mod binds tycs mb_modBreaks + = withTiming logger dflags + (text "GHC.StgToByteCode"<+>brackets (ppr this_mod)) + (const ()) $ do + -- Split top-level binds into strings and others. + -- See Note [generating code for top-level string literal bindings]. + let (strings, lifted_binds) = partitionEithers $ do -- list monad + bnd <- binds + case bnd of + StgTopLifted bnd -> [Right bnd] + StgTopStringLit b str -> [Left (b, str)] + flattenBind (StgNonRec b e) = [(b,e)] + flattenBind (StgRec bs) = bs + stringPtrs <- allocateTopStrings hsc_env strings + + us <- mkSplitUniqSupply 'y' + (BcM_State{..}, proto_bcos) <- + runBc hsc_env us this_mod mb_modBreaks (mkVarEnv stringPtrs) $ do + prepd_binds <- mapM bcPrepBind lifted_binds + let flattened_binds = + concatMap (flattenBind . annBindingFreeVars) (reverse prepd_binds) + mapM schemeTopBind flattened_binds + + when (notNull ffis) + (panic "GHC.StgToByteCode.byteCodeGen: missing final emitBc?") + + dumpIfSet_dyn logger dflags Opt_D_dump_BCOs + "Proto-BCOs" FormatByteCode + (vcat (intersperse (char ' ') (map ppr proto_bcos))) + + cbc <- assembleBCOs hsc_env proto_bcos tycs (map snd stringPtrs) + (case modBreaks of + Nothing -> Nothing + Just mb -> Just mb{ modBreaks_breakInfo = breakInfo }) + + -- Squash space leaks in the CompiledByteCode. This is really + -- important, because when loading a set of modules into GHCi + -- we don't touch the CompiledByteCode until the end when we + -- do linking. Forcing out the thunks here reduces space + -- usage by more than 50% when loading a large number of + -- modules. + evaluate (seqCompiledByteCode cbc) + + return cbc + + where dflags = hsc_dflags hsc_env + logger = hsc_logger hsc_env + +allocateTopStrings + :: HscEnv + -> [(Id, ByteString)] + -> IO [(Var, RemotePtr ())] +allocateTopStrings hsc_env topStrings = do + let !(bndrs, strings) = unzip topStrings + ptrs <- iservCmd hsc_env $ MallocStrings strings + return $ zip bndrs ptrs + +{- +Note [generating code for top-level string literal bindings] + +Here is a summary on how the byte code generator deals with top-level string +literals: + +1. Top-level string literal bindings are separated from the rest of the module. + +2. The strings are allocated via iservCmd, in allocateTopStrings + +3. The mapping from binders to allocated strings (topStrings) are maintained in + BcM and used when generating code for variable references. +-} + +-- ----------------------------------------------------------------------------- +-- Generating byte code for an expression + +-- Returns: the root BCO for this expression +stgExprToBCOs :: HscEnv + -> Module + -> Type + -> StgRhs + -> IO UnlinkedBCO +stgExprToBCOs hsc_env this_mod expr_ty expr + = withTiming logger dflags + (text "GHC.StgToByteCode"<+>brackets (ppr this_mod)) + (const ()) $ do + + -- the uniques are needed to generate fresh variables when we introduce new + -- let bindings for ticked expressions + us <- mkSplitUniqSupply 'y' + (BcM_State _dflags _us _this_mod _final_ctr mallocd _ _ _, proto_bco) + <- runBc hsc_env us this_mod Nothing emptyVarEnv $ do + prepd_expr <- annBindingFreeVars <$> + bcPrepBind (StgNonRec dummy_id expr) + case prepd_expr of + (StgNonRec _ cg_expr) -> schemeR [] (idName dummy_id, cg_expr) + _ -> + panic "GHC.StgByteCode.stgExprToBCOs" + + when (notNull mallocd) + (panic "GHC.StgToByteCode.stgExprToBCOs: missing final emitBc?") + + dumpIfSet_dyn logger dflags Opt_D_dump_BCOs "Proto-BCOs" FormatByteCode + (ppr proto_bco) + + assembleOneBCO hsc_env proto_bco + where dflags = hsc_dflags hsc_env + logger = hsc_logger hsc_env + -- we need an otherwise unused Id for bytecode generation + dummy_id = mkSysLocal (fsLit "BCO_toplevel") + (mkPseudoUniqueE 0) + Many + expr_ty +{- + Prepare the STG for bytecode generation: + + - Ensure that all breakpoints are directly under + a let-binding, introducing a new binding for + those that aren't already. + + - Protect Not-necessarily lifted join points, see + Note [Not-necessarily-lifted join points] + + -} + +bcPrepRHS :: StgRhs -> BcM StgRhs +-- explicitly match all constructors so we get a warning if we miss any +bcPrepRHS (StgRhsClosure fvs cc upd args (StgTick bp@Breakpoint{} expr)) = do + {- If we have a breakpoint directly under an StgRhsClosure we don't + need to introduce a new binding for it. + -} + expr' <- bcPrepExpr expr + pure (StgRhsClosure fvs cc upd args (StgTick bp expr')) +bcPrepRHS (StgRhsClosure fvs cc upd args expr) = + StgRhsClosure fvs cc upd args <$> bcPrepExpr expr +bcPrepRHS con@StgRhsCon{} = pure con + +bcPrepExpr :: StgExpr -> BcM StgExpr +-- explicitly match all constructors so we get a warning if we miss any +bcPrepExpr (StgTick bp@(Breakpoint tick_ty _ _) rhs) + | isLiftedTypeKind (typeKind tick_ty) = do + id <- newId tick_ty + rhs' <- bcPrepExpr rhs + let expr' = StgTick bp rhs' + bnd = StgNonRec id (StgRhsClosure noExtFieldSilent + CC.dontCareCCS + ReEntrant + [] + expr' + ) + letExp = StgLet noExtFieldSilent bnd (StgApp id []) + pure letExp + | otherwise = do + id <- newId (mkVisFunTyMany realWorldStatePrimTy tick_ty) + st <- newId realWorldStatePrimTy + rhs' <- bcPrepExpr rhs + let expr' = StgTick bp rhs' + bnd = StgNonRec id (StgRhsClosure noExtFieldSilent + CC.dontCareCCS + ReEntrant + [voidArgId] + expr' + ) + pure $ StgLet noExtFieldSilent bnd (StgApp id [StgVarArg st]) +bcPrepExpr (StgTick tick rhs) = + StgTick tick <$> bcPrepExpr rhs +bcPrepExpr (StgLet xlet bnds expr) = + StgLet xlet <$> bcPrepBind bnds + <*> bcPrepExpr expr +bcPrepExpr (StgLetNoEscape xlne bnds expr) = + StgLet xlne <$> bcPrepBind bnds + <*> bcPrepExpr expr +bcPrepExpr (StgCase expr bndr alt_type alts) = + StgCase <$> bcPrepExpr expr + <*> pure bndr + <*> pure alt_type + <*> mapM bcPrepAlt alts +bcPrepExpr lit@StgLit{} = pure lit +-- See Note [Not-necessarily-lifted join points], step 3. +bcPrepExpr (StgApp x []) + | isNNLJoinPoint x = pure $ + StgApp (protectNNLJoinPointId x) [StgVarArg voidPrimId] +bcPrepExpr app@StgApp{} = pure app +bcPrepExpr app@StgConApp{} = pure app +bcPrepExpr app@StgOpApp{} = pure app + +bcPrepAlt :: StgAlt -> BcM StgAlt +bcPrepAlt (ac, bndrs, expr) = (,,) ac bndrs <$> bcPrepExpr expr + +bcPrepBind :: StgBinding -> BcM StgBinding +-- explicitly match all constructors so we get a warning if we miss any +bcPrepBind (StgNonRec bndr rhs) = + let (bndr', rhs') = bcPrepSingleBind (bndr, rhs) + in StgNonRec bndr' <$> bcPrepRHS rhs' +bcPrepBind (StgRec bnds) = + StgRec <$> mapM ((\(b,r) -> (,) b <$> bcPrepRHS r) . bcPrepSingleBind) + bnds + +bcPrepSingleBind :: (Id, StgRhs) -> (Id, StgRhs) +-- If necessary, modify this Id and body to protect not-necessarily-lifted join points. +-- See Note [Not-necessarily-lifted join points], step 2. +bcPrepSingleBind (x, StgRhsClosure ext cc upd_flag args body) + | isNNLJoinPoint x + = ( protectNNLJoinPointId x + , StgRhsClosure ext cc upd_flag (args ++ [voidArgId]) body) +bcPrepSingleBind bnd = bnd + +-- ----------------------------------------------------------------------------- +-- Compilation schema for the bytecode generator + +type BCInstrList = OrdList BCInstr + +wordsToBytes :: Platform -> WordOff -> ByteOff +wordsToBytes platform = fromIntegral . (* platformWordSizeInBytes platform) . fromIntegral + +-- Used when we know we have a whole number of words +bytesToWords :: Platform -> ByteOff -> WordOff +bytesToWords platform (ByteOff bytes) = + let (q, r) = bytes `quotRem` (platformWordSizeInBytes platform) + in if r == 0 + then fromIntegral q + else panic $ "GHC.StgToByteCode.bytesToWords: bytes=" ++ show bytes + +wordSize :: Platform -> ByteOff +wordSize platform = ByteOff (platformWordSizeInBytes platform) + +type Sequel = ByteOff -- back off to this depth before ENTER + +type StackDepth = ByteOff + +-- | Maps Ids to their stack depth. This allows us to avoid having to mess with +-- it after each push/pop. +type BCEnv = Map Id StackDepth -- To find vars on the stack + +{- +ppBCEnv :: BCEnv -> SDoc +ppBCEnv p + = text "begin-env" + $$ nest 4 (vcat (map pp_one (sortBy cmp_snd (Map.toList p)))) + $$ text "end-env" + where + pp_one (var, ByteOff offset) = int offset <> colon <+> ppr var <+> ppr (bcIdArgReps var) + cmp_snd x y = compare (snd x) (snd y) +-} + +-- Create a BCO and do a spot of peephole optimisation on the insns +-- at the same time. +mkProtoBCO + :: Platform + -> name + -> BCInstrList + -> Either [CgStgAlt] (CgStgRhs) + -- ^ original expression; for debugging only + -> Int + -> Word16 + -> [StgWord] + -> Bool -- True <=> is a return point, rather than a function + -> [FFIInfo] + -> ProtoBCO name +mkProtoBCO platform nm instrs_ordlist origin arity bitmap_size bitmap is_ret ffis + = ProtoBCO { + protoBCOName = nm, + protoBCOInstrs = maybe_with_stack_check, + protoBCOBitmap = bitmap, + protoBCOBitmapSize = bitmap_size, + protoBCOArity = arity, + protoBCOExpr = origin, + protoBCOFFIs = ffis + } + where + -- Overestimate the stack usage (in words) of this BCO, + -- and if >= iNTERP_STACK_CHECK_THRESH, add an explicit + -- stack check. (The interpreter always does a stack check + -- for iNTERP_STACK_CHECK_THRESH words at the start of each + -- BCO anyway, so we only need to add an explicit one in the + -- (hopefully rare) cases when the (overestimated) stack use + -- exceeds iNTERP_STACK_CHECK_THRESH. + maybe_with_stack_check + | is_ret && stack_usage < fromIntegral (pc_AP_STACK_SPLIM (platformConstants platform)) = peep_d + -- don't do stack checks at return points, + -- everything is aggregated up to the top BCO + -- (which must be a function). + -- That is, unless the stack usage is >= AP_STACK_SPLIM, + -- see bug #1466. + | stack_usage >= fromIntegral iNTERP_STACK_CHECK_THRESH + = STKCHECK stack_usage : peep_d + | otherwise + = peep_d -- the supposedly common case + + -- We assume that this sum doesn't wrap + stack_usage = sum (map bciStackUse peep_d) + + -- Merge local pushes + peep_d = peep (fromOL instrs_ordlist) + + peep (PUSH_L off1 : PUSH_L off2 : PUSH_L off3 : rest) + = PUSH_LLL off1 (off2-1) (off3-2) : peep rest + peep (PUSH_L off1 : PUSH_L off2 : rest) + = PUSH_LL off1 (off2-1) : peep rest + peep (i:rest) + = i : peep rest + peep [] + = [] + +argBits :: Platform -> [ArgRep] -> [Bool] +argBits _ [] = [] +argBits platform (rep : args) + | isFollowableArg rep = False : argBits platform args + | otherwise = take (argRepSizeW platform rep) (repeat True) ++ argBits platform args + +non_void :: [ArgRep] -> [ArgRep] +non_void = filter nv + where nv V = False + nv _ = True + +-- ----------------------------------------------------------------------------- +-- schemeTopBind + +-- Compile code for the right-hand side of a top-level binding + +schemeTopBind :: (Id, CgStgRhs) -> BcM (ProtoBCO Name) +schemeTopBind (id, rhs) + | Just data_con <- isDataConWorkId_maybe id, + isNullaryRepDataCon data_con = do + platform <- profilePlatform <$> getProfile + -- Special case for the worker of a nullary data con. + -- It'll look like this: Nil = /\a -> Nil a + -- If we feed it into schemeR, we'll get + -- Nil = Nil + -- because mkConAppCode treats nullary constructor applications + -- by just re-using the single top-level definition. So + -- for the worker itself, we must allocate it directly. + -- ioToBc (putStrLn $ "top level BCO") + emitBc (mkProtoBCO platform (getName id) (toOL [PACK data_con 0, ENTER]) + (Right rhs) 0 0 [{-no bitmap-}] False{-not alts-}) + + | otherwise + = schemeR [{- No free variables -}] (getName id, rhs) + + +-- ----------------------------------------------------------------------------- +-- schemeR + +-- Compile code for a right-hand side, to give a BCO that, +-- when executed with the free variables and arguments on top of the stack, +-- will return with a pointer to the result on top of the stack, after +-- removing the free variables and arguments. +-- +-- Park the resulting BCO in the monad. Also requires the +-- name of the variable to which this value was bound, +-- so as to give the resulting BCO a name. +schemeR :: [Id] -- Free vars of the RHS, ordered as they + -- will appear in the thunk. Empty for + -- top-level things, which have no free vars. + -> (Name, CgStgRhs) + -> BcM (ProtoBCO Name) +schemeR fvs (nm, rhs) + = schemeR_wrk fvs nm rhs (collect rhs) + +-- If an expression is a lambda (after apply bcView), return the +-- list of arguments to the lambda (in R-to-L order) and the +-- underlying expression + +collect :: CgStgRhs -> ([Var], CgStgExpr) +collect (StgRhsClosure _ _ _ args body) = (args, body) +collect (StgRhsCon _cc dc cnum _ticks args) = ([], StgConApp dc cnum args []) + +schemeR_wrk + :: [Id] + -> Name + -> CgStgRhs -- expression e, for debugging only + -> ([Var], CgStgExpr) -- result of collect on e + -> BcM (ProtoBCO Name) +schemeR_wrk fvs nm original_body (args, body) + = do + profile <- getProfile + let + platform = profilePlatform profile + all_args = reverse args ++ fvs + arity = length all_args + -- all_args are the args in reverse order. We're compiling a function + -- \fv1..fvn x1..xn -> e + -- i.e. the fvs come first + + -- Stack arguments always take a whole number of words, we never pack + -- them unlike constructor fields. + szsb_args = map (wordsToBytes platform . idSizeW platform) all_args + sum_szsb_args = sum szsb_args + p_init = Map.fromList (zip all_args (mkStackOffsets 0 szsb_args)) + + -- make the arg bitmap + bits = argBits platform (reverse (map (bcIdArgRep platform) all_args)) + bitmap_size = genericLength bits + bitmap = mkBitmap platform bits + body_code <- schemeER_wrk sum_szsb_args p_init body + + emitBc (mkProtoBCO platform nm body_code (Right original_body) + arity bitmap_size bitmap False{-not alts-}) + +-- introduce break instructions for ticked expressions +schemeER_wrk :: StackDepth -> BCEnv -> CgStgExpr -> BcM BCInstrList +schemeER_wrk d p (StgTick (Breakpoint tick_ty tick_no fvs) rhs) + = do code <- schemeE d 0 p rhs + cc_arr <- getCCArray + this_mod <- moduleName <$> getCurrentModule + platform <- profilePlatform <$> getProfile + let idOffSets = getVarOffSets platform d p fvs + let breakInfo = CgBreakInfo + { cgb_vars = idOffSets + , cgb_resty = tick_ty + } + newBreakInfo tick_no breakInfo + hsc_env <- getHscEnv + let cc | Just interp <- hsc_interp hsc_env + , interpreterProfiled interp + = cc_arr ! tick_no + | otherwise = toRemotePtr nullPtr + let breakInstr = BRK_FUN (fromIntegral tick_no) (getUnique this_mod) cc + return $ breakInstr `consOL` code +schemeER_wrk d p rhs = schemeE d 0 p rhs + +getVarOffSets :: Platform -> StackDepth -> BCEnv -> [Id] -> [Maybe (Id, Word16)] +getVarOffSets platform depth env = map getOffSet + where + getOffSet id = case lookupBCEnv_maybe id env of + Nothing -> Nothing + Just offset -> + -- michalt: I'm not entirely sure why we need the stack + -- adjustment by 2 here. I initially thought that there's + -- something off with getIdValFromApStack (the only user of this + -- value), but it looks ok to me. My current hypothesis is that + -- this "adjustment" is needed due to stack manipulation for + -- BRK_FUN in Interpreter.c In any case, this is used only when + -- we trigger a breakpoint. + let !var_depth_ws = + trunc16W $ bytesToWords platform (depth - offset) + 2 + in Just (id, var_depth_ws) + +truncIntegral16 :: Integral a => a -> Word16 +truncIntegral16 w + | w > fromIntegral (maxBound :: Word16) + = panic "stack depth overflow" + | otherwise + = fromIntegral w + +trunc16B :: ByteOff -> Word16 +trunc16B = truncIntegral16 + +trunc16W :: WordOff -> Word16 +trunc16W = truncIntegral16 + +fvsToEnv :: BCEnv -> CgStgRhs -> [Id] +-- Takes the free variables of a right-hand side, and +-- delivers an ordered list of the local variables that will +-- be captured in the thunk for the RHS +-- The BCEnv argument tells which variables are in the local +-- environment: these are the ones that should be captured +-- +-- The code that constructs the thunk, and the code that executes +-- it, have to agree about this layout + +fvsToEnv p (StgRhsClosure fvs _ _ _ _) = + [v | v <- dVarSetElems fvs, + v `Map.member` p] +fvsToEnv _ _ = [] + +-- ----------------------------------------------------------------------------- +-- schemeE + +-- Returning an unlifted value. +-- Heave it on the stack, SLIDE, and RETURN. +returnUnboxedAtom + :: StackDepth + -> Sequel + -> BCEnv + -> StgArg + -> BcM BCInstrList +returnUnboxedAtom d s p e = do + let reps = case e of + StgLitArg lit -> typePrimRepArgs (literalType lit) + StgVarArg i -> bcIdPrimReps i + (push, szb) <- pushAtom d p e + ret <- returnUnboxedReps d s szb reps + return (push `appOL` ret) + +-- return an unboxed value from the top of the stack +returnUnboxedReps + :: StackDepth + -> Sequel + -> ByteOff -- size of the thing we're returning + -> [PrimRep] -- representations + -> BcM BCInstrList +returnUnboxedReps d s szb reps = do + profile <- getProfile + let platform = profilePlatform profile + non_void VoidRep = False + non_void _ = True + ret <- case filter non_void reps of + -- use RETURN_UBX for unary representations + [] -> return (unitOL $ RETURN_UBX V) + [rep] -> return (unitOL $ RETURN_UBX (toArgRep platform rep)) + -- otherwise use RETURN_TUPLE with a tuple descriptor + nv_reps -> do + let (tuple_info, args_offsets) = layoutTuple profile 0 (primRepCmmType platform) nv_reps + args_ptrs = map (\(rep, off) -> (isFollowableArg (toArgRep platform rep), off)) args_offsets + tuple_bco <- emitBc (tupleBCO platform tuple_info args_ptrs) + return $ PUSH_UBX (mkTupleInfoLit platform tuple_info) 1 `consOL` + PUSH_BCO tuple_bco `consOL` + unitOL RETURN_TUPLE + return ( mkSlideB platform szb (d - s) -- clear to sequel + `appOL` ret) -- go + +-- construct and return an unboxed tuple +returnUnboxedTuple + :: StackDepth + -> Sequel + -> BCEnv + -> [StgArg] + -> BcM BCInstrList +returnUnboxedTuple d s p es = do + profile <- getProfile + let platform = profilePlatform profile + arg_ty e = primRepCmmType platform (atomPrimRep e) + (tuple_info, tuple_components) = layoutTuple profile d arg_ty es + go _ pushes [] = return (reverse pushes) + go !dd pushes ((a, off):cs) = do (push, szb) <- pushAtom dd p a + MASSERT(off == dd + szb) + go (dd + szb) (push:pushes) cs + pushes <- go d [] tuple_components + ret <- returnUnboxedReps d + s + (wordsToBytes platform $ tupleSize tuple_info) + (map atomPrimRep es) + return (mconcat pushes `appOL` ret) + +-- Compile code to apply the given expression to the remaining args +-- on the stack, returning a HNF. +schemeE + :: StackDepth -> Sequel -> BCEnv -> CgStgExpr -> BcM BCInstrList +schemeE d s p (StgLit lit) = returnUnboxedAtom d s p (StgLitArg lit) +schemeE d s p (StgApp x []) + | isUnliftedType (idType x) = returnUnboxedAtom d s p (StgVarArg x) +-- Delegate tail-calls to schemeT. +schemeE d s p e@(StgApp {}) = schemeT d s p e +schemeE d s p e@(StgConApp {}) = schemeT d s p e +schemeE d s p e@(StgOpApp {}) = schemeT d s p e +schemeE d s p (StgLetNoEscape xlet bnd body) + = schemeE d s p (StgLet xlet bnd body) +schemeE d s p (StgLet _xlet + (StgNonRec x (StgRhsCon _cc data_con _cnum _ticks args)) + body) + = do -- Special case for a non-recursive let whose RHS is a + -- saturated constructor application. + -- Just allocate the constructor and carry on + alloc_code <- mkConAppCode d s p data_con args + platform <- targetPlatform <$> getDynFlags + let !d2 = d + wordSize platform + body_code <- schemeE d2 s (Map.insert x d2 p) body + return (alloc_code `appOL` body_code) +-- General case for let. Generates correct, if inefficient, code in +-- all situations. +schemeE d s p (StgLet _ext binds body) = do + platform <- targetPlatform <$> getDynFlags + let (xs,rhss) = case binds of StgNonRec x rhs -> ([x],[rhs]) + StgRec xs_n_rhss -> unzip xs_n_rhss + n_binds = genericLength xs + + fvss = map (fvsToEnv p') rhss + + -- Sizes of free vars + size_w = trunc16W . idSizeW platform + sizes = map (\rhs_fvs -> sum (map size_w rhs_fvs)) fvss + + -- the arity of each rhs + arities = map (genericLength . fst . collect) rhss + + -- This p', d' defn is safe because all the items being pushed + -- are ptrs, so all have size 1 word. d' and p' reflect the stack + -- after the closures have been allocated in the heap (but not + -- filled in), and pointers to them parked on the stack. + offsets = mkStackOffsets d (genericReplicate n_binds (wordSize platform)) + p' = Map.insertList (zipE xs offsets) p + d' = d + wordsToBytes platform n_binds + zipE = zipEqual "schemeE" + + -- ToDo: don't build thunks for things with no free variables + build_thunk + :: StackDepth + -> [Id] + -> Word16 + -> ProtoBCO Name + -> Word16 + -> Word16 + -> BcM BCInstrList + build_thunk _ [] size bco off arity + = return (PUSH_BCO bco `consOL` unitOL (mkap (off+size) size)) + where + mkap | arity == 0 = MKAP + | otherwise = MKPAP + build_thunk dd (fv:fvs) size bco off arity = do + (push_code, pushed_szb) <- pushAtom dd p' (StgVarArg fv) + more_push_code <- + build_thunk (dd + pushed_szb) fvs size bco off arity + return (push_code `appOL` more_push_code) + + alloc_code = toOL (zipWith mkAlloc sizes arities) + where mkAlloc sz 0 + | is_tick = ALLOC_AP_NOUPD sz + | otherwise = ALLOC_AP sz + mkAlloc sz arity = ALLOC_PAP arity sz + + is_tick = case binds of + StgNonRec id _ -> occNameFS (getOccName id) == tickFS + _other -> False + + compile_bind d' fvs x (rhs::CgStgRhs) size arity off = do + bco <- schemeR fvs (getName x,rhs) + build_thunk d' fvs size bco off arity + + compile_binds = + [ compile_bind d' fvs x rhs size arity (trunc16W n) + | (fvs, x, rhs, size, arity, n) <- + zip6 fvss xs rhss sizes arities [n_binds, n_binds-1 .. 1] + ] + body_code <- schemeE d' s p' body + thunk_codes <- sequence compile_binds + return (alloc_code `appOL` concatOL thunk_codes `appOL` body_code) + +schemeE _d _s _p (StgTick (Breakpoint _ bp_id _) _rhs) + = panic ("schemeE: Breakpoint without let binding: " ++ + show bp_id ++ + " forgot to run bcPrep?") + +-- ignore other kinds of tick +schemeE d s p (StgTick _ rhs) = schemeE d s p rhs + +-- no alts: scrut is guaranteed to diverge +schemeE d s p (StgCase scrut _ _ []) = schemeE d s p scrut + +schemeE d s p (StgCase scrut bndr _ alts) + = doCase d s p scrut bndr alts + +-- Is this Id a not-necessarily-lifted join point? +-- See Note [Not-necessarily-lifted join points], step 1 +isNNLJoinPoint :: Id -> Bool +isNNLJoinPoint x = isJoinId x && + Just True /= isLiftedType_maybe (idType x) + +-- Update an Id's type to take a Void# argument. +-- Precondition: the Id is a not-necessarily-lifted join point. +-- See Note [Not-necessarily-lifted join points] +protectNNLJoinPointId :: Id -> Id +protectNNLJoinPointId x + = ASSERT( isNNLJoinPoint x ) + updateIdTypeButNotMult (unboxedUnitTy `mkVisFunTyMany`) x + +{- + Ticked Expressions + ------------------ + + The idea is that the "breakpoint<n,fvs> E" is really just an annotation on + the code. When we find such a thing, we pull out the useful information, + and then compile the code as if it was just the expression E. + +Note [Not-necessarily-lifted join points] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A join point variable is essentially a goto-label: it is, for example, +never used as an argument to another function, and it is called only +in tail position. See Note [Join points] and Note [Invariants on join points], +both in GHC.Core. Because join points do not compile to true, red-blooded +variables (with, e.g., registers allocated to them), they are allowed +to be levity-polymorphic. (See invariant #6 in Note [Invariants on join points] +in GHC.Core.) + +However, in this byte-code generator, join points *are* treated just as +ordinary variables. There is no check whether a binding is for a join point +or not; they are all treated uniformly. (Perhaps there is a missed optimization +opportunity here, but that is beyond the scope of my (Richard E's) Thursday.) + +We thus must have *some* strategy for dealing with levity-polymorphic and +unlifted join points. Levity-polymorphic variables are generally not allowed +(though levity-polymorphic join points *are*; see Note [Invariants on join points] +in GHC.Core, point 6), and we don't wish to evaluate unlifted join points eagerly. +The questionable join points are *not-necessarily-lifted join points* +(NNLJPs). (Not having such a strategy led to #16509, which panicked in the +isUnliftedType check in the AnnVar case of schemeE.) Here is the strategy: + +1. Detect NNLJPs. This is done in isNNLJoinPoint. + +2. When binding an NNLJP, add a `\ (_ :: (# #)) ->` to its RHS, and modify the + type to tack on a `(# #) ->`. + Note that functions are never levity-polymorphic, so this transformation + changes an NNLJP to a non-levity-polymorphic join point. This is done + in bcPrepSingleBind. + +3. At an occurrence of an NNLJP, add an application to void# (called voidPrimId), + being careful to note the new type of the NNLJP. This is done in the AnnVar + case of schemeE, with help from protectNNLJoinPointId. + +Here is an example. Suppose we have + + f = \(r :: RuntimeRep) (a :: TYPE r) (x :: T). + join j :: a + j = error @r @a "bloop" + in case x of + A -> j + B -> j + C -> error @r @a "blurp" + +Our plan is to behave is if the code was + + f = \(r :: RuntimeRep) (a :: TYPE r) (x :: T). + let j :: (Void# -> a) + j = \ _ -> error @r @a "bloop" + in case x of + A -> j void# + B -> j void# + C -> error @r @a "blurp" + +It's a bit hacky, but it works well in practice and is local. I suspect the +Right Fix is to take advantage of join points as goto-labels. + +-} + +-- Compile code to do a tail call. Specifically, push the fn, +-- slide the on-stack app back down to the sequel depth, +-- and enter. Four cases: +-- +-- 0. (Nasty hack). +-- An application "GHC.Prim.tagToEnum# <type> unboxed-int". +-- The int will be on the stack. Generate a code sequence +-- to convert it to the relevant constructor, SLIDE and ENTER. +-- +-- 1. The fn denotes a ccall. Defer to generateCCall. +-- +-- 2. An unboxed tuple: push the components on the top of +-- the stack and return. +-- +-- 3. Application of a constructor, by defn saturated. +-- Split the args into ptrs and non-ptrs, and push the nonptrs, +-- then the ptrs, and then do PACK and RETURN. +-- +-- 4. Otherwise, it must be a function call. Push the args +-- right to left, SLIDE and ENTER. + +schemeT :: StackDepth -- Stack depth + -> Sequel -- Sequel depth + -> BCEnv -- stack env + -> CgStgExpr + -> BcM BCInstrList + + -- Case 0 +schemeT d s p app + | Just (arg, constr_names) <- maybe_is_tagToEnum_call app + = implement_tagToId d s p arg constr_names + + -- Case 1 +schemeT d s p (StgOpApp (StgFCallOp (CCall ccall_spec) _ty) args result_ty) + = if isSupportedCConv ccall_spec + then generateCCall d s p ccall_spec result_ty (reverse args) + else unsupportedCConvException + +schemeT d s p (StgOpApp (StgPrimOp op) args _ty) + = doTailCall d s p (primOpId op) (reverse args) + +schemeT _d _s _p (StgOpApp StgPrimCallOp{} _args _ty) + = unsupportedCConvException + + -- Case 2: Unboxed tuple +schemeT d s p (StgConApp con _ext args _tys) + | isUnboxedTupleDataCon con || isUnboxedSumDataCon con + = returnUnboxedTuple d s p args + + -- Case 3: Ordinary data constructor + | otherwise + = do alloc_con <- mkConAppCode d s p con args + platform <- profilePlatform <$> getProfile + return (alloc_con `appOL` + mkSlideW 1 (bytesToWords platform $ d - s) `snocOL` + ENTER) + + -- Case 4: Tail call of function +schemeT d s p (StgApp fn args) + = doTailCall d s p fn (reverse args) + +schemeT _ _ _ e = pprPanic "GHC.StgToByteCode.schemeT" + (pprStgExpr shortStgPprOpts e) + +-- ----------------------------------------------------------------------------- +-- Generate code to build a constructor application, +-- leaving it on top of the stack + +mkConAppCode + :: StackDepth + -> Sequel + -> BCEnv + -> DataCon -- The data constructor + -> [StgArg] -- Args, in *reverse* order + -> BcM BCInstrList +mkConAppCode orig_d _ p con args = app_code + where + app_code = do + profile <- getProfile + let platform = profilePlatform profile + + non_voids = + [ NonVoid (prim_rep, arg) + | arg <- args + , let prim_rep = atomPrimRep arg + , not (isVoidRep prim_rep) + ] + (_, _, args_offsets) = + mkVirtHeapOffsetsWithPadding profile StdHeader non_voids + + do_pushery !d (arg : args) = do + (push, arg_bytes) <- case arg of + (Padding l _) -> return $! pushPadding l + (FieldOff a _) -> pushConstrAtom d p (fromNonVoid a) + more_push_code <- do_pushery (d + arg_bytes) args + return (push `appOL` more_push_code) + do_pushery !d [] = do + let !n_arg_words = trunc16W $ bytesToWords platform (d - orig_d) + return (unitOL (PACK con n_arg_words)) + + -- Push on the stack in the reverse order. + do_pushery orig_d (reverse args_offsets) + +-- ----------------------------------------------------------------------------- +-- Generate code for a tail-call + +doTailCall + :: StackDepth + -> Sequel + -> BCEnv + -> Id + -> [StgArg] + -> BcM BCInstrList +doTailCall init_d s p fn args = do + platform <- profilePlatform <$> getProfile + do_pushes init_d args (map (atomRep platform) args) + where + do_pushes !d [] reps = do + ASSERT( null reps ) return () + (push_fn, sz) <- pushAtom d p (StgVarArg fn) + platform <- profilePlatform <$> getProfile + ASSERT( sz == wordSize platform ) return () + let slide = mkSlideB platform (d - init_d + wordSize platform) (init_d - s) + return (push_fn `appOL` (slide `appOL` unitOL ENTER)) + do_pushes !d args reps = do + let (push_apply, n, rest_of_reps) = findPushSeq reps + (these_args, rest_of_args) = splitAt n args + (next_d, push_code) <- push_seq d these_args + platform <- profilePlatform <$> getProfile + instrs <- do_pushes (next_d + wordSize platform) rest_of_args rest_of_reps + -- ^^^ for the PUSH_APPLY_ instruction + return (push_code `appOL` (push_apply `consOL` instrs)) + + push_seq d [] = return (d, nilOL) + push_seq d (arg:args) = do + (push_code, sz) <- pushAtom d p arg + (final_d, more_push_code) <- push_seq (d + sz) args + return (final_d, push_code `appOL` more_push_code) + +-- v. similar to CgStackery.findMatch, ToDo: merge +findPushSeq :: [ArgRep] -> (BCInstr, Int, [ArgRep]) +findPushSeq (P: P: P: P: P: P: rest) + = (PUSH_APPLY_PPPPPP, 6, rest) +findPushSeq (P: P: P: P: P: rest) + = (PUSH_APPLY_PPPPP, 5, rest) +findPushSeq (P: P: P: P: rest) + = (PUSH_APPLY_PPPP, 4, rest) +findPushSeq (P: P: P: rest) + = (PUSH_APPLY_PPP, 3, rest) +findPushSeq (P: P: rest) + = (PUSH_APPLY_PP, 2, rest) +findPushSeq (P: rest) + = (PUSH_APPLY_P, 1, rest) +findPushSeq (V: rest) + = (PUSH_APPLY_V, 1, rest) +findPushSeq (N: rest) + = (PUSH_APPLY_N, 1, rest) +findPushSeq (F: rest) + = (PUSH_APPLY_F, 1, rest) +findPushSeq (D: rest) + = (PUSH_APPLY_D, 1, rest) +findPushSeq (L: rest) + = (PUSH_APPLY_L, 1, rest) +findPushSeq _ + = panic "GHC.StgToByteCode.findPushSeq" + +-- ----------------------------------------------------------------------------- +-- Case expressions + +doCase + :: StackDepth + -> Sequel + -> BCEnv + -> CgStgExpr + -> Id + -> [CgStgAlt] + -> BcM BCInstrList +doCase d s p scrut bndr alts + = do + profile <- getProfile + hsc_env <- getHscEnv + let + platform = profilePlatform profile + + -- Are we dealing with an unboxed tuple with a tuple return frame? + -- + -- 'Simple' tuples with at most one non-void component, + -- like (# Word# #) or (# Int#, State# RealWorld# #) do not have a + -- tuple return frame. This is because (# foo #) and (# foo, Void# #) + -- have the same runtime rep. We have more efficient specialized + -- return frames for the situations with one non-void element. + + ubx_tuple_frame = + (isUnboxedTupleType bndr_ty || isUnboxedSumType bndr_ty) && + length non_void_arg_reps > 1 + + non_void_arg_reps = non_void (typeArgReps platform bndr_ty) + + profiling + | Just interp <- hsc_interp hsc_env + = interpreterProfiled interp + | otherwise = False + + -- Top of stack is the return itbl, as usual. + -- underneath it is the pointer to the alt_code BCO. + -- When an alt is entered, it assumes the returned value is + -- on top of the itbl. + ret_frame_size_b :: StackDepth + ret_frame_size_b | ubx_tuple_frame = + (if profiling then 5 else 4) * wordSize platform + | otherwise = 2 * wordSize platform + + -- The stack space used to save/restore the CCCS when profiling + save_ccs_size_b | profiling && + not ubx_tuple_frame = 2 * wordSize platform + | otherwise = 0 + + -- An unlifted value gets an extra info table pushed on top + -- when it is returned. + unlifted_itbl_size_b :: StackDepth + unlifted_itbl_size_b | isAlgCase = 0 + | ubx_tuple_frame = 3 * wordSize platform + | otherwise = wordSize platform + + (bndr_size, tuple_info, args_offsets) + | ubx_tuple_frame = + let bndr_ty = primRepCmmType platform + bndr_reps = filter (not.isVoidRep) (bcIdPrimReps bndr) + (tuple_info, args_offsets) = + layoutTuple profile 0 bndr_ty bndr_reps + in ( wordsToBytes platform (tupleSize tuple_info) + , tuple_info + , args_offsets + ) + | otherwise = ( wordsToBytes platform (idSizeW platform bndr) + , voidTupleInfo + , [] + ) + + -- depth of stack after the return value has been pushed + d_bndr = + d + ret_frame_size_b + bndr_size + + -- depth of stack after the extra info table for an unboxed return + -- has been pushed, if any. This is the stack depth at the + -- continuation. + d_alts = d + ret_frame_size_b + bndr_size + unlifted_itbl_size_b + + -- Env in which to compile the alts, not including + -- any vars bound by the alts themselves + p_alts = Map.insert bndr d_bndr p + + bndr_ty = idType bndr + isAlgCase = not (isUnliftedType bndr_ty) + + -- given an alt, return a discr and code for it. + codeAlt (DEFAULT, _, rhs) + = do rhs_code <- schemeE d_alts s p_alts rhs + return (NoDiscr, rhs_code) + + codeAlt alt@(_, bndrs, rhs) + -- primitive or nullary constructor alt: no need to UNPACK + | null real_bndrs = do + rhs_code <- schemeE d_alts s p_alts rhs + return (my_discr alt, rhs_code) + | isUnboxedTupleType bndr_ty || isUnboxedSumType bndr_ty = + let bndr_ty = primRepCmmType platform . bcIdPrimRep + tuple_start = d_bndr + (tuple_info, args_offsets) = + layoutTuple profile + 0 + bndr_ty + bndrs + + stack_bot = d_alts + + p' = Map.insertList + [ (arg, tuple_start - + wordsToBytes platform (tupleSize tuple_info) + + offset) + | (arg, offset) <- args_offsets + , not (isVoidRep $ bcIdPrimRep arg)] + p_alts + in do + rhs_code <- schemeE stack_bot s p' rhs + return (NoDiscr, rhs_code) + -- algebraic alt with some binders + | otherwise = + let (tot_wds, _ptrs_wds, args_offsets) = + mkVirtHeapOffsets profile NoHeader + [ NonVoid (bcIdPrimRep id, id) + | NonVoid id <- nonVoidIds real_bndrs + ] + size = WordOff tot_wds + + stack_bot = d_alts + wordsToBytes platform size + + -- convert offsets from Sp into offsets into the virtual stack + p' = Map.insertList + [ (arg, stack_bot - ByteOff offset) + | (NonVoid arg, offset) <- args_offsets ] + p_alts + in do + MASSERT(isAlgCase) + rhs_code <- schemeE stack_bot s p' rhs + return (my_discr alt, + unitOL (UNPACK (trunc16W size)) `appOL` rhs_code) + where + real_bndrs = filterOut isTyVar bndrs + + my_discr (DEFAULT, _, _) = NoDiscr {-shouldn't really happen-} + my_discr (DataAlt dc, _, _) + | isUnboxedTupleDataCon dc || isUnboxedSumDataCon dc + = NoDiscr + | otherwise + = DiscrP (fromIntegral (dataConTag dc - fIRST_TAG)) + my_discr (LitAlt l, _, _) + = case l of LitNumber LitNumInt i -> DiscrI (fromInteger i) + LitNumber LitNumWord w -> DiscrW (fromInteger w) + LitFloat r -> DiscrF (fromRational r) + LitDouble r -> DiscrD (fromRational r) + LitChar i -> DiscrI (ord i) + _ -> pprPanic "schemeE(StgCase).my_discr" (ppr l) + + maybe_ncons + | not isAlgCase = Nothing + | otherwise + = case [dc | (DataAlt dc, _, _) <- alts] of + [] -> Nothing + (dc:_) -> Just (tyConFamilySize (dataConTyCon dc)) + + -- the bitmap is relative to stack depth d, i.e. before the + -- BCO, info table and return value are pushed on. + -- This bit of code is v. similar to buildLivenessMask in CgBindery, + -- except that here we build the bitmap from the known bindings of + -- things that are pointers, whereas in CgBindery the code builds the + -- bitmap from the free slots and unboxed bindings. + -- (ToDo: merge?) + -- + -- NOTE [7/12/2006] bug #1013, testcase ghci/should_run/ghci002. + -- The bitmap must cover the portion of the stack up to the sequel only. + -- Previously we were building a bitmap for the whole depth (d), but we + -- really want a bitmap up to depth (d-s). This affects compilation of + -- case-of-case expressions, which is the only time we can be compiling a + -- case expression with s /= 0. + + -- unboxed tuples get two more words, the second is a pointer (tuple_bco) + (extra_pointers, extra_slots) + | ubx_tuple_frame && profiling = ([1], 3) -- tuple_info, tuple_BCO, CCCS + | ubx_tuple_frame = ([1], 2) -- tuple_info, tuple_BCO + | otherwise = ([], 0) + + bitmap_size = trunc16W $ fromIntegral extra_slots + + bytesToWords platform (d - s) + + bitmap_size' :: Int + bitmap_size' = fromIntegral bitmap_size + + + pointers = + extra_pointers ++ + sort (filter (< bitmap_size') (map (+extra_slots) rel_slots)) + where + binds = Map.toList p + -- NB: unboxed tuple cases bind the scrut binder to the same offset + -- as one of the alt binders, so we have to remove any duplicates here: + rel_slots = nub $ map fromIntegral $ concatMap spread binds + spread (id, offset) | isUnboxedTupleType (idType id) || + isUnboxedSumType (idType id) = [] + | isFollowableArg (bcIdArgRep platform id) = [ rel_offset ] + | otherwise = [] + where rel_offset = trunc16W $ bytesToWords platform (d - offset) + + bitmap = intsToReverseBitmap platform bitmap_size'{-size-} pointers + + alt_stuff <- mapM codeAlt alts + alt_final <- mkMultiBranch maybe_ncons alt_stuff + + let + alt_bco_name = getName bndr + alt_bco = mkProtoBCO platform alt_bco_name alt_final (Left alts) + 0{-no arity-} bitmap_size bitmap True{-is alts-} + scrut_code <- schemeE (d + ret_frame_size_b + save_ccs_size_b) + (d + ret_frame_size_b + save_ccs_size_b) + p scrut + alt_bco' <- emitBc alt_bco + if ubx_tuple_frame + then do + let args_ptrs = + map (\(rep, off) -> (isFollowableArg (toArgRep platform rep), off)) + args_offsets + tuple_bco <- emitBc (tupleBCO platform tuple_info args_ptrs) + return (PUSH_ALTS_TUPLE alt_bco' tuple_info tuple_bco + `consOL` scrut_code) + else let push_alts + | isAlgCase + = PUSH_ALTS alt_bco' + | otherwise + = let unlifted_rep = + case non_void_arg_reps of + [] -> V + [rep] -> rep + _ -> panic "schemeE(StgCase).push_alts" + in PUSH_ALTS_UNLIFTED alt_bco' unlifted_rep + in return (push_alts `consOL` scrut_code) + + +-- ----------------------------------------------------------------------------- +-- Deal with tuples + +-- The native calling convention uses registers for tuples, but in the +-- bytecode interpreter, all values live on the stack. + +layoutTuple :: Profile + -> ByteOff + -> (a -> CmmType) + -> [a] + -> ( TupleInfo -- See Note [GHCi TupleInfo] + , [(a, ByteOff)] -- argument, offset on stack + ) +layoutTuple profile start_off arg_ty reps = + let platform = profilePlatform profile + (orig_stk_bytes, pos) = assignArgumentsPos profile + 0 + NativeReturn + arg_ty + reps + + -- keep the stack parameters in the same place + orig_stk_params = [(x, fromIntegral off) | (x, StackParam off) <- pos] + + -- sort the register parameters by register and add them to the stack + (regs, reg_params) + = unzip $ sortBy (comparing fst) + [(reg, x) | (x, RegisterParam reg) <- pos] + + (new_stk_bytes, new_stk_params) = assignStack platform + orig_stk_bytes + arg_ty + reg_params + + -- make live register bitmaps + bmp_reg r ~(v, f, d, l) + = case r of VanillaReg n _ -> (a v n, f, d, l ) + FloatReg n -> (v, a f n, d, l ) + DoubleReg n -> (v, f, a d n, l ) + LongReg n -> (v, f, d, a l n) + _ -> + pprPanic "GHC.StgToByteCode.layoutTuple unsupported register type" + (ppr r) + where a bmp n = bmp .|. (1 `shiftL` (n-1)) + + (vanilla_regs, float_regs, double_regs, long_regs) + = foldr bmp_reg (0, 0, 0, 0) regs + + get_byte_off (x, StackParam y) = (x, fromIntegral y) + get_byte_off _ = + panic "GHC.StgToByteCode.layoutTuple get_byte_off" + + in ( TupleInfo + { tupleSize = bytesToWords platform (ByteOff new_stk_bytes) + , tupleVanillaRegs = vanilla_regs + , tupleLongRegs = long_regs + , tupleFloatRegs = float_regs + , tupleDoubleRegs = double_regs + , tupleNativeStackSize = bytesToWords platform + (ByteOff orig_stk_bytes) + } + , sortBy (comparing snd) $ + map (\(x, o) -> (x, o + start_off)) + (orig_stk_params ++ map get_byte_off new_stk_params) + ) + +{- Note [unboxed tuple bytecodes and tuple_BCO] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + We have the bytecode instructions RETURN_TUPLE and PUSH_ALTS_TUPLE to + return and receive arbitrary unboxed tuples, respectively. These + instructions use the helper data tuple_BCO and tuple_info. + + The helper data is used to convert tuples between GHCs native calling + convention (object code), which uses stack and registers, and the bytecode + calling convention, which only uses the stack. See Note [GHCi TupleInfo] + for more details. + + + Returning a tuple + ================= + + Bytecode that returns a tuple first pushes all the tuple fields followed + by the appropriate tuple_info and tuple_BCO onto the stack. It then + executes the RETURN_TUPLE instruction, which causes the interpreter + to push stg_ret_t_info to the top of the stack. The stack (growing down) + then looks as follows: + + ... + next_frame + tuple_field_1 + tuple_field_2 + ... + tuple_field_n + tuple_info + tuple_BCO + stg_ret_t_info <- Sp + + If next_frame is bytecode, the interpreter will start executing it. If + it's object code, the interpreter jumps back to the scheduler, which in + turn jumps to stg_ret_t. stg_ret_t converts the tuple to the native + calling convention using the description in tuple_info, and then jumps + to next_frame. + + + Receiving a tuple + ================= + + Bytecode that receives a tuple uses the PUSH_ALTS_TUPLE instruction to + push a continuation, followed by jumping to the code that produces the + tuple. The PUSH_ALTS_TUPLE instuction contains three pieces of data: + + * cont_BCO: the continuation that receives the tuple + * tuple_info: see below + * tuple_BCO: see below + + The interpreter pushes these onto the stack when the PUSH_ALTS_TUPLE + instruction is executed, followed by stg_ctoi_tN_info, with N depending + on the number of stack words used by the tuple in the GHC native calling + convention. N is derived from tuple_info. + + For example if we expect a tuple with three words on the stack, the stack + looks as follows after PUSH_ALTS_TUPLE: + + ... + next_frame + cont_free_var_1 + cont_free_var_2 + ... + cont_free_var_n + tuple_info + tuple_BCO + cont_BCO + stg_ctoi_t3_info <- Sp + + If the tuple is returned by object code, stg_ctoi_t3 will deal with + adjusting the stack pointer and converting the tuple to the bytecode + calling convention. See Note [GHCi unboxed tuples stack spills] for more + details. + + + The tuple_BCO + ============= + + The tuple_BCO is a helper bytecode object. Its main purpose is describing + the contents of the stack frame containing the tuple for the storage + manager. It contains only instructions to immediately return the tuple + that is already on the stack. + + + The tuple_info word + =================== + + The tuple_info word describes the stack and STG register (e.g. R1..R6, + D1..D6) usage for the tuple. tuple_info contains enough information to + convert the tuple between the stack-only bytecode and stack+registers + GHC native calling conventions. + + See Note [GHCi tuple layout] for more details of how the data is packed + in a single word. + + -} + +tupleBCO :: Platform -> TupleInfo -> [(Bool, ByteOff)] -> [FFIInfo] -> ProtoBCO Name +tupleBCO platform info pointers = + mkProtoBCO platform invented_name body_code (Left []) + 0{-no arity-} bitmap_size bitmap False{-is alts-} + + where + {- + The tuple BCO is never referred to by name, so we can get away + with using a fake name here. We will need to change this if we want + to save some memory by sharing the BCO between places that have + the same tuple shape + -} + invented_name = mkSystemVarName (mkPseudoUniqueE 0) (fsLit "tuple") + + -- the first word in the frame is the tuple_info word, + -- which is not a pointer + bitmap_size = trunc16W $ 1 + tupleSize info + bitmap = intsToReverseBitmap platform (fromIntegral bitmap_size) $ + map ((+1) . fromIntegral . bytesToWords platform . snd) + (filter fst pointers) + body_code = mkSlideW 0 1 -- pop frame header + `snocOL` RETURN_TUPLE -- and add it again + +-- ----------------------------------------------------------------------------- +-- Deal with a CCall. + +-- Taggedly push the args onto the stack R->L, +-- deferencing ForeignObj#s and adjusting addrs to point to +-- payloads in Ptr/Byte arrays. Then, generate the marshalling +-- (machine) code for the ccall, and create bytecodes to call that and +-- then return in the right way. + +generateCCall + :: StackDepth + -> Sequel + -> BCEnv + -> CCallSpec -- where to call + -> Type + -> [StgArg] -- args (atoms) + -> BcM BCInstrList +generateCCall d0 s p (CCallSpec target cconv safety) result_ty args_r_to_l + = do + profile <- getProfile + + let + platform = profilePlatform profile + -- useful constants + addr_size_b :: ByteOff + addr_size_b = wordSize platform + + arrayish_rep_hdr_size :: TyCon -> Maybe Int + arrayish_rep_hdr_size t + | t == arrayPrimTyCon || t == mutableArrayPrimTyCon + = Just (arrPtrsHdrSize profile) + | t == smallArrayPrimTyCon || t == smallMutableArrayPrimTyCon + = Just (smallArrPtrsHdrSize profile) + | t == byteArrayPrimTyCon || t == mutableByteArrayPrimTyCon + = Just (arrWordsHdrSize profile) + | otherwise + = Nothing + + -- Get the args on the stack, with tags and suitably + -- dereferenced for the CCall. For each arg, return the + -- depth to the first word of the bits for that arg, and the + -- ArgRep of what was actually pushed. + + pargs + :: ByteOff -> [StgArg] -> BcM [(BCInstrList, PrimRep)] + pargs _ [] = return [] + pargs d (aa@(StgVarArg a):az) + | Just t <- tyConAppTyCon_maybe (idType a) + , Just hdr_sz <- arrayish_rep_hdr_size t + -- Do magic for Ptr/Byte arrays. Push a ptr to the array on + -- the stack but then advance it over the headers, so as to + -- point to the payload. + = do rest <- pargs (d + addr_size_b) az + (push_fo, _) <- pushAtom d p aa + -- The ptr points at the header. Advance it over the + -- header and then pretend this is an Addr#. + let code = push_fo `snocOL` SWIZZLE 0 (fromIntegral hdr_sz) + return ((code, AddrRep) : rest) + pargs d (aa:az) = do (code_a, sz_a) <- pushAtom d p aa + rest <- pargs (d + sz_a) az + return ((code_a, atomPrimRep aa) : rest) + + code_n_reps <- pargs d0 args_r_to_l + let + (pushs_arg, a_reps_pushed_r_to_l) = unzip code_n_reps + a_reps_sizeW = sum (map (repSizeWords platform) a_reps_pushed_r_to_l) + + push_args = concatOL pushs_arg + !d_after_args = d0 + wordsToBytes platform a_reps_sizeW + a_reps_pushed_RAW + | null a_reps_pushed_r_to_l || not (isVoidRep (head a_reps_pushed_r_to_l)) + = panic "GHC.StgToByteCode.generateCCall: missing or invalid World token?" + | otherwise + = reverse (tail a_reps_pushed_r_to_l) + + -- Now: a_reps_pushed_RAW are the reps which are actually on the stack. + -- push_args is the code to do that. + -- d_after_args is the stack depth once the args are on. + + -- Get the result rep. + (returns_void, r_rep) + = case maybe_getCCallReturnRep result_ty of + Nothing -> (True, VoidRep) + Just rr -> (False, rr) + {- + Because the Haskell stack grows down, the a_reps refer to + lowest to highest addresses in that order. The args for the call + are on the stack. Now push an unboxed Addr# indicating + the C function to call. Then push a dummy placeholder for the + result. Finally, emit a CCALL insn with an offset pointing to the + Addr# just pushed, and a literal field holding the mallocville + address of the piece of marshalling code we generate. + So, just prior to the CCALL insn, the stack looks like this + (growing down, as usual): + + <arg_n> + ... + <arg_1> + Addr# address_of_C_fn + <placeholder-for-result#> (must be an unboxed type) + + The interpreter then calls the marshall code mentioned + in the CCALL insn, passing it (& <placeholder-for-result#>), + that is, the addr of the topmost word in the stack. + When this returns, the placeholder will have been + filled in. The placeholder is slid down to the sequel + depth, and we RETURN. + + This arrangement makes it simple to do f-i-dynamic since the Addr# + value is the first arg anyway. + + The marshalling code is generated specifically for this + call site, and so knows exactly the (Haskell) stack + offsets of the args, fn address and placeholder. It + copies the args to the C stack, calls the stacked addr, + and parks the result back in the placeholder. The interpreter + calls it as a normal C call, assuming it has a signature + void marshall_code ( StgWord* ptr_to_top_of_stack ) + -} + -- resolve static address + maybe_static_target :: Maybe Literal + maybe_static_target = + case target of + DynamicTarget -> Nothing + StaticTarget _ _ _ False -> + panic "generateCCall: unexpected FFI value import" + StaticTarget _ target _ True -> + Just (LitLabel target mb_size IsFunction) + where + mb_size + | OSMinGW32 <- platformOS platform + , StdCallConv <- cconv + = Just (fromIntegral a_reps_sizeW * platformWordSizeInBytes platform) + | otherwise + = Nothing + + let + is_static = isJust maybe_static_target + + -- Get the arg reps, zapping the leading Addr# in the dynamic case + a_reps -- | trace (showSDoc (ppr a_reps_pushed_RAW)) False = error "???" + | is_static = a_reps_pushed_RAW + | otherwise = if null a_reps_pushed_RAW + then panic "GHC.StgToByteCode.generateCCall: dyn with no args" + else tail a_reps_pushed_RAW + + -- push the Addr# + (push_Addr, d_after_Addr) + | Just machlabel <- maybe_static_target + = (toOL [PUSH_UBX machlabel 1], d_after_args + addr_size_b) + | otherwise -- is already on the stack + = (nilOL, d_after_args) + + -- Push the return placeholder. For a call returning nothing, + -- this is a V (tag). + r_sizeW = repSizeWords platform r_rep + d_after_r = d_after_Addr + wordsToBytes platform r_sizeW + push_r = + if returns_void + then nilOL + else unitOL (PUSH_UBX (mkDummyLiteral platform r_rep) (trunc16W r_sizeW)) + + -- generate the marshalling code we're going to call + + -- Offset of the next stack frame down the stack. The CCALL + -- instruction needs to describe the chunk of stack containing + -- the ccall args to the GC, so it needs to know how large it + -- is. See comment in Interpreter.c with the CCALL instruction. + stk_offset = trunc16W $ bytesToWords platform (d_after_r - s) + + conv = case cconv of + CCallConv -> FFICCall + StdCallConv -> FFIStdCall + _ -> panic "GHC.StgToByteCode: unexpected calling convention" + + -- the only difference in libffi mode is that we prepare a cif + -- describing the call type by calling libffi, and we attach the + -- address of this to the CCALL instruction. + + + let ffires = primRepToFFIType platform r_rep + ffiargs = map (primRepToFFIType platform) a_reps + hsc_env <- getHscEnv + token <- ioToBc $ iservCmd hsc_env (PrepFFI conv ffiargs ffires) + recordFFIBc token + + let + -- do the call + do_call = unitOL (CCALL stk_offset token flags) + where flags = case safety of + PlaySafe -> 0x0 + PlayInterruptible -> 0x1 + PlayRisky -> 0x2 + + -- slide and return + d_after_r_min_s = bytesToWords platform (d_after_r - s) + wrapup = mkSlideW (trunc16W r_sizeW) (d_after_r_min_s - r_sizeW) + `snocOL` RETURN_UBX (toArgRep platform r_rep) + --trace (show (arg1_offW, args_offW , (map argRepSizeW a_reps) )) $ + return ( + push_args `appOL` + push_Addr `appOL` push_r `appOL` do_call `appOL` wrapup + ) + +primRepToFFIType :: Platform -> PrimRep -> FFIType +primRepToFFIType platform r + = case r of + VoidRep -> FFIVoid + IntRep -> signed_word + WordRep -> unsigned_word + Int8Rep -> FFISInt8 + Word8Rep -> FFIUInt8 + Int16Rep -> FFISInt16 + Word16Rep -> FFIUInt16 + Int32Rep -> FFISInt32 + Word32Rep -> FFIUInt32 + Int64Rep -> FFISInt64 + Word64Rep -> FFIUInt64 + AddrRep -> FFIPointer + FloatRep -> FFIFloat + DoubleRep -> FFIDouble + _ -> panic "primRepToFFIType" + where + (signed_word, unsigned_word) = case platformWordSize platform of + PW4 -> (FFISInt32, FFIUInt32) + PW8 -> (FFISInt64, FFIUInt64) + +-- Make a dummy literal, to be used as a placeholder for FFI return +-- values on the stack. +mkDummyLiteral :: Platform -> PrimRep -> Literal +mkDummyLiteral platform pr + = case pr of + IntRep -> mkLitInt platform 0 + WordRep -> mkLitWord platform 0 + Int8Rep -> mkLitInt8 0 + Word8Rep -> mkLitWord8 0 + Int16Rep -> mkLitInt16 0 + Word16Rep -> mkLitWord16 0 + Int32Rep -> mkLitInt32 0 + Word32Rep -> mkLitWord32 0 + Int64Rep -> mkLitInt64 0 + Word64Rep -> mkLitWord64 0 + AddrRep -> LitNullAddr + DoubleRep -> LitDouble 0 + FloatRep -> LitFloat 0 + _ -> pprPanic "mkDummyLiteral" (ppr pr) + + +-- Convert (eg) +-- GHC.Prim.Char# -> GHC.Prim.State# GHC.Prim.RealWorld +-- -> (# GHC.Prim.State# GHC.Prim.RealWorld, GHC.Prim.Int# #) +-- +-- to Just IntRep +-- and check that an unboxed pair is returned wherein the first arg is V'd. +-- +-- Alternatively, for call-targets returning nothing, convert +-- +-- GHC.Prim.Char# -> GHC.Prim.State# GHC.Prim.RealWorld +-- -> (# GHC.Prim.State# GHC.Prim.RealWorld #) +-- +-- to Nothing + +maybe_getCCallReturnRep :: Type -> Maybe PrimRep +maybe_getCCallReturnRep fn_ty + = let + (_a_tys, r_ty) = splitFunTys (dropForAlls fn_ty) + r_reps = typePrimRepArgs r_ty + + blargh :: a -- Used at more than one type + blargh = pprPanic "maybe_getCCallReturn: can't handle:" + (pprType fn_ty) + in + case r_reps of + [] -> panic "empty typePrimRepArgs" + [VoidRep] -> Nothing + [rep] + | isGcPtrRep rep -> blargh + | otherwise -> Just rep + + -- if it was, it would be impossible to create a + -- valid return value placeholder on the stack + _ -> blargh + +maybe_is_tagToEnum_call :: CgStgExpr -> Maybe (Id, [Name]) +-- Detect and extract relevant info for the tagToEnum kludge. +maybe_is_tagToEnum_call (StgOpApp (StgPrimOp TagToEnumOp) [StgVarArg v] t) + = Just (v, extract_constr_Names t) + where + extract_constr_Names ty + | rep_ty <- unwrapType ty + , Just tyc <- tyConAppTyCon_maybe rep_ty + , isDataTyCon tyc + = map (getName . dataConWorkId) (tyConDataCons tyc) + -- NOTE: use the worker name, not the source name of + -- the DataCon. See "GHC.Core.DataCon" for details. + | otherwise + = pprPanic "maybe_is_tagToEnum_call.extract_constr_Ids" (ppr ty) +maybe_is_tagToEnum_call _ = Nothing + +{- ----------------------------------------------------------------------------- +Note [Implementing tagToEnum#] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +(implement_tagToId arg names) compiles code which takes an argument +'arg', (call it i), and enters the i'th closure in the supplied list +as a consequence. The [Name] is a list of the constructors of this +(enumeration) type. + +The code we generate is this: + push arg + push bogus-word + + TESTEQ_I 0 L1 + PUSH_G <lbl for first data con> + JMP L_Exit + + L1: TESTEQ_I 1 L2 + PUSH_G <lbl for second data con> + JMP L_Exit + ...etc... + Ln: TESTEQ_I n L_fail + PUSH_G <lbl for last data con> + JMP L_Exit + + L_fail: CASEFAIL + + L_exit: SLIDE 1 n + ENTER + +The 'bogus-word' push is because TESTEQ_I expects the top of the stack +to have an info-table, and the next word to have the value to be +tested. This is very weird, but it's the way it is right now. See +Interpreter.c. We don't actually need an info-table here; we just +need to have the argument to be one-from-top on the stack, hence pushing +a 1-word null. See #8383. +-} + + +implement_tagToId + :: StackDepth + -> Sequel + -> BCEnv + -> Id + -> [Name] + -> BcM BCInstrList +-- See Note [Implementing tagToEnum#] +implement_tagToId d s p arg names + = ASSERT( notNull names ) + do (push_arg, arg_bytes) <- pushAtom d p (StgVarArg arg) + labels <- getLabelsBc (genericLength names) + label_fail <- getLabelBc + label_exit <- getLabelBc + dflags <- getDynFlags + let infos = zip4 labels (tail labels ++ [label_fail]) + [0 ..] names + platform = targetPlatform dflags + steps = map (mkStep label_exit) infos + slide_ws = bytesToWords platform (d - s + arg_bytes) + + return (push_arg + `appOL` unitOL (PUSH_UBX LitNullAddr 1) + -- Push bogus word (see Note [Implementing tagToEnum#]) + `appOL` concatOL steps + `appOL` toOL [ LABEL label_fail, CASEFAIL, + LABEL label_exit ] + `appOL` mkSlideW 1 (slide_ws + 1) + -- "+1" to account for bogus word + -- (see Note [Implementing tagToEnum#]) + `appOL` unitOL ENTER) + where + mkStep l_exit (my_label, next_label, n, name_for_n) + = toOL [LABEL my_label, + TESTEQ_I n next_label, + PUSH_G name_for_n, + JMP l_exit] + + +-- ----------------------------------------------------------------------------- +-- pushAtom + +-- Push an atom onto the stack, returning suitable code & number of +-- stack words used. +-- +-- The env p must map each variable to the highest- numbered stack +-- slot for it. For example, if the stack has depth 4 and we +-- tagged-ly push (v :: Int#) on it, the value will be in stack[4], +-- the tag in stack[5], the stack will have depth 6, and p must map v +-- to 5 and not to 4. Stack locations are numbered from zero, so a +-- depth 6 stack has valid words 0 .. 5. + +pushAtom + :: StackDepth -> BCEnv -> StgArg -> BcM (BCInstrList, ByteOff) + +-- See Note [Empty case alternatives] in GHC.Core +-- and Note [Bottoming expressions] in GHC.Core.Utils: +-- The scrutinee of an empty case evaluates to bottom +pushAtom d p (StgVarArg var) + | [] <- typePrimRep (idType var) + = return (nilOL, 0) + + | isFCallId var + = pprPanic "pushAtom: shouldn't get an FCallId here" (ppr var) + + | Just primop <- isPrimOpId_maybe var + = do + platform <- targetPlatform <$> getDynFlags + return (unitOL (PUSH_PRIMOP primop), wordSize platform) + + | Just d_v <- lookupBCEnv_maybe var p -- var is a local variable + = do platform <- targetPlatform <$> getDynFlags + + let !szb = idSizeCon platform var + with_instr instr = do + let !off_b = trunc16B $ d - d_v + return (unitOL (instr off_b), wordSize platform) + + case szb of + 1 -> with_instr PUSH8_W + 2 -> with_instr PUSH16_W + 4 -> with_instr PUSH32_W + _ -> do + let !szw = bytesToWords platform szb + !off_w = trunc16W $ bytesToWords platform (d - d_v) + szw - 1 + return (toOL (genericReplicate szw (PUSH_L off_w)), szb) + -- d - d_v offset from TOS to the first slot of the object + -- + -- d - d_v + sz - 1 offset from the TOS of the last slot of the object + -- + -- Having found the last slot, we proceed to copy the right number of + -- slots on to the top of the stack. + + | otherwise -- var must be a global variable + = do topStrings <- getTopStrings + platform <- targetPlatform <$> getDynFlags + case lookupVarEnv topStrings var of + Just ptr -> pushAtom d p $ StgLitArg $ mkLitWord platform $ + fromIntegral $ ptrToWordPtr $ fromRemotePtr ptr + Nothing -> do + let sz = idSizeCon platform var + MASSERT( sz == wordSize platform ) + return (unitOL (PUSH_G (getName var)), sz) + +pushAtom _ _ (StgLitArg lit) = do + platform <- targetPlatform <$> getDynFlags + let code :: PrimRep -> BcM (BCInstrList, ByteOff) + code rep = + return (unitOL instr, size_bytes) + where + size_bytes = ByteOff $ primRepSizeB platform rep + -- Here we handle the non-word-width cases specifically since we + -- must emit different bytecode for them. + instr = + case size_bytes of + 1 -> PUSH_UBX8 lit + 2 -> PUSH_UBX16 lit + 4 -> PUSH_UBX32 lit + _ -> PUSH_UBX lit (trunc16W $ bytesToWords platform size_bytes) + + case lit of + LitLabel {} -> code AddrRep + LitFloat {} -> code FloatRep + LitDouble {} -> code DoubleRep + LitChar {} -> code WordRep + LitNullAddr -> code AddrRep + LitString {} -> code AddrRep + LitRubbish {} -> code WordRep + LitNumber nt _ -> case nt of + LitNumInt -> code IntRep + LitNumWord -> code WordRep + LitNumInt8 -> code Int8Rep + LitNumWord8 -> code Word8Rep + LitNumInt16 -> code Int16Rep + LitNumWord16 -> code Word16Rep + LitNumInt32 -> code Int32Rep + LitNumWord32 -> code Word32Rep + LitNumInt64 -> code Int64Rep + LitNumWord64 -> code Word64Rep + -- No LitInteger's or LitNatural's should be left by the time this is + -- called. CorePrep should have converted them all to a real core + -- representation. + LitNumInteger -> panic "pushAtom: LitInteger" + LitNumNatural -> panic "pushAtom: LitNatural" + +-- | Push an atom for constructor (i.e., PACK instruction) onto the stack. +-- This is slightly different to @pushAtom@ due to the fact that we allow +-- packing constructor fields. See also @mkConAppCode@ and @pushPadding@. +pushConstrAtom + :: StackDepth -> BCEnv -> StgArg -> BcM (BCInstrList, ByteOff) +pushConstrAtom _ _ (StgLitArg lit@(LitFloat _)) = + return (unitOL (PUSH_UBX32 lit), 4) + +pushConstrAtom d p va@(StgVarArg v) + | Just d_v <- lookupBCEnv_maybe v p = do -- v is a local variable + platform <- targetPlatform <$> getDynFlags + let !szb = idSizeCon platform v + done instr = do + let !off = trunc16B $ d - d_v + return (unitOL (instr off), szb) + case szb of + 1 -> done PUSH8 + 2 -> done PUSH16 + 4 -> done PUSH32 + _ -> pushAtom d p va + +pushConstrAtom d p expr = pushAtom d p expr + +pushPadding :: Int -> (BCInstrList, ByteOff) +pushPadding !n = go n (nilOL, 0) + where + go n acc@(!instrs, !off) = case n of + 0 -> acc + 1 -> (instrs `mappend` unitOL PUSH_PAD8, off + 1) + 2 -> (instrs `mappend` unitOL PUSH_PAD16, off + 2) + 3 -> go 1 (go 2 acc) + 4 -> (instrs `mappend` unitOL PUSH_PAD32, off + 4) + _ -> go (n - 4) (go 4 acc) + +-- ----------------------------------------------------------------------------- +-- Given a bunch of alts code and their discrs, do the donkey work +-- of making a multiway branch using a switch tree. +-- What a load of hassle! + +mkMultiBranch :: Maybe Int -- # datacons in tycon, if alg alt + -- a hint; generates better code + -- Nothing is always safe + -> [(Discr, BCInstrList)] + -> BcM BCInstrList +mkMultiBranch maybe_ncons raw_ways = do + lbl_default <- getLabelBc + + let + mkTree :: [(Discr, BCInstrList)] -> Discr -> Discr -> BcM BCInstrList + mkTree [] _range_lo _range_hi = return (unitOL (JMP lbl_default)) + -- shouldn't happen? + + mkTree [val] range_lo range_hi + | range_lo == range_hi + = return (snd val) + | null defaults -- Note [CASEFAIL] + = do lbl <- getLabelBc + return (testEQ (fst val) lbl + `consOL` (snd val + `appOL` (LABEL lbl `consOL` unitOL CASEFAIL))) + | otherwise + = return (testEQ (fst val) lbl_default `consOL` snd val) + + -- Note [CASEFAIL] It may be that this case has no default + -- branch, but the alternatives are not exhaustive - this + -- happens for GADT cases for example, where the types + -- prove that certain branches are impossible. We could + -- just assume that the other cases won't occur, but if + -- this assumption was wrong (because of a bug in GHC) + -- then the result would be a segfault. So instead we + -- emit an explicit test and a CASEFAIL instruction that + -- causes the interpreter to barf() if it is ever + -- executed. + + mkTree vals range_lo range_hi + = let n = length vals `div` 2 + vals_lo = take n vals + vals_hi = drop n vals + v_mid = fst (head vals_hi) + in do + label_geq <- getLabelBc + code_lo <- mkTree vals_lo range_lo (dec v_mid) + code_hi <- mkTree vals_hi v_mid range_hi + return (testLT v_mid label_geq + `consOL` (code_lo + `appOL` unitOL (LABEL label_geq) + `appOL` code_hi)) + + the_default + = case defaults of + [] -> nilOL + [(_, def)] -> LABEL lbl_default `consOL` def + _ -> panic "mkMultiBranch/the_default" + instrs <- mkTree notd_ways init_lo init_hi + return (instrs `appOL` the_default) + where + (defaults, not_defaults) = partition (isNoDiscr.fst) raw_ways + notd_ways = sortBy (comparing fst) not_defaults + + testLT (DiscrI i) fail_label = TESTLT_I i fail_label + testLT (DiscrW i) fail_label = TESTLT_W i fail_label + testLT (DiscrF i) fail_label = TESTLT_F i fail_label + testLT (DiscrD i) fail_label = TESTLT_D i fail_label + testLT (DiscrP i) fail_label = TESTLT_P i fail_label + testLT NoDiscr _ = panic "mkMultiBranch NoDiscr" + + testEQ (DiscrI i) fail_label = TESTEQ_I i fail_label + testEQ (DiscrW i) fail_label = TESTEQ_W i fail_label + testEQ (DiscrF i) fail_label = TESTEQ_F i fail_label + testEQ (DiscrD i) fail_label = TESTEQ_D i fail_label + testEQ (DiscrP i) fail_label = TESTEQ_P i fail_label + testEQ NoDiscr _ = panic "mkMultiBranch NoDiscr" + + -- None of these will be needed if there are no non-default alts + (init_lo, init_hi) + | null notd_ways + = panic "mkMultiBranch: awesome foursome" + | otherwise + = case fst (head notd_ways) of + DiscrI _ -> ( DiscrI minBound, DiscrI maxBound ) + DiscrW _ -> ( DiscrW minBound, DiscrW maxBound ) + DiscrF _ -> ( DiscrF minF, DiscrF maxF ) + DiscrD _ -> ( DiscrD minD, DiscrD maxD ) + DiscrP _ -> ( DiscrP algMinBound, DiscrP algMaxBound ) + NoDiscr -> panic "mkMultiBranch NoDiscr" + + (algMinBound, algMaxBound) + = case maybe_ncons of + -- XXX What happens when n == 0? + Just n -> (0, fromIntegral n - 1) + Nothing -> (minBound, maxBound) + + isNoDiscr NoDiscr = True + isNoDiscr _ = False + + dec (DiscrI i) = DiscrI (i-1) + dec (DiscrW w) = DiscrW (w-1) + dec (DiscrP i) = DiscrP (i-1) + dec other = other -- not really right, but if you + -- do cases on floating values, you'll get what you deserve + + -- same snotty comment applies to the following + minF, maxF :: Float + minD, maxD :: Double + minF = -1.0e37 + maxF = 1.0e37 + minD = -1.0e308 + maxD = 1.0e308 + + +-- ----------------------------------------------------------------------------- +-- Supporting junk for the compilation schemes + +-- Describes case alts +data Discr + = DiscrI Int + | DiscrW Word + | DiscrF Float + | DiscrD Double + | DiscrP Word16 + | NoDiscr + deriving (Eq, Ord) + +instance Outputable Discr where + ppr (DiscrI i) = int i + ppr (DiscrW w) = text (show w) + ppr (DiscrF f) = text (show f) + ppr (DiscrD d) = text (show d) + ppr (DiscrP i) = ppr i + ppr NoDiscr = text "DEF" + + +lookupBCEnv_maybe :: Id -> BCEnv -> Maybe ByteOff +lookupBCEnv_maybe = Map.lookup + +idSizeW :: Platform -> Id -> WordOff +idSizeW platform = WordOff . argRepSizeW platform . bcIdArgRep platform + +idSizeCon :: Platform -> Id -> ByteOff +idSizeCon platform var + -- unboxed tuple components are padded to word size + | isUnboxedTupleType (idType var) || + isUnboxedSumType (idType var) = + wordsToBytes platform . + WordOff . sum . map (argRepSizeW platform . toArgRep platform) . + bcIdPrimReps $ var + | otherwise = ByteOff (primRepSizeB platform (bcIdPrimRep var)) + +bcIdArgRep :: Platform -> Id -> ArgRep +bcIdArgRep platform = toArgRep platform . bcIdPrimRep + +bcIdPrimRep :: Id -> PrimRep +bcIdPrimRep id + | [rep] <- typePrimRepArgs (idType id) + = rep + | otherwise + = pprPanic "bcIdPrimRep" (ppr id <+> dcolon <+> ppr (idType id)) + + +bcIdPrimReps :: Id -> [PrimRep] +bcIdPrimReps id = typePrimRepArgs (idType id) + +repSizeWords :: Platform -> PrimRep -> WordOff +repSizeWords platform rep = WordOff $ argRepSizeW platform (toArgRep platform rep) + +isFollowableArg :: ArgRep -> Bool +isFollowableArg P = True +isFollowableArg _ = False + +-- | Indicate if the calling convention is supported +isSupportedCConv :: CCallSpec -> Bool +isSupportedCConv (CCallSpec _ cconv _) = case cconv of + CCallConv -> True -- we explicitly pattern match on every + StdCallConv -> True -- convention to ensure that a warning + PrimCallConv -> False -- is triggered when a new one is added + JavaScriptCallConv -> False + CApiConv -> False + +-- See bug #10462 +unsupportedCConvException :: a +unsupportedCConvException = throwGhcException (ProgramError + ("Error: bytecode compiler can't handle some foreign calling conventions\n"++ + " Workaround: use -fobject-code, or compile this module to .o separately.")) + +mkSlideB :: Platform -> ByteOff -> ByteOff -> OrdList BCInstr +mkSlideB platform !nb !db = mkSlideW n d + where + !n = trunc16W $ bytesToWords platform nb + !d = bytesToWords platform db + +mkSlideW :: Word16 -> WordOff -> OrdList BCInstr +mkSlideW !n !ws + | ws > fromIntegral limit + -- If the amount to slide doesn't fit in a Word16, generate multiple slide + -- instructions + = SLIDE n limit `consOL` mkSlideW n (ws - fromIntegral limit) + | ws == 0 + = nilOL + | otherwise + = unitOL (SLIDE n $ fromIntegral ws) + where + limit :: Word16 + limit = maxBound + +atomPrimRep :: StgArg -> PrimRep +atomPrimRep (StgVarArg v) = bcIdPrimRep v +atomPrimRep (StgLitArg l) = typePrimRep1 (literalType l) + +atomRep :: Platform -> StgArg -> ArgRep +atomRep platform e = toArgRep platform (atomPrimRep e) + +-- | Let szsw be the sizes in bytes of some items pushed onto the stack, which +-- has initial depth @original_depth@. Return the values which the stack +-- environment should map these items to. +mkStackOffsets :: ByteOff -> [ByteOff] -> [ByteOff] +mkStackOffsets original_depth szsb = tail (scanl' (+) original_depth szsb) + +typeArgReps :: Platform -> Type -> [ArgRep] +typeArgReps platform = map (toArgRep platform) . typePrimRepArgs + +-- ----------------------------------------------------------------------------- +-- The bytecode generator's monad + +data BcM_State + = BcM_State + { bcm_hsc_env :: HscEnv + , uniqSupply :: UniqSupply -- for generating fresh variable names + , thisModule :: Module -- current module (for breakpoints) + , nextlabel :: Word32 -- for generating local labels + , ffis :: [FFIInfo] -- ffi info blocks, to free later + -- Should be free()d when it is GCd + , modBreaks :: Maybe ModBreaks -- info about breakpoints + , breakInfo :: IntMap CgBreakInfo + , topStrings :: IdEnv (RemotePtr ()) -- top-level string literals + -- See Note [generating code for top-level string literal bindings]. + } + +newtype BcM r = BcM (BcM_State -> IO (BcM_State, r)) deriving (Functor) + +ioToBc :: IO a -> BcM a +ioToBc io = BcM $ \st -> do + x <- io + return (st, x) + +runBc :: HscEnv -> UniqSupply -> Module -> Maybe ModBreaks + -> IdEnv (RemotePtr ()) + -> BcM r + -> IO (BcM_State, r) +runBc hsc_env us this_mod modBreaks topStrings (BcM m) + = m (BcM_State hsc_env us this_mod 0 [] modBreaks IntMap.empty topStrings) + +thenBc :: BcM a -> (a -> BcM b) -> BcM b +thenBc (BcM expr) cont = BcM $ \st0 -> do + (st1, q) <- expr st0 + let BcM k = cont q + (st2, r) <- k st1 + return (st2, r) + +thenBc_ :: BcM a -> BcM b -> BcM b +thenBc_ (BcM expr) (BcM cont) = BcM $ \st0 -> do + (st1, _) <- expr st0 + (st2, r) <- cont st1 + return (st2, r) + +returnBc :: a -> BcM a +returnBc result = BcM $ \st -> (return (st, result)) + +instance Applicative BcM where + pure = returnBc + (<*>) = ap + (*>) = thenBc_ + +instance Monad BcM where + (>>=) = thenBc + (>>) = (*>) + +instance HasDynFlags BcM where + getDynFlags = BcM $ \st -> return (st, hsc_dflags (bcm_hsc_env st)) + +getHscEnv :: BcM HscEnv +getHscEnv = BcM $ \st -> return (st, bcm_hsc_env st) + +getProfile :: BcM Profile +getProfile = targetProfile <$> getDynFlags + +emitBc :: ([FFIInfo] -> ProtoBCO Name) -> BcM (ProtoBCO Name) +emitBc bco + = BcM $ \st -> return (st{ffis=[]}, bco (ffis st)) + +recordFFIBc :: RemotePtr C_ffi_cif -> BcM () +recordFFIBc a + = BcM $ \st -> return (st{ffis = FFIInfo a : ffis st}, ()) + +getLabelBc :: BcM LocalLabel +getLabelBc + = BcM $ \st -> do let nl = nextlabel st + when (nl == maxBound) $ + panic "getLabelBc: Ran out of labels" + return (st{nextlabel = nl + 1}, LocalLabel nl) + +getLabelsBc :: Word32 -> BcM [LocalLabel] +getLabelsBc n + = BcM $ \st -> let ctr = nextlabel st + in return (st{nextlabel = ctr+n}, coerce [ctr .. ctr+n-1]) + +getCCArray :: BcM (Array BreakIndex (RemotePtr CostCentre)) +getCCArray = BcM $ \st -> + let breaks = expectJust "GHC.StgToByteCode.getCCArray" $ modBreaks st in + return (st, modBreaks_ccs breaks) + + +newBreakInfo :: BreakIndex -> CgBreakInfo -> BcM () +newBreakInfo ix info = BcM $ \st -> + return (st{breakInfo = IntMap.insert ix info (breakInfo st)}, ()) + +newUnique :: BcM Unique +newUnique = BcM $ + \st -> case takeUniqFromSupply (uniqSupply st) of + (uniq, us) -> let newState = st { uniqSupply = us } + in return (newState, uniq) + +getCurrentModule :: BcM Module +getCurrentModule = BcM $ \st -> return (st, thisModule st) + +getTopStrings :: BcM (IdEnv (RemotePtr ())) +getTopStrings = BcM $ \st -> return (st, topStrings st) + +newId :: Type -> BcM Id +newId ty = do + uniq <- newUnique + return $ mkSysLocal tickFS uniq Many ty + +tickFS :: FastString +tickFS = fsLit "ticked" |