diff options
author | Takano Akio <tak@anoak.io> | 2017-01-18 18:26:47 -0500 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2017-01-20 14:36:29 -0500 |
commit | d49b2bb21691892ca6ac8f2403e31f2a5e53feb3 (patch) | |
tree | cc8488acf59467899e4d3279a340577eec95310f /compiler | |
parent | a2a67b77c3048713541d1ed96ec0b95fb2542f9a (diff) | |
download | haskell-d49b2bb21691892ca6ac8f2403e31f2a5e53feb3.tar.gz |
Allow top-level string literals in Core (#8472)
This commits relaxes the invariants of the Core syntax so that a
top-level variable can be bound to a primitive string literal of type
Addr#.
This commit:
* Relaxes the invatiants of the Core, and allows top-level bindings whose
type is Addr# as long as their RHS is either a primitive string literal or
another variable.
* Allows the simplifier and the full-laziness transformer to float out
primitive string literals to the top leve.
* Introduces the new StgGenTopBinding type to accomodate top-level Addr#
bindings.
* Introduces a new type of labels in the object code, with the suffix "_bytes",
for exported top-level Addr# bindings.
* Makes some built-in rules more robust. This was necessary to keep them
functional after the above changes.
This is a continuation of D2554.
Rebasing notes:
This had two slightly suspicious performance regressions:
* T12425: bytes allocated regressed by roughly 5%
* T4029: bytes allocated regressed by a bit over 1%
* T13035: bytes allocated regressed by a bit over 5%
These deserve additional investigation.
Rebased by: bgamari.
Test Plan: ./validate --slow
Reviewers: goldfire, trofi, simonmar, simonpj, austin, hvr, bgamari
Reviewed By: trofi, simonpj, bgamari
Subscribers: trofi, simonpj, gridaphobe, thomie
Differential Revision: https://phabricator.haskell.org/D2605
GHC Trac Issues: #8472
Diffstat (limited to 'compiler')
28 files changed, 388 insertions, 142 deletions
diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs index 0f3410a66e..ee87ef1b37 100644 --- a/compiler/cmm/CLabel.hs +++ b/compiler/cmm/CLabel.hs @@ -26,6 +26,7 @@ module CLabel ( mkApEntryLabel, mkApInfoTableLabel, mkClosureTableLabel, + mkBytesLabel, mkLocalClosureLabel, mkLocalInfoTableLabel, @@ -389,6 +390,9 @@ data IdLabelInfo | ClosureTable -- ^ Table of closures for Enum tycons + | Bytes -- ^ Content of a string literal. See + -- Note [Bytes label]. + deriving (Eq, Ord) @@ -474,6 +478,7 @@ mkClosureTableLabel :: Name -> CafInfo -> CLabel mkLocalConInfoTableLabel :: CafInfo -> Name -> CLabel mkLocalConEntryLabel :: CafInfo -> Name -> CLabel mkConInfoTableLabel :: Name -> CafInfo -> CLabel +mkBytesLabel :: Name -> CLabel mkClosureLabel name c = IdLabel name c Closure mkInfoTableLabel name c = IdLabel name c InfoTable mkEntryLabel name c = IdLabel name c Entry @@ -481,6 +486,7 @@ mkClosureTableLabel name c = IdLabel name c ClosureTable mkLocalConInfoTableLabel c con = IdLabel con c ConInfoTable mkLocalConEntryLabel c con = IdLabel con c ConEntry mkConInfoTableLabel name c = IdLabel name c ConInfoTable +mkBytesLabel name = IdLabel name NoCafRefs Bytes mkConEntryLabel :: Name -> CafInfo -> CLabel mkConEntryLabel name c = IdLabel name c ConEntry @@ -935,6 +941,7 @@ idInfoLabelType info = ConInfoTable -> DataLabel ClosureTable -> DataLabel RednCounts -> DataLabel + Bytes -> DataLabel _ -> CodeLabel @@ -1056,6 +1063,11 @@ export this because in other modules we either have * A saturated call 'Just x'; allocate using Just_con_info Not exporting these Just_info labels reduces the number of symbols somewhat. + +Note [Bytes label] +~~~~~~~~~~~~~~~~~~ +For a top-level string literal 'foo', we have just one symbol 'foo_bytes', which +points to a static data block containing the content of the literal. -} instance Outputable CLabel where @@ -1234,6 +1246,7 @@ ppIdFlavor x = pp_cSEP <> ConEntry -> text "con_entry" ConInfoTable -> text "con_info" ClosureTable -> text "closure_tbl" + Bytes -> text "bytes" ) diff --git a/compiler/cmm/CmmInfo.hs b/compiler/cmm/CmmInfo.hs index b9981f247b..b5e800a977 100644 --- a/compiler/cmm/CmmInfo.hs +++ b/compiler/cmm/CmmInfo.hs @@ -400,7 +400,7 @@ mkProfLits _ (ProfilingInfo td cd) newStringLit :: [Word8] -> UniqSM (CmmLit, GenCmmDecl CmmStatics info stmt) newStringLit bytes = do { uniq <- getUniqueM - ; return (mkByteStringCLit uniq bytes) } + ; return (mkByteStringCLit (mkStringLitLabel uniq) bytes) } -- Misc utils diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs index 3260cbab2f..1dab6eeff5 100644 --- a/compiler/cmm/CmmUtils.hs +++ b/compiler/cmm/CmmUtils.hs @@ -72,7 +72,6 @@ import Cmm import BlockId import CLabel import Outputable -import Unique import DynFlags import Util import CodeGen.Platform @@ -169,13 +168,13 @@ zeroExpr dflags = CmmLit (zeroCLit dflags) mkWordCLit :: DynFlags -> Integer -> CmmLit mkWordCLit dflags wd = CmmInt wd (wordWidth dflags) -mkByteStringCLit :: Unique -> [Word8] -> (CmmLit, GenCmmDecl CmmStatics info stmt) +mkByteStringCLit + :: CLabel -> [Word8] -> (CmmLit, GenCmmDecl CmmStatics info stmt) -- We have to make a top-level decl for the string, -- and return a literal pointing to it -mkByteStringCLit uniq bytes - = (CmmLabel lbl, CmmData (Section sec lbl) $ Statics lbl [CmmString bytes]) +mkByteStringCLit lbl bytes + = (CmmLabel lbl, CmmData (Section sec lbl) $ Statics lbl [CmmString bytes]) where - lbl = mkStringLitLabel uniq -- This can not happen for String literals (as there \NUL is replaced by -- C0 80). However, it can happen with Addr# literals. sec = if 0 `elem` bytes then ReadOnlyData else CString diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs index bb82da265e..a420677625 100644 --- a/compiler/codeGen/StgCmm.hs +++ b/compiler/codeGen/StgCmm.hs @@ -24,6 +24,7 @@ import StgCmmHpc import StgCmmTicky import Cmm +import CmmUtils import CLabel import StgSyn @@ -45,6 +46,7 @@ import BasicTypes import OrdList import MkGraph +import qualified Data.ByteString as BS import Data.IORef import Control.Monad (when,void) import Util @@ -53,7 +55,7 @@ codeGen :: DynFlags -> Module -> [TyCon] -> CollectedCCs -- (Local/global) cost-centres needing declaring/registering. - -> [StgBinding] -- Bindings to convert + -> [StgTopBinding] -- Bindings to convert -> HpcInfo -> Stream IO CmmGroup () -- Output as a stream, so codegen can -- be interleaved with output @@ -113,8 +115,8 @@ This is so that we can write the top level processing in a compositional style, with the increasing static environment being plumbed as a state variable. -} -cgTopBinding :: DynFlags -> StgBinding -> FCode () -cgTopBinding dflags (StgNonRec id rhs) +cgTopBinding :: DynFlags -> StgTopBinding -> FCode () +cgTopBinding dflags (StgTopLifted (StgNonRec id rhs)) = do { id' <- maybeExternaliseId dflags id ; let (info, fcode) = cgTopRhs dflags NonRecursive id' rhs ; fcode @@ -122,7 +124,7 @@ cgTopBinding dflags (StgNonRec id rhs) -- so we find it when we look up occurrences } -cgTopBinding dflags (StgRec pairs) +cgTopBinding dflags (StgTopLifted (StgRec pairs)) = do { let (bndrs, rhss) = unzip pairs ; bndrs' <- Prelude.mapM (maybeExternaliseId dflags) bndrs ; let pairs' = zip bndrs' rhss @@ -132,6 +134,13 @@ cgTopBinding dflags (StgRec pairs) ; sequence_ fcodes } +cgTopBinding dflags (StgTopStringLit id str) + = do { id' <- maybeExternaliseId dflags id + ; let label = mkBytesLabel (idName id') + ; let (lit, decl) = mkByteStringCLit label (BS.unpack str) + ; emitDecl decl + ; addBindC (litIdInfo dflags id' mkLFStringLit lit) + } cgTopRhs :: DynFlags -> RecFlag -> Id -> StgRhs -> (CgIdInfo, FCode ()) -- The Id is passed along for setting up a binding... diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs index 3cc0af0669..e799ea6639 100644 --- a/compiler/codeGen/StgCmmClosure.hs +++ b/compiler/codeGen/StgCmmClosure.hs @@ -26,6 +26,7 @@ module StgCmmClosure ( StandardFormInfo, -- ...ditto... mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo, mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape, + mkLFStringLit, lfDynTag, maybeIsLFCon, isLFThunk, isLFReEntrant, lfUpdatable, @@ -332,6 +333,10 @@ mkLFImported id where arity = idFunRepArity id +------------- +mkLFStringLit :: LambdaFormInfo +mkLFStringLit = LFUnlifted + ----------------------------------------------------- -- Dynamic pointer tagging ----------------------------------------------------- diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs index ba093fee88..3061fb351b 100644 --- a/compiler/codeGen/StgCmmEnv.hs +++ b/compiler/codeGen/StgCmmEnv.hs @@ -40,7 +40,10 @@ import MkGraph import Name import Outputable import StgSyn +import Type +import TysPrim import UniqFM +import Util import VarEnv ------------------------------------- @@ -125,8 +128,15 @@ getCgIdInfo id -- Should be imported; make up a CgIdInfo for it let name = idName id ; if isExternalName name then - let ext_lbl = CmmLabel (mkClosureLabel name $ idCafInfo id) - in return (litIdInfo dflags id (mkLFImported id) ext_lbl) + let ext_lbl + | isUnliftedType (idType id) = + -- An unlifted external Id must refer to a top-level + -- string literal. See Note [Bytes label] in CLabel. + ASSERT( idType id `eqType` addrPrimTy ) + mkBytesLabel name + | otherwise = mkClosureLabel name $ idCafInfo id + in return $ + litIdInfo dflags id (mkLFImported id) (CmmLabel ext_lbl) else cgLookupPanic id -- Bug }}} diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs index 4a976e68af..295ac15a85 100644 --- a/compiler/codeGen/StgCmmUtils.hs +++ b/compiler/codeGen/StgCmmUtils.hs @@ -322,7 +322,7 @@ newStringCLit str = newByteStringCLit (map (fromIntegral . ord) str) newByteStringCLit :: [Word8] -> FCode CmmLit newByteStringCLit bytes = do { uniq <- newUnique - ; let (lit, decl) = mkByteStringCLit uniq bytes + ; let (lit, decl) = mkByteStringCLit (mkStringLitLabel uniq) bytes ; emitDecl decl ; return lit } diff --git a/compiler/coreSyn/CoreLint.hs b/compiler/coreSyn/CoreLint.hs index f9e7f863c4..c09b4a0288 100644 --- a/compiler/coreSyn/CoreLint.hs +++ b/compiler/coreSyn/CoreLint.hs @@ -30,6 +30,7 @@ import Bag import Literal import DataCon import TysWiredIn +import TysPrim import TcType ( isFloatingTy ) import Var import VarEnv @@ -480,14 +481,25 @@ lintSingleBinding top_lvl_flag rec_flag (binder,rhs) -- Check the let/app invariant -- See Note [CoreSyn let/app invariant] in CoreSyn ; checkL (not (isUnliftedType binder_ty) - || (isNonRec rec_flag && exprOkForSpeculation rhs)) + || (isNonRec rec_flag && exprOkForSpeculation rhs) + || exprIsLiteralString rhs) (mkRhsPrimMsg binder rhs) - -- Check that if the binder is top-level or recursive, it's not demanded + -- Check that if the binder is top-level or recursive, it's not + -- demanded. Primitive string literals are exempt as there is no + -- computation to perform, see Note [CoreSyn top-level string literals]. ; checkL (not (isStrictId binder) - || (isNonRec rec_flag && not (isTopLevel top_lvl_flag))) + || (isNonRec rec_flag && not (isTopLevel top_lvl_flag)) + || exprIsLiteralString rhs) (mkStrictMsg binder) + -- Check that if the binder is at the top level and has type Addr#, + -- that it is a string literal, see + -- Note [CoreSyn top-level string literals]. + ; checkL (not (isTopLevel top_lvl_flag && binder_ty `eqType` addrPrimTy) + || exprIsLiteralString rhs) + (mkTopNonLitStrMsg binder) + ; flags <- getLintFlags ; when (lf_check_inline_loop_breakers flags && isStrongLoopBreaker (idOccInfo binder) @@ -2033,6 +2045,10 @@ mkNonTopExternalNameMsg :: Id -> MsgDoc mkNonTopExternalNameMsg binder = hsep [text "Non-top-level binder has an external name:", ppr binder] +mkTopNonLitStrMsg :: Id -> MsgDoc +mkTopNonLitStrMsg binder + = hsep [text "Top-level Addr# binder has a non-literal rhs:", ppr binder] + mkKindErrMsg :: TyVar -> Type -> MsgDoc mkKindErrMsg tyvar arg_ty = vcat [text "Kinds don't match in type application:", diff --git a/compiler/coreSyn/CorePrep.hs b/compiler/coreSyn/CorePrep.hs index c93a121c23..fb650f61be 100644 --- a/compiler/coreSyn/CorePrep.hs +++ b/compiler/coreSyn/CorePrep.hs @@ -1168,7 +1168,9 @@ deFloatTop (Floats _ floats) = foldrOL get [] floats where get (FloatLet b) bs = occurAnalyseRHSs b : bs - get b _ = pprPanic "corePrepPgm" (ppr b) + get (FloatCase var body _) bs = + occurAnalyseRHSs (NonRec var body) : bs + get b _ = pprPanic "corePrepPgm" (ppr b) -- See Note [Dead code in CorePrep] occurAnalyseRHSs (NonRec x e) = NonRec x (occurAnalyseExpr_NoBinderSwap e) diff --git a/compiler/coreSyn/CoreSubst.hs b/compiler/coreSyn/CoreSubst.hs index d98536caec..758a17b34d 100644 --- a/compiler/coreSyn/CoreSubst.hs +++ b/compiler/coreSyn/CoreSubst.hs @@ -1339,7 +1339,7 @@ than the ordinary arity of the dfun: see Note [DFun unfoldings] in CoreSyn exprIsLiteral_maybe :: InScopeEnv -> CoreExpr -> Maybe Literal -- Same deal as exprIsConApp_maybe, but much simpler -- Nevertheless we do need to look through unfoldings for --- Integer literals, which are vigorously hoisted to top level +-- Integer and string literals, which are vigorously hoisted to top level -- and not subsequently inlined exprIsLiteral_maybe env@(_, id_unf) e = case e of diff --git a/compiler/coreSyn/CoreSyn.hs b/compiler/coreSyn/CoreSyn.hs index fd0cf3ed26..4dfd9c3dae 100644 --- a/compiler/coreSyn/CoreSyn.hs +++ b/compiler/coreSyn/CoreSyn.hs @@ -191,7 +191,9 @@ These data types are the heart of the compiler -- -- The right hand sides of all top-level and recursive @let@s -- /must/ be of lifted type (see "Type#type_classification" for --- the meaning of /lifted/ vs. /unlifted/). +-- the meaning of /lifted/ vs. /unlifted/). There is one exception +-- to this rule, top-level @let@s are allowed to bind primitive +-- string literals, see Note [CoreSyn top-level string literals]. -- -- See Note [CoreSyn let/app invariant] -- See Note [Levity polymorphism invariants] @@ -361,6 +363,46 @@ Note [CoreSyn letrec invariant] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ See #letrec_invariant# +Note [CoreSyn top-level string literals] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +As an exception to the usual rule that top-level binders must be lifted, +we allow binding primitive string literals (of type Addr#) of type Addr# at the +top level. This allows us to share string literals earlier in the pipeline and +crucially allows other optimizations in the Core2Core pipeline to fire. +Consider, + + f n = let a::Addr# = "foo"# + in \x -> blah + +In order to be able to inline `f`, we would like to float `a` to the top. +Another option would be to inline `a`, but that would lead to duplicating string +literals, which we want to avoid. See Trac #8472. + +The solution is simply to allow top-level unlifted binders. We can't allow +arbitrary unlifted expression at the top-level though, unlifted binders cannot +be thunks, so we just allow string literals. + +Also see Note [Compilation plan for top-level string literals]. + +Note [Compilation plan for top-level string literals] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Here is a summary on how top-level string literals are handled by various +parts of the compilation pipeline. + +* In the source language, there is no way to bind a primitive string literal + at the top leve. + +* In Core, we have a special rule that permits top-level Addr# bindings. See + Note [CoreSyn top-level string literals]. Core-to-core passes may introduce + new top-level string literals. + +* In STG, top-level string literals are explicitly represented in the syntax + tree. + +* A top-level string literal may end up exported from a module. In this case, + in the object file, the content of the exported literal is given a label with + the _bytes suffix. + Note [CoreSyn let/app invariant] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The let/app invariant diff --git a/compiler/coreSyn/CoreUtils.hs b/compiler/coreSyn/CoreUtils.hs index 2505fcfff4..b5d248e579 100644 --- a/compiler/coreSyn/CoreUtils.hs +++ b/compiler/coreSyn/CoreUtils.hs @@ -29,6 +29,7 @@ module CoreUtils ( exprIsHNF, exprOkForSpeculation, exprOkForSideEffects, exprIsWorkFree, exprIsBig, exprIsConLike, rhsIsStatic, isCheapApp, isExpandableApp, + exprIsLiteralString, exprIsTopLevelBindable, -- * Equality cheapEqExpr, cheapEqExpr', eqExpr, @@ -1581,6 +1582,17 @@ tick is there to tell us that the expression was evaluated, so we don't want to discard a seq on it. -} +-- | Can we bind this 'CoreExpr' at the top level? +exprIsTopLevelBindable :: CoreExpr -> Bool +-- See Note [CoreSyn top-level string literals] +exprIsTopLevelBindable expr + = exprIsLiteralString expr + || not (isUnliftedType (exprType expr)) + +exprIsLiteralString :: CoreExpr -> Bool +exprIsLiteralString (Lit (MachStr _)) = True +exprIsLiteralString _ = False + {- ************************************************************************ * * diff --git a/compiler/ghci/ByteCodeAsm.hs b/compiler/ghci/ByteCodeAsm.hs index 817e379003..9eb730ff1a 100644 --- a/compiler/ghci/ByteCodeAsm.hs +++ b/compiler/ghci/ByteCodeAsm.hs @@ -89,9 +89,10 @@ bcoFreeNames bco -- Top level assembler fn. assembleBCOs - :: HscEnv -> [ProtoBCO Name] -> [TyCon] -> Maybe ModBreaks + :: HscEnv -> [ProtoBCO Name] -> [TyCon] -> [RemotePtr ()] + -> Maybe ModBreaks -> IO CompiledByteCode -assembleBCOs hsc_env proto_bcos tycons modbreaks = do +assembleBCOs hsc_env proto_bcos tycons top_strs modbreaks = do itblenv <- mkITbls hsc_env tycons bcos <- mapM (assembleBCO (hsc_dflags hsc_env)) proto_bcos (bcos',ptrs) <- mallocStrings hsc_env bcos @@ -99,7 +100,7 @@ assembleBCOs hsc_env proto_bcos tycons modbreaks = do { bc_bcos = bcos' , bc_itbls = itblenv , bc_ffis = concat (map protoBCOFFIs proto_bcos) - , bc_strs = ptrs + , bc_strs = top_strs ++ ptrs , bc_breaks = modbreaks } diff --git a/compiler/ghci/ByteCodeGen.hs b/compiler/ghci/ByteCodeGen.hs index a4373b459f..f4b224d2a5 100644 --- a/compiler/ghci/ByteCodeGen.hs +++ b/compiler/ghci/ByteCodeGen.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP, MagicHash, RecordWildCards #-} +{-# LANGUAGE CPP, MagicHash, RecordWildCards, BangPatterns #-} {-# OPTIONS_GHC -fprof-auto-top #-} -- -- (c) The University of Glasgow 2002-2006 @@ -48,6 +48,7 @@ import SMRep import Bitmap import OrdList import Maybes +import VarEnv import Data.List import Foreign @@ -60,6 +61,7 @@ import Control.Arrow ( second ) import Control.Exception import Data.Array +import Data.ByteString (ByteString) import Data.Map (Map) import Data.IntMap (IntMap) import qualified Data.Map as Map @@ -85,12 +87,18 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks = withTiming (pure dflags) (text "ByteCodeGen"<+>brackets (ppr this_mod)) (const ()) $ do - let flatBinds = [ (bndr, simpleFreeVars rhs) - | (bndr, rhs) <- flattenBinds binds] + -- Split top-level binds into strings and others. + -- See Note [generating code for top-level string literal bindings]. + let (strings, flatBinds) = splitEithers $ do + (bndr, rhs) <- flattenBinds binds + return $ case rhs of + Lit (MachStr str) -> Left (bndr, str) + _ -> Right (bndr, simpleFreeVars rhs) + stringPtrs <- allocateTopStrings hsc_env strings us <- mkSplitUniqSupply 'y' (BcM_State{..}, proto_bcos) <- - runBc hsc_env us this_mod mb_modBreaks $ + runBc hsc_env us this_mod mb_modBreaks (mkVarEnv stringPtrs) $ mapM schemeTopBind flatBinds when (notNull ffis) @@ -99,7 +107,7 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks dumpIfSet_dyn dflags Opt_D_dump_BCOs "Proto-BCOs" (vcat (intersperse (char ' ') (map ppr proto_bcos))) - cbc <- assembleBCOs hsc_env proto_bcos tycs + cbc <- assembleBCOs hsc_env proto_bcos tycs (map snd stringPtrs) (case modBreaks of Nothing -> Nothing Just mb -> Just mb{ modBreaks_breakInfo = breakInfo }) @@ -116,6 +124,29 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks where dflags = hsc_dflags hsc_env +allocateTopStrings + :: HscEnv + -> [(Id, ByteString)] + -> IO [(Var, RemotePtr ())] +allocateTopStrings hsc_env topStrings = do + let !(bndrs, strings) = unzip topStrings + ptrs <- iservCmd hsc_env $ MallocStrings strings + return $ zip bndrs ptrs + +{- +Note [generating code for top-level string literal bindings] + +Here is a summary on how the byte code generator deals with top-level string +literals: + +1. Top-level string literal bindings are spearted from the rest of the module. + +2. The strings are allocated via iservCmd, in allocateTopStrings + +3. The mapping from binders to allocated strings (topStrings) are maintained in + BcM and used when generating code for variable references. +-} + -- ----------------------------------------------------------------------------- -- Generating byte code for an expression @@ -136,8 +167,8 @@ coreExprToBCOs hsc_env this_mod expr -- the uniques are needed to generate fresh variables when we introduce new -- let bindings for ticked expressions us <- mkSplitUniqSupply 'y' - (BcM_State _dflags _us _this_mod _final_ctr mallocd _ _ , proto_bco) - <- runBc hsc_env us this_mod Nothing $ + (BcM_State _dflags _us _this_mod _final_ctr mallocd _ _ _, proto_bco) + <- runBc hsc_env us this_mod Nothing emptyVarEnv $ schemeTopBind (invented_id, simpleFreeVars expr) when (notNull mallocd) @@ -1356,11 +1387,16 @@ pushAtom d p (AnnVar v) -- slots on to the top of the stack. | otherwise -- v must be a global variable - = do dflags <- getDynFlags - let sz :: Word16 - sz = fromIntegral (idSizeW dflags v) - MASSERT(sz == 1) - return (unitOL (PUSH_G (getName v)), sz) + = do topStrings <- getTopStrings + case lookupVarEnv topStrings v of + Just ptr -> pushAtom d p $ AnnLit $ MachWord $ fromIntegral $ + ptrToWordPtr $ fromRemotePtr ptr + Nothing -> do + dflags <- getDynFlags + let sz :: Word16 + sz = fromIntegral (idSizeW dflags v) + MASSERT(sz == 1) + return (unitOL (PUSH_G (getName v)), sz) pushAtom _ _ (AnnLit lit) = do @@ -1659,6 +1695,8 @@ data BcM_State -- Should be free()d when it is GCd , modBreaks :: Maybe ModBreaks -- info about breakpoints , breakInfo :: IntMap CgBreakInfo + , topStrings :: IdEnv (RemotePtr ()) -- top-level string literals + -- See Note [generating code for top-level string literal bindings]. } newtype BcM r = BcM (BcM_State -> IO (BcM_State, r)) @@ -1668,10 +1706,12 @@ ioToBc io = BcM $ \st -> do x <- io return (st, x) -runBc :: HscEnv -> UniqSupply -> Module -> Maybe ModBreaks -> BcM r +runBc :: HscEnv -> UniqSupply -> Module -> Maybe ModBreaks + -> IdEnv (RemotePtr ()) + -> BcM r -> IO (BcM_State, r) -runBc hsc_env us this_mod modBreaks (BcM m) - = m (BcM_State hsc_env us this_mod 0 [] modBreaks IntMap.empty) +runBc hsc_env us this_mod modBreaks topStrings (BcM m) + = m (BcM_State hsc_env us this_mod 0 [] modBreaks IntMap.empty topStrings) thenBc :: BcM a -> (a -> BcM b) -> BcM b thenBc (BcM expr) cont = BcM $ \st0 -> do @@ -1746,6 +1786,9 @@ newUnique = BcM $ getCurrentModule :: BcM Module getCurrentModule = BcM $ \st -> return (st, thisModule st) +getTopStrings :: BcM (IdEnv (RemotePtr ())) +getTopStrings = BcM $ \st -> return (st, topStrings st) + newId :: Type -> BcM Id newId ty = do uniq <- newUnique diff --git a/compiler/main/HscMain.hs b/compiler/main/HscMain.hs index b163cbbe21..092f04c1aa 100644 --- a/compiler/main/HscMain.hs +++ b/compiler/main/HscMain.hs @@ -1363,7 +1363,7 @@ hscCompileCmmFile hsc_env filename output_filename = runHsc hsc_env $ do doCodeGen :: HscEnv -> Module -> [TyCon] -> CollectedCCs - -> [StgBinding] + -> [StgTopBinding] -> HpcInfo -> IO (Stream IO CmmGroup ()) -- Note we produce a 'Stream' of CmmGroups, so that the @@ -1429,7 +1429,7 @@ doCodeGen hsc_env this_mod data_tycons myCoreToStg :: DynFlags -> Module -> CoreProgram - -> IO ( [StgBinding] -- output program + -> IO ( [StgTopBinding] -- output program , CollectedCCs) -- cost centre info (declared and used) myCoreToStg dflags this_mod prepd_binds = do let stg_binds diff --git a/compiler/prelude/PrelRules.hs b/compiler/prelude/PrelRules.hs index e98fd9f6a3..c2938c7dfd 100644 --- a/compiler/prelude/PrelRules.hs +++ b/compiler/prelude/PrelRules.hs @@ -987,9 +987,9 @@ builtinRules :: [CoreRule] builtinRules = [BuiltinRule { ru_name = fsLit "AppendLitString", ru_fn = unpackCStringFoldrName, - ru_nargs = 4, ru_try = \_ _ _ -> match_append_lit }, + ru_nargs = 4, ru_try = match_append_lit }, BuiltinRule { ru_name = fsLit "EqString", ru_fn = eqStringName, - ru_nargs = 2, ru_try = \dflags _ _ -> match_eq_string dflags }, + ru_nargs = 2, ru_try = match_eq_string }, BuiltinRule { ru_name = fsLit "Inline", ru_fn = inlineIdName, ru_nargs = 2, ru_try = \_ _ _ -> match_inline }, BuiltinRule { ru_name = fsLit "MagicDict", ru_fn = idName magicDictId, @@ -1133,37 +1133,42 @@ builtinIntegerRules = -- unpackFoldrCString# "foo" c (unpackFoldrCString# "baz" c n) -- = unpackFoldrCString# "foobaz" c n -match_append_lit :: [Expr CoreBndr] -> Maybe (Expr CoreBndr) -match_append_lit [Type ty1, - Lit (MachStr s1), - c1, - Var unpk `App` Type ty2 - `App` Lit (MachStr s2) - `App` c2 - `App` n - ] +match_append_lit :: RuleFun +match_append_lit _ id_unf _ + [ Type ty1 + , lit1 + , c1 + , Var unpk `App` Type ty2 + `App` lit2 + `App` c2 + `App` n + ] | unpk `hasKey` unpackCStringFoldrIdKey && c1 `cheapEqExpr` c2 + , Just (MachStr s1) <- exprIsLiteral_maybe id_unf lit1 + , Just (MachStr s2) <- exprIsLiteral_maybe id_unf lit2 = ASSERT( ty1 `eqType` ty2 ) Just (Var unpk `App` Type ty1 `App` Lit (MachStr (s1 `BS.append` s2)) `App` c1 `App` n) -match_append_lit _ = Nothing +match_append_lit _ _ _ _ = Nothing --------------------------------------------------- -- The rule is this: -- eqString (unpackCString# (Lit s1)) (unpackCString# (Lit s2) = s1==s2 -match_eq_string :: DynFlags -> [Expr CoreBndr] -> Maybe (Expr CoreBndr) -match_eq_string _ [Var unpk1 `App` Lit (MachStr s1), - Var unpk2 `App` Lit (MachStr s2)] - | unpk1 `hasKey` unpackCStringIdKey, - unpk2 `hasKey` unpackCStringIdKey +match_eq_string :: RuleFun +match_eq_string _ id_unf _ + [Var unpk1 `App` lit1, Var unpk2 `App` lit2] + | unpk1 `hasKey` unpackCStringIdKey + , unpk2 `hasKey` unpackCStringIdKey + , Just (MachStr s1) <- exprIsLiteral_maybe id_unf lit1 + , Just (MachStr s2) <- exprIsLiteral_maybe id_unf lit2 = Just (if s1 == s2 then trueValBool else falseValBool) -match_eq_string _ _ = Nothing +match_eq_string _ _ _ _ = Nothing --------------------------------------------------- diff --git a/compiler/profiling/SCCfinal.hs b/compiler/profiling/SCCfinal.hs index ee37ab14b6..9704e0b132 100644 --- a/compiler/profiling/SCCfinal.hs +++ b/compiler/profiling/SCCfinal.hs @@ -42,8 +42,8 @@ stgMassageForProfiling :: DynFlags -> Module -- module name -> UniqSupply -- unique supply - -> [StgBinding] -- input - -> (CollectedCCs, [StgBinding]) + -> [StgTopBinding] -- input + -> (CollectedCCs, [StgTopBinding]) stgMassageForProfiling dflags mod_name _us stg_binds = let @@ -69,24 +69,28 @@ stgMassageForProfiling dflags mod_name _us stg_binds all_cafs_ccs = mkSingletonCCS all_cafs_cc ---------- - do_top_bindings :: [StgBinding] -> MassageM [StgBinding] + do_top_bindings :: [StgTopBinding] -> MassageM [StgTopBinding] do_top_bindings [] = return [] - do_top_bindings (StgNonRec b rhs : bs) = do + do_top_bindings (StgTopLifted (StgNonRec b rhs) : bs) = do rhs' <- do_top_rhs b rhs bs' <- do_top_bindings bs - return (StgNonRec b rhs' : bs') + return (StgTopLifted (StgNonRec b rhs') : bs') - do_top_bindings (StgRec pairs : bs) = do + do_top_bindings (StgTopLifted (StgRec pairs) : bs) = do pairs2 <- mapM do_pair pairs bs' <- do_top_bindings bs - return (StgRec pairs2 : bs') + return (StgTopLifted (StgRec pairs2) : bs') where do_pair (b, rhs) = do rhs2 <- do_top_rhs b rhs return (b, rhs2) + do_top_bindings (b@StgTopStringLit{} : bs) = do + bs' <- do_top_bindings bs + return (b : bs') + ---------- do_top_rhs :: Id -> StgRhs -> MassageM StgRhs diff --git a/compiler/simplCore/CSE.hs b/compiler/simplCore/CSE.hs index 54fbc5008c..e364c31cdc 100644 --- a/compiler/simplCore/CSE.hs +++ b/compiler/simplCore/CSE.hs @@ -15,6 +15,7 @@ import Var ( Var ) import Id ( Id, idType, idUnfolding, idInlineActivation , zapIdOccInfo, zapIdUsageInfo ) import CoreUtils ( mkAltExpr + , exprIsLiteralString , stripTicksE, stripTicksT, mkTicks ) import Literal ( litIsTrivial ) import Type ( tyConAppArgs ) @@ -253,22 +254,22 @@ had -} cseProgram :: CoreProgram -> CoreProgram -cseProgram binds = snd (mapAccumL cseBind emptyCSEnv binds) +cseProgram binds = snd (mapAccumL (cseBind True) emptyCSEnv binds) -cseBind :: CSEnv -> CoreBind -> (CSEnv, CoreBind) -cseBind env (NonRec b e) +cseBind :: Bool -> CSEnv -> CoreBind -> (CSEnv, CoreBind) +cseBind toplevel env (NonRec b e) = (env2, NonRec b2 e1) where - e1 = tryForCSE env e + e1 = tryForCSE toplevel env e (env1, b1) = addBinder env b (env2, b2) = addBinding env1 b b1 e1 -cseBind env (Rec pairs) +cseBind toplevel env (Rec pairs) = (env2, Rec pairs') where (bndrs, rhss) = unzip pairs (env1, bndrs1) = addRecBinders env bndrs - rhss1 = map (tryForCSE env1) rhss + rhss1 = map (tryForCSE toplevel env1) rhss -- Process rhss in extended env1 (env2, pairs') = foldl do_one (env1, []) (zip3 bndrs bndrs1 rhss1) do_one (env, pairs) (b, b1, e1) @@ -311,8 +312,38 @@ addBinding env in_id out_id rhs' Lit l -> litIsTrivial l _ -> False -tryForCSE :: CSEnv -> InExpr -> OutExpr -tryForCSE env expr +{- +Note [Take care with literal strings] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Consider this example: + + x = "foo"# + y = "foo"# + ...x...y...x...y.... + +We would normally turn this into: + + x = "foo"# + y = x + ...x...x...x...x.... + +But this breaks an invariant of Core, namely that the RHS of a top-level binding +of type Addr# must be a string literal, not another variable. See Note +[CoreSyn top-level string literals] in CoreSyn. + +For this reason, we special case top-level bindings to literal strings and leave +the original RHS unmodified. This produces: + + x = "foo"# + y = "foo"# + ...x...x...x...x.... +-} + +tryForCSE :: Bool -> CSEnv -> InExpr -> OutExpr +tryForCSE toplevel env expr + | toplevel && exprIsLiteralString expr = expr + -- See Note [Take care with literal strings] | Just e <- lookupCSEnv env expr'' = mkTicks ticks e | otherwise = expr' -- The varToCoreExpr is needed if we have @@ -333,12 +364,12 @@ cseExpr env (Type t) = Type (substTy (csEnvSubst env) t) cseExpr env (Coercion c) = Coercion (substCo (csEnvSubst env) c) cseExpr _ (Lit lit) = Lit lit cseExpr env (Var v) = lookupSubst env v -cseExpr env (App f a) = App (cseExpr env f) (tryForCSE env a) +cseExpr env (App f a) = App (cseExpr env f) (tryForCSE False env a) cseExpr env (Tick t e) = Tick t (cseExpr env e) cseExpr env (Cast e co) = Cast (cseExpr env e) (substCo (csEnvSubst env) co) cseExpr env (Lam b e) = let (env', b') = addBinder env b in Lam b' (cseExpr env' e) -cseExpr env (Let bind e) = let (env', bind') = cseBind env bind +cseExpr env (Let bind e) = let (env', bind') = cseBind False env bind in Let bind' (cseExpr env' e) cseExpr env (Case e bndr ty alts) = cseCase env e bndr ty alts @@ -346,7 +377,7 @@ cseCase :: CSEnv -> InExpr -> InId -> InType -> [InAlt] -> OutExpr cseCase env scrut bndr ty alts = Case scrut1 bndr3 ty (map cse_alt alts) where - scrut1 = tryForCSE env scrut + scrut1 = tryForCSE False env scrut bndr1 = zapIdOccInfo bndr -- Zapping the OccInfo is needed because the extendCSEnv @@ -369,14 +400,14 @@ cseCase env scrut bndr ty alts -- case x of { True -> ....True.... } -- Don't replace True by x! -- Hence the 'null args', which also deal with literals and DEFAULT - = (DataAlt con, args', tryForCSE new_env rhs) + = (DataAlt con, args', tryForCSE False new_env rhs) where (env', args') = addBinders alt_env args new_env = extendCSEnv env' con_expr con_target con_expr = mkAltExpr (DataAlt con) args' arg_tys cse_alt (con, args, rhs) - = (con, args', tryForCSE env' rhs) + = (con, args', tryForCSE False env' rhs) where (env', args') = addBinders alt_env args diff --git a/compiler/simplCore/SetLevels.hs b/compiler/simplCore/SetLevels.hs index 0b81f29a7d..955d3ba89d 100644 --- a/compiler/simplCore/SetLevels.hs +++ b/compiler/simplCore/SetLevels.hs @@ -67,6 +67,7 @@ import CoreMonad ( FloatOutSwitches(..) ) import CoreUtils ( exprType , isExprLevPoly , exprOkForSpeculation + , exprIsTopLevelBindable , collectMakeStaticArgs ) import CoreArity ( exprBotStrictness_maybe ) @@ -494,7 +495,7 @@ lvlMFE strict_ctxt env ann_expr lvlExpr env ann_expr | Just (wrap_float, wrap_use) - <- canFloat_maybe rhs_env strict_ctxt float_is_lam expr_ty + <- canFloat_maybe rhs_env strict_ctxt float_is_lam expr = do { expr1 <- lvlExpr rhs_env ann_expr ; let abs_expr = mkLams abs_vars_w_lvls (wrap_float expr1) ; var <- newLvlVar abs_expr @@ -507,7 +508,6 @@ lvlMFE strict_ctxt env ann_expr where expr = deAnnotate ann_expr - expr_ty = exprType expr fvs = freeVarsOf ann_expr is_bot = isJust mb_bot_str mb_bot_str = exprBotStrictness_maybe expr @@ -544,12 +544,12 @@ lvlMFE strict_ctxt env ann_expr canFloat_maybe :: LevelEnv -> Bool -- Strict context -> Bool -- The float has a value lambda - -> Type + -> CoreExpr -> Maybe ( LevelledExpr -> LevelledExpr -- Wrep the flaot , LevelledExpr -> LevelledExpr) -- Wrap the use -- See Note [Floating MFEs of unlifted type] -canFloat_maybe env strict_ctxt float_is_lam expr_ty - | float_is_lam || not (isUnliftedType expr_ty) +canFloat_maybe env strict_ctxt float_is_lam expr + | float_is_lam || exprIsTopLevelBindable expr = Just (id, id) -- No wrapping needed if the type is lifted, or -- if we are wrapping it in one or more value lambdas @@ -568,6 +568,7 @@ canFloat_maybe env strict_ctxt float_is_lam expr_ty | otherwise -- e.g. do not float unboxed tuples = Nothing + where expr_ty = exprType expr {- Note [Floating MFEs of unlifted type] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/compiler/simplCore/SimplEnv.hs b/compiler/simplCore/SimplEnv.hs index 8a26220029..99d8291491 100644 --- a/compiler/simplCore/SimplEnv.hs +++ b/compiler/simplCore/SimplEnv.hs @@ -332,7 +332,8 @@ data Floats = Floats (OrdList OutBind) FloatFlag -- See Note [Simplifier floats] data FloatFlag - = FltLifted -- All bindings are lifted and lazy + = FltLifted -- All bindings are lifted and lazy *or* + -- consist of a single primitive string literal -- Hence ok to float to top level, or recursive | FltOkSpec -- All bindings are FltLifted *or* @@ -395,6 +396,9 @@ unitFloat bind = Floats (unitOL bind) (flag bind) flag (Rec {}) = FltLifted flag (NonRec bndr rhs) | not (isStrictId bndr) = FltLifted + | exprIsLiteralString rhs = FltLifted + -- String literals can be floated freely. + -- See Note [CoreSyn top-level string ltierals] in CoreSyn. | exprOkForSpeculation rhs = FltOkSpec -- Unlifted, and lifted but ok-for-spec (eg HNF) | otherwise = ASSERT2( not (isUnliftedType (idType bndr)), ppr bndr ) FltCareful diff --git a/compiler/simplCore/Simplify.hs b/compiler/simplCore/Simplify.hs index 2c8ff5e941..9e5c00d284 100644 --- a/compiler/simplCore/Simplify.hs +++ b/compiler/simplCore/Simplify.hs @@ -581,7 +581,7 @@ makeTrivialWithInfo :: TopLevelFlag -> SimplEnv -- Returned SimplEnv has same substitution as incoming one makeTrivialWithInfo top_lvl env context info expr | exprIsTrivial expr -- Already trivial - || not (bindingOk top_lvl expr expr_ty) -- Cannot trivialise + || not (bindingOk top_lvl expr) -- Cannot trivialise -- See Note [Cannot trivialise] = return (env, expr) | otherwise -- See Note [Take care] below @@ -603,11 +603,11 @@ makeTrivialWithInfo top_lvl env context info expr where expr_ty = exprType expr -bindingOk :: TopLevelFlag -> CoreExpr -> Type -> Bool +bindingOk :: TopLevelFlag -> CoreExpr -> Bool -- True iff we can have a binding of this expression at this level -- Precondition: the type is the type of the expression -bindingOk top_lvl _ expr_ty - | isTopLevel top_lvl = not (isUnliftedType expr_ty) +bindingOk top_lvl expr + | isTopLevel top_lvl = exprIsTopLevelBindable expr | otherwise = True {- @@ -626,12 +626,16 @@ so we don't want to turn it into because we'll just end up inlining x back, and that makes the simplifier loop. Better not to ANF-ise it at all. -A case in point is literal strings (a MachStr is not regarded as -trivial): +Literal strings are an exception. foo = Ptr "blob"# -We don't want to ANF-ise this. +We want to turn this into: + + foo1 = "blob"# + foo = Ptr foo1 + +See Note [CoreSyn top-level string literals] in CoreSyn. ************************************************************************ * * diff --git a/compiler/simplStg/SimplStg.hs b/compiler/simplStg/SimplStg.hs index 406e415287..08f9d79782 100644 --- a/compiler/simplStg/SimplStg.hs +++ b/compiler/simplStg/SimplStg.hs @@ -14,7 +14,7 @@ import StgSyn import CostCentre ( CollectedCCs ) import SCCfinal ( stgMassageForProfiling ) -import StgLint ( lintStgBindings ) +import StgLint ( lintStgTopBindings ) import StgStats ( showStgStats ) import UnariseStg ( unarise ) import StgCse ( stgCse ) @@ -29,8 +29,8 @@ import Control.Monad stg2stg :: DynFlags -- includes spec of what stg-to-stg passes to do -> Module -- module name (profiling only) - -> [StgBinding] -- input... - -> IO ( [StgBinding] -- output program... + -> [StgTopBinding] -- input... + -> IO ( [StgTopBinding] -- output program... , CollectedCCs) -- cost centre information (declared and used) stg2stg dflags module_name binds @@ -48,19 +48,19 @@ stg2stg dflags module_name binds <- foldM do_stg_pass (binds', us0, ccs) (getStgToDo dflags) ; dumpIfSet_dyn dflags Opt_D_dump_stg "Pre unarise:" - (pprStgBindings processed_binds) + (pprStgTopBindings processed_binds) ; let un_binds = unarise us1 processed_binds ; dumpIfSet_dyn dflags Opt_D_dump_stg "STG syntax:" - (pprStgBindings un_binds) + (pprStgTopBindings un_binds) ; return (un_binds, cost_centres) } where stg_linter = if gopt Opt_DoStgLinting dflags - then lintStgBindings + then lintStgTopBindings else ( \ _whodunnit binds -> binds ) ------------------------------------------- diff --git a/compiler/simplStg/StgCse.hs b/compiler/simplStg/StgCse.hs index 7454d24a2c..3e141439ed 100644 --- a/compiler/simplStg/StgCse.hs +++ b/compiler/simplStg/StgCse.hs @@ -240,7 +240,7 @@ substPairs env bndrs = mapAccumL go env bndrs -- Main entry point -stgCse :: [InStgBinding] -> [OutStgBinding] +stgCse :: [InStgTopBinding] -> [OutStgTopBinding] stgCse binds = snd $ mapAccumL stgCseTopLvl emptyInScopeSet binds -- Top level bindings. @@ -250,15 +250,16 @@ stgCse binds = snd $ mapAccumL stgCseTopLvl emptyInScopeSet binds -- But we still have to collect the set of in-scope variables, otherwise -- uniqAway might shadow a top-level closure. -stgCseTopLvl :: InScopeSet -> InStgBinding -> (InScopeSet, OutStgBinding) -stgCseTopLvl in_scope (StgNonRec bndr rhs) +stgCseTopLvl :: InScopeSet -> InStgTopBinding -> (InScopeSet, OutStgTopBinding) +stgCseTopLvl in_scope t@(StgTopStringLit _ _) = (in_scope, t) +stgCseTopLvl in_scope (StgTopLifted (StgNonRec bndr rhs)) = (in_scope' - , StgNonRec bndr (stgCseTopLvlRhs in_scope rhs)) + , StgTopLifted (StgNonRec bndr (stgCseTopLvlRhs in_scope rhs))) where in_scope' = in_scope `extendInScopeSet` bndr -stgCseTopLvl in_scope (StgRec eqs) +stgCseTopLvl in_scope (StgTopLifted (StgRec eqs)) = ( in_scope' - , StgRec [ (bndr, stgCseTopLvlRhs in_scope' rhs) | (bndr, rhs) <- eqs ]) + , StgTopLifted (StgRec [ (bndr, stgCseTopLvlRhs in_scope' rhs) | (bndr, rhs) <- eqs ])) where in_scope' = in_scope `extendInScopeSetList` [ bndr | (bndr, _) <- eqs ] stgCseTopLvlRhs :: InScopeSet -> InStgRhs -> OutStgRhs diff --git a/compiler/simplStg/StgStats.hs b/compiler/simplStg/StgStats.hs index 38544822d2..3f75ae23fa 100644 --- a/compiler/simplStg/StgStats.hs +++ b/compiler/simplStg/StgStats.hs @@ -75,7 +75,7 @@ countN = Map.singleton ************************************************************************ -} -showStgStats :: [StgBinding] -> String +showStgStats :: [StgTopBinding] -> String showStgStats prog = "STG Statistics:\n\n" @@ -99,10 +99,8 @@ showStgStats prog s (SingleEntryBinds _) = "SingleEntryBinds_Nested " s (UpdatableBinds _) = "UpdatableBinds_Nested " -gatherStgStats :: [StgBinding] -> StatEnv - -gatherStgStats binds - = combineSEs (map (statBinding True{-top-level-}) binds) +gatherStgStats :: [StgTopBinding] -> StatEnv +gatherStgStats binds = combineSEs (map statTopBinding binds) {- ************************************************************************ @@ -112,6 +110,10 @@ gatherStgStats binds ************************************************************************ -} +statTopBinding :: StgTopBinding -> StatEnv +statTopBinding (StgTopStringLit _ _) = countOne Literals +statTopBinding (StgTopLifted bind) = statBinding True bind + statBinding :: Bool -- True <=> top-level; False <=> nested -> StgBinding -> StatEnv diff --git a/compiler/simplStg/UnariseStg.hs b/compiler/simplStg/UnariseStg.hs index aa42586cd1..3f67bc278f 100644 --- a/compiler/simplStg/UnariseStg.hs +++ b/compiler/simplStg/UnariseStg.hs @@ -264,8 +264,13 @@ extendRho rho x (UnaryVal val) -------------------------------------------------------------------------------- -unarise :: UniqSupply -> [StgBinding] -> [StgBinding] -unarise us binds = initUs_ us (mapM (unariseBinding emptyVarEnv) binds) +unarise :: UniqSupply -> [StgTopBinding] -> [StgTopBinding] +unarise us binds = initUs_ us (mapM (unariseTopBinding emptyVarEnv) binds) + +unariseTopBinding :: UnariseEnv -> StgTopBinding -> UniqSM StgTopBinding +unariseTopBinding rho (StgTopLifted bind) + = StgTopLifted <$> unariseBinding rho bind +unariseTopBinding _ bind@StgTopStringLit{} = return bind unariseBinding :: UnariseEnv -> StgBinding -> UniqSM StgBinding unariseBinding rho (StgNonRec x rhs) diff --git a/compiler/stgSyn/CoreToStg.hs b/compiler/stgSyn/CoreToStg.hs index dcb923afea..37df9e2146 100644 --- a/compiler/stgSyn/CoreToStg.hs +++ b/compiler/stgSyn/CoreToStg.hs @@ -196,7 +196,7 @@ import Control.Monad (liftM, ap) -- Setting variable info: top-level, binds, RHSs -- -------------------------------------------------------------- -coreToStg :: DynFlags -> Module -> CoreProgram -> [StgBinding] +coreToStg :: DynFlags -> Module -> CoreProgram -> [StgTopBinding] coreToStg dflags this_mod pgm = pgm' where (_, _, pgm') = coreTopBindsToStg dflags this_mod emptyVarEnv pgm @@ -211,7 +211,7 @@ coreTopBindsToStg -> Module -> IdEnv HowBound -- environment for the bindings -> CoreProgram - -> (IdEnv HowBound, FreeVarsInfo, [StgBinding]) + -> (IdEnv HowBound, FreeVarsInfo, [StgTopBinding]) coreTopBindsToStg _ _ env [] = (env, emptyFVInfo, []) coreTopBindsToStg dflags this_mod env (b:bs) @@ -229,7 +229,14 @@ coreTopBindToStg -> IdEnv HowBound -> FreeVarsInfo -- Info about the body -> CoreBind - -> (IdEnv HowBound, FreeVarsInfo, StgBinding) + -> (IdEnv HowBound, FreeVarsInfo, StgTopBinding) + +coreTopBindToStg _ _ env body_fvs (NonRec id (Lit (MachStr str))) + -- top-level string literal + = let + env' = extendVarEnv env id how_bound + how_bound = LetBound TopLet 0 + in (env', body_fvs, StgTopStringLit id str) coreTopBindToStg dflags this_mod env body_fvs (NonRec id rhs) = let @@ -241,7 +248,7 @@ coreTopBindToStg dflags this_mod env body_fvs (NonRec id rhs) (stg_rhs, fvs') <- coreToTopStgRhs dflags this_mod body_fvs (id,rhs) return (stg_rhs, fvs') - bind = StgNonRec id stg_rhs + bind = StgTopLifted $ StgNonRec id stg_rhs in ASSERT2(consistentCafInfo id bind, ppr id ) -- NB: previously the assertion printed 'rhs' and 'bind' @@ -265,7 +272,7 @@ coreTopBindToStg dflags this_mod env body_fvs (Rec pairs) let fvs' = unionFVInfos fvss' return (stg_rhss, fvs') - bind = StgRec (zip binders stg_rhss) + bind = StgTopLifted $ StgRec (zip binders stg_rhss) in ASSERT2(consistentCafInfo (head binders) bind, ppr binders) (env', fvs' `unionFVInfo` body_fvs, bind) @@ -275,7 +282,7 @@ coreTopBindToStg dflags this_mod env body_fvs (Rec pairs) -- what CoreToStg has figured out about the binding's SRT. The -- CafInfo will be exact in all cases except when CorePrep has -- floated out a binding, in which case it will be approximate. -consistentCafInfo :: Id -> GenStgBinding Var Id -> Bool +consistentCafInfo :: Id -> GenStgTopBinding Var Id -> Bool consistentCafInfo id bind = WARN( not (exact || is_sat_thing) , ppr id <+> ppr id_marked_caffy <+> ppr binding_is_caffy ) safe diff --git a/compiler/stgSyn/StgLint.hs b/compiler/stgSyn/StgLint.hs index e31e7ae015..02d989cec0 100644 --- a/compiler/stgSyn/StgLint.hs +++ b/compiler/stgSyn/StgLint.hs @@ -6,7 +6,7 @@ {-# LANGUAGE CPP #-} -module StgLint ( lintStgBindings ) where +module StgLint ( lintStgTopBindings ) where import StgSyn @@ -54,12 +54,12 @@ generation. Solution: don't use it! (KSW 2000-05). * * ************************************************************************ -@lintStgBindings@ is the top-level interface function. +@lintStgTopBindings@ is the top-level interface function. -} -lintStgBindings :: String -> [StgBinding] -> [StgBinding] +lintStgTopBindings :: String -> [StgTopBinding] -> [StgTopBinding] -lintStgBindings whodunnit binds +lintStgTopBindings whodunnit binds = {-# SCC "StgLint" #-} case (initL (lint_binds binds)) of Nothing -> binds @@ -68,17 +68,20 @@ lintStgBindings whodunnit binds text whodunnit <+> text "***", msg, text "*** Offending Program ***", - pprStgBindings binds, + pprStgTopBindings binds, text "*** End of Offense ***"]) where - lint_binds :: [StgBinding] -> LintM () + lint_binds :: [StgTopBinding] -> LintM () lint_binds [] = return () lint_binds (bind:binds) = do - binders <- lintStgBinds bind + binders <- lint_bind bind addInScopeVars binders $ lint_binds binds + lint_bind (StgTopLifted bind) = lintStgBinds bind + lint_bind (StgTopStringLit v _) = return [v] + lintStgArg :: StgArg -> LintM (Maybe Type) lintStgArg (StgLitArg lit) = return (Just (literalType lit)) lintStgArg (StgVarArg v) = lintStgVar v diff --git a/compiler/stgSyn/StgSyn.hs b/compiler/stgSyn/StgSyn.hs index 48e836cc56..56978f868c 100644 --- a/compiler/stgSyn/StgSyn.hs +++ b/compiler/stgSyn/StgSyn.hs @@ -14,7 +14,7 @@ generation. module StgSyn ( GenStgArg(..), - GenStgBinding(..), GenStgExpr(..), GenStgRhs(..), + GenStgTopBinding(..), GenStgBinding(..), GenStgExpr(..), GenStgRhs(..), GenStgAlt, AltType(..), UpdateFlag(..), isUpdatable, @@ -24,11 +24,12 @@ module StgSyn ( combineStgBinderInfo, -- a set of synonyms for the most common (only :-) parameterisation - StgArg, StgBinding, StgExpr, StgRhs, StgAlt, + StgArg, + StgTopBinding, StgBinding, StgExpr, StgRhs, StgAlt, -- a set of synonyms to distinguish in- and out variants - InStgArg, InStgBinding, InStgExpr, InStgRhs, InStgAlt, - OutStgArg, OutStgBinding, OutStgExpr, OutStgRhs, OutStgAlt, + InStgArg, InStgTopBinding, InStgBinding, InStgExpr, InStgRhs, InStgAlt, + OutStgArg, OutStgTopBinding, OutStgBinding, OutStgExpr, OutStgRhs, OutStgAlt, -- StgOp StgOp(..), @@ -39,13 +40,14 @@ module StgSyn ( stgArgType, stripStgTicksTop, - pprStgBinding, pprStgBindings + pprStgBinding, pprStgTopBindings ) where #include "HsVersions.h" import CoreSyn ( AltCon, Tickish ) import CostCentre ( CostCentreStack ) +import Data.ByteString ( ByteString ) import Data.List ( intersperse ) import DataCon import DynFlags @@ -79,6 +81,12 @@ with respect to binder and occurrence information (just as in @CoreSyn@): -} +-- | A top-level binding. +data GenStgTopBinding bndr occ +-- See Note [CoreSyn top-level string literals] + = StgTopLifted (GenStgBinding bndr occ) + | StgTopStringLit bndr ByteString + data GenStgBinding bndr occ = StgNonRec bndr (GenStgRhs bndr occ) | StgRec [(bndr, GenStgRhs bndr occ)] @@ -421,11 +429,13 @@ stgRhsArity (StgRhsCon _ _ _) = 0 -- is that `TidyPgm` computed the CAF info on the `Id` but some transformations -- have taken place since then. -topStgBindHasCafRefs :: GenStgBinding bndr Id -> Bool -topStgBindHasCafRefs (StgNonRec _ rhs) +topStgBindHasCafRefs :: GenStgTopBinding bndr Id -> Bool +topStgBindHasCafRefs (StgTopLifted (StgNonRec _ rhs)) = topRhsHasCafRefs rhs -topStgBindHasCafRefs (StgRec binds) +topStgBindHasCafRefs (StgTopLifted (StgRec binds)) = any topRhsHasCafRefs (map snd binds) +topStgBindHasCafRefs StgTopStringLit{} + = False topRhsHasCafRefs :: GenStgRhs bndr Id -> Bool topRhsHasCafRefs (StgRhsClosure _ _ _ upd _ body) @@ -550,6 +560,7 @@ data AltType This happens to be the only one we use at the moment. -} +type StgTopBinding = GenStgTopBinding Id Id type StgBinding = GenStgBinding Id Id type StgArg = GenStgArg Id type StgExpr = GenStgExpr Id Id @@ -561,16 +572,18 @@ type StgAlt = GenStgAlt Id Id See CoreSyn for precedence in Core land -} -type InStgBinding = StgBinding -type InStgArg = StgArg -type InStgExpr = StgExpr -type InStgRhs = StgRhs -type InStgAlt = StgAlt -type OutStgBinding = StgBinding -type OutStgArg = StgArg -type OutStgExpr = StgExpr -type OutStgRhs = StgRhs -type OutStgAlt = StgAlt +type InStgTopBinding = StgTopBinding +type InStgBinding = StgBinding +type InStgArg = StgArg +type InStgExpr = StgExpr +type InStgRhs = StgRhs +type InStgAlt = StgAlt +type OutStgTopBinding = StgTopBinding +type OutStgBinding = StgBinding +type OutStgArg = StgArg +type OutStgExpr = StgExpr +type OutStgRhs = StgRhs +type OutStgAlt = StgAlt {- @@ -635,6 +648,15 @@ Robin Popplestone asked for semi-colon separators on STG binds; here's hoping he likes terminators instead... Ditto for case alternatives. -} +pprGenStgTopBinding :: (OutputableBndr bndr, Outputable bdee, Ord bdee) + => GenStgTopBinding bndr bdee -> SDoc + +pprGenStgTopBinding (StgTopStringLit bndr str) + = hang (hsep [pprBndr LetBind bndr, equals]) + 4 (pprHsBytes str <> semi) +pprGenStgTopBinding (StgTopLifted bind) + = pprGenStgBinding bind + pprGenStgBinding :: (OutputableBndr bndr, Outputable bdee, Ord bdee) => GenStgBinding bndr bdee -> SDoc @@ -653,13 +675,18 @@ pprGenStgBinding (StgRec pairs) pprStgBinding :: StgBinding -> SDoc pprStgBinding bind = pprGenStgBinding bind -pprStgBindings :: [StgBinding] -> SDoc -pprStgBindings binds = vcat $ intersperse blankLine (map pprGenStgBinding binds) +pprStgTopBindings :: [StgTopBinding] -> SDoc +pprStgTopBindings binds + = vcat $ intersperse blankLine (map pprGenStgTopBinding binds) instance (Outputable bdee) => Outputable (GenStgArg bdee) where ppr = pprStgArg instance (OutputableBndr bndr, Outputable bdee, Ord bdee) + => Outputable (GenStgTopBinding bndr bdee) where + ppr = pprGenStgTopBinding + +instance (OutputableBndr bndr, Outputable bdee, Ord bdee) => Outputable (GenStgBinding bndr bdee) where ppr = pprGenStgBinding |