summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
authorTakano Akio <tak@anoak.io>2017-01-18 18:26:47 -0500
committerBen Gamari <ben@smart-cactus.org>2017-01-20 14:36:29 -0500
commitd49b2bb21691892ca6ac8f2403e31f2a5e53feb3 (patch)
treecc8488acf59467899e4d3279a340577eec95310f /compiler
parenta2a67b77c3048713541d1ed96ec0b95fb2542f9a (diff)
downloadhaskell-d49b2bb21691892ca6ac8f2403e31f2a5e53feb3.tar.gz
Allow top-level string literals in Core (#8472)
This commits relaxes the invariants of the Core syntax so that a top-level variable can be bound to a primitive string literal of type Addr#. This commit: * Relaxes the invatiants of the Core, and allows top-level bindings whose type is Addr# as long as their RHS is either a primitive string literal or another variable. * Allows the simplifier and the full-laziness transformer to float out primitive string literals to the top leve. * Introduces the new StgGenTopBinding type to accomodate top-level Addr# bindings. * Introduces a new type of labels in the object code, with the suffix "_bytes", for exported top-level Addr# bindings. * Makes some built-in rules more robust. This was necessary to keep them functional after the above changes. This is a continuation of D2554. Rebasing notes: This had two slightly suspicious performance regressions: * T12425: bytes allocated regressed by roughly 5% * T4029: bytes allocated regressed by a bit over 1% * T13035: bytes allocated regressed by a bit over 5% These deserve additional investigation. Rebased by: bgamari. Test Plan: ./validate --slow Reviewers: goldfire, trofi, simonmar, simonpj, austin, hvr, bgamari Reviewed By: trofi, simonpj, bgamari Subscribers: trofi, simonpj, gridaphobe, thomie Differential Revision: https://phabricator.haskell.org/D2605 GHC Trac Issues: #8472
Diffstat (limited to 'compiler')
-rw-r--r--compiler/cmm/CLabel.hs13
-rw-r--r--compiler/cmm/CmmInfo.hs2
-rw-r--r--compiler/cmm/CmmUtils.hs9
-rw-r--r--compiler/codeGen/StgCmm.hs17
-rw-r--r--compiler/codeGen/StgCmmClosure.hs5
-rw-r--r--compiler/codeGen/StgCmmEnv.hs14
-rw-r--r--compiler/codeGen/StgCmmUtils.hs2
-rw-r--r--compiler/coreSyn/CoreLint.hs22
-rw-r--r--compiler/coreSyn/CorePrep.hs4
-rw-r--r--compiler/coreSyn/CoreSubst.hs2
-rw-r--r--compiler/coreSyn/CoreSyn.hs44
-rw-r--r--compiler/coreSyn/CoreUtils.hs12
-rw-r--r--compiler/ghci/ByteCodeAsm.hs7
-rw-r--r--compiler/ghci/ByteCodeGen.hs73
-rw-r--r--compiler/main/HscMain.hs4
-rw-r--r--compiler/prelude/PrelRules.hs41
-rw-r--r--compiler/profiling/SCCfinal.hs18
-rw-r--r--compiler/simplCore/CSE.hs57
-rw-r--r--compiler/simplCore/SetLevels.hs11
-rw-r--r--compiler/simplCore/SimplEnv.hs6
-rw-r--r--compiler/simplCore/Simplify.hs18
-rw-r--r--compiler/simplStg/SimplStg.hs12
-rw-r--r--compiler/simplStg/StgCse.hs13
-rw-r--r--compiler/simplStg/StgStats.hs12
-rw-r--r--compiler/simplStg/UnariseStg.hs9
-rw-r--r--compiler/stgSyn/CoreToStg.hs19
-rw-r--r--compiler/stgSyn/StgLint.hs17
-rw-r--r--compiler/stgSyn/StgSyn.hs67
28 files changed, 388 insertions, 142 deletions
diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs
index 0f3410a66e..ee87ef1b37 100644
--- a/compiler/cmm/CLabel.hs
+++ b/compiler/cmm/CLabel.hs
@@ -26,6 +26,7 @@ module CLabel (
mkApEntryLabel,
mkApInfoTableLabel,
mkClosureTableLabel,
+ mkBytesLabel,
mkLocalClosureLabel,
mkLocalInfoTableLabel,
@@ -389,6 +390,9 @@ data IdLabelInfo
| ClosureTable -- ^ Table of closures for Enum tycons
+ | Bytes -- ^ Content of a string literal. See
+ -- Note [Bytes label].
+
deriving (Eq, Ord)
@@ -474,6 +478,7 @@ mkClosureTableLabel :: Name -> CafInfo -> CLabel
mkLocalConInfoTableLabel :: CafInfo -> Name -> CLabel
mkLocalConEntryLabel :: CafInfo -> Name -> CLabel
mkConInfoTableLabel :: Name -> CafInfo -> CLabel
+mkBytesLabel :: Name -> CLabel
mkClosureLabel name c = IdLabel name c Closure
mkInfoTableLabel name c = IdLabel name c InfoTable
mkEntryLabel name c = IdLabel name c Entry
@@ -481,6 +486,7 @@ mkClosureTableLabel name c = IdLabel name c ClosureTable
mkLocalConInfoTableLabel c con = IdLabel con c ConInfoTable
mkLocalConEntryLabel c con = IdLabel con c ConEntry
mkConInfoTableLabel name c = IdLabel name c ConInfoTable
+mkBytesLabel name = IdLabel name NoCafRefs Bytes
mkConEntryLabel :: Name -> CafInfo -> CLabel
mkConEntryLabel name c = IdLabel name c ConEntry
@@ -935,6 +941,7 @@ idInfoLabelType info =
ConInfoTable -> DataLabel
ClosureTable -> DataLabel
RednCounts -> DataLabel
+ Bytes -> DataLabel
_ -> CodeLabel
@@ -1056,6 +1063,11 @@ export this because in other modules we either have
* A saturated call 'Just x'; allocate using Just_con_info
Not exporting these Just_info labels reduces the number of symbols
somewhat.
+
+Note [Bytes label]
+~~~~~~~~~~~~~~~~~~
+For a top-level string literal 'foo', we have just one symbol 'foo_bytes', which
+points to a static data block containing the content of the literal.
-}
instance Outputable CLabel where
@@ -1234,6 +1246,7 @@ ppIdFlavor x = pp_cSEP <>
ConEntry -> text "con_entry"
ConInfoTable -> text "con_info"
ClosureTable -> text "closure_tbl"
+ Bytes -> text "bytes"
)
diff --git a/compiler/cmm/CmmInfo.hs b/compiler/cmm/CmmInfo.hs
index b9981f247b..b5e800a977 100644
--- a/compiler/cmm/CmmInfo.hs
+++ b/compiler/cmm/CmmInfo.hs
@@ -400,7 +400,7 @@ mkProfLits _ (ProfilingInfo td cd)
newStringLit :: [Word8] -> UniqSM (CmmLit, GenCmmDecl CmmStatics info stmt)
newStringLit bytes
= do { uniq <- getUniqueM
- ; return (mkByteStringCLit uniq bytes) }
+ ; return (mkByteStringCLit (mkStringLitLabel uniq) bytes) }
-- Misc utils
diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs
index 3260cbab2f..1dab6eeff5 100644
--- a/compiler/cmm/CmmUtils.hs
+++ b/compiler/cmm/CmmUtils.hs
@@ -72,7 +72,6 @@ import Cmm
import BlockId
import CLabel
import Outputable
-import Unique
import DynFlags
import Util
import CodeGen.Platform
@@ -169,13 +168,13 @@ zeroExpr dflags = CmmLit (zeroCLit dflags)
mkWordCLit :: DynFlags -> Integer -> CmmLit
mkWordCLit dflags wd = CmmInt wd (wordWidth dflags)
-mkByteStringCLit :: Unique -> [Word8] -> (CmmLit, GenCmmDecl CmmStatics info stmt)
+mkByteStringCLit
+ :: CLabel -> [Word8] -> (CmmLit, GenCmmDecl CmmStatics info stmt)
-- We have to make a top-level decl for the string,
-- and return a literal pointing to it
-mkByteStringCLit uniq bytes
- = (CmmLabel lbl, CmmData (Section sec lbl) $ Statics lbl [CmmString bytes])
+mkByteStringCLit lbl bytes
+ = (CmmLabel lbl, CmmData (Section sec lbl) $ Statics lbl [CmmString bytes])
where
- lbl = mkStringLitLabel uniq
-- This can not happen for String literals (as there \NUL is replaced by
-- C0 80). However, it can happen with Addr# literals.
sec = if 0 `elem` bytes then ReadOnlyData else CString
diff --git a/compiler/codeGen/StgCmm.hs b/compiler/codeGen/StgCmm.hs
index bb82da265e..a420677625 100644
--- a/compiler/codeGen/StgCmm.hs
+++ b/compiler/codeGen/StgCmm.hs
@@ -24,6 +24,7 @@ import StgCmmHpc
import StgCmmTicky
import Cmm
+import CmmUtils
import CLabel
import StgSyn
@@ -45,6 +46,7 @@ import BasicTypes
import OrdList
import MkGraph
+import qualified Data.ByteString as BS
import Data.IORef
import Control.Monad (when,void)
import Util
@@ -53,7 +55,7 @@ codeGen :: DynFlags
-> Module
-> [TyCon]
-> CollectedCCs -- (Local/global) cost-centres needing declaring/registering.
- -> [StgBinding] -- Bindings to convert
+ -> [StgTopBinding] -- Bindings to convert
-> HpcInfo
-> Stream IO CmmGroup () -- Output as a stream, so codegen can
-- be interleaved with output
@@ -113,8 +115,8 @@ This is so that we can write the top level processing in a compositional
style, with the increasing static environment being plumbed as a state
variable. -}
-cgTopBinding :: DynFlags -> StgBinding -> FCode ()
-cgTopBinding dflags (StgNonRec id rhs)
+cgTopBinding :: DynFlags -> StgTopBinding -> FCode ()
+cgTopBinding dflags (StgTopLifted (StgNonRec id rhs))
= do { id' <- maybeExternaliseId dflags id
; let (info, fcode) = cgTopRhs dflags NonRecursive id' rhs
; fcode
@@ -122,7 +124,7 @@ cgTopBinding dflags (StgNonRec id rhs)
-- so we find it when we look up occurrences
}
-cgTopBinding dflags (StgRec pairs)
+cgTopBinding dflags (StgTopLifted (StgRec pairs))
= do { let (bndrs, rhss) = unzip pairs
; bndrs' <- Prelude.mapM (maybeExternaliseId dflags) bndrs
; let pairs' = zip bndrs' rhss
@@ -132,6 +134,13 @@ cgTopBinding dflags (StgRec pairs)
; sequence_ fcodes
}
+cgTopBinding dflags (StgTopStringLit id str)
+ = do { id' <- maybeExternaliseId dflags id
+ ; let label = mkBytesLabel (idName id')
+ ; let (lit, decl) = mkByteStringCLit label (BS.unpack str)
+ ; emitDecl decl
+ ; addBindC (litIdInfo dflags id' mkLFStringLit lit)
+ }
cgTopRhs :: DynFlags -> RecFlag -> Id -> StgRhs -> (CgIdInfo, FCode ())
-- The Id is passed along for setting up a binding...
diff --git a/compiler/codeGen/StgCmmClosure.hs b/compiler/codeGen/StgCmmClosure.hs
index 3cc0af0669..e799ea6639 100644
--- a/compiler/codeGen/StgCmmClosure.hs
+++ b/compiler/codeGen/StgCmmClosure.hs
@@ -26,6 +26,7 @@ module StgCmmClosure (
StandardFormInfo, -- ...ditto...
mkLFThunk, mkLFReEntrant, mkConLFInfo, mkSelectorLFInfo,
mkApLFInfo, mkLFImported, mkLFArgument, mkLFLetNoEscape,
+ mkLFStringLit,
lfDynTag,
maybeIsLFCon, isLFThunk, isLFReEntrant, lfUpdatable,
@@ -332,6 +333,10 @@ mkLFImported id
where
arity = idFunRepArity id
+-------------
+mkLFStringLit :: LambdaFormInfo
+mkLFStringLit = LFUnlifted
+
-----------------------------------------------------
-- Dynamic pointer tagging
-----------------------------------------------------
diff --git a/compiler/codeGen/StgCmmEnv.hs b/compiler/codeGen/StgCmmEnv.hs
index ba093fee88..3061fb351b 100644
--- a/compiler/codeGen/StgCmmEnv.hs
+++ b/compiler/codeGen/StgCmmEnv.hs
@@ -40,7 +40,10 @@ import MkGraph
import Name
import Outputable
import StgSyn
+import Type
+import TysPrim
import UniqFM
+import Util
import VarEnv
-------------------------------------
@@ -125,8 +128,15 @@ getCgIdInfo id
-- Should be imported; make up a CgIdInfo for it
let name = idName id
; if isExternalName name then
- let ext_lbl = CmmLabel (mkClosureLabel name $ idCafInfo id)
- in return (litIdInfo dflags id (mkLFImported id) ext_lbl)
+ let ext_lbl
+ | isUnliftedType (idType id) =
+ -- An unlifted external Id must refer to a top-level
+ -- string literal. See Note [Bytes label] in CLabel.
+ ASSERT( idType id `eqType` addrPrimTy )
+ mkBytesLabel name
+ | otherwise = mkClosureLabel name $ idCafInfo id
+ in return $
+ litIdInfo dflags id (mkLFImported id) (CmmLabel ext_lbl)
else
cgLookupPanic id -- Bug
}}}
diff --git a/compiler/codeGen/StgCmmUtils.hs b/compiler/codeGen/StgCmmUtils.hs
index 4a976e68af..295ac15a85 100644
--- a/compiler/codeGen/StgCmmUtils.hs
+++ b/compiler/codeGen/StgCmmUtils.hs
@@ -322,7 +322,7 @@ newStringCLit str = newByteStringCLit (map (fromIntegral . ord) str)
newByteStringCLit :: [Word8] -> FCode CmmLit
newByteStringCLit bytes
= do { uniq <- newUnique
- ; let (lit, decl) = mkByteStringCLit uniq bytes
+ ; let (lit, decl) = mkByteStringCLit (mkStringLitLabel uniq) bytes
; emitDecl decl
; return lit }
diff --git a/compiler/coreSyn/CoreLint.hs b/compiler/coreSyn/CoreLint.hs
index f9e7f863c4..c09b4a0288 100644
--- a/compiler/coreSyn/CoreLint.hs
+++ b/compiler/coreSyn/CoreLint.hs
@@ -30,6 +30,7 @@ import Bag
import Literal
import DataCon
import TysWiredIn
+import TysPrim
import TcType ( isFloatingTy )
import Var
import VarEnv
@@ -480,14 +481,25 @@ lintSingleBinding top_lvl_flag rec_flag (binder,rhs)
-- Check the let/app invariant
-- See Note [CoreSyn let/app invariant] in CoreSyn
; checkL (not (isUnliftedType binder_ty)
- || (isNonRec rec_flag && exprOkForSpeculation rhs))
+ || (isNonRec rec_flag && exprOkForSpeculation rhs)
+ || exprIsLiteralString rhs)
(mkRhsPrimMsg binder rhs)
- -- Check that if the binder is top-level or recursive, it's not demanded
+ -- Check that if the binder is top-level or recursive, it's not
+ -- demanded. Primitive string literals are exempt as there is no
+ -- computation to perform, see Note [CoreSyn top-level string literals].
; checkL (not (isStrictId binder)
- || (isNonRec rec_flag && not (isTopLevel top_lvl_flag)))
+ || (isNonRec rec_flag && not (isTopLevel top_lvl_flag))
+ || exprIsLiteralString rhs)
(mkStrictMsg binder)
+ -- Check that if the binder is at the top level and has type Addr#,
+ -- that it is a string literal, see
+ -- Note [CoreSyn top-level string literals].
+ ; checkL (not (isTopLevel top_lvl_flag && binder_ty `eqType` addrPrimTy)
+ || exprIsLiteralString rhs)
+ (mkTopNonLitStrMsg binder)
+
; flags <- getLintFlags
; when (lf_check_inline_loop_breakers flags
&& isStrongLoopBreaker (idOccInfo binder)
@@ -2033,6 +2045,10 @@ mkNonTopExternalNameMsg :: Id -> MsgDoc
mkNonTopExternalNameMsg binder
= hsep [text "Non-top-level binder has an external name:", ppr binder]
+mkTopNonLitStrMsg :: Id -> MsgDoc
+mkTopNonLitStrMsg binder
+ = hsep [text "Top-level Addr# binder has a non-literal rhs:", ppr binder]
+
mkKindErrMsg :: TyVar -> Type -> MsgDoc
mkKindErrMsg tyvar arg_ty
= vcat [text "Kinds don't match in type application:",
diff --git a/compiler/coreSyn/CorePrep.hs b/compiler/coreSyn/CorePrep.hs
index c93a121c23..fb650f61be 100644
--- a/compiler/coreSyn/CorePrep.hs
+++ b/compiler/coreSyn/CorePrep.hs
@@ -1168,7 +1168,9 @@ deFloatTop (Floats _ floats)
= foldrOL get [] floats
where
get (FloatLet b) bs = occurAnalyseRHSs b : bs
- get b _ = pprPanic "corePrepPgm" (ppr b)
+ get (FloatCase var body _) bs =
+ occurAnalyseRHSs (NonRec var body) : bs
+ get b _ = pprPanic "corePrepPgm" (ppr b)
-- See Note [Dead code in CorePrep]
occurAnalyseRHSs (NonRec x e) = NonRec x (occurAnalyseExpr_NoBinderSwap e)
diff --git a/compiler/coreSyn/CoreSubst.hs b/compiler/coreSyn/CoreSubst.hs
index d98536caec..758a17b34d 100644
--- a/compiler/coreSyn/CoreSubst.hs
+++ b/compiler/coreSyn/CoreSubst.hs
@@ -1339,7 +1339,7 @@ than the ordinary arity of the dfun: see Note [DFun unfoldings] in CoreSyn
exprIsLiteral_maybe :: InScopeEnv -> CoreExpr -> Maybe Literal
-- Same deal as exprIsConApp_maybe, but much simpler
-- Nevertheless we do need to look through unfoldings for
--- Integer literals, which are vigorously hoisted to top level
+-- Integer and string literals, which are vigorously hoisted to top level
-- and not subsequently inlined
exprIsLiteral_maybe env@(_, id_unf) e
= case e of
diff --git a/compiler/coreSyn/CoreSyn.hs b/compiler/coreSyn/CoreSyn.hs
index fd0cf3ed26..4dfd9c3dae 100644
--- a/compiler/coreSyn/CoreSyn.hs
+++ b/compiler/coreSyn/CoreSyn.hs
@@ -191,7 +191,9 @@ These data types are the heart of the compiler
--
-- The right hand sides of all top-level and recursive @let@s
-- /must/ be of lifted type (see "Type#type_classification" for
--- the meaning of /lifted/ vs. /unlifted/).
+-- the meaning of /lifted/ vs. /unlifted/). There is one exception
+-- to this rule, top-level @let@s are allowed to bind primitive
+-- string literals, see Note [CoreSyn top-level string literals].
--
-- See Note [CoreSyn let/app invariant]
-- See Note [Levity polymorphism invariants]
@@ -361,6 +363,46 @@ Note [CoreSyn letrec invariant]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
See #letrec_invariant#
+Note [CoreSyn top-level string literals]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+As an exception to the usual rule that top-level binders must be lifted,
+we allow binding primitive string literals (of type Addr#) of type Addr# at the
+top level. This allows us to share string literals earlier in the pipeline and
+crucially allows other optimizations in the Core2Core pipeline to fire.
+Consider,
+
+ f n = let a::Addr# = "foo"#
+ in \x -> blah
+
+In order to be able to inline `f`, we would like to float `a` to the top.
+Another option would be to inline `a`, but that would lead to duplicating string
+literals, which we want to avoid. See Trac #8472.
+
+The solution is simply to allow top-level unlifted binders. We can't allow
+arbitrary unlifted expression at the top-level though, unlifted binders cannot
+be thunks, so we just allow string literals.
+
+Also see Note [Compilation plan for top-level string literals].
+
+Note [Compilation plan for top-level string literals]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Here is a summary on how top-level string literals are handled by various
+parts of the compilation pipeline.
+
+* In the source language, there is no way to bind a primitive string literal
+ at the top leve.
+
+* In Core, we have a special rule that permits top-level Addr# bindings. See
+ Note [CoreSyn top-level string literals]. Core-to-core passes may introduce
+ new top-level string literals.
+
+* In STG, top-level string literals are explicitly represented in the syntax
+ tree.
+
+* A top-level string literal may end up exported from a module. In this case,
+ in the object file, the content of the exported literal is given a label with
+ the _bytes suffix.
+
Note [CoreSyn let/app invariant]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The let/app invariant
diff --git a/compiler/coreSyn/CoreUtils.hs b/compiler/coreSyn/CoreUtils.hs
index 2505fcfff4..b5d248e579 100644
--- a/compiler/coreSyn/CoreUtils.hs
+++ b/compiler/coreSyn/CoreUtils.hs
@@ -29,6 +29,7 @@ module CoreUtils (
exprIsHNF, exprOkForSpeculation, exprOkForSideEffects, exprIsWorkFree,
exprIsBig, exprIsConLike,
rhsIsStatic, isCheapApp, isExpandableApp,
+ exprIsLiteralString, exprIsTopLevelBindable,
-- * Equality
cheapEqExpr, cheapEqExpr', eqExpr,
@@ -1581,6 +1582,17 @@ tick is there to tell us that the expression was evaluated, so we
don't want to discard a seq on it.
-}
+-- | Can we bind this 'CoreExpr' at the top level?
+exprIsTopLevelBindable :: CoreExpr -> Bool
+-- See Note [CoreSyn top-level string literals]
+exprIsTopLevelBindable expr
+ = exprIsLiteralString expr
+ || not (isUnliftedType (exprType expr))
+
+exprIsLiteralString :: CoreExpr -> Bool
+exprIsLiteralString (Lit (MachStr _)) = True
+exprIsLiteralString _ = False
+
{-
************************************************************************
* *
diff --git a/compiler/ghci/ByteCodeAsm.hs b/compiler/ghci/ByteCodeAsm.hs
index 817e379003..9eb730ff1a 100644
--- a/compiler/ghci/ByteCodeAsm.hs
+++ b/compiler/ghci/ByteCodeAsm.hs
@@ -89,9 +89,10 @@ bcoFreeNames bco
-- Top level assembler fn.
assembleBCOs
- :: HscEnv -> [ProtoBCO Name] -> [TyCon] -> Maybe ModBreaks
+ :: HscEnv -> [ProtoBCO Name] -> [TyCon] -> [RemotePtr ()]
+ -> Maybe ModBreaks
-> IO CompiledByteCode
-assembleBCOs hsc_env proto_bcos tycons modbreaks = do
+assembleBCOs hsc_env proto_bcos tycons top_strs modbreaks = do
itblenv <- mkITbls hsc_env tycons
bcos <- mapM (assembleBCO (hsc_dflags hsc_env)) proto_bcos
(bcos',ptrs) <- mallocStrings hsc_env bcos
@@ -99,7 +100,7 @@ assembleBCOs hsc_env proto_bcos tycons modbreaks = do
{ bc_bcos = bcos'
, bc_itbls = itblenv
, bc_ffis = concat (map protoBCOFFIs proto_bcos)
- , bc_strs = ptrs
+ , bc_strs = top_strs ++ ptrs
, bc_breaks = modbreaks
}
diff --git a/compiler/ghci/ByteCodeGen.hs b/compiler/ghci/ByteCodeGen.hs
index a4373b459f..f4b224d2a5 100644
--- a/compiler/ghci/ByteCodeGen.hs
+++ b/compiler/ghci/ByteCodeGen.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE CPP, MagicHash, RecordWildCards #-}
+{-# LANGUAGE CPP, MagicHash, RecordWildCards, BangPatterns #-}
{-# OPTIONS_GHC -fprof-auto-top #-}
--
-- (c) The University of Glasgow 2002-2006
@@ -48,6 +48,7 @@ import SMRep
import Bitmap
import OrdList
import Maybes
+import VarEnv
import Data.List
import Foreign
@@ -60,6 +61,7 @@ import Control.Arrow ( second )
import Control.Exception
import Data.Array
+import Data.ByteString (ByteString)
import Data.Map (Map)
import Data.IntMap (IntMap)
import qualified Data.Map as Map
@@ -85,12 +87,18 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks
= withTiming (pure dflags)
(text "ByteCodeGen"<+>brackets (ppr this_mod))
(const ()) $ do
- let flatBinds = [ (bndr, simpleFreeVars rhs)
- | (bndr, rhs) <- flattenBinds binds]
+ -- Split top-level binds into strings and others.
+ -- See Note [generating code for top-level string literal bindings].
+ let (strings, flatBinds) = splitEithers $ do
+ (bndr, rhs) <- flattenBinds binds
+ return $ case rhs of
+ Lit (MachStr str) -> Left (bndr, str)
+ _ -> Right (bndr, simpleFreeVars rhs)
+ stringPtrs <- allocateTopStrings hsc_env strings
us <- mkSplitUniqSupply 'y'
(BcM_State{..}, proto_bcos) <-
- runBc hsc_env us this_mod mb_modBreaks $
+ runBc hsc_env us this_mod mb_modBreaks (mkVarEnv stringPtrs) $
mapM schemeTopBind flatBinds
when (notNull ffis)
@@ -99,7 +107,7 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks
dumpIfSet_dyn dflags Opt_D_dump_BCOs
"Proto-BCOs" (vcat (intersperse (char ' ') (map ppr proto_bcos)))
- cbc <- assembleBCOs hsc_env proto_bcos tycs
+ cbc <- assembleBCOs hsc_env proto_bcos tycs (map snd stringPtrs)
(case modBreaks of
Nothing -> Nothing
Just mb -> Just mb{ modBreaks_breakInfo = breakInfo })
@@ -116,6 +124,29 @@ byteCodeGen hsc_env this_mod binds tycs mb_modBreaks
where dflags = hsc_dflags hsc_env
+allocateTopStrings
+ :: HscEnv
+ -> [(Id, ByteString)]
+ -> IO [(Var, RemotePtr ())]
+allocateTopStrings hsc_env topStrings = do
+ let !(bndrs, strings) = unzip topStrings
+ ptrs <- iservCmd hsc_env $ MallocStrings strings
+ return $ zip bndrs ptrs
+
+{-
+Note [generating code for top-level string literal bindings]
+
+Here is a summary on how the byte code generator deals with top-level string
+literals:
+
+1. Top-level string literal bindings are spearted from the rest of the module.
+
+2. The strings are allocated via iservCmd, in allocateTopStrings
+
+3. The mapping from binders to allocated strings (topStrings) are maintained in
+ BcM and used when generating code for variable references.
+-}
+
-- -----------------------------------------------------------------------------
-- Generating byte code for an expression
@@ -136,8 +167,8 @@ coreExprToBCOs hsc_env this_mod expr
-- the uniques are needed to generate fresh variables when we introduce new
-- let bindings for ticked expressions
us <- mkSplitUniqSupply 'y'
- (BcM_State _dflags _us _this_mod _final_ctr mallocd _ _ , proto_bco)
- <- runBc hsc_env us this_mod Nothing $
+ (BcM_State _dflags _us _this_mod _final_ctr mallocd _ _ _, proto_bco)
+ <- runBc hsc_env us this_mod Nothing emptyVarEnv $
schemeTopBind (invented_id, simpleFreeVars expr)
when (notNull mallocd)
@@ -1356,11 +1387,16 @@ pushAtom d p (AnnVar v)
-- slots on to the top of the stack.
| otherwise -- v must be a global variable
- = do dflags <- getDynFlags
- let sz :: Word16
- sz = fromIntegral (idSizeW dflags v)
- MASSERT(sz == 1)
- return (unitOL (PUSH_G (getName v)), sz)
+ = do topStrings <- getTopStrings
+ case lookupVarEnv topStrings v of
+ Just ptr -> pushAtom d p $ AnnLit $ MachWord $ fromIntegral $
+ ptrToWordPtr $ fromRemotePtr ptr
+ Nothing -> do
+ dflags <- getDynFlags
+ let sz :: Word16
+ sz = fromIntegral (idSizeW dflags v)
+ MASSERT(sz == 1)
+ return (unitOL (PUSH_G (getName v)), sz)
pushAtom _ _ (AnnLit lit) = do
@@ -1659,6 +1695,8 @@ data BcM_State
-- Should be free()d when it is GCd
, modBreaks :: Maybe ModBreaks -- info about breakpoints
, breakInfo :: IntMap CgBreakInfo
+ , topStrings :: IdEnv (RemotePtr ()) -- top-level string literals
+ -- See Note [generating code for top-level string literal bindings].
}
newtype BcM r = BcM (BcM_State -> IO (BcM_State, r))
@@ -1668,10 +1706,12 @@ ioToBc io = BcM $ \st -> do
x <- io
return (st, x)
-runBc :: HscEnv -> UniqSupply -> Module -> Maybe ModBreaks -> BcM r
+runBc :: HscEnv -> UniqSupply -> Module -> Maybe ModBreaks
+ -> IdEnv (RemotePtr ())
+ -> BcM r
-> IO (BcM_State, r)
-runBc hsc_env us this_mod modBreaks (BcM m)
- = m (BcM_State hsc_env us this_mod 0 [] modBreaks IntMap.empty)
+runBc hsc_env us this_mod modBreaks topStrings (BcM m)
+ = m (BcM_State hsc_env us this_mod 0 [] modBreaks IntMap.empty topStrings)
thenBc :: BcM a -> (a -> BcM b) -> BcM b
thenBc (BcM expr) cont = BcM $ \st0 -> do
@@ -1746,6 +1786,9 @@ newUnique = BcM $
getCurrentModule :: BcM Module
getCurrentModule = BcM $ \st -> return (st, thisModule st)
+getTopStrings :: BcM (IdEnv (RemotePtr ()))
+getTopStrings = BcM $ \st -> return (st, topStrings st)
+
newId :: Type -> BcM Id
newId ty = do
uniq <- newUnique
diff --git a/compiler/main/HscMain.hs b/compiler/main/HscMain.hs
index b163cbbe21..092f04c1aa 100644
--- a/compiler/main/HscMain.hs
+++ b/compiler/main/HscMain.hs
@@ -1363,7 +1363,7 @@ hscCompileCmmFile hsc_env filename output_filename = runHsc hsc_env $ do
doCodeGen :: HscEnv -> Module -> [TyCon]
-> CollectedCCs
- -> [StgBinding]
+ -> [StgTopBinding]
-> HpcInfo
-> IO (Stream IO CmmGroup ())
-- Note we produce a 'Stream' of CmmGroups, so that the
@@ -1429,7 +1429,7 @@ doCodeGen hsc_env this_mod data_tycons
myCoreToStg :: DynFlags -> Module -> CoreProgram
- -> IO ( [StgBinding] -- output program
+ -> IO ( [StgTopBinding] -- output program
, CollectedCCs) -- cost centre info (declared and used)
myCoreToStg dflags this_mod prepd_binds = do
let stg_binds
diff --git a/compiler/prelude/PrelRules.hs b/compiler/prelude/PrelRules.hs
index e98fd9f6a3..c2938c7dfd 100644
--- a/compiler/prelude/PrelRules.hs
+++ b/compiler/prelude/PrelRules.hs
@@ -987,9 +987,9 @@ builtinRules :: [CoreRule]
builtinRules
= [BuiltinRule { ru_name = fsLit "AppendLitString",
ru_fn = unpackCStringFoldrName,
- ru_nargs = 4, ru_try = \_ _ _ -> match_append_lit },
+ ru_nargs = 4, ru_try = match_append_lit },
BuiltinRule { ru_name = fsLit "EqString", ru_fn = eqStringName,
- ru_nargs = 2, ru_try = \dflags _ _ -> match_eq_string dflags },
+ ru_nargs = 2, ru_try = match_eq_string },
BuiltinRule { ru_name = fsLit "Inline", ru_fn = inlineIdName,
ru_nargs = 2, ru_try = \_ _ _ -> match_inline },
BuiltinRule { ru_name = fsLit "MagicDict", ru_fn = idName magicDictId,
@@ -1133,37 +1133,42 @@ builtinIntegerRules =
-- unpackFoldrCString# "foo" c (unpackFoldrCString# "baz" c n)
-- = unpackFoldrCString# "foobaz" c n
-match_append_lit :: [Expr CoreBndr] -> Maybe (Expr CoreBndr)
-match_append_lit [Type ty1,
- Lit (MachStr s1),
- c1,
- Var unpk `App` Type ty2
- `App` Lit (MachStr s2)
- `App` c2
- `App` n
- ]
+match_append_lit :: RuleFun
+match_append_lit _ id_unf _
+ [ Type ty1
+ , lit1
+ , c1
+ , Var unpk `App` Type ty2
+ `App` lit2
+ `App` c2
+ `App` n
+ ]
| unpk `hasKey` unpackCStringFoldrIdKey &&
c1 `cheapEqExpr` c2
+ , Just (MachStr s1) <- exprIsLiteral_maybe id_unf lit1
+ , Just (MachStr s2) <- exprIsLiteral_maybe id_unf lit2
= ASSERT( ty1 `eqType` ty2 )
Just (Var unpk `App` Type ty1
`App` Lit (MachStr (s1 `BS.append` s2))
`App` c1
`App` n)
-match_append_lit _ = Nothing
+match_append_lit _ _ _ _ = Nothing
---------------------------------------------------
-- The rule is this:
-- eqString (unpackCString# (Lit s1)) (unpackCString# (Lit s2) = s1==s2
-match_eq_string :: DynFlags -> [Expr CoreBndr] -> Maybe (Expr CoreBndr)
-match_eq_string _ [Var unpk1 `App` Lit (MachStr s1),
- Var unpk2 `App` Lit (MachStr s2)]
- | unpk1 `hasKey` unpackCStringIdKey,
- unpk2 `hasKey` unpackCStringIdKey
+match_eq_string :: RuleFun
+match_eq_string _ id_unf _
+ [Var unpk1 `App` lit1, Var unpk2 `App` lit2]
+ | unpk1 `hasKey` unpackCStringIdKey
+ , unpk2 `hasKey` unpackCStringIdKey
+ , Just (MachStr s1) <- exprIsLiteral_maybe id_unf lit1
+ , Just (MachStr s2) <- exprIsLiteral_maybe id_unf lit2
= Just (if s1 == s2 then trueValBool else falseValBool)
-match_eq_string _ _ = Nothing
+match_eq_string _ _ _ _ = Nothing
---------------------------------------------------
diff --git a/compiler/profiling/SCCfinal.hs b/compiler/profiling/SCCfinal.hs
index ee37ab14b6..9704e0b132 100644
--- a/compiler/profiling/SCCfinal.hs
+++ b/compiler/profiling/SCCfinal.hs
@@ -42,8 +42,8 @@ stgMassageForProfiling
:: DynFlags
-> Module -- module name
-> UniqSupply -- unique supply
- -> [StgBinding] -- input
- -> (CollectedCCs, [StgBinding])
+ -> [StgTopBinding] -- input
+ -> (CollectedCCs, [StgTopBinding])
stgMassageForProfiling dflags mod_name _us stg_binds
= let
@@ -69,24 +69,28 @@ stgMassageForProfiling dflags mod_name _us stg_binds
all_cafs_ccs = mkSingletonCCS all_cafs_cc
----------
- do_top_bindings :: [StgBinding] -> MassageM [StgBinding]
+ do_top_bindings :: [StgTopBinding] -> MassageM [StgTopBinding]
do_top_bindings [] = return []
- do_top_bindings (StgNonRec b rhs : bs) = do
+ do_top_bindings (StgTopLifted (StgNonRec b rhs) : bs) = do
rhs' <- do_top_rhs b rhs
bs' <- do_top_bindings bs
- return (StgNonRec b rhs' : bs')
+ return (StgTopLifted (StgNonRec b rhs') : bs')
- do_top_bindings (StgRec pairs : bs) = do
+ do_top_bindings (StgTopLifted (StgRec pairs) : bs) = do
pairs2 <- mapM do_pair pairs
bs' <- do_top_bindings bs
- return (StgRec pairs2 : bs')
+ return (StgTopLifted (StgRec pairs2) : bs')
where
do_pair (b, rhs) = do
rhs2 <- do_top_rhs b rhs
return (b, rhs2)
+ do_top_bindings (b@StgTopStringLit{} : bs) = do
+ bs' <- do_top_bindings bs
+ return (b : bs')
+
----------
do_top_rhs :: Id -> StgRhs -> MassageM StgRhs
diff --git a/compiler/simplCore/CSE.hs b/compiler/simplCore/CSE.hs
index 54fbc5008c..e364c31cdc 100644
--- a/compiler/simplCore/CSE.hs
+++ b/compiler/simplCore/CSE.hs
@@ -15,6 +15,7 @@ import Var ( Var )
import Id ( Id, idType, idUnfolding, idInlineActivation
, zapIdOccInfo, zapIdUsageInfo )
import CoreUtils ( mkAltExpr
+ , exprIsLiteralString
, stripTicksE, stripTicksT, mkTicks )
import Literal ( litIsTrivial )
import Type ( tyConAppArgs )
@@ -253,22 +254,22 @@ had
-}
cseProgram :: CoreProgram -> CoreProgram
-cseProgram binds = snd (mapAccumL cseBind emptyCSEnv binds)
+cseProgram binds = snd (mapAccumL (cseBind True) emptyCSEnv binds)
-cseBind :: CSEnv -> CoreBind -> (CSEnv, CoreBind)
-cseBind env (NonRec b e)
+cseBind :: Bool -> CSEnv -> CoreBind -> (CSEnv, CoreBind)
+cseBind toplevel env (NonRec b e)
= (env2, NonRec b2 e1)
where
- e1 = tryForCSE env e
+ e1 = tryForCSE toplevel env e
(env1, b1) = addBinder env b
(env2, b2) = addBinding env1 b b1 e1
-cseBind env (Rec pairs)
+cseBind toplevel env (Rec pairs)
= (env2, Rec pairs')
where
(bndrs, rhss) = unzip pairs
(env1, bndrs1) = addRecBinders env bndrs
- rhss1 = map (tryForCSE env1) rhss
+ rhss1 = map (tryForCSE toplevel env1) rhss
-- Process rhss in extended env1
(env2, pairs') = foldl do_one (env1, []) (zip3 bndrs bndrs1 rhss1)
do_one (env, pairs) (b, b1, e1)
@@ -311,8 +312,38 @@ addBinding env in_id out_id rhs'
Lit l -> litIsTrivial l
_ -> False
-tryForCSE :: CSEnv -> InExpr -> OutExpr
-tryForCSE env expr
+{-
+Note [Take care with literal strings]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Consider this example:
+
+ x = "foo"#
+ y = "foo"#
+ ...x...y...x...y....
+
+We would normally turn this into:
+
+ x = "foo"#
+ y = x
+ ...x...x...x...x....
+
+But this breaks an invariant of Core, namely that the RHS of a top-level binding
+of type Addr# must be a string literal, not another variable. See Note
+[CoreSyn top-level string literals] in CoreSyn.
+
+For this reason, we special case top-level bindings to literal strings and leave
+the original RHS unmodified. This produces:
+
+ x = "foo"#
+ y = "foo"#
+ ...x...x...x...x....
+-}
+
+tryForCSE :: Bool -> CSEnv -> InExpr -> OutExpr
+tryForCSE toplevel env expr
+ | toplevel && exprIsLiteralString expr = expr
+ -- See Note [Take care with literal strings]
| Just e <- lookupCSEnv env expr'' = mkTicks ticks e
| otherwise = expr'
-- The varToCoreExpr is needed if we have
@@ -333,12 +364,12 @@ cseExpr env (Type t) = Type (substTy (csEnvSubst env) t)
cseExpr env (Coercion c) = Coercion (substCo (csEnvSubst env) c)
cseExpr _ (Lit lit) = Lit lit
cseExpr env (Var v) = lookupSubst env v
-cseExpr env (App f a) = App (cseExpr env f) (tryForCSE env a)
+cseExpr env (App f a) = App (cseExpr env f) (tryForCSE False env a)
cseExpr env (Tick t e) = Tick t (cseExpr env e)
cseExpr env (Cast e co) = Cast (cseExpr env e) (substCo (csEnvSubst env) co)
cseExpr env (Lam b e) = let (env', b') = addBinder env b
in Lam b' (cseExpr env' e)
-cseExpr env (Let bind e) = let (env', bind') = cseBind env bind
+cseExpr env (Let bind e) = let (env', bind') = cseBind False env bind
in Let bind' (cseExpr env' e)
cseExpr env (Case e bndr ty alts) = cseCase env e bndr ty alts
@@ -346,7 +377,7 @@ cseCase :: CSEnv -> InExpr -> InId -> InType -> [InAlt] -> OutExpr
cseCase env scrut bndr ty alts
= Case scrut1 bndr3 ty (map cse_alt alts)
where
- scrut1 = tryForCSE env scrut
+ scrut1 = tryForCSE False env scrut
bndr1 = zapIdOccInfo bndr
-- Zapping the OccInfo is needed because the extendCSEnv
@@ -369,14 +400,14 @@ cseCase env scrut bndr ty alts
-- case x of { True -> ....True.... }
-- Don't replace True by x!
-- Hence the 'null args', which also deal with literals and DEFAULT
- = (DataAlt con, args', tryForCSE new_env rhs)
+ = (DataAlt con, args', tryForCSE False new_env rhs)
where
(env', args') = addBinders alt_env args
new_env = extendCSEnv env' con_expr con_target
con_expr = mkAltExpr (DataAlt con) args' arg_tys
cse_alt (con, args, rhs)
- = (con, args', tryForCSE env' rhs)
+ = (con, args', tryForCSE False env' rhs)
where
(env', args') = addBinders alt_env args
diff --git a/compiler/simplCore/SetLevels.hs b/compiler/simplCore/SetLevels.hs
index 0b81f29a7d..955d3ba89d 100644
--- a/compiler/simplCore/SetLevels.hs
+++ b/compiler/simplCore/SetLevels.hs
@@ -67,6 +67,7 @@ import CoreMonad ( FloatOutSwitches(..) )
import CoreUtils ( exprType
, isExprLevPoly
, exprOkForSpeculation
+ , exprIsTopLevelBindable
, collectMakeStaticArgs
)
import CoreArity ( exprBotStrictness_maybe )
@@ -494,7 +495,7 @@ lvlMFE strict_ctxt env ann_expr
lvlExpr env ann_expr
| Just (wrap_float, wrap_use)
- <- canFloat_maybe rhs_env strict_ctxt float_is_lam expr_ty
+ <- canFloat_maybe rhs_env strict_ctxt float_is_lam expr
= do { expr1 <- lvlExpr rhs_env ann_expr
; let abs_expr = mkLams abs_vars_w_lvls (wrap_float expr1)
; var <- newLvlVar abs_expr
@@ -507,7 +508,6 @@ lvlMFE strict_ctxt env ann_expr
where
expr = deAnnotate ann_expr
- expr_ty = exprType expr
fvs = freeVarsOf ann_expr
is_bot = isJust mb_bot_str
mb_bot_str = exprBotStrictness_maybe expr
@@ -544,12 +544,12 @@ lvlMFE strict_ctxt env ann_expr
canFloat_maybe :: LevelEnv
-> Bool -- Strict context
-> Bool -- The float has a value lambda
- -> Type
+ -> CoreExpr
-> Maybe ( LevelledExpr -> LevelledExpr -- Wrep the flaot
, LevelledExpr -> LevelledExpr) -- Wrap the use
-- See Note [Floating MFEs of unlifted type]
-canFloat_maybe env strict_ctxt float_is_lam expr_ty
- | float_is_lam || not (isUnliftedType expr_ty)
+canFloat_maybe env strict_ctxt float_is_lam expr
+ | float_is_lam || exprIsTopLevelBindable expr
= Just (id, id) -- No wrapping needed if the type is lifted, or
-- if we are wrapping it in one or more value lambdas
@@ -568,6 +568,7 @@ canFloat_maybe env strict_ctxt float_is_lam expr_ty
| otherwise -- e.g. do not float unboxed tuples
= Nothing
+ where expr_ty = exprType expr
{- Note [Floating MFEs of unlifted type]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/compiler/simplCore/SimplEnv.hs b/compiler/simplCore/SimplEnv.hs
index 8a26220029..99d8291491 100644
--- a/compiler/simplCore/SimplEnv.hs
+++ b/compiler/simplCore/SimplEnv.hs
@@ -332,7 +332,8 @@ data Floats = Floats (OrdList OutBind) FloatFlag
-- See Note [Simplifier floats]
data FloatFlag
- = FltLifted -- All bindings are lifted and lazy
+ = FltLifted -- All bindings are lifted and lazy *or*
+ -- consist of a single primitive string literal
-- Hence ok to float to top level, or recursive
| FltOkSpec -- All bindings are FltLifted *or*
@@ -395,6 +396,9 @@ unitFloat bind = Floats (unitOL bind) (flag bind)
flag (Rec {}) = FltLifted
flag (NonRec bndr rhs)
| not (isStrictId bndr) = FltLifted
+ | exprIsLiteralString rhs = FltLifted
+ -- String literals can be floated freely.
+ -- See Note [CoreSyn top-level string ltierals] in CoreSyn.
| exprOkForSpeculation rhs = FltOkSpec -- Unlifted, and lifted but ok-for-spec (eg HNF)
| otherwise = ASSERT2( not (isUnliftedType (idType bndr)), ppr bndr )
FltCareful
diff --git a/compiler/simplCore/Simplify.hs b/compiler/simplCore/Simplify.hs
index 2c8ff5e941..9e5c00d284 100644
--- a/compiler/simplCore/Simplify.hs
+++ b/compiler/simplCore/Simplify.hs
@@ -581,7 +581,7 @@ makeTrivialWithInfo :: TopLevelFlag -> SimplEnv
-- Returned SimplEnv has same substitution as incoming one
makeTrivialWithInfo top_lvl env context info expr
| exprIsTrivial expr -- Already trivial
- || not (bindingOk top_lvl expr expr_ty) -- Cannot trivialise
+ || not (bindingOk top_lvl expr) -- Cannot trivialise
-- See Note [Cannot trivialise]
= return (env, expr)
| otherwise -- See Note [Take care] below
@@ -603,11 +603,11 @@ makeTrivialWithInfo top_lvl env context info expr
where
expr_ty = exprType expr
-bindingOk :: TopLevelFlag -> CoreExpr -> Type -> Bool
+bindingOk :: TopLevelFlag -> CoreExpr -> Bool
-- True iff we can have a binding of this expression at this level
-- Precondition: the type is the type of the expression
-bindingOk top_lvl _ expr_ty
- | isTopLevel top_lvl = not (isUnliftedType expr_ty)
+bindingOk top_lvl expr
+ | isTopLevel top_lvl = exprIsTopLevelBindable expr
| otherwise = True
{-
@@ -626,12 +626,16 @@ so we don't want to turn it into
because we'll just end up inlining x back, and that makes the
simplifier loop. Better not to ANF-ise it at all.
-A case in point is literal strings (a MachStr is not regarded as
-trivial):
+Literal strings are an exception.
foo = Ptr "blob"#
-We don't want to ANF-ise this.
+We want to turn this into:
+
+ foo1 = "blob"#
+ foo = Ptr foo1
+
+See Note [CoreSyn top-level string literals] in CoreSyn.
************************************************************************
* *
diff --git a/compiler/simplStg/SimplStg.hs b/compiler/simplStg/SimplStg.hs
index 406e415287..08f9d79782 100644
--- a/compiler/simplStg/SimplStg.hs
+++ b/compiler/simplStg/SimplStg.hs
@@ -14,7 +14,7 @@ import StgSyn
import CostCentre ( CollectedCCs )
import SCCfinal ( stgMassageForProfiling )
-import StgLint ( lintStgBindings )
+import StgLint ( lintStgTopBindings )
import StgStats ( showStgStats )
import UnariseStg ( unarise )
import StgCse ( stgCse )
@@ -29,8 +29,8 @@ import Control.Monad
stg2stg :: DynFlags -- includes spec of what stg-to-stg passes to do
-> Module -- module name (profiling only)
- -> [StgBinding] -- input...
- -> IO ( [StgBinding] -- output program...
+ -> [StgTopBinding] -- input...
+ -> IO ( [StgTopBinding] -- output program...
, CollectedCCs) -- cost centre information (declared and used)
stg2stg dflags module_name binds
@@ -48,19 +48,19 @@ stg2stg dflags module_name binds
<- foldM do_stg_pass (binds', us0, ccs) (getStgToDo dflags)
; dumpIfSet_dyn dflags Opt_D_dump_stg "Pre unarise:"
- (pprStgBindings processed_binds)
+ (pprStgTopBindings processed_binds)
; let un_binds = unarise us1 processed_binds
; dumpIfSet_dyn dflags Opt_D_dump_stg "STG syntax:"
- (pprStgBindings un_binds)
+ (pprStgTopBindings un_binds)
; return (un_binds, cost_centres)
}
where
stg_linter = if gopt Opt_DoStgLinting dflags
- then lintStgBindings
+ then lintStgTopBindings
else ( \ _whodunnit binds -> binds )
-------------------------------------------
diff --git a/compiler/simplStg/StgCse.hs b/compiler/simplStg/StgCse.hs
index 7454d24a2c..3e141439ed 100644
--- a/compiler/simplStg/StgCse.hs
+++ b/compiler/simplStg/StgCse.hs
@@ -240,7 +240,7 @@ substPairs env bndrs = mapAccumL go env bndrs
-- Main entry point
-stgCse :: [InStgBinding] -> [OutStgBinding]
+stgCse :: [InStgTopBinding] -> [OutStgTopBinding]
stgCse binds = snd $ mapAccumL stgCseTopLvl emptyInScopeSet binds
-- Top level bindings.
@@ -250,15 +250,16 @@ stgCse binds = snd $ mapAccumL stgCseTopLvl emptyInScopeSet binds
-- But we still have to collect the set of in-scope variables, otherwise
-- uniqAway might shadow a top-level closure.
-stgCseTopLvl :: InScopeSet -> InStgBinding -> (InScopeSet, OutStgBinding)
-stgCseTopLvl in_scope (StgNonRec bndr rhs)
+stgCseTopLvl :: InScopeSet -> InStgTopBinding -> (InScopeSet, OutStgTopBinding)
+stgCseTopLvl in_scope t@(StgTopStringLit _ _) = (in_scope, t)
+stgCseTopLvl in_scope (StgTopLifted (StgNonRec bndr rhs))
= (in_scope'
- , StgNonRec bndr (stgCseTopLvlRhs in_scope rhs))
+ , StgTopLifted (StgNonRec bndr (stgCseTopLvlRhs in_scope rhs)))
where in_scope' = in_scope `extendInScopeSet` bndr
-stgCseTopLvl in_scope (StgRec eqs)
+stgCseTopLvl in_scope (StgTopLifted (StgRec eqs))
= ( in_scope'
- , StgRec [ (bndr, stgCseTopLvlRhs in_scope' rhs) | (bndr, rhs) <- eqs ])
+ , StgTopLifted (StgRec [ (bndr, stgCseTopLvlRhs in_scope' rhs) | (bndr, rhs) <- eqs ]))
where in_scope' = in_scope `extendInScopeSetList` [ bndr | (bndr, _) <- eqs ]
stgCseTopLvlRhs :: InScopeSet -> InStgRhs -> OutStgRhs
diff --git a/compiler/simplStg/StgStats.hs b/compiler/simplStg/StgStats.hs
index 38544822d2..3f75ae23fa 100644
--- a/compiler/simplStg/StgStats.hs
+++ b/compiler/simplStg/StgStats.hs
@@ -75,7 +75,7 @@ countN = Map.singleton
************************************************************************
-}
-showStgStats :: [StgBinding] -> String
+showStgStats :: [StgTopBinding] -> String
showStgStats prog
= "STG Statistics:\n\n"
@@ -99,10 +99,8 @@ showStgStats prog
s (SingleEntryBinds _) = "SingleEntryBinds_Nested "
s (UpdatableBinds _) = "UpdatableBinds_Nested "
-gatherStgStats :: [StgBinding] -> StatEnv
-
-gatherStgStats binds
- = combineSEs (map (statBinding True{-top-level-}) binds)
+gatherStgStats :: [StgTopBinding] -> StatEnv
+gatherStgStats binds = combineSEs (map statTopBinding binds)
{-
************************************************************************
@@ -112,6 +110,10 @@ gatherStgStats binds
************************************************************************
-}
+statTopBinding :: StgTopBinding -> StatEnv
+statTopBinding (StgTopStringLit _ _) = countOne Literals
+statTopBinding (StgTopLifted bind) = statBinding True bind
+
statBinding :: Bool -- True <=> top-level; False <=> nested
-> StgBinding
-> StatEnv
diff --git a/compiler/simplStg/UnariseStg.hs b/compiler/simplStg/UnariseStg.hs
index aa42586cd1..3f67bc278f 100644
--- a/compiler/simplStg/UnariseStg.hs
+++ b/compiler/simplStg/UnariseStg.hs
@@ -264,8 +264,13 @@ extendRho rho x (UnaryVal val)
--------------------------------------------------------------------------------
-unarise :: UniqSupply -> [StgBinding] -> [StgBinding]
-unarise us binds = initUs_ us (mapM (unariseBinding emptyVarEnv) binds)
+unarise :: UniqSupply -> [StgTopBinding] -> [StgTopBinding]
+unarise us binds = initUs_ us (mapM (unariseTopBinding emptyVarEnv) binds)
+
+unariseTopBinding :: UnariseEnv -> StgTopBinding -> UniqSM StgTopBinding
+unariseTopBinding rho (StgTopLifted bind)
+ = StgTopLifted <$> unariseBinding rho bind
+unariseTopBinding _ bind@StgTopStringLit{} = return bind
unariseBinding :: UnariseEnv -> StgBinding -> UniqSM StgBinding
unariseBinding rho (StgNonRec x rhs)
diff --git a/compiler/stgSyn/CoreToStg.hs b/compiler/stgSyn/CoreToStg.hs
index dcb923afea..37df9e2146 100644
--- a/compiler/stgSyn/CoreToStg.hs
+++ b/compiler/stgSyn/CoreToStg.hs
@@ -196,7 +196,7 @@ import Control.Monad (liftM, ap)
-- Setting variable info: top-level, binds, RHSs
-- --------------------------------------------------------------
-coreToStg :: DynFlags -> Module -> CoreProgram -> [StgBinding]
+coreToStg :: DynFlags -> Module -> CoreProgram -> [StgTopBinding]
coreToStg dflags this_mod pgm
= pgm'
where (_, _, pgm') = coreTopBindsToStg dflags this_mod emptyVarEnv pgm
@@ -211,7 +211,7 @@ coreTopBindsToStg
-> Module
-> IdEnv HowBound -- environment for the bindings
-> CoreProgram
- -> (IdEnv HowBound, FreeVarsInfo, [StgBinding])
+ -> (IdEnv HowBound, FreeVarsInfo, [StgTopBinding])
coreTopBindsToStg _ _ env [] = (env, emptyFVInfo, [])
coreTopBindsToStg dflags this_mod env (b:bs)
@@ -229,7 +229,14 @@ coreTopBindToStg
-> IdEnv HowBound
-> FreeVarsInfo -- Info about the body
-> CoreBind
- -> (IdEnv HowBound, FreeVarsInfo, StgBinding)
+ -> (IdEnv HowBound, FreeVarsInfo, StgTopBinding)
+
+coreTopBindToStg _ _ env body_fvs (NonRec id (Lit (MachStr str)))
+ -- top-level string literal
+ = let
+ env' = extendVarEnv env id how_bound
+ how_bound = LetBound TopLet 0
+ in (env', body_fvs, StgTopStringLit id str)
coreTopBindToStg dflags this_mod env body_fvs (NonRec id rhs)
= let
@@ -241,7 +248,7 @@ coreTopBindToStg dflags this_mod env body_fvs (NonRec id rhs)
(stg_rhs, fvs') <- coreToTopStgRhs dflags this_mod body_fvs (id,rhs)
return (stg_rhs, fvs')
- bind = StgNonRec id stg_rhs
+ bind = StgTopLifted $ StgNonRec id stg_rhs
in
ASSERT2(consistentCafInfo id bind, ppr id )
-- NB: previously the assertion printed 'rhs' and 'bind'
@@ -265,7 +272,7 @@ coreTopBindToStg dflags this_mod env body_fvs (Rec pairs)
let fvs' = unionFVInfos fvss'
return (stg_rhss, fvs')
- bind = StgRec (zip binders stg_rhss)
+ bind = StgTopLifted $ StgRec (zip binders stg_rhss)
in
ASSERT2(consistentCafInfo (head binders) bind, ppr binders)
(env', fvs' `unionFVInfo` body_fvs, bind)
@@ -275,7 +282,7 @@ coreTopBindToStg dflags this_mod env body_fvs (Rec pairs)
-- what CoreToStg has figured out about the binding's SRT. The
-- CafInfo will be exact in all cases except when CorePrep has
-- floated out a binding, in which case it will be approximate.
-consistentCafInfo :: Id -> GenStgBinding Var Id -> Bool
+consistentCafInfo :: Id -> GenStgTopBinding Var Id -> Bool
consistentCafInfo id bind
= WARN( not (exact || is_sat_thing) , ppr id <+> ppr id_marked_caffy <+> ppr binding_is_caffy )
safe
diff --git a/compiler/stgSyn/StgLint.hs b/compiler/stgSyn/StgLint.hs
index e31e7ae015..02d989cec0 100644
--- a/compiler/stgSyn/StgLint.hs
+++ b/compiler/stgSyn/StgLint.hs
@@ -6,7 +6,7 @@
{-# LANGUAGE CPP #-}
-module StgLint ( lintStgBindings ) where
+module StgLint ( lintStgTopBindings ) where
import StgSyn
@@ -54,12 +54,12 @@ generation. Solution: don't use it! (KSW 2000-05).
* *
************************************************************************
-@lintStgBindings@ is the top-level interface function.
+@lintStgTopBindings@ is the top-level interface function.
-}
-lintStgBindings :: String -> [StgBinding] -> [StgBinding]
+lintStgTopBindings :: String -> [StgTopBinding] -> [StgTopBinding]
-lintStgBindings whodunnit binds
+lintStgTopBindings whodunnit binds
= {-# SCC "StgLint" #-}
case (initL (lint_binds binds)) of
Nothing -> binds
@@ -68,17 +68,20 @@ lintStgBindings whodunnit binds
text whodunnit <+> text "***",
msg,
text "*** Offending Program ***",
- pprStgBindings binds,
+ pprStgTopBindings binds,
text "*** End of Offense ***"])
where
- lint_binds :: [StgBinding] -> LintM ()
+ lint_binds :: [StgTopBinding] -> LintM ()
lint_binds [] = return ()
lint_binds (bind:binds) = do
- binders <- lintStgBinds bind
+ binders <- lint_bind bind
addInScopeVars binders $
lint_binds binds
+ lint_bind (StgTopLifted bind) = lintStgBinds bind
+ lint_bind (StgTopStringLit v _) = return [v]
+
lintStgArg :: StgArg -> LintM (Maybe Type)
lintStgArg (StgLitArg lit) = return (Just (literalType lit))
lintStgArg (StgVarArg v) = lintStgVar v
diff --git a/compiler/stgSyn/StgSyn.hs b/compiler/stgSyn/StgSyn.hs
index 48e836cc56..56978f868c 100644
--- a/compiler/stgSyn/StgSyn.hs
+++ b/compiler/stgSyn/StgSyn.hs
@@ -14,7 +14,7 @@ generation.
module StgSyn (
GenStgArg(..),
- GenStgBinding(..), GenStgExpr(..), GenStgRhs(..),
+ GenStgTopBinding(..), GenStgBinding(..), GenStgExpr(..), GenStgRhs(..),
GenStgAlt, AltType(..),
UpdateFlag(..), isUpdatable,
@@ -24,11 +24,12 @@ module StgSyn (
combineStgBinderInfo,
-- a set of synonyms for the most common (only :-) parameterisation
- StgArg, StgBinding, StgExpr, StgRhs, StgAlt,
+ StgArg,
+ StgTopBinding, StgBinding, StgExpr, StgRhs, StgAlt,
-- a set of synonyms to distinguish in- and out variants
- InStgArg, InStgBinding, InStgExpr, InStgRhs, InStgAlt,
- OutStgArg, OutStgBinding, OutStgExpr, OutStgRhs, OutStgAlt,
+ InStgArg, InStgTopBinding, InStgBinding, InStgExpr, InStgRhs, InStgAlt,
+ OutStgArg, OutStgTopBinding, OutStgBinding, OutStgExpr, OutStgRhs, OutStgAlt,
-- StgOp
StgOp(..),
@@ -39,13 +40,14 @@ module StgSyn (
stgArgType,
stripStgTicksTop,
- pprStgBinding, pprStgBindings
+ pprStgBinding, pprStgTopBindings
) where
#include "HsVersions.h"
import CoreSyn ( AltCon, Tickish )
import CostCentre ( CostCentreStack )
+import Data.ByteString ( ByteString )
import Data.List ( intersperse )
import DataCon
import DynFlags
@@ -79,6 +81,12 @@ with respect to binder and occurrence information (just as in
@CoreSyn@):
-}
+-- | A top-level binding.
+data GenStgTopBinding bndr occ
+-- See Note [CoreSyn top-level string literals]
+ = StgTopLifted (GenStgBinding bndr occ)
+ | StgTopStringLit bndr ByteString
+
data GenStgBinding bndr occ
= StgNonRec bndr (GenStgRhs bndr occ)
| StgRec [(bndr, GenStgRhs bndr occ)]
@@ -421,11 +429,13 @@ stgRhsArity (StgRhsCon _ _ _) = 0
-- is that `TidyPgm` computed the CAF info on the `Id` but some transformations
-- have taken place since then.
-topStgBindHasCafRefs :: GenStgBinding bndr Id -> Bool
-topStgBindHasCafRefs (StgNonRec _ rhs)
+topStgBindHasCafRefs :: GenStgTopBinding bndr Id -> Bool
+topStgBindHasCafRefs (StgTopLifted (StgNonRec _ rhs))
= topRhsHasCafRefs rhs
-topStgBindHasCafRefs (StgRec binds)
+topStgBindHasCafRefs (StgTopLifted (StgRec binds))
= any topRhsHasCafRefs (map snd binds)
+topStgBindHasCafRefs StgTopStringLit{}
+ = False
topRhsHasCafRefs :: GenStgRhs bndr Id -> Bool
topRhsHasCafRefs (StgRhsClosure _ _ _ upd _ body)
@@ -550,6 +560,7 @@ data AltType
This happens to be the only one we use at the moment.
-}
+type StgTopBinding = GenStgTopBinding Id Id
type StgBinding = GenStgBinding Id Id
type StgArg = GenStgArg Id
type StgExpr = GenStgExpr Id Id
@@ -561,16 +572,18 @@ type StgAlt = GenStgAlt Id Id
See CoreSyn for precedence in Core land
-}
-type InStgBinding = StgBinding
-type InStgArg = StgArg
-type InStgExpr = StgExpr
-type InStgRhs = StgRhs
-type InStgAlt = StgAlt
-type OutStgBinding = StgBinding
-type OutStgArg = StgArg
-type OutStgExpr = StgExpr
-type OutStgRhs = StgRhs
-type OutStgAlt = StgAlt
+type InStgTopBinding = StgTopBinding
+type InStgBinding = StgBinding
+type InStgArg = StgArg
+type InStgExpr = StgExpr
+type InStgRhs = StgRhs
+type InStgAlt = StgAlt
+type OutStgTopBinding = StgTopBinding
+type OutStgBinding = StgBinding
+type OutStgArg = StgArg
+type OutStgExpr = StgExpr
+type OutStgRhs = StgRhs
+type OutStgAlt = StgAlt
{-
@@ -635,6 +648,15 @@ Robin Popplestone asked for semi-colon separators on STG binds; here's
hoping he likes terminators instead... Ditto for case alternatives.
-}
+pprGenStgTopBinding :: (OutputableBndr bndr, Outputable bdee, Ord bdee)
+ => GenStgTopBinding bndr bdee -> SDoc
+
+pprGenStgTopBinding (StgTopStringLit bndr str)
+ = hang (hsep [pprBndr LetBind bndr, equals])
+ 4 (pprHsBytes str <> semi)
+pprGenStgTopBinding (StgTopLifted bind)
+ = pprGenStgBinding bind
+
pprGenStgBinding :: (OutputableBndr bndr, Outputable bdee, Ord bdee)
=> GenStgBinding bndr bdee -> SDoc
@@ -653,13 +675,18 @@ pprGenStgBinding (StgRec pairs)
pprStgBinding :: StgBinding -> SDoc
pprStgBinding bind = pprGenStgBinding bind
-pprStgBindings :: [StgBinding] -> SDoc
-pprStgBindings binds = vcat $ intersperse blankLine (map pprGenStgBinding binds)
+pprStgTopBindings :: [StgTopBinding] -> SDoc
+pprStgTopBindings binds
+ = vcat $ intersperse blankLine (map pprGenStgTopBinding binds)
instance (Outputable bdee) => Outputable (GenStgArg bdee) where
ppr = pprStgArg
instance (OutputableBndr bndr, Outputable bdee, Ord bdee)
+ => Outputable (GenStgTopBinding bndr bdee) where
+ ppr = pprGenStgTopBinding
+
+instance (OutputableBndr bndr, Outputable bdee, Ord bdee)
=> Outputable (GenStgBinding bndr bdee) where
ppr = pprGenStgBinding