diff options
author | Takano Akio <tak@anoak.io> | 2017-01-18 18:26:47 -0500 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2017-01-20 14:36:29 -0500 |
commit | d49b2bb21691892ca6ac8f2403e31f2a5e53feb3 (patch) | |
tree | cc8488acf59467899e4d3279a340577eec95310f /compiler/cmm | |
parent | a2a67b77c3048713541d1ed96ec0b95fb2542f9a (diff) | |
download | haskell-d49b2bb21691892ca6ac8f2403e31f2a5e53feb3.tar.gz |
Allow top-level string literals in Core (#8472)
This commits relaxes the invariants of the Core syntax so that a
top-level variable can be bound to a primitive string literal of type
Addr#.
This commit:
* Relaxes the invatiants of the Core, and allows top-level bindings whose
type is Addr# as long as their RHS is either a primitive string literal or
another variable.
* Allows the simplifier and the full-laziness transformer to float out
primitive string literals to the top leve.
* Introduces the new StgGenTopBinding type to accomodate top-level Addr#
bindings.
* Introduces a new type of labels in the object code, with the suffix "_bytes",
for exported top-level Addr# bindings.
* Makes some built-in rules more robust. This was necessary to keep them
functional after the above changes.
This is a continuation of D2554.
Rebasing notes:
This had two slightly suspicious performance regressions:
* T12425: bytes allocated regressed by roughly 5%
* T4029: bytes allocated regressed by a bit over 1%
* T13035: bytes allocated regressed by a bit over 5%
These deserve additional investigation.
Rebased by: bgamari.
Test Plan: ./validate --slow
Reviewers: goldfire, trofi, simonmar, simonpj, austin, hvr, bgamari
Reviewed By: trofi, simonpj, bgamari
Subscribers: trofi, simonpj, gridaphobe, thomie
Differential Revision: https://phabricator.haskell.org/D2605
GHC Trac Issues: #8472
Diffstat (limited to 'compiler/cmm')
-rw-r--r-- | compiler/cmm/CLabel.hs | 13 | ||||
-rw-r--r-- | compiler/cmm/CmmInfo.hs | 2 | ||||
-rw-r--r-- | compiler/cmm/CmmUtils.hs | 9 |
3 files changed, 18 insertions, 6 deletions
diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs index 0f3410a66e..ee87ef1b37 100644 --- a/compiler/cmm/CLabel.hs +++ b/compiler/cmm/CLabel.hs @@ -26,6 +26,7 @@ module CLabel ( mkApEntryLabel, mkApInfoTableLabel, mkClosureTableLabel, + mkBytesLabel, mkLocalClosureLabel, mkLocalInfoTableLabel, @@ -389,6 +390,9 @@ data IdLabelInfo | ClosureTable -- ^ Table of closures for Enum tycons + | Bytes -- ^ Content of a string literal. See + -- Note [Bytes label]. + deriving (Eq, Ord) @@ -474,6 +478,7 @@ mkClosureTableLabel :: Name -> CafInfo -> CLabel mkLocalConInfoTableLabel :: CafInfo -> Name -> CLabel mkLocalConEntryLabel :: CafInfo -> Name -> CLabel mkConInfoTableLabel :: Name -> CafInfo -> CLabel +mkBytesLabel :: Name -> CLabel mkClosureLabel name c = IdLabel name c Closure mkInfoTableLabel name c = IdLabel name c InfoTable mkEntryLabel name c = IdLabel name c Entry @@ -481,6 +486,7 @@ mkClosureTableLabel name c = IdLabel name c ClosureTable mkLocalConInfoTableLabel c con = IdLabel con c ConInfoTable mkLocalConEntryLabel c con = IdLabel con c ConEntry mkConInfoTableLabel name c = IdLabel name c ConInfoTable +mkBytesLabel name = IdLabel name NoCafRefs Bytes mkConEntryLabel :: Name -> CafInfo -> CLabel mkConEntryLabel name c = IdLabel name c ConEntry @@ -935,6 +941,7 @@ idInfoLabelType info = ConInfoTable -> DataLabel ClosureTable -> DataLabel RednCounts -> DataLabel + Bytes -> DataLabel _ -> CodeLabel @@ -1056,6 +1063,11 @@ export this because in other modules we either have * A saturated call 'Just x'; allocate using Just_con_info Not exporting these Just_info labels reduces the number of symbols somewhat. + +Note [Bytes label] +~~~~~~~~~~~~~~~~~~ +For a top-level string literal 'foo', we have just one symbol 'foo_bytes', which +points to a static data block containing the content of the literal. -} instance Outputable CLabel where @@ -1234,6 +1246,7 @@ ppIdFlavor x = pp_cSEP <> ConEntry -> text "con_entry" ConInfoTable -> text "con_info" ClosureTable -> text "closure_tbl" + Bytes -> text "bytes" ) diff --git a/compiler/cmm/CmmInfo.hs b/compiler/cmm/CmmInfo.hs index b9981f247b..b5e800a977 100644 --- a/compiler/cmm/CmmInfo.hs +++ b/compiler/cmm/CmmInfo.hs @@ -400,7 +400,7 @@ mkProfLits _ (ProfilingInfo td cd) newStringLit :: [Word8] -> UniqSM (CmmLit, GenCmmDecl CmmStatics info stmt) newStringLit bytes = do { uniq <- getUniqueM - ; return (mkByteStringCLit uniq bytes) } + ; return (mkByteStringCLit (mkStringLitLabel uniq) bytes) } -- Misc utils diff --git a/compiler/cmm/CmmUtils.hs b/compiler/cmm/CmmUtils.hs index 3260cbab2f..1dab6eeff5 100644 --- a/compiler/cmm/CmmUtils.hs +++ b/compiler/cmm/CmmUtils.hs @@ -72,7 +72,6 @@ import Cmm import BlockId import CLabel import Outputable -import Unique import DynFlags import Util import CodeGen.Platform @@ -169,13 +168,13 @@ zeroExpr dflags = CmmLit (zeroCLit dflags) mkWordCLit :: DynFlags -> Integer -> CmmLit mkWordCLit dflags wd = CmmInt wd (wordWidth dflags) -mkByteStringCLit :: Unique -> [Word8] -> (CmmLit, GenCmmDecl CmmStatics info stmt) +mkByteStringCLit + :: CLabel -> [Word8] -> (CmmLit, GenCmmDecl CmmStatics info stmt) -- We have to make a top-level decl for the string, -- and return a literal pointing to it -mkByteStringCLit uniq bytes - = (CmmLabel lbl, CmmData (Section sec lbl) $ Statics lbl [CmmString bytes]) +mkByteStringCLit lbl bytes + = (CmmLabel lbl, CmmData (Section sec lbl) $ Statics lbl [CmmString bytes]) where - lbl = mkStringLitLabel uniq -- This can not happen for String literals (as there \NUL is replaced by -- C0 80). However, it can happen with Addr# literals. sec = if 0 `elem` bytes then ReadOnlyData else CString |