From d49b2bb21691892ca6ac8f2403e31f2a5e53feb3 Mon Sep 17 00:00:00 2001 From: Takano Akio Date: Wed, 18 Jan 2017 18:26:47 -0500 Subject: Allow top-level string literals in Core (#8472) This commits relaxes the invariants of the Core syntax so that a top-level variable can be bound to a primitive string literal of type Addr#. This commit: * Relaxes the invatiants of the Core, and allows top-level bindings whose type is Addr# as long as their RHS is either a primitive string literal or another variable. * Allows the simplifier and the full-laziness transformer to float out primitive string literals to the top leve. * Introduces the new StgGenTopBinding type to accomodate top-level Addr# bindings. * Introduces a new type of labels in the object code, with the suffix "_bytes", for exported top-level Addr# bindings. * Makes some built-in rules more robust. This was necessary to keep them functional after the above changes. This is a continuation of D2554. Rebasing notes: This had two slightly suspicious performance regressions: * T12425: bytes allocated regressed by roughly 5% * T4029: bytes allocated regressed by a bit over 1% * T13035: bytes allocated regressed by a bit over 5% These deserve additional investigation. Rebased by: bgamari. Test Plan: ./validate --slow Reviewers: goldfire, trofi, simonmar, simonpj, austin, hvr, bgamari Reviewed By: trofi, simonpj, bgamari Subscribers: trofi, simonpj, gridaphobe, thomie Differential Revision: https://phabricator.haskell.org/D2605 GHC Trac Issues: #8472 --- compiler/stgSyn/CoreToStg.hs | 19 +++++++++---- compiler/stgSyn/StgLint.hs | 17 ++++++----- compiler/stgSyn/StgSyn.hs | 67 +++++++++++++++++++++++++++++++------------- 3 files changed, 70 insertions(+), 33 deletions(-) (limited to 'compiler/stgSyn') diff --git a/compiler/stgSyn/CoreToStg.hs b/compiler/stgSyn/CoreToStg.hs index dcb923afea..37df9e2146 100644 --- a/compiler/stgSyn/CoreToStg.hs +++ b/compiler/stgSyn/CoreToStg.hs @@ -196,7 +196,7 @@ import Control.Monad (liftM, ap) -- Setting variable info: top-level, binds, RHSs -- -------------------------------------------------------------- -coreToStg :: DynFlags -> Module -> CoreProgram -> [StgBinding] +coreToStg :: DynFlags -> Module -> CoreProgram -> [StgTopBinding] coreToStg dflags this_mod pgm = pgm' where (_, _, pgm') = coreTopBindsToStg dflags this_mod emptyVarEnv pgm @@ -211,7 +211,7 @@ coreTopBindsToStg -> Module -> IdEnv HowBound -- environment for the bindings -> CoreProgram - -> (IdEnv HowBound, FreeVarsInfo, [StgBinding]) + -> (IdEnv HowBound, FreeVarsInfo, [StgTopBinding]) coreTopBindsToStg _ _ env [] = (env, emptyFVInfo, []) coreTopBindsToStg dflags this_mod env (b:bs) @@ -229,7 +229,14 @@ coreTopBindToStg -> IdEnv HowBound -> FreeVarsInfo -- Info about the body -> CoreBind - -> (IdEnv HowBound, FreeVarsInfo, StgBinding) + -> (IdEnv HowBound, FreeVarsInfo, StgTopBinding) + +coreTopBindToStg _ _ env body_fvs (NonRec id (Lit (MachStr str))) + -- top-level string literal + = let + env' = extendVarEnv env id how_bound + how_bound = LetBound TopLet 0 + in (env', body_fvs, StgTopStringLit id str) coreTopBindToStg dflags this_mod env body_fvs (NonRec id rhs) = let @@ -241,7 +248,7 @@ coreTopBindToStg dflags this_mod env body_fvs (NonRec id rhs) (stg_rhs, fvs') <- coreToTopStgRhs dflags this_mod body_fvs (id,rhs) return (stg_rhs, fvs') - bind = StgNonRec id stg_rhs + bind = StgTopLifted $ StgNonRec id stg_rhs in ASSERT2(consistentCafInfo id bind, ppr id ) -- NB: previously the assertion printed 'rhs' and 'bind' @@ -265,7 +272,7 @@ coreTopBindToStg dflags this_mod env body_fvs (Rec pairs) let fvs' = unionFVInfos fvss' return (stg_rhss, fvs') - bind = StgRec (zip binders stg_rhss) + bind = StgTopLifted $ StgRec (zip binders stg_rhss) in ASSERT2(consistentCafInfo (head binders) bind, ppr binders) (env', fvs' `unionFVInfo` body_fvs, bind) @@ -275,7 +282,7 @@ coreTopBindToStg dflags this_mod env body_fvs (Rec pairs) -- what CoreToStg has figured out about the binding's SRT. The -- CafInfo will be exact in all cases except when CorePrep has -- floated out a binding, in which case it will be approximate. -consistentCafInfo :: Id -> GenStgBinding Var Id -> Bool +consistentCafInfo :: Id -> GenStgTopBinding Var Id -> Bool consistentCafInfo id bind = WARN( not (exact || is_sat_thing) , ppr id <+> ppr id_marked_caffy <+> ppr binding_is_caffy ) safe diff --git a/compiler/stgSyn/StgLint.hs b/compiler/stgSyn/StgLint.hs index e31e7ae015..02d989cec0 100644 --- a/compiler/stgSyn/StgLint.hs +++ b/compiler/stgSyn/StgLint.hs @@ -6,7 +6,7 @@ {-# LANGUAGE CPP #-} -module StgLint ( lintStgBindings ) where +module StgLint ( lintStgTopBindings ) where import StgSyn @@ -54,12 +54,12 @@ generation. Solution: don't use it! (KSW 2000-05). * * ************************************************************************ -@lintStgBindings@ is the top-level interface function. +@lintStgTopBindings@ is the top-level interface function. -} -lintStgBindings :: String -> [StgBinding] -> [StgBinding] +lintStgTopBindings :: String -> [StgTopBinding] -> [StgTopBinding] -lintStgBindings whodunnit binds +lintStgTopBindings whodunnit binds = {-# SCC "StgLint" #-} case (initL (lint_binds binds)) of Nothing -> binds @@ -68,17 +68,20 @@ lintStgBindings whodunnit binds text whodunnit <+> text "***", msg, text "*** Offending Program ***", - pprStgBindings binds, + pprStgTopBindings binds, text "*** End of Offense ***"]) where - lint_binds :: [StgBinding] -> LintM () + lint_binds :: [StgTopBinding] -> LintM () lint_binds [] = return () lint_binds (bind:binds) = do - binders <- lintStgBinds bind + binders <- lint_bind bind addInScopeVars binders $ lint_binds binds + lint_bind (StgTopLifted bind) = lintStgBinds bind + lint_bind (StgTopStringLit v _) = return [v] + lintStgArg :: StgArg -> LintM (Maybe Type) lintStgArg (StgLitArg lit) = return (Just (literalType lit)) lintStgArg (StgVarArg v) = lintStgVar v diff --git a/compiler/stgSyn/StgSyn.hs b/compiler/stgSyn/StgSyn.hs index 48e836cc56..56978f868c 100644 --- a/compiler/stgSyn/StgSyn.hs +++ b/compiler/stgSyn/StgSyn.hs @@ -14,7 +14,7 @@ generation. module StgSyn ( GenStgArg(..), - GenStgBinding(..), GenStgExpr(..), GenStgRhs(..), + GenStgTopBinding(..), GenStgBinding(..), GenStgExpr(..), GenStgRhs(..), GenStgAlt, AltType(..), UpdateFlag(..), isUpdatable, @@ -24,11 +24,12 @@ module StgSyn ( combineStgBinderInfo, -- a set of synonyms for the most common (only :-) parameterisation - StgArg, StgBinding, StgExpr, StgRhs, StgAlt, + StgArg, + StgTopBinding, StgBinding, StgExpr, StgRhs, StgAlt, -- a set of synonyms to distinguish in- and out variants - InStgArg, InStgBinding, InStgExpr, InStgRhs, InStgAlt, - OutStgArg, OutStgBinding, OutStgExpr, OutStgRhs, OutStgAlt, + InStgArg, InStgTopBinding, InStgBinding, InStgExpr, InStgRhs, InStgAlt, + OutStgArg, OutStgTopBinding, OutStgBinding, OutStgExpr, OutStgRhs, OutStgAlt, -- StgOp StgOp(..), @@ -39,13 +40,14 @@ module StgSyn ( stgArgType, stripStgTicksTop, - pprStgBinding, pprStgBindings + pprStgBinding, pprStgTopBindings ) where #include "HsVersions.h" import CoreSyn ( AltCon, Tickish ) import CostCentre ( CostCentreStack ) +import Data.ByteString ( ByteString ) import Data.List ( intersperse ) import DataCon import DynFlags @@ -79,6 +81,12 @@ with respect to binder and occurrence information (just as in @CoreSyn@): -} +-- | A top-level binding. +data GenStgTopBinding bndr occ +-- See Note [CoreSyn top-level string literals] + = StgTopLifted (GenStgBinding bndr occ) + | StgTopStringLit bndr ByteString + data GenStgBinding bndr occ = StgNonRec bndr (GenStgRhs bndr occ) | StgRec [(bndr, GenStgRhs bndr occ)] @@ -421,11 +429,13 @@ stgRhsArity (StgRhsCon _ _ _) = 0 -- is that `TidyPgm` computed the CAF info on the `Id` but some transformations -- have taken place since then. -topStgBindHasCafRefs :: GenStgBinding bndr Id -> Bool -topStgBindHasCafRefs (StgNonRec _ rhs) +topStgBindHasCafRefs :: GenStgTopBinding bndr Id -> Bool +topStgBindHasCafRefs (StgTopLifted (StgNonRec _ rhs)) = topRhsHasCafRefs rhs -topStgBindHasCafRefs (StgRec binds) +topStgBindHasCafRefs (StgTopLifted (StgRec binds)) = any topRhsHasCafRefs (map snd binds) +topStgBindHasCafRefs StgTopStringLit{} + = False topRhsHasCafRefs :: GenStgRhs bndr Id -> Bool topRhsHasCafRefs (StgRhsClosure _ _ _ upd _ body) @@ -550,6 +560,7 @@ data AltType This happens to be the only one we use at the moment. -} +type StgTopBinding = GenStgTopBinding Id Id type StgBinding = GenStgBinding Id Id type StgArg = GenStgArg Id type StgExpr = GenStgExpr Id Id @@ -561,16 +572,18 @@ type StgAlt = GenStgAlt Id Id See CoreSyn for precedence in Core land -} -type InStgBinding = StgBinding -type InStgArg = StgArg -type InStgExpr = StgExpr -type InStgRhs = StgRhs -type InStgAlt = StgAlt -type OutStgBinding = StgBinding -type OutStgArg = StgArg -type OutStgExpr = StgExpr -type OutStgRhs = StgRhs -type OutStgAlt = StgAlt +type InStgTopBinding = StgTopBinding +type InStgBinding = StgBinding +type InStgArg = StgArg +type InStgExpr = StgExpr +type InStgRhs = StgRhs +type InStgAlt = StgAlt +type OutStgTopBinding = StgTopBinding +type OutStgBinding = StgBinding +type OutStgArg = StgArg +type OutStgExpr = StgExpr +type OutStgRhs = StgRhs +type OutStgAlt = StgAlt {- @@ -635,6 +648,15 @@ Robin Popplestone asked for semi-colon separators on STG binds; here's hoping he likes terminators instead... Ditto for case alternatives. -} +pprGenStgTopBinding :: (OutputableBndr bndr, Outputable bdee, Ord bdee) + => GenStgTopBinding bndr bdee -> SDoc + +pprGenStgTopBinding (StgTopStringLit bndr str) + = hang (hsep [pprBndr LetBind bndr, equals]) + 4 (pprHsBytes str <> semi) +pprGenStgTopBinding (StgTopLifted bind) + = pprGenStgBinding bind + pprGenStgBinding :: (OutputableBndr bndr, Outputable bdee, Ord bdee) => GenStgBinding bndr bdee -> SDoc @@ -653,12 +675,17 @@ pprGenStgBinding (StgRec pairs) pprStgBinding :: StgBinding -> SDoc pprStgBinding bind = pprGenStgBinding bind -pprStgBindings :: [StgBinding] -> SDoc -pprStgBindings binds = vcat $ intersperse blankLine (map pprGenStgBinding binds) +pprStgTopBindings :: [StgTopBinding] -> SDoc +pprStgTopBindings binds + = vcat $ intersperse blankLine (map pprGenStgTopBinding binds) instance (Outputable bdee) => Outputable (GenStgArg bdee) where ppr = pprStgArg +instance (OutputableBndr bndr, Outputable bdee, Ord bdee) + => Outputable (GenStgTopBinding bndr bdee) where + ppr = pprGenStgTopBinding + instance (OutputableBndr bndr, Outputable bdee, Ord bdee) => Outputable (GenStgBinding bndr bdee) where ppr = pprGenStgBinding -- cgit v1.2.1