summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitrios Vytiniotis <dimitris@microsoft.com>2011-11-16 15:28:43 +0000
committerDimitrios Vytiniotis <dimitris@microsoft.com>2011-11-16 15:28:43 +0000
commit7ec5404a3fd277251a1ab353aa398adfc02b6d34 (patch)
tree78ff33800fad55d7dbb4e1b1732d4f82c4e092a2
parentdb892577a2effc2266533e355dad2c40f9fd3be1 (diff)
parent1bbb89f3ab009367fcca84b73b351ddcf5be16a4 (diff)
downloadhaskell-ghc-constraint-solver.tar.gz
Merge branch 'master' of http://darcs.haskell.org/ghc into ghc-constraint-solverghc-constraint-solver
-rw-r--r--compiler/basicTypes/Literal.lhs6
-rw-r--r--compiler/coreSyn/CoreArity.lhs193
-rw-r--r--compiler/coreSyn/CoreLint.lhs13
-rw-r--r--compiler/coreSyn/CoreSyn.lhs30
-rw-r--r--compiler/coreSyn/CoreUnfold.lhs106
-rw-r--r--compiler/coreSyn/CoreUtils.lhs84
-rw-r--r--compiler/deSugar/DsUtils.lhs8
-rw-r--r--compiler/main/DriverPipeline.hs61
-rw-r--r--compiler/main/DynFlags.hs2
-rw-r--r--compiler/prelude/PrelRules.lhs3
-rw-r--r--compiler/simplCore/SimplCore.lhs9
-rw-r--r--compiler/simplCore/SimplUtils.lhs71
-rw-r--r--compiler/simplCore/Simplify.lhs10
-rw-r--r--compiler/specialise/SpecConstr.lhs4
-rw-r--r--compiler/typecheck/TcGenDeriv.lhs27
-rw-r--r--docs/users_guide/flags.xml9
-rwxr-xr-xdocs/users_guide/glasgow_exts.xml15
-rw-r--r--docs/users_guide/using.xml14
-rw-r--r--includes/Rts.h1
-rw-r--r--includes/RtsAPI.h48
-rw-r--r--includes/RtsOpts.h20
-rw-r--r--includes/rts/Main.h (renamed from rts/RtsMain.h)4
-rw-r--r--mk/validate-settings.mk6
-rw-r--r--rts/Main.c24
-rw-r--r--rts/RtsFlags.c26
-rw-r--r--rts/RtsFlags.h4
-rw-r--r--rts/RtsMain.c19
-rwxr-xr-xrts/RtsStartup.c14
-rw-r--r--rts/ThreadPaused.c132
-rw-r--r--rts/ghc.mk13
-rw-r--r--rts/hooks/RtsOpts.c14
-rwxr-xr-xvalidate85
32 files changed, 624 insertions, 451 deletions
diff --git a/compiler/basicTypes/Literal.lhs b/compiler/basicTypes/Literal.lhs
index 417444542a..966dca1e71 100644
--- a/compiler/basicTypes/Literal.lhs
+++ b/compiler/basicTypes/Literal.lhs
@@ -33,7 +33,7 @@ module Literal
, pprLiteral
-- ** Predicates on Literals and their contents
- , litIsDupable, litIsTrivial
+ , litIsDupable, litIsTrivial, litIsLifted
, inIntRange, inWordRange, tARGET_MAX_INT, inCharRange
, isZeroLit
, litFitsInChar
@@ -368,6 +368,10 @@ litFitsInChar (MachInt i)
= fromInteger i <= ord minBound
&& fromInteger i >= ord maxBound
litFitsInChar _ = False
+
+litIsLifted :: Literal -> Bool
+litIsLifted (LitInteger {}) = True
+litIsLifted _ = False
\end{code}
Types
diff --git a/compiler/coreSyn/CoreArity.lhs b/compiler/coreSyn/CoreArity.lhs
index f8565cb4c8..249861a4e4 100644
--- a/compiler/coreSyn/CoreArity.lhs
+++ b/compiler/coreSyn/CoreArity.lhs
@@ -34,6 +34,7 @@ import TyCon ( isRecursiveTyCon, isClassTyCon )
import Coercion
import BasicTypes
import Unique
+import DynFlags ( DynFlags, DynFlag(..), dopt )
import Outputable
import FastString
import Pair
@@ -128,11 +129,12 @@ exprBotStrictness_maybe :: CoreExpr -> Maybe (Arity, StrictSig)
-- and gives them a suitable strictness signatures. It's used during
-- float-out
exprBotStrictness_maybe e
- = case getBotArity (arityType is_cheap e) of
+ = case getBotArity (arityType env e) of
Nothing -> Nothing
Just ar -> Just (ar, mkStrictSig (mkTopDmdType (replicate ar topDmd) BotRes))
where
- is_cheap _ _ = False -- Irrelevant for this purpose
+ env = AE { ae_bndrs = [], ae_ped_bot = True, ae_cheap_fn = \ _ _ -> False }
+ -- For this purpose we can be very simple
\end{code}
Note [exprArity invariant]
@@ -251,34 +253,33 @@ Or, to put it another way, in any context C
It's all a bit more subtle than it looks:
-Note [Arity of case expressions]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-We treat the arity of
- case x of p -> \s -> ...
-as 1 (or more) because for I/O ish things we really want to get that
-\s to the top. We are prepared to evaluate x each time round the loop
-in order to get that.
+Note [One-shot lambdas]
+~~~~~~~~~~~~~~~~~~~~~~~
+Consider one-shot lambdas
+ let x = expensive in \y z -> E
+We want this to have arity 1 if the \y-abstraction is a 1-shot lambda.
+
+Note [Dealing with bottom]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+A Big Deal with computing arities is expressions like
+
+ f = \x -> case x of
+ True -> \s -> e1
+ False -> \s -> e2
+
+This happens all the time when f :: Bool -> IO ()
+In this case we do eta-expand, in order to get that \s to the
+top, and give f arity 2.
This isn't really right in the presence of seq. Consider
- f = \x -> case x of
- True -> \y -> x+y
- False -> \y -> x-y
-Can we eta-expand here? At first the answer looks like "yes of course", but
-consider
(f bot) `seq` 1
-This should diverge! But if we eta-expand, it won't. Again, we ignore this
-"problem", because being scrupulous would lose an important transformation for
-many programs.
-1. Note [One-shot lambdas]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider one-shot lambdas
- let x = expensive in \y z -> E
-We want this to have arity 1 if the \y-abstraction is a 1-shot lambda.
+This should diverge! But if we eta-expand, it won't. We ignore this
+"problem" (unless -fpedantic-bottoms is on), because being scrupulous
+would lose an important transformation for many programs. (See
+Trac #5587 for an example.)
-3. Note [Dealing with bottom]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider
+Consider also
f = \x -> error "foo"
Here, arity 1 is fine. But if it is
f = \x -> case x of
@@ -290,22 +291,31 @@ should diverge, but it'll converge if we eta-expand f. Nevertheless, we
do so; it improves some programs significantly, and increasing convergence
isn't a bad thing. Hence the ABot/ATop in ArityType.
-However, this really isn't always the Right Thing, and we have several
-tickets reporting unexpected bahaviour resulting from this
-transformation. So we try to limit it as much as possible:
+So these two transformations aren't always the Right Thing, and we
+have several tickets reporting unexpected bahaviour resulting from
+this transformation. So we try to limit it as much as possible:
+
+ (1) Do NOT move a lambda outside a known-bottom case expression
+ case undefined of { (a,b) -> \y -> e }
+ This showed up in Trac #5557
- * Do NOT move a lambda outside a known-bottom case expression
- case undefined of { (a,b) -> \y -> e }
- This showed up in Trac #5557
+ (2) Do NOT move a lambda outside a case if all the branches of
+ the case are known to return bottom.
+ case x of { (a,b) -> \y -> error "urk" }
+ This case is less important, but the idea is that if the fn is
+ going to diverge eventually anyway then getting the best arity
+ isn't an issue, so we might as well play safe
- * Do NOT move a lambda outside a case if all the branches of
- the case are known to return bottom.
- case x of { (a,b) -> \y -> error "urk" }
- This case is less important, but the idea is that if the fn is
- going to diverge eventually anyway then getting the best arity
- isn't an issue, so we might as well play safe
+ (3) Do NOT move a lambda outside a case unless
+ (a) The scrutinee is ok-for-speculation, or
+ (b) There is an enclosing value \x, and the scrutinee is x
+ E.g. let x = case y of ( DEFAULT -> \v -> blah }
+ We don't move the \y out. This is pretty arbitrary; but it
+ catches the common case of doing `seq` on y.
+ This is the reason for the under_lam argument to arityType.
+ See Trac #5625
-Of course both these are readily defeated by disguising the bottoms.
+Of course both (1) and (2) are readily defeated by disguising the bottoms.
4. Note [Newtype arity]
~~~~~~~~~~~~~~~~~~~~~~~~
@@ -463,17 +473,21 @@ vanillaArityType = ATop [] -- Totally uninformative
-- ^ The Arity returned is the number of value args the
-- expression can be applied to without doing much work
-exprEtaExpandArity :: CheapFun -> CoreExpr -> Arity
+exprEtaExpandArity :: DynFlags -> CheapAppFun -> CoreExpr -> Arity
-- exprEtaExpandArity is used when eta expanding
-- e ==> \xy -> e x y
-exprEtaExpandArity cheap_fun e
- = case (arityType cheap_fun e) of
+exprEtaExpandArity dflags cheap_app e
+ = case (arityType env e) of
ATop (os:oss)
| os || has_lam e -> 1 + length oss -- Note [Eta expanding thunks]
| otherwise -> 0
ATop [] -> 0
ABot n -> n
where
+ env = AE { ae_bndrs = []
+ , ae_cheap_fn = mk_cheap_fn dflags cheap_app
+ , ae_ped_bot = dopt Opt_PedanticBottoms dflags }
+
has_lam (Tick _ e) = has_lam e
has_lam (Lam b e) = isId b || has_lam e
has_lam _ = False
@@ -482,8 +496,40 @@ getBotArity :: ArityType -> Maybe Arity
-- Arity of a divergent function
getBotArity (ABot n) = Just n
getBotArity _ = Nothing
+
+mk_cheap_fn :: DynFlags -> CheapAppFun -> CheapFun
+mk_cheap_fn dflags cheap_app
+ | not (dopt Opt_DictsCheap dflags)
+ = \e _ -> exprIsCheap' cheap_app e
+ | otherwise
+ = \e mb_ty -> exprIsCheap' cheap_app e
+ || case mb_ty of
+ Nothing -> False
+ Just ty -> isDictLikeTy ty
\end{code}
+Note [Eta expanding through dictionaries]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If the experimental -fdicts-cheap flag is on, we eta-expand through
+dictionary bindings. This improves arities. Thereby, it also
+means that full laziness is less prone to floating out the
+application of a function to its dictionary arguments, which
+can thereby lose opportunities for fusion. Example:
+ foo :: Ord a => a -> ...
+ foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). ....
+ -- So foo has arity 1
+
+ f = \x. foo dInt $ bar x
+
+The (foo DInt) is floated out, and makes ineffective a RULE
+ foo (bar x) = ...
+
+One could go further and make exprIsCheap reply True to any
+dictionary-typed expression, but that's more work.
+
+See Note [Dictionary-like types] in TcType.lhs for why we use
+isDictLikeTy here rather than isDictTy
+
Note [Eta expanding thunks]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
When we see
@@ -558,10 +604,17 @@ type CheapFun = CoreExpr -> Maybe Type -> Bool
-- If the Maybe is Just, the type is the type
-- of the expression; Nothing means "don't know"
-arityType :: CheapFun -> CoreExpr -> ArityType
+data ArityEnv
+ = AE { ae_bndrs :: [Id] -- Enclosing value-lambda Ids
+ -- See Note [Dealing with bottom (3)]
+ , ae_cheap_fn :: CheapFun
+ , ae_ped_bot :: Bool -- True <=> be pedantic about bottoms
+ }
+
+arityType :: ArityEnv -> CoreExpr -> ArityType
-arityType cheap_fn (Cast e co)
- = case arityType cheap_fn e of
+arityType env (Cast e co)
+ = case arityType env e of
ATop os -> ATop (take co_arity os)
ABot n -> ABot (n `min` co_arity)
where
@@ -586,15 +639,20 @@ arityType _ (Var v)
one_shots = typeArity (idType v)
-- Lambdas; increase arity
-arityType cheap_fn (Lam x e)
- | isId x = arityLam x (arityType cheap_fn e)
- | otherwise = arityType cheap_fn e
+arityType env (Lam x e)
+ | isId x = arityLam x (arityType env' e)
+ | otherwise = arityType env e
+ where
+ env' = env { ae_bndrs = x : ae_bndrs env }
-- Applications; decrease arity, except for types
-arityType cheap_fn (App fun (Type _))
- = arityType cheap_fn fun
-arityType cheap_fn (App fun arg )
- = arityApp (arityType cheap_fn fun) (cheap_fn arg Nothing)
+arityType env (App fun (Type _))
+ = arityType env fun
+arityType env (App fun arg )
+ = arityApp (arityType env' fun) (ae_cheap_fn env arg Nothing)
+ where
+ env' = env { ae_bndrs = case ae_bndrs env of
+ { [] -> []; (_:xs) -> xs } }
-- Case/Let; keep arity if either the expression is cheap
-- or it's a 1-shot lambda
@@ -604,31 +662,40 @@ arityType cheap_fn (App fun arg )
-- f x y = case x of { (a,b) -> e }
-- The difference is observable using 'seq'
--
-arityType cheap_fn (Case scrut _ _ alts)
+arityType env (Case scrut _ _ alts)
| exprIsBottom scrut
= ABot 0 -- Do not eta expand
- -- See Note [Dealing with bottom]
+ -- See Note [Dealing with bottom (1)]
| otherwise
= case alts_type of
ABot n | n>0 -> ATop [] -- Don't eta expand
| otherwise -> ABot 0 -- if RHS is bottomming
- -- See Note [Dealing with bottom]
- ATop as | exprIsTrivial scrut -> ATop as
- | otherwise -> ATop (takeWhile id as)
+ -- See Note [Dealing with bottom (2)]
+
+ ATop as | not (ae_ped_bot env) -- Check -fpedantic-bottoms
+ , is_under scrut -> ATop as
+ | exprOkForSpeculation scrut -> ATop as
+ | otherwise -> ATop (takeWhile id as)
where
- alts_type = foldr1 andArityType [arityType cheap_fn rhs | (_,_,rhs) <- alts]
+ -- is_under implements Note [Dealing with bottom (3)]
+ is_under (Var f) = f `elem` ae_bndrs env
+ is_under (App f (Type {})) = is_under f
+ is_under (Cast f _) = is_under f
+ is_under _ = False
+
+ alts_type = foldr1 andArityType [arityType env rhs | (_,_,rhs) <- alts]
-arityType cheap_fn (Let b e)
- = floatIn (cheap_bind b) (arityType cheap_fn e)
+arityType env (Let b e)
+ = floatIn (cheap_bind b) (arityType env e)
where
cheap_bind (NonRec b e) = is_cheap (b,e)
cheap_bind (Rec prs) = all is_cheap prs
- is_cheap (b,e) = cheap_fn e (Just (idType b))
+ is_cheap (b,e) = ae_cheap_fn env e (Just (idType b))
-arityType cheap_fn (Tick t e)
- | not (tickishIsCode t) = arityType cheap_fn e
+arityType env (Tick t e)
+ | not (tickishIsCode t) = arityType env e
-arityType _ _ = vanillaArityType
+arityType _ _ = vanillaArityType
\end{code}
diff --git a/compiler/coreSyn/CoreLint.lhs b/compiler/coreSyn/CoreLint.lhs
index 7bd61fa351..77747aabf3 100644
--- a/compiler/coreSyn/CoreLint.lhs
+++ b/compiler/coreSyn/CoreLint.lhs
@@ -41,7 +41,6 @@ import Kind
import Type
import TypeRep
import TyCon
-import TcType
import BasicTypes
import StaticFlags
import ListSetOps
@@ -562,12 +561,12 @@ lintCoreAlt _ alt_ty (DEFAULT, args, rhs) =
; checkAltExpr rhs alt_ty }
lintCoreAlt scrut_ty alt_ty (LitAlt lit, args, rhs)
- | isIntegerTy scrut_ty
- = failWithL integerScrutinisedMsg
+ | litIsLifted lit
+ = failWithL integerScrutinisedMsg
| otherwise
- = do { checkL (null args) (mkDefaultArgsMsg args)
- ; checkTys lit_ty scrut_ty (mkBadPatMsg lit_ty scrut_ty)
- ; checkAltExpr rhs alt_ty }
+ = do { checkL (null args) (mkDefaultArgsMsg args)
+ ; checkTys lit_ty scrut_ty (mkBadPatMsg lit_ty scrut_ty)
+ ; checkAltExpr rhs alt_ty }
where
lit_ty = literalType lit
@@ -1196,7 +1195,7 @@ mkBadPatMsg con_result_ty scrut_ty
integerScrutinisedMsg :: Message
integerScrutinisedMsg
- = text "In a case alternative, scrutinee type is Integer"
+ = text "In a LitAlt, the literal is lifted (probably Integer)"
mkBadAltMsg :: Type -> CoreAlt -> Message
mkBadAltMsg scrut_ty alt
diff --git a/compiler/coreSyn/CoreSyn.lhs b/compiler/coreSyn/CoreSyn.lhs
index ea0ef2242f..a8dbbceb36 100644
--- a/compiler/coreSyn/CoreSyn.lhs
+++ b/compiler/coreSyn/CoreSyn.lhs
@@ -278,11 +278,16 @@ type Arg b = Expr b
type Alt b = (AltCon, [b], Expr b)
-- | A case alternative constructor (i.e. pattern match)
-data AltCon = DataAlt DataCon -- ^ A plain data constructor: @case e of { Foo x -> ... }@.
- -- Invariant: the 'DataCon' is always from a @data@ type, and never from a @newtype@
- | LitAlt Literal -- ^ A literal: @case e of { 1 -> ... }@
- | DEFAULT -- ^ Trivial alternative: @case e of { _ -> ... }@
- deriving (Eq, Ord, Data, Typeable)
+data AltCon
+ = DataAlt DataCon -- ^ A plain data constructor: @case e of { Foo x -> ... }@.
+ -- Invariant: the 'DataCon' is always from a @data@ type, and never from a @newtype@
+
+ | LitAlt Literal -- ^ A literal: @case e of { 1 -> ... }@
+ -- Invariant: always an *unlifted* literal
+ -- See Note [Literal alternatives]
+
+ | DEFAULT -- ^ Trivial alternative: @case e of { _ -> ... }@
+ deriving (Eq, Ord, Data, Typeable)
-- | Binding, used for top level bindings in a module and local bindings in a @let@.
data Bind b = NonRec b (Expr b)
@@ -290,6 +295,21 @@ data Bind b = NonRec b (Expr b)
deriving (Data, Typeable)
\end{code}
+Note [Literal alternatives]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Literal alternatives (LitAlt lit) are always for *un-lifted* literals.
+We have one literal, a literal Integer, that is lifted, and we don't
+allow in a LitAlt, because LitAlt cases don't do any evaluation. Also
+(see Trac #5603) if you say
+ case 3 of
+ S# x -> ...
+ J# _ _ -> ...
+(where S#, J# are the constructors for Integer) we don't want the
+simplifier calling findAlt with argument (LitAlt 3). No no. Integer
+literals are an opaque encoding of an algebraic data type, not of
+an unlifted literal, like all the others.
+
+
-------------------------- CoreSyn INVARIANTS ---------------------------
Note [CoreSyn top-level invariant]
diff --git a/compiler/coreSyn/CoreUnfold.lhs b/compiler/coreSyn/CoreUnfold.lhs
index 4f1dee3da3..930041dea4 100644
--- a/compiler/coreSyn/CoreUnfold.lhs
+++ b/compiler/coreSyn/CoreUnfold.lhs
@@ -174,8 +174,7 @@ mkUnfolding src top_lvl is_bottoming expr
uf_guidance = guidance }
where
is_cheap = exprIsCheap expr
- (arity, guidance) = calcUnfoldingGuidance is_cheap
- opt_UF_CreationThreshold expr
+ (arity, guidance) = calcUnfoldingGuidance expr
-- Sometimes during simplification, there's a large let-bound thing
-- which has been substituted, and so is now dead; so 'expr' contains
-- two copies of the thing while the occurrence-analysed expression doesn't
@@ -217,14 +216,13 @@ inlineBoringOk e
go _ _ = boringCxtNotOk
calcUnfoldingGuidance
- :: Bool -- True <=> the rhs is cheap, or we want to treat it
- -- as cheap (INLINE things)
- -> Int -- Bomb out if size gets bigger than this
- -> CoreExpr -- Expression to look at
+ :: CoreExpr -- Expression to look at
-> (Arity, UnfoldingGuidance)
-calcUnfoldingGuidance expr_is_cheap bOMB_OUT_SIZE expr
+calcUnfoldingGuidance expr
= case collectBinders expr of { (bndrs, body) ->
let
+ bOMB_OUT_SIZE = opt_UF_CreationThreshold
+ -- Bomb out if size gets bigger than this
val_bndrs = filter isId bndrs
n_val_bndrs = length val_bndrs
@@ -232,8 +230,7 @@ calcUnfoldingGuidance expr_is_cheap bOMB_OUT_SIZE expr
= case (sizeExpr (iUnbox bOMB_OUT_SIZE) val_bndrs body) of
TooBig -> UnfNever
SizeIs size cased_bndrs scrut_discount
- | uncondInline n_val_bndrs (iBox size)
- , expr_is_cheap
+ | uncondInline expr n_val_bndrs (iBox size)
-> UnfWhen unSaturatedOk boringCxtOk -- Note [INLINE for small functions]
| otherwise
-> UnfIfGoodArgs { ug_args = map (discount cased_bndrs) val_bndrs
@@ -278,9 +275,10 @@ Notice that 'x' counts 0, while (f x) counts 2. That's deliberate: there's
a function call to account for. Notice also that constructor applications
are very cheap, because exposing them to a caller is so valuable.
-[25/5/11] All sizes are now multiplied by 10, except for primops.
-This makes primops look cheap, and seems to be almost unversally
-beneficial. Done partly as a result of #4978.
+[25/5/11] All sizes are now multiplied by 10, except for primops
+(which have sizes like 1 or 4. This makes primops look fantastically
+cheap, and seems to be almost unversally beneficial. Done partly as a
+result of #4978.
Note [Do not inline top-level bottoming functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -289,7 +287,6 @@ and similar friends. See Note [Bottoming floats] in SetLevels.
Do not re-inline them! But we *do* still inline if they are very small
(the uncondInline stuff).
-
Note [INLINE for small functions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider {-# INLINE f #-}
@@ -302,43 +299,54 @@ inline unconditionally, regardless of how boring the context is.
Things to note:
- * We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
- than the thing it's replacing. Notice that
+(1) We inline *unconditionally* if inlined thing is smaller (using sizeExpr)
+ than the thing it's replacing. Notice that
(f x) --> (g 3) -- YES, unconditionally
(f x) --> x : [] -- YES, *even though* there are two
-- arguments to the cons
x --> g 3 -- NO
x --> Just v -- NO
- It's very important not to unconditionally replace a variable by
- a non-atomic term.
-
-* We do this even if the thing isn't saturated, else we end up with the
- silly situation that
- f x y = x
- ...map (f 3)...
- doesn't inline. Even in a boring context, inlining without being
- saturated will give a lambda instead of a PAP, and will be more
- efficient at runtime.
-
-* However, when the function's arity > 0, we do insist that it
- has at least one value argument at the call site. Otherwise we find this:
- f = /\a \x:a. x
- d = /\b. MkD (f b)
- If we inline f here we get
- d = /\b. MkD (\x:b. x)
- and then prepareRhs floats out the argument, abstracting the type
- variables, so we end up with the original again!
-
+ It's very important not to unconditionally replace a variable by
+ a non-atomic term.
+
+(2) We do this even if the thing isn't saturated, else we end up with the
+ silly situation that
+ f x y = x
+ ...map (f 3)...
+ doesn't inline. Even in a boring context, inlining without being
+ saturated will give a lambda instead of a PAP, and will be more
+ efficient at runtime.
+
+(3) However, when the function's arity > 0, we do insist that it
+ has at least one value argument at the call site. (This check is
+ made in the UnfWhen case of callSiteInline.) Otherwise we find this:
+ f = /\a \x:a. x
+ d = /\b. MkD (f b)
+ If we inline f here we get
+ d = /\b. MkD (\x:b. x)
+ and then prepareRhs floats out the argument, abstracting the type
+ variables, so we end up with the original again!
+
+(4) We must be much more cautious about arity-zero things. Consider
+ let x = y +# z in ...
+ In *size* terms primops look very small, because the generate a
+ single instruction, but we do not want to unconditionally replace
+ every occurrence of x with (y +# z). So we only do the
+ unconditional-inline thing for *trivial* expressions.
+
+ NB: you might think that PostInlineUnconditionally would do this
+ but it doesn't fire for top-level things; see SimplUtils
+ Note [Top level and postInlineUnconditionally]
\begin{code}
-uncondInline :: Arity -> Int -> Bool
+uncondInline :: CoreExpr -> Arity -> Int -> Bool
-- Inline unconditionally if there no size increase
-- Size of call is arity (+1 for the function)
-- See Note [INLINE for small functions]
-uncondInline arity size
- | arity == 0 = size == 0
- | otherwise = size <= 10 * (arity + 1)
+uncondInline rhs arity size
+ | arity > 0 = size <= 10 * (arity + 1) -- See Note [INLINE for small functions] (1)
+ | otherwise = exprIsTrivial rhs -- See Note [INLINE for small functions] (4)
\end{code}
@@ -747,17 +755,28 @@ smallEnoughToInline _
----------------
certainlyWillInline :: Unfolding -> Bool
-- Sees if the unfolding is pretty certain to inline
-certainlyWillInline (CoreUnfolding { uf_is_cheap = is_cheap, uf_arity = n_vals, uf_guidance = guidance })
+certainlyWillInline (CoreUnfolding { uf_arity = n_vals, uf_guidance = guidance })
= case guidance of
UnfNever -> False
UnfWhen {} -> True
UnfIfGoodArgs { ug_size = size}
- -> is_cheap && size - (10 * (n_vals +1)) <= opt_UF_UseThreshold
+ -> n_vals > 0 -- See Note [certainlyWillInline: be caseful of thunks]
+ && size - (10 * (n_vals +1)) <= opt_UF_UseThreshold
certainlyWillInline _
= False
\end{code}
+Note [certainlyWillInline: be caseful of thunks]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Don't claim that thunks will certainly inline, because that risks work
+duplication. Even if the work duplication is not great (eg is_cheap
+holds), it can make a big difference in an inner loop In Trac #5623 we
+found that the WorkWrap phase thought that
+ y = case x of F# v -> F# (v +# v)
+was certainlyWillInline, so the addition got duplicated.
+
+
%************************************************************************
%* *
\subsection{callSiteInline}
@@ -894,7 +913,7 @@ tryUnfolding dflags id lone_variable
UnfWhen unsat_ok boring_ok
-> (enough_args && (boring_ok || some_benefit), empty )
- where -- See Note [INLINE for small functions]
+ where -- See Note [INLINE for small functions (3)]
enough_args = saturated || (unsat_ok && n_val_args > 0)
UnfIfGoodArgs { ug_args = arg_discounts, ug_res = res_discount, ug_size = size }
@@ -1084,7 +1103,8 @@ to be cheap, and that's good because exprIsConApp_maybe doesn't
think that expression is a constructor application.
I used to test is_value rather than is_cheap, which was utterly
-wrong, because the above expression responds True to exprIsHNF.
+wrong, because the above expression responds True to exprIsHNF,
+which is what sets is_value.
This kind of thing can occur if you have
diff --git a/compiler/coreSyn/CoreUtils.lhs b/compiler/coreSyn/CoreUtils.lhs
index f0aa71133c..27026b2353 100644
--- a/compiler/coreSyn/CoreUtils.lhs
+++ b/compiler/coreSyn/CoreUtils.lhs
@@ -729,6 +729,7 @@ it's applied only to dictionaries.
%************************************************************************
\begin{code}
+-----------------------------
-- | 'exprOkForSpeculation' returns True of an expression that is:
--
-- * Safe to evaluate even if normal order eval might not
@@ -769,12 +770,8 @@ exprOkForSpeculation :: Expr b -> Bool
exprOkForSpeculation (Lit _) = True
exprOkForSpeculation (Type _) = True
exprOkForSpeculation (Coercion _) = True
-
-exprOkForSpeculation (Var v)
- = isUnLiftedType (idType v) -- c.f. the Var case of exprIsHNF
- || isDataConWorkId v -- Nullary constructors
- || idArity v > 0 -- Functions
- || isEvaldUnfolding (idUnfolding v) -- Let-bound values
+exprOkForSpeculation (Var v) = appOkForSpeculation v []
+exprOkForSpeculation (Cast e _) = exprOkForSpeculation e
-- Tick annotations that *tick* cannot be speculated, because these
-- are meant to identify whether or not (and how often) the particular
@@ -783,8 +780,6 @@ exprOkForSpeculation (Tick tickish e)
| tickishCounts tickish = False
| otherwise = exprOkForSpeculation e
-exprOkForSpeculation (Cast e _) = exprOkForSpeculation e
-
exprOkForSpeculation (Case e _ _ alts)
= exprOkForSpeculation e -- Note [exprOkForSpeculation: case expressions]
&& all (\(_,_,rhs) -> exprOkForSpeculation rhs) alts
@@ -792,37 +787,46 @@ exprOkForSpeculation (Case e _ _ alts)
exprOkForSpeculation other_expr
= case collectArgs other_expr of
- (Var f, args) -> spec_ok (idDetails f) args
+ (Var f, args) -> appOkForSpeculation f args
_ -> False
- where
- spec_ok (DataConWorkId _) _
- = True -- The strictness of the constructor has already
+-----------------------------
+appOkForSpeculation :: Id -> [Expr b] -> Bool
+appOkForSpeculation fun args
+ = case idDetails fun of
+ DFunId new_type -> not new_type
+ -- DFuns terminate, unless the dict is implemented
+ -- with a newtype in which case they may not
+
+ DataConWorkId {} -> True
+ -- The strictness of the constructor has already
-- been expressed by its "wrapper", so we don't need
-- to take the arguments into account
- spec_ok (PrimOpId op) args
- | isDivOp op, -- Special case for dividing operations that fail
- [arg1, Lit lit] <- args -- only if the divisor is zero
- = not (isZeroLit lit) && exprOkForSpeculation arg1
- -- Often there is a literal divisor, and this
- -- can get rid of a thunk in an inner looop
-
- | DataToTagOp <- op -- See Note [dataToTag speculation]
- = True
-
- | otherwise
- = primOpOkForSpeculation op &&
- all exprOkForSpeculation args
- -- A bit conservative: we don't really need
- -- to care about lazy arguments, but this is easy
-
- spec_ok (DFunId new_type) _ = not new_type
- -- DFuns terminate, unless the dict is implemented with a newtype
- -- in which case they may not
-
- spec_ok _ _ = False
-
+ PrimOpId op
+ | isDivOp op -- Special case for dividing operations that fail
+ , [arg1, Lit lit] <- args -- only if the divisor is zero
+ -> not (isZeroLit lit) && exprOkForSpeculation arg1
+ -- Often there is a literal divisor, and this
+ -- can get rid of a thunk in an inner looop
+
+ | DataToTagOp <- op -- See Note [dataToTag speculation]
+ -> True
+
+ | otherwise
+ -> primOpOkForSpeculation op &&
+ all exprOkForSpeculation args
+ -- A bit conservative: we don't really need
+ -- to care about lazy arguments, but this is easy
+
+ _other -> isUnLiftedType (idType fun) -- c.f. the Var case of exprIsHNF
+ || idArity fun > n_val_args -- Partial apps
+ || (n_val_args ==0 &&
+ isEvaldUnfolding (idUnfolding fun)) -- Let-bound values
+ where
+ n_val_args = valArgCount args
+
+-----------------------------
altsAreExhaustive :: [Alt b] -> Bool
-- True <=> the case alterantives are definiely exhaustive
-- False <=> they may or may not be
@@ -991,19 +995,19 @@ exprIsHNFlike is_con is_con_unf = is_hnf_like
-- we could get an infinite loop
is_hnf_like (Lit _) = True
- is_hnf_like (Type _) = True -- Types are honorary Values;
+ is_hnf_like (Type _) = True -- Types are honorary Values;
-- we don't mind copying them
is_hnf_like (Coercion _) = True -- Same for coercions
is_hnf_like (Lam b e) = isRuntimeVar b || is_hnf_like e
is_hnf_like (Tick tickish e) = not (tickishCounts tickish)
&& is_hnf_like e
-- See Note [exprIsHNF Tick]
- is_hnf_like (Cast e _) = is_hnf_like e
- is_hnf_like (App e (Type _)) = is_hnf_like e
+ is_hnf_like (Cast e _) = is_hnf_like e
+ is_hnf_like (App e (Type _)) = is_hnf_like e
is_hnf_like (App e (Coercion _)) = is_hnf_like e
- is_hnf_like (App e a) = app_is_value e [a]
- is_hnf_like (Let _ e) = is_hnf_like e -- Lazy let(rec)s don't affect us
- is_hnf_like _ = False
+ is_hnf_like (App e a) = app_is_value e [a]
+ is_hnf_like (Let _ e) = is_hnf_like e -- Lazy let(rec)s don't affect us
+ is_hnf_like _ = False
-- There is at least one value argument
app_is_value :: CoreExpr -> [CoreArg] -> Bool
diff --git a/compiler/deSugar/DsUtils.lhs b/compiler/deSugar/DsUtils.lhs
index 6a46bbe93d..626b6ee795 100644
--- a/compiler/deSugar/DsUtils.lhs
+++ b/compiler/deSugar/DsUtils.lhs
@@ -291,7 +291,7 @@ mkGuardedMatchResult pred_expr (MatchResult _ body_fn)
mkCoPrimCaseMatchResult :: Id -- Scrutinee
-> Type -- Type of the case
-> [(Literal, MatchResult)] -- Alternatives
- -> MatchResult
+ -> MatchResult -- Literals are all unlifted
mkCoPrimCaseMatchResult var ty match_alts
= MatchResult CanFail mk_case
where
@@ -300,8 +300,10 @@ mkCoPrimCaseMatchResult var ty match_alts
return (Case (Var var) var ty ((DEFAULT, [], fail) : alts))
sorted_alts = sortWith fst match_alts -- Right order for a Case
- mk_alt fail (lit, MatchResult _ body_fn) = do body <- body_fn fail
- return (LitAlt lit, [], body)
+ mk_alt fail (lit, MatchResult _ body_fn)
+ = ASSERT( not (litIsLifted lit) )
+ do body <- body_fn fail
+ return (LitAlt lit, [], body)
mkCoAlgCaseMatchResult
diff --git a/compiler/main/DriverPipeline.hs b/compiler/main/DriverPipeline.hs
index 8c0f3a6098..8103f66239 100644
--- a/compiler/main/DriverPipeline.hs
+++ b/compiler/main/DriverPipeline.hs
@@ -172,8 +172,7 @@ compile' (nothingCompiler, interactiveCompiler, batchCompiler)
handleBatch (HscRecomp hasStub _)
| isHsBoot src_flavour
= do when (isObjectTarget hsc_lang) $ -- interpreted reaches here too
- liftIO $ SysTools.touch dflags' "Touching object file"
- object_filename
+ liftIO $ touchObjectFile dflags' object_filename
return maybe_old_linkable
| otherwise
@@ -956,7 +955,7 @@ runPhase (Hsc src_flavour) input_fn dflags0
case result of
HscNoRecomp
- -> do io $ SysTools.touch dflags' "Touching object file" o_file
+ -> do io $ touchObjectFile dflags' o_file
-- The .o file must have a later modification date
-- than the source file (else we wouldn't be in HscNoRecomp)
-- but we touch it anyway, to keep 'make' happy (we think).
@@ -970,7 +969,7 @@ runPhase (Hsc src_flavour) input_fn dflags0
-- In the case of hs-boot files, generate a dummy .o-boot
-- stamp file for the benefit of Make
when (isHsBoot src_flavour) $
- io $ SysTools.touch dflags' "Touching object file" o_file
+ io $ touchObjectFile dflags' o_file
return (next_phase, output_fn)
-----------------------------------------------------------------------------
@@ -1437,25 +1436,39 @@ mkExtraCObj dflags xs
++ map (FileOption "-I") (includeDirs rtsDetails))
return oFile
+-- When linking a binary, we need to create a C main() function that
+-- starts everything off. This used to be compiled statically as part
+-- of the RTS, but that made it hard to change the -rtsopts setting,
+-- so now we generate and compile a main() stub as part of every
+-- binary and pass the -rtsopts setting directly to the RTS (#5373)
+--
mkExtraObjToLinkIntoBinary :: DynFlags -> [PackageId] -> IO FilePath
mkExtraObjToLinkIntoBinary dflags dep_packages = do
link_info <- getLinkInfo dflags dep_packages
- mkExtraCObj dflags (showSDoc (vcat [rts_opts_enabled,
- extra_rts_opts,
+
+ mkExtraCObj dflags (showSDoc (vcat [main,
link_opts link_info]
<> char '\n')) -- final newline, to
-- keep gcc happy
where
- rts_opts_enabled
- = vcat [text "#include \"Rts.h\"",
- text "#include \"RtsOpts.h\"",
- text "const RtsOptsEnabledEnum rtsOptsEnabled = " <>
- text (show (rtsOptsEnabled dflags)) <> semi ]
-
- extra_rts_opts = case rtsOpts dflags of
- Nothing -> empty
- Just opts -> text "char *ghc_rts_opts = " <> text (show opts) <> semi
+ main
+ | dopt Opt_NoHsMain dflags = empty
+ | otherwise = vcat [
+ ptext (sLit "#include \"Rts.h\""),
+ ptext (sLit "extern StgClosure ZCMain_main_closure;"),
+ ptext (sLit "int main(int argc, char *argv[])"),
+ char '{',
+ ptext (sLit " RtsConfig __conf = defaultRtsConfig;"),
+ ptext (sLit " __conf.rts_opts_enabled = ")
+ <> text (show (rtsOptsEnabled dflags)) <> semi,
+ case rtsOpts dflags of
+ Nothing -> empty
+ Just opts -> ptext (sLit " __conf.rts_opts= ") <>
+ text (show opts) <> semi,
+ ptext (sLit " return hs_main(argc, argv, &ZCMain_main_closure,__conf);"),
+ char '}'
+ ]
link_opts info
| not (platformSupportsSavingLinkOpts (platformOS (targetPlatform dflags)))
@@ -1607,13 +1620,6 @@ linkBinary dflags o_files dep_packages = do
let lib_paths = libraryPaths dflags
let lib_path_opts = map ("-L"++) lib_paths
- -- The C "main" function is not in the rts but in a separate static
- -- library libHSrtsmain.a that sits next to the rts lib files. Assuming
- -- we're using a Haskell main function then we need to link it in.
- let no_hs_main = dopt Opt_NoHsMain dflags
- let main_lib | no_hs_main = []
- | otherwise = [ "-lHSrtsmain" ]
-
extraLinkObj <- mkExtraObjToLinkIntoBinary dflags dep_packages
pkg_link_opts <- getPackageLinkOpts dflags dep_packages
@@ -1731,7 +1737,6 @@ linkBinary dflags o_files dep_packages = do
++ framework_path_opts
++ framework_opts
++ pkg_lib_path_opts
- ++ main_lib
++ [extraLinkObj]
++ pkg_link_opts
++ pkg_framework_path_opts
@@ -1852,8 +1857,6 @@ linkDynLib dflags o_files dep_packages = do
let extra_ld_opts = getOpts dflags opt_l
- extraLinkObj <- mkExtraObjToLinkIntoBinary dflags dep_packages
-
#if defined(mingw32_HOST_OS)
-----------------------------------------------------------------------------
-- Making a DLL
@@ -1880,7 +1883,6 @@ linkDynLib dflags o_files dep_packages = do
++ lib_path_opts
++ extra_ld_opts
++ pkg_lib_path_opts
- ++ [extraLinkObj]
++ pkg_link_opts
))
#elif defined(darwin_TARGET_OS)
@@ -1936,7 +1938,6 @@ linkDynLib dflags o_files dep_packages = do
++ lib_path_opts
++ extra_ld_opts
++ pkg_lib_path_opts
- ++ [extraLinkObj]
++ pkg_link_opts
))
#else
@@ -1970,7 +1971,6 @@ linkDynLib dflags o_files dep_packages = do
++ lib_path_opts
++ extra_ld_opts
++ pkg_lib_path_opts
- ++ [extraLinkObj]
++ pkg_link_opts
))
#endif
@@ -2083,3 +2083,8 @@ hscNextPhase dflags _ hsc_lang =
HscNothing -> StopLn
HscInterpreted -> StopLn
+touchObjectFile :: DynFlags -> FilePath -> IO ()
+touchObjectFile dflags path = do
+ createDirectoryHierarchy $ takeDirectory path
+ SysTools.touch dflags "Touching object file" path
+
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index 2c0cccb0ba..8de96d80b3 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -244,6 +244,7 @@ data DynFlag
| Opt_Vectorise
| Opt_RegsGraph -- do graph coloring register allocation
| Opt_RegsIterative -- do iterative coalescing graph coloring register allocation
+ | Opt_PedanticBottoms -- Be picky about how we treat bottom
-- Interface files
| Opt_IgnoreInterfacePragmas
@@ -1753,6 +1754,7 @@ fFlags = [
( "liberate-case", Opt_LiberateCase, nop ),
( "spec-constr", Opt_SpecConstr, nop ),
( "cse", Opt_CSE, nop ),
+ ( "pedantic-bottoms", Opt_PedanticBottoms, nop ),
( "ignore-interface-pragmas", Opt_IgnoreInterfacePragmas, nop ),
( "omit-interface-pragmas", Opt_OmitInterfacePragmas, nop ),
( "expose-all-unfoldings", Opt_ExposeAllUnfoldings, nop ),
diff --git a/compiler/prelude/PrelRules.lhs b/compiler/prelude/PrelRules.lhs
index 4e39966183..40ee5b0850 100644
--- a/compiler/prelude/PrelRules.lhs
+++ b/compiler/prelude/PrelRules.lhs
@@ -348,6 +348,9 @@ litEq op_name is_eq
rule_fn _ _ = Nothing
do_lit_eq lit expr
+ | litIsLifted lit
+ = Nothing
+ | otherwise
= Just (mkWildCase expr (literalType lit) boolTy
[(DEFAULT, [], val_if_neq),
(LitAlt lit, [], val_if_eq)])
diff --git a/compiler/simplCore/SimplCore.lhs b/compiler/simplCore/SimplCore.lhs
index 12d180642e..1081ce0752 100644
--- a/compiler/simplCore/SimplCore.lhs
+++ b/compiler/simplCore/SimplCore.lhs
@@ -191,8 +191,12 @@ getCoreToDo dflags
core_todo =
if opt_level == 0 then
- [vectorisation,
- simpl_phase 0 ["final"] max_iter]
+ [ vectorisation
+ , CoreDoSimplify max_iter
+ (base_mode { sm_phase = Phase 0
+ , sm_names = ["Non-opt simplification"] })
+ ]
+
else {- opt_level >= 1 -} [
-- We want to do the static argument transform before full laziness as it
@@ -296,7 +300,6 @@ getCoreToDo dflags
]
\end{code}
-
Loading plugins
\begin{code}
diff --git a/compiler/simplCore/SimplUtils.lhs b/compiler/simplCore/SimplUtils.lhs
index c326cbc74d..86dc88ddd1 100644
--- a/compiler/simplCore/SimplUtils.lhs
+++ b/compiler/simplCore/SimplUtils.lhs
@@ -43,7 +43,7 @@ import StaticFlags
import CoreSyn
import qualified CoreSubst
import PprCore
-import DataCon ( dataConCannotMatch )
+import DataCon ( dataConCannotMatch, dataConWorkId )
import CoreFVs
import CoreUtils
import CoreArity
@@ -1139,8 +1139,7 @@ tryEtaExpand env bndr rhs
= return (exprArity rhs, rhs)
| sm_eta_expand (getMode env) -- Provided eta-expansion is on
- , let dicts_cheap = dopt Opt_DictsCheap dflags
- new_arity = findArity dicts_cheap bndr rhs old_arity
+ , let new_arity = findArity dflags bndr rhs old_arity
, new_arity > manifest_arity -- And the curent manifest arity isn't enough
-- See Note [Eta expansion to manifes arity]
= do { tick (EtaExpansion bndr)
@@ -1152,16 +1151,21 @@ tryEtaExpand env bndr rhs
old_arity = idArity bndr
_dmd_arity = length $ fst $ splitStrictSig $ idStrictness bndr
-findArity :: Bool -> Id -> CoreExpr -> Arity -> Arity
+findArity :: DynFlags -> Id -> CoreExpr -> Arity -> Arity
-- This implements the fixpoint loop for arity analysis
-- See Note [Arity analysis]
-findArity dicts_cheap bndr rhs old_arity
- = go (exprEtaExpandArity (mk_cheap_fn dicts_cheap init_cheap_app) rhs)
+findArity dflags bndr rhs old_arity
+ = go (exprEtaExpandArity dflags init_cheap_app rhs)
-- We always call exprEtaExpandArity once, but usually
-- that produces a result equal to old_arity, and then
-- we stop right away (since arities should not decrease)
-- Result: the common case is that there is just one iteration
where
+ init_cheap_app :: CheapAppFun
+ init_cheap_app fn n_val_args
+ | fn == bndr = True -- On the first pass, this binder gets infinite arity
+ | otherwise = isCheapApp fn n_val_args
+
go :: Arity -> Arity
go cur_arity
| cur_arity <= old_arity = cur_arity
@@ -1172,46 +1176,12 @@ findArity dicts_cheap bndr rhs old_arity
, ppr rhs])
go new_arity
where
- new_arity = exprEtaExpandArity (mk_cheap_fn dicts_cheap cheap_app) rhs
-
+ new_arity = exprEtaExpandArity dflags cheap_app rhs
+
cheap_app :: CheapAppFun
cheap_app fn n_val_args
| fn == bndr = n_val_args < cur_arity
| otherwise = isCheapApp fn n_val_args
-
- init_cheap_app :: CheapAppFun
- init_cheap_app fn n_val_args
- | fn == bndr = True -- On the first pass, this binder gets infinite arity
- | otherwise = isCheapApp fn n_val_args
-
-mk_cheap_fn :: Bool -> CheapAppFun -> CheapFun
-mk_cheap_fn dicts_cheap cheap_app
- | not dicts_cheap
- = \e _ -> exprIsCheap' cheap_app e
- | otherwise
- = \e mb_ty -> exprIsCheap' cheap_app e
- || case mb_ty of
- Nothing -> False
- Just ty -> isDictLikeTy ty
- -- If the experimental -fdicts-cheap flag is on, we eta-expand through
- -- dictionary bindings. This improves arities. Thereby, it also
- -- means that full laziness is less prone to floating out the
- -- application of a function to its dictionary arguments, which
- -- can thereby lose opportunities for fusion. Example:
- -- foo :: Ord a => a -> ...
- -- foo = /\a \(d:Ord a). let d' = ...d... in \(x:a). ....
- -- -- So foo has arity 1
- --
- -- f = \x. foo dInt $ bar x
- --
- -- The (foo DInt) is floated out, and makes ineffective a RULE
- -- foo (bar x) = ...
- --
- -- One could go further and make exprIsCheap reply True to any
- -- dictionary-typed expression, but that's more work.
- --
- -- See Note [Dictionary-like types] in TcType.lhs for why we use
- -- isDictLikeTy here rather than isDictTy
\end{code}
Note [Eta-expanding at let bindings]
@@ -1747,14 +1717,15 @@ mkCase1 _dflags scrut case_bndr alts -- Identity case
= do { tick (CaseIdentity case_bndr)
; return (re_cast scrut rhs1) }
where
- identity_alt (con, args, rhs) = check_eq con args rhs
-
- check_eq con args (Cast e co) | not (any (`elemVarSet` tyCoVarsOfCo co) args)
- {- See Note [RHS casts] -} = check_eq con args e
- check_eq _ _ (Var v) = v == case_bndr
- check_eq (LitAlt lit') _ (Lit lit) = lit == lit'
- check_eq (DataAlt con) args rhs = rhs `cheapEqExpr` mkConApp con (arg_tys ++ varsToCoreExprs args)
- check_eq _ _ _ = False
+ identity_alt (con, args, rhs) = check_eq rhs con args
+
+ check_eq (Cast rhs co) con args = not (any (`elemVarSet` tyCoVarsOfCo co) args)
+ {- See Note [RHS casts] -} && check_eq rhs con args
+ check_eq (Lit lit) (LitAlt lit') _ = lit == lit'
+ check_eq (Var v) _ _ | v == case_bndr = True
+ check_eq (Var v) (DataAlt con) [] = v == dataConWorkId con -- Optimisation only
+ check_eq rhs (DataAlt con) args = rhs `cheapEqExpr` mkConApp con (arg_tys ++ varsToCoreExprs args)
+ check_eq _ _ _ = False
arg_tys = map Type (tyConAppArgs (idType case_bndr))
diff --git a/compiler/simplCore/Simplify.lhs b/compiler/simplCore/Simplify.lhs
index 60b6889d5c..a8f7761e61 100644
--- a/compiler/simplCore/Simplify.lhs
+++ b/compiler/simplCore/Simplify.lhs
@@ -21,6 +21,7 @@ import Type hiding ( substTy, extendTvSubst, substTyVar )
import SimplEnv
import SimplUtils
import FamInstEnv ( FamInstEnv )
+import Literal ( litIsLifted )
import Id
import MkId ( seqId, realWorldPrimId )
import MkCore ( mkImpossibleExpr )
@@ -1629,7 +1630,7 @@ to just
This particular example shows up in default methods for
comparision operations (e.g. in (>=) for Int.Int32)
-Note [CaseElimination: lifted case]
+Note [Case elimination: lifted case]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We also make sure that we deal with this very common case,
where x has a lifted type:
@@ -1716,6 +1717,7 @@ rebuildCase, reallyRebuildCase
rebuildCase env scrut case_bndr alts cont
| Lit lit <- scrut -- No need for same treatment as constructors
-- because literals are inlined more vigorously
+ , not (litIsLifted lit)
= do { tick (KnownBranch case_bndr)
; case findAlt (LitAlt lit) alts of
Nothing -> missingAlt env case_bndr alts cont
@@ -1751,7 +1753,11 @@ rebuildCase env scrut case_bndr [(_, bndrs, rhs)] cont
, if isUnLiftedType (idType case_bndr)
then ok_for_spec -- Satisfy the let-binding invariant
else elim_lifted
- = do { tick (CaseElim case_bndr)
+ = do { -- pprTrace "case elim" (vcat [ppr case_bndr, ppr (exprIsHNF scrut),
+ -- ppr strict_case_bndr, ppr (scrut_is_var scrut),
+ -- ppr ok_for_spec,
+ -- ppr scrut]) $
+ tick (CaseElim case_bndr)
; env' <- simplNonRecX env case_bndr scrut
-- If case_bndr is deads, simplNonRecX will discard
; simplExprF env' rhs cont }
diff --git a/compiler/specialise/SpecConstr.lhs b/compiler/specialise/SpecConstr.lhs
index 12492836ab..d2c07bcc1b 100644
--- a/compiler/specialise/SpecConstr.lhs
+++ b/compiler/specialise/SpecConstr.lhs
@@ -31,6 +31,7 @@ import CoreUtils
import CoreUnfold ( couldBeSmallEnoughToInline )
import CoreFVs ( exprsFreeVars )
import CoreMonad
+import Literal ( litIsLifted )
import HscTypes ( ModGuts(..) )
import WwLib ( mkWorkerArgs )
import DataCon
@@ -1714,7 +1715,8 @@ argsToPats env in_scope val_env args occs
\begin{code}
isValue :: ValueEnv -> CoreExpr -> Maybe Value
isValue _env (Lit lit)
- = Just (ConVal (LitAlt lit) [])
+ | litIsLifted lit = Nothing
+ | otherwise = Just (ConVal (LitAlt lit) [])
isValue env (Var v)
| Just stuff <- lookupVarEnv env v
diff --git a/compiler/typecheck/TcGenDeriv.lhs b/compiler/typecheck/TcGenDeriv.lhs
index 202dace414..1cdb72c311 100644
--- a/compiler/typecheck/TcGenDeriv.lhs
+++ b/compiler/typecheck/TcGenDeriv.lhs
@@ -177,27 +177,32 @@ gen_Eq_binds :: SrcSpan -> TyCon -> (LHsBinds RdrName, BagDerivStuff)
gen_Eq_binds loc tycon
= (method_binds, aux_binds)
where
- (nullary_cons, nonnullary_cons)
+ (nullary_cons, non_nullary_cons)
| isNewTyCon tycon = ([], tyConDataCons tycon)
| otherwise = partition isNullarySrcDataCon (tyConDataCons tycon)
no_nullary_cons = null nullary_cons
- rest | no_nullary_cons
- = case tyConSingleDataCon_maybe tycon of
- Just _ -> []
- Nothing -> -- if cons don't match, then False
- [([nlWildPat, nlWildPat], false_Expr)]
- | otherwise -- calc. and compare the tags
- = [([a_Pat, b_Pat],
- untag_Expr tycon [(a_RDR,ah_RDR), (b_RDR,bh_RDR)]
- (genOpApp (nlHsVar ah_RDR) eqInt_RDR (nlHsVar bh_RDR)))]
+ fall_through_eqn
+ | no_nullary_cons -- All constructors have arguments
+ = case non_nullary_cons of
+ [] -> [] -- No constructors; no fall-though case
+ [_] -> [] -- One constructor; no fall-though case
+ _ -> -- Two or more constructors; add fall-through of
+ -- (==) _ _ = False
+ [([nlWildPat, nlWildPat], false_Expr)]
+
+ | otherwise -- One or more nullary cons; add fall-through of
+ -- extract tags compare for equality
+ = [([a_Pat, b_Pat],
+ untag_Expr tycon [(a_RDR,ah_RDR), (b_RDR,bh_RDR)]
+ (genOpApp (nlHsVar ah_RDR) eqInt_RDR (nlHsVar bh_RDR)))]
aux_binds | no_nullary_cons = emptyBag
| otherwise = unitBag $ DerivAuxBind $ DerivCon2Tag tycon
method_binds = listToBag [eq_bind, ne_bind]
- eq_bind = mk_FunBind loc eq_RDR (map pats_etc nonnullary_cons ++ rest)
+ eq_bind = mk_FunBind loc eq_RDR (map pats_etc non_nullary_cons ++ fall_through_eqn)
ne_bind = mk_easy_FunBind loc ne_RDR [a_Pat, b_Pat] (
nlHsApp (nlHsVar not_RDR) (nlHsPar (nlHsVarApps eq_RDR [a_RDR, b_RDR])))
diff --git a/docs/users_guide/flags.xml b/docs/users_guide/flags.xml
index e765525c13..1245d25fde 100644
--- a/docs/users_guide/flags.xml
+++ b/docs/users_guide/flags.xml
@@ -1538,6 +1538,15 @@
</row>
<row>
+ <entry><option>-fpedantic-bottoms</option></entry>
+ <entry>Make GHC be more precise about its treatment of bottom (but see also
+ <option>-fno-state-hack</option>). In particular, GHC will not
+ eta-expand through a case expression.</entry>
+ <entry>dynamic</entry>
+ <entry><option>-fno-pedantic-bottoms</option></entry>
+ </row>
+
+ <row>
<entry><option>-fomit-interface-pragmas</option></entry>
<entry>Don't generate interface pragmas</entry>
<entry>dynamic</entry>
diff --git a/docs/users_guide/glasgow_exts.xml b/docs/users_guide/glasgow_exts.xml
index 6d1b293701..1eb041bc27 100755
--- a/docs/users_guide/glasgow_exts.xml
+++ b/docs/users_guide/glasgow_exts.xml
@@ -8344,12 +8344,21 @@ happen.
</programlisting>
<para>A <literal>SPECIALIZE</literal> pragma for a function can
- be put anywhere its type signature could be put.</para>
+ be put anywhere its type signature could be put. Moreover, you
+ can also <literal>SPECIALIZE</literal> an <emphasis>imported</emphasis>
+ provided it was given an <literal>INLINABLE</literal> pragma at its definition site
+ (<xref linkend="inlinable-pragma"/>)</para>
<para>A <literal>SPECIALIZE</literal> has the effect of generating
(a) a specialised version of the function and (b) a rewrite rule
- (see <xref linkend="rewrite-rules"/>) that rewrites a call to the
- un-specialised function into a call to the specialised one.</para>
+ (see <xref linkend="rewrite-rules"/>) that rewrites a call to
+ the un-specialised function into a call to the specialised one.
+ Moreover, given a <literal>SPECIALIZE</literal> pragma for a
+ function <literal>f</literal>, GHC will automatically create
+ specialisations for any type-class-overloaded functions called
+ by <literal>f</literal>, if they are in the same module as
+ the <literal>SPECIALIZE</literal> pragma, or if they are
+ <literal>INLINABLE</literal>; and so on, transitively.</para>
<para>The type in a SPECIALIZE pragma can be any type that is less
polymorphic than the type of the original function. In concrete terms,
diff --git a/docs/users_guide/using.xml b/docs/users_guide/using.xml
index 4cace1ee88..2837842a0e 100644
--- a/docs/users_guide/using.xml
+++ b/docs/users_guide/using.xml
@@ -1856,6 +1856,20 @@ f "2" = 2
<varlistentry>
<term>
+ <option>-fpedantic-bottoms</option>
+ <indexterm><primary><option>-fpedantic-bottoms</option></primary></indexterm>
+ </term>
+ <listitem>
+ <para>Make GHC be more precise about its treatment of bottom (but see also
+ <option>-fno-state-hack</option>). In particular, stop GHC
+ eta-expanding through a case expression, which is good for
+ performance, but bad if you are using <literal>seq</literal> on
+ partial applications.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
<option>-fomit-interface-pragmas</option>
<indexterm><primary><option>-fomit-interface-pragmas</option></primary></indexterm>
</term>
diff --git a/includes/Rts.h b/includes/Rts.h
index 91ec76d467..5caba59dbe 100644
--- a/includes/Rts.h
+++ b/includes/Rts.h
@@ -213,6 +213,7 @@ void _assertFail(const char *filename, unsigned int linenum)
#include "rts/TTY.h"
#include "rts/Utils.h"
#include "rts/PrimFloat.h"
+#include "rts/Main.h"
/* Misc stuff without a home */
DLL_IMPORT_RTS extern char **prog_argv; /* so we can get at these from Haskell */
diff --git a/includes/RtsAPI.h b/includes/RtsAPI.h
index dc151faf07..329b1569ab 100644
--- a/includes/RtsAPI.h
+++ b/includes/RtsAPI.h
@@ -38,26 +38,64 @@ typedef struct StgClosure_ *HaskellObj;
typedef struct Capability_ Capability;
/* ----------------------------------------------------------------------------
+ RTS configuration settings, for passing to hs_init_ghc()
+ ------------------------------------------------------------------------- */
+
+typedef enum {
+ RtsOptsNone, // +RTS causes an error
+ RtsOptsSafeOnly, // safe RTS options allowed; others cause an error
+ RtsOptsAll // all RTS options allowed
+ } RtsOptsEnabledEnum;
+
+// The RtsConfig struct is passed (by value) to hs_init_ghc(). The
+// reason for using a struct is extensibility: we can add more
+// fields to this later without breaking existing client code.
+typedef struct {
+ RtsOptsEnabledEnum rts_opts_enabled;
+ const char *rts_opts;
+} RtsConfig;
+
+// Clients should start with defaultRtsConfig and then customise it.
+// Bah, I really wanted this to be a const struct value, but it seems
+// you can't do that in C (it generates code).
+extern const RtsConfig defaultRtsConfig;
+
+/* ----------------------------------------------------------------------------
Starting up and shutting down the Haskell RTS.
------------------------------------------------------------------------- */
-extern void startupHaskell ( int argc, char *argv[],
+
+/* DEPRECATED, use hs_init() or hs_init_ghc() instead */
+extern void startupHaskell ( int argc, char *argv[],
void (*init_root)(void) );
+
+/* DEPRECATED, use hs_exit() instead */
extern void shutdownHaskell ( void );
+
+/*
+ * GHC-specific version of hs_init() that allows specifying whether
+ * +RTS ... -RTS options are allowed or not (default: only "safe"
+ * options are allowed), and allows passing an option string that is
+ * to be interpreted by the RTS only, not passed to the program.
+ */
+extern void hs_init_ghc (int *argc, char **argv[], // program arguments
+ RtsConfig rts_config); // RTS configuration
+
extern void shutdownHaskellAndExit ( int exitCode )
#if __GNUC__ >= 3
__attribute__((__noreturn__))
#endif
;
+
+#ifndef mingw32_HOST_OS
+extern void shutdownHaskellAndSignal (int sig);
+#endif
+
extern void getProgArgv ( int *argc, char **argv[] );
extern void setProgArgv ( int argc, char *argv[] );
extern void getFullProgArgv ( int *argc, char **argv[] );
extern void setFullProgArgv ( int argc, char *argv[] );
extern void freeFullProgArgv ( void ) ;
-#ifndef mingw32_HOST_OS
-extern void shutdownHaskellAndSignal (int sig);
-#endif
-
/* exit() override */
extern void (*exitFn)(int);
diff --git a/includes/RtsOpts.h b/includes/RtsOpts.h
deleted file mode 100644
index b8eab68d3b..0000000000
--- a/includes/RtsOpts.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* -----------------------------------------------------------------------------
- *
- * (c) The GHC Team, 2010
- *
- * En/disable RTS options
- *
- * ---------------------------------------------------------------------------*/
-
-#ifndef RTSOPTS_H
-#define RTSOPTS_H
-
-typedef enum {
- RtsOptsNone, // +RTS causes an error
- RtsOptsSafeOnly, // safe RTS options allowed; others cause an error
- RtsOptsAll // all RTS options allowed
- } RtsOptsEnabledEnum;
-
-extern const RtsOptsEnabledEnum rtsOptsEnabled;
-
-#endif /* RTSOPTS_H */
diff --git a/rts/RtsMain.h b/includes/rts/Main.h
index e004480cce..1c332fc95c 100644
--- a/rts/RtsMain.h
+++ b/includes/rts/Main.h
@@ -13,7 +13,9 @@
* The entry point for Haskell programs that use a Haskell main function
* -------------------------------------------------------------------------- */
-int hs_main(int argc, char *argv[], StgClosure *main_closure)
+int hs_main (int argc, char *argv[], // program args
+ StgClosure *main_closure, // closure for Main.main
+ RtsConfig rts_config) // RTS configuration
GNUC3_ATTRIBUTE(__noreturn__);
#endif /* RTSMAIN_H */
diff --git a/mk/validate-settings.mk b/mk/validate-settings.mk
index 0516be8f56..86bb73ed60 100644
--- a/mk/validate-settings.mk
+++ b/mk/validate-settings.mk
@@ -27,7 +27,11 @@ GhcStage2HcOpts += -O -fwarn-tabs -dcore-lint
# running of the tests, and faster building of the utils to be installed
GhcLibHcOpts += -O -dcore-lint
+ifeq "$(ValidateSpeed)" "FAST"
+GhcLibWays := v
+else
GhcLibWays := $(filter v dyn,$(GhcLibWays))
+endif
SplitObjs = NO
NoFibWays =
STRIP_CMD = :
@@ -47,7 +51,7 @@ BUILD_DOCBOOK_PDF = NO
ifeq "$(ValidateHpc)" "YES"
GhcStage2HcOpts += -fhpc -hpcdir $(TOP)/testsuite/hpc_output/
endif
-ifeq "$(ValidateSlow)" "YES"
+ifeq "$(ValidateSpeed)" "SLOW"
GhcStage2HcOpts += -DDEBUG
endif
diff --git a/rts/Main.c b/rts/Main.c
deleted file mode 100644
index c7a559fc14..0000000000
--- a/rts/Main.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/* -----------------------------------------------------------------------------
- *
- * (c) The GHC Team 2009
- *
- * The C main() function for a standalone Haskell program.
- *
- * Note that this is not part of the RTS. It calls into the RTS to get things
- * going. It is compiled to a separate Main.o which is linked into every
- * standalone Haskell program that uses a Haskell Main.main function
- * (as opposed to a mixed Haskell C program using a C main function).
- *
- * ---------------------------------------------------------------------------*/
-
-#include "PosixSource.h"
-#include "Rts.h"
-#include "RtsMain.h"
-
-/* Similarly, we can refer to the ZCMain_main_closure here */
-extern StgClosure ZCMain_main_closure;
-
-int main(int argc, char *argv[])
-{
- return hs_main(argc, argv, &ZCMain_main_closure);
-}
diff --git a/rts/RtsFlags.c b/rts/RtsFlags.c
index d2b4945c19..d8bcf1c915 100644
--- a/rts/RtsFlags.c
+++ b/rts/RtsFlags.c
@@ -10,7 +10,6 @@
#include "PosixSource.h"
#include "Rts.h"
-#include "RtsOpts.h"
#include "RtsUtils.h"
#include "Profiling.h"
#include "RtsFlags.h"
@@ -396,9 +395,10 @@ strequal(const char *a, const char * b)
return(strcmp(a, b) == 0);
}
-static void splitRtsFlags(char *s)
+static void splitRtsFlags(const char *s)
{
- char *c1, *c2;
+ const char *c1, *c2;
+ char *t;
c1 = s;
do {
@@ -408,10 +408,10 @@ static void splitRtsFlags(char *s)
if (c1 == c2) { break; }
- s = stgMallocBytes(c2-c1+1, "RtsFlags.c:splitRtsFlags()");
- strncpy(s, c1, c2-c1);
- s[c2-c1] = '\0';
- rts_argv[rts_argc++] = s;
+ t = stgMallocBytes(c2-c1+1, "RtsFlags.c:splitRtsFlags()");
+ strncpy(t, c1, c2-c1);
+ t[c2-c1] = '\0';
+ rts_argv[rts_argc++] = t;
c1 = c2;
} while (*c1 != '\0');
@@ -434,7 +434,9 @@ static void splitRtsFlags(char *s)
-------------------------------------------------------------------------- */
-void setupRtsFlags (int *argc, char *argv[])
+void setupRtsFlags (int *argc, char *argv[],
+ RtsOptsEnabledEnum rtsOptsEnabled,
+ const char *ghc_rts_opts)
{
nat mode;
nat total_arg;
@@ -554,14 +556,14 @@ static void checkUnsafe(RtsOptsEnabledEnum enabled)
}
}
-static void procRtsOpts (int rts_argc0, RtsOptsEnabledEnum enabled)
+static void procRtsOpts (int rts_argc0, RtsOptsEnabledEnum rtsOptsEnabled)
{
rtsBool error = rtsFalse;
int arg;
if (!(rts_argc0 < rts_argc)) return;
- if (enabled == RtsOptsNone) {
+ if (rtsOptsEnabled == RtsOptsNone) {
errorBelch("RTS options are disabled. Link with -rtsopts to enable them.");
stg_exit(EXIT_FAILURE);
}
@@ -578,7 +580,7 @@ static void procRtsOpts (int rts_argc0, RtsOptsEnabledEnum enabled)
rtsBool option_checked = rtsFalse;
#define OPTION_SAFE option_checked = rtsTrue;
-#define OPTION_UNSAFE checkUnsafe(enabled); option_checked = rtsTrue;
+#define OPTION_UNSAFE checkUnsafe(rtsOptsEnabled); option_checked = rtsTrue;
if (rts_argv[arg][0] != '-') {
fflush(stdout);
@@ -1142,7 +1144,7 @@ error = rtsTrue;
errorBelch("bad value for -N");
error = rtsTrue;
}
- if (enabled == RtsOptsSafeOnly &&
+ if (rtsOptsEnabled == RtsOptsSafeOnly &&
nNodes > (int)getNumberOfProcessors()) {
errorBelch("Using large values for -N is not allowed by default. Link with -rtsopts to allow full control.");
stg_exit(EXIT_FAILURE);
diff --git a/rts/RtsFlags.h b/rts/RtsFlags.h
index a6bfe0a924..73eb6688a6 100644
--- a/rts/RtsFlags.h
+++ b/rts/RtsFlags.h
@@ -15,7 +15,9 @@
/* Routines that operate-on/to-do-with RTS flags: */
void initRtsFlagsDefaults (void);
-void setupRtsFlags (int *argc, char *argv[]);
+void setupRtsFlags (int *argc, char *argv[],
+ RtsOptsEnabledEnum rtsOptsEnabled,
+ const char *ghc_rts_opts);
void setProgName (char *argv[]);
void freeRtsArgs (void);
diff --git a/rts/RtsMain.c b/rts/RtsMain.c
index a822da9749..0f6ca82382 100644
--- a/rts/RtsMain.c
+++ b/rts/RtsMain.c
@@ -13,7 +13,6 @@
#include "RtsAPI.h"
#include "RtsUtils.h"
-#include "RtsMain.h"
#include "Prelude.h"
#include "Task.h"
#if defined(mingw32_HOST_OS)
@@ -33,8 +32,9 @@
static int progargc;
static char **progargv;
static StgClosure *progmain_closure; /* This will be ZCMain_main_closure */
+static RtsConfig rtsconfig;
-/* Hack: we assume that we're building a batch-mode system unless
+/* Hack: we assume that we're building a batch-mode system unless
* INTERPRETER is set
*/
#ifndef INTERPRETER /* Hack */
@@ -43,9 +43,8 @@ static void real_main(void)
{
int exit_status;
SchedulerStatus status;
- /* all GranSim/GUM init is done in startupHaskell; sets IAmMainThread! */
- startupHaskell(progargc,progargv,NULL);
+ hs_init_ghc(&progargc, &progargv, rtsconfig);
/* kick off the computation by creating the main thread with a pointer
to mainIO_closure representing the computation of the overall program;
@@ -89,22 +88,26 @@ static void real_main(void)
shutdownHaskellAndExit(exit_status);
}
-/* The rts entry point from a compiled program using a Haskell main function.
- * This gets called from a tiny main function which gets linked into each
- * compiled Haskell program that uses a Haskell main function.
+/* The rts entry point from a compiled program using a Haskell main
+ * function. This gets called from a tiny main function generated by
+ * GHC and linked into each compiled Haskell program that uses a
+ * Haskell main function.
*
* We expect the caller to pass ZCMain_main_closure for
* main_closure. The reason we cannot refer to this symbol directly
* is because we're inside the rts and we do not know for sure that
* we'll be using a Haskell main function.
*/
-int hs_main(int argc, char *argv[], StgClosure *main_closure)
+int hs_main (int argc, char *argv[], // program args
+ StgClosure *main_closure, // closure for Main.main
+ RtsConfig rts_config) // RTS configuration
{
/* We do this dance with argc and argv as otherwise the SEH exception
stuff (the BEGIN/END CATCH below) on Windows gets confused */
progargc = argc;
progargv = argv;
progmain_closure = main_closure;
+ rtsconfig = rts_config;
#if defined(mingw32_HOST_OS)
BEGIN_CATCH
diff --git a/rts/RtsStartup.c b/rts/RtsStartup.c
index de8bf792c4..e8ed86c994 100755
--- a/rts/RtsStartup.c
+++ b/rts/RtsStartup.c
@@ -71,6 +71,11 @@ static int hs_init_count = 0;
static void flushStdHandles(void);
+const RtsConfig defaultRtsConfig = {
+ .rts_opts_enabled = RtsOptsSafeOnly,
+ .rts_opts = NULL
+};
+
/* -----------------------------------------------------------------------------
Initialise floating point unit on x86 (currently disabled; See Note
[x86 Floating point precision] in compiler/nativeGen/X86/Instr.hs)
@@ -106,6 +111,12 @@ x86_init_fpu ( void )
void
hs_init(int *argc, char **argv[])
{
+ hs_init_ghc(argc, argv, defaultRtsConfig);
+}
+
+void
+hs_init_ghc(int *argc, char **argv[], RtsConfig rts_config)
+{
hs_init_count++;
if (hs_init_count > 1) {
// second and subsequent inits are ignored
@@ -132,7 +143,8 @@ hs_init(int *argc, char **argv[])
/* Parse the flags, separating the RTS flags from the programs args */
if (argc != NULL && argv != NULL) {
setFullProgArgv(*argc,*argv);
- setupRtsFlags(argc, *argv);
+ setupRtsFlags(argc, *argv,
+ rts_config.rts_opts_enabled, rts_config.rts_opts);
}
/* Initialise the stats department, phase 1 */
diff --git a/rts/ThreadPaused.c b/rts/ThreadPaused.c
index a35a96232b..0507880e6a 100644
--- a/rts/ThreadPaused.c
+++ b/rts/ThreadPaused.c
@@ -28,14 +28,58 @@
struct stack_gap { StgWord gap_size; struct stack_gap *next_gap; };
+static struct stack_gap *
+updateAdjacentFrames (Capability *cap, StgTSO *tso,
+ StgUpdateFrame *upd, nat count, struct stack_gap *next)
+{
+ StgClosure *updatee;
+ struct stack_gap *gap;
+ nat i;
+
+ // The first one (highest address) is the frame we take the
+ // "master" updatee from; all the others will be made indirections
+ // to this one. It is essential that we do it this way around: we
+ // used to make the lowest-addressed frame the "master" frame and
+ // shuffle it down, but a bad case cropped up (#5505) where this
+ // happened repeatedly, generating a chain of indirections which
+ // the GC repeatedly traversed (indirection chains longer than one
+ // are not supposed to happen). So now after identifying a block
+ // of adjacent update frames we walk downwards again updating them
+ // all to point to the highest one, before squeezing out all but
+ // the highest one.
+ updatee = upd->updatee;
+ count--;
+
+ upd--;
+ gap = (struct stack_gap*)upd;
+
+ for (i = count; i > 0; i--, upd--) {
+ /*
+ * Check two things: that the two update frames
+ * don't point to the same object, and that the
+ * updatee_bypass isn't already an indirection.
+ * Both of these cases only happen when we're in a
+ * block hole-style loop (and there are multiple
+ * update frames on the stack pointing to the same
+ * closure), but they can both screw us up if we
+ * don't check.
+ */
+ if (upd->updatee != updatee && !closure_IND(upd->updatee)) {
+ updateThunk(cap, tso, upd->updatee, updatee);
+ }
+ }
+
+ gap->gap_size = count * sizeofW(StgUpdateFrame);
+ gap->next_gap = next;
+
+ return gap;
+}
+
static void
stackSqueeze(Capability *cap, StgTSO *tso, StgPtr bottom)
{
StgPtr frame;
- rtsBool prev_was_update_frame;
- StgClosure *updatee = NULL;
- StgRetInfoTable *info;
- StgWord current_gap_size;
+ nat adjacent_update_frames;
struct stack_gap *gap;
// Stage 1:
@@ -48,75 +92,43 @@ stackSqueeze(Capability *cap, StgTSO *tso, StgPtr bottom)
ASSERT(frame < bottom);
- prev_was_update_frame = rtsFalse;
- current_gap_size = 0;
+ adjacent_update_frames = 0;
gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame));
- while (frame <= bottom) {
-
- info = get_ret_itbl((StgClosure *)frame);
- switch (info->i.type) {
+ while (frame <= bottom)
+ {
+ switch (get_ret_itbl((StgClosure *)frame)->i.type) {
- case UPDATE_FRAME:
+ case UPDATE_FRAME:
{
- StgUpdateFrame *upd = (StgUpdateFrame *)frame;
-
- if (prev_was_update_frame) {
+ if (adjacent_update_frames > 0) {
+ TICK_UPD_SQUEEZED();
+ }
+ adjacent_update_frames++;
- TICK_UPD_SQUEEZED();
- /* wasn't there something about update squeezing and ticky to be
- * sorted out? oh yes: we aren't counting each enter properly
- * in this case. See the log somewhere. KSW 1999-04-21
- *
- * Check two things: that the two update frames don't point to
- * the same object, and that the updatee_bypass isn't already an
- * indirection. Both of these cases only happen when we're in a
- * block hole-style loop (and there are multiple update frames
- * on the stack pointing to the same closure), but they can both
- * screw us up if we don't check.
- */
- if (upd->updatee != updatee && !closure_IND(upd->updatee)) {
- updateThunk(cap, tso, upd->updatee, updatee);
- }
-
- // now mark this update frame as a stack gap. The gap
- // marker resides in the bottom-most update frame of
- // the series of adjacent frames, and covers all the
- // frames in this series.
- current_gap_size += sizeofW(StgUpdateFrame);
- ((struct stack_gap *)frame)->gap_size = current_gap_size;
- ((struct stack_gap *)frame)->next_gap = gap;
-
- frame += sizeofW(StgUpdateFrame);
- continue;
- }
-
- // single update frame, or the topmost update frame in a series
- else {
- prev_was_update_frame = rtsTrue;
- updatee = upd->updatee;
- frame += sizeofW(StgUpdateFrame);
- continue;
- }
- }
+ frame += sizeofW(StgUpdateFrame);
+ continue;
+ }
default:
- prev_was_update_frame = rtsFalse;
-
- // we're not in a gap... check whether this is the end of a gap
+ // we're not in a gap... check whether this is the end of a gap
// (an update frame can't be the end of a gap).
- if (current_gap_size != 0) {
- gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame));
- }
- current_gap_size = 0;
+ if (adjacent_update_frames > 1) {
+ gap = updateAdjacentFrames(cap, tso,
+ (StgUpdateFrame*)(frame - sizeofW(StgUpdateFrame)),
+ adjacent_update_frames, gap);
+ }
+ adjacent_update_frames = 0;
frame += stack_frame_sizeW((StgClosure *)frame);
continue;
}
}
- if (current_gap_size != 0) {
- gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame));
+ if (adjacent_update_frames > 1) {
+ gap = updateAdjacentFrames(cap, tso,
+ (StgUpdateFrame*)(frame - sizeofW(StgUpdateFrame)),
+ adjacent_update_frames, gap);
}
// Now we have a stack with gaps in it, and we have to walk down
@@ -349,7 +361,7 @@ end:
debugTrace(DEBUG_squeeze,
"words_to_squeeze: %d, weight: %d, squeeze: %s",
words_to_squeeze, weight,
- weight < words_to_squeeze ? "YES" : "NO");
+ ((weight <= 8 && words_to_squeeze > 0) || weight < words_to_squeeze) ? "YES" : "NO");
// Should we squeeze or not? Arbitrary heuristic: we squeeze if
// the number of words we have to shift down is less than the
diff --git a/rts/ghc.mk b/rts/ghc.mk
index 54c941d019..40ff02fcc4 100644
--- a/rts/ghc.mk
+++ b/rts/ghc.mk
@@ -20,8 +20,7 @@ rts_dist_HC = $(GHC_STAGE1)
rts_WAYS = $(GhcLibWays) $(filter-out $(GhcLibWays),$(GhcRTSWays))
rts_dist_WAYS = $(rts_WAYS)
-ALL_RTS_LIBS = rts/dist/build/libHSrtsmain.a \
- $(foreach way,$(rts_WAYS),rts/dist/build/libHSrts$($(way)_libsuf))
+ALL_RTS_LIBS = $(foreach way,$(rts_WAYS),rts/dist/build/libHSrts$($(way)_libsuf))
all_rts : $(ALL_RTS_LIBS)
# -----------------------------------------------------------------------------
@@ -36,7 +35,6 @@ ALL_DIRS += posix
endif
EXCLUDED_SRCS :=
-EXCLUDED_SRCS += rts/Main.c
EXCLUDED_SRCS += rts/parallel/SysMan.c
EXCLUDED_SRCS += $(wildcard rts/Vis*.c)
@@ -485,15 +483,6 @@ $(DTRACEPROBES_H): $(DTRACEPROBES_SRC) includes/ghcplatform.h | $$(dir $$@)/.
endif
# -----------------------------------------------------------------------------
-# build the static lib containing the C main symbol
-
-ifneq "$(BINDIST)" "YES"
-rts/dist/build/libHSrtsmain.a : rts/dist/build/Main.o
- "$(RM)" $(RM_OPTS) $@
- "$(AR_STAGE1)" $(AR_OPTS_STAGE1) $(EXTRA_AR_ARGS_STAGE1) $@ $<
-endif
-
-# -----------------------------------------------------------------------------
# The RTS package config
# If -DDEBUG is in effect, adjust package conf accordingly..
diff --git a/rts/hooks/RtsOpts.c b/rts/hooks/RtsOpts.c
deleted file mode 100644
index 2aae37246e..0000000000
--- a/rts/hooks/RtsOpts.c
+++ /dev/null
@@ -1,14 +0,0 @@
-/* -----------------------------------------------------------------------------
- *
- * Default RTS options.
- *
- * ---------------------------------------------------------------------------*/
-
-#include "PosixSource.h"
-#include "Rts.h"
-
-#include <stdlib.h>
-
-// Default RTS options can be given by providing an alternate
-// definition for this variable, pointing to a string of RTS options.
-char *ghc_rts_opts = NULL;
diff --git a/validate b/validate
index 0010d743e4..eae6d01280 100755
--- a/validate
+++ b/validate
@@ -9,7 +9,8 @@
# compiler the test suite covers.
# 2008-07-01: 63% slower than the default.
# HTML generated here: testsuite/hpc_output/hpc_index.html
-# --fast: Default. Opposite to --slow.
+# --normal: Default settings
+# --fast: Omit dyn way, omit binary distribution
# --slow: Build stage2 with -DDEBUG.
# 2008-07-01: 14% slower than the default.
@@ -18,7 +19,7 @@ set -e
no_clean=0
testsuite_only=0
hpc=NO
-slow=NO
+speed=NORMAL
while [ $# -gt 0 ]
do
@@ -33,10 +34,13 @@ do
hpc=YES
;;
--slow)
- slow=YES
+ speed=SLOW
;;
--fast)
- slow=NO
+ speed=FAST
+ ;;
+ --normal)
+ speed=NORMAL
;;
*)
echo "Bad argument: $1" >&2
@@ -88,32 +92,41 @@ thisdir=`utils/ghc-pwd/dist-boot/ghc-pwd`
echo "Validating=YES" > mk/are-validating.mk
-$make -j$threads ValidateHpc=$hpc ValidateSlow=$slow
+$make -j$threads ValidateHpc=$hpc ValidateSpeed=$speed
# For a "debug make", add "--debug=b --debug=m"
-$make binary-dist-prep
-$make test_bindist TEST_PREP=YES
-
-#
-# Install the mtl package into the bindist, because it is used by some
-# tests. It isn't essential that we do this (the failing tests will
-# be treated as expected failures), but we get a bit more test
-# coverage, and also verify that we can install a package into the
-# bindist with Cabal.
-#
-bindistdir="bindisttest/install dir"
-cd libraries/mtl
-"$thisdir/$bindistdir/bin/ghc" --make Setup
-./Setup configure --with-ghc="$thisdir/$bindistdir/bin/ghc" --with-haddock="$thisdir/$bindistdir/bin/haddock" --global --builddir=dist-bindist --prefix="$thisdir/$bindistdir"
-./Setup build --builddir=dist-bindist
-./Setup haddock --builddir=dist-bindist
-./Setup install --builddir=dist-bindist
-./Setup clean --builddir=dist-bindist
-rm -f Setup Setup.exe Setup.hi Setup.o
-cd $thisdir
+# -----------------------------------------------------------------------------
+# Build and test a binary distribution (not --fast)
+
+if [ $speed != "FAST" ]; then
+
+ $make binary-dist-prep
+ $make test_bindist TEST_PREP=YES
+
+ #
+ # Install the mtl package into the bindist, because it is used by some
+ # tests. It isn't essential that we do this (the failing tests will
+ # be treated as expected failures), but we get a bit more test
+ # coverage, and also verify that we can install a package into the
+ # bindist with Cabal.
+ #
+ bindistdir="bindisttest/install dir"
+ cd libraries/mtl
+ "$thisdir/$bindistdir/bin/ghc" --make Setup
+ ./Setup configure --with-ghc="$thisdir/$bindistdir/bin/ghc" --with-haddock="$thisdir/$bindistdir/bin/haddock" --global --builddir=dist-bindist --prefix="$thisdir/$bindistdir"
+ ./Setup build --builddir=dist-bindist
+ ./Setup haddock --builddir=dist-bindist
+ ./Setup install --builddir=dist-bindist
+ ./Setup clean --builddir=dist-bindist
+ rm -f Setup Setup.exe Setup.hi Setup.o
+ cd $thisdir
+fi
fi # testsuite-only
+# -----------------------------------------------------------------------------
+# Run the testsuite
+
if [ "$hpc" = YES ]
then
# XXX With threads we'd need to give a different tix file to each thread
@@ -124,14 +137,22 @@ then
rm -f $HPCTIXFILE
fi
-if [ "$slow" = YES ]
-then
-MAKE_TEST_TARGET=fulltest
-else
-MAKE_TEST_TARGET=test
-fi
+case "$speed" in
+SLOW)
+ MAKE_TEST_TARGET=fulltest
+ BINDIST="BINDIST=YES"
+ ;;
+NORMAL)
+ MAKE_TEST_TARGET=test
+ BINDIST="BINDIST=YES"
+ ;;
+FAST)
+ MAKE_TEST_TARGET=test
+ BINDIST="BINDIST=NO"
+ ;;
+esac
-$make $MAKE_TEST_TARGET stage=2 BINDIST=YES THREADS=$threads 2>&1 | tee testlog
+$make $MAKE_TEST_TARGET stage=2 $BINDIST THREADS=$threads 2>&1 | tee testlog
if [ "$hpc" = YES ]
then