From 6752c1bf4fb3cafc5ff002158d47b1796cfebedf Mon Sep 17 00:00:00 2001 From: Simon Peyton Jones Date: Tue, 10 Jan 2023 21:57:34 +0000 Subject: Document the semantics of pattern bindings a bit better This MR is in response to the discussion on #22719 --- compiler/GHC/Hs/Utils.hs | 107 +++++++--- compiler/GHC/HsToCore/Expr.hs | 2 +- docs/users_guide/exts/primitives.rst | 15 +- docs/users_guide/exts/strict.rst | 215 +++++++++++++++------ testsuite/tests/deSugar/should_compile/T22719.hs | 21 ++ .../tests/deSugar/should_compile/T22719.stderr | 30 +++ testsuite/tests/deSugar/should_compile/all.T | 1 + 7 files changed, 295 insertions(+), 96 deletions(-) create mode 100644 testsuite/tests/deSugar/should_compile/T22719.hs create mode 100644 testsuite/tests/deSugar/should_compile/T22719.stderr diff --git a/compiler/GHC/Hs/Utils.hs b/compiler/GHC/Hs/Utils.hs index af222bf98a..5866243824 100644 --- a/compiler/GHC/Hs/Utils.hs +++ b/compiler/GHC/Hs/Utils.hs @@ -86,7 +86,7 @@ module GHC.Hs.Utils( mkLetStmt, -- * Collecting binders - isUnliftedHsBind, isBangedHsBind, + isUnliftedHsBind, isUnliftedHsBinds, isBangedHsBind, collectLocalBinders, collectHsValBinders, collectHsBindListBinders, collectHsIdBinders, @@ -905,55 +905,106 @@ to return a [Name] or [Id]. Before renaming the record punning and wild-card mechanism makes it hard to know what is bound. So these functions should not be applied to (HsSyn RdrName) -Note [Unlifted id check in isUnliftedHsBind] +Note [isUnliftedHsBind] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The function isUnliftedHsBind is used to complain if we make a top-level -binding for a variable of unlifted type. +The function isUnliftedHsBind tells if the binding binds a variable of +unlifted type. e.g. -Such a binding is illegal if the top-level binding would be unlifted; -but also if the local letrec generated by desugaring AbsBinds would be. -E.g. - f :: Num a => (# a, a #) - g :: Num a => a -> a - f = ...g... - g = ...g... + - I# x = blah + - Just (I# x) = blah -The top-level bindings for f,g are not unlifted (because of the Num a =>), -but the local, recursive, monomorphic bindings are: +isUnliftedHsBind is used in two ways: +* To complain if we make a top-level binding for a variable of unlifted + type. E.g. any of the above bindings are illegal at top level + +* To generate a case expression for a non-recursive local let. E.g. + let Just (I# x) = blah in body + ==> + case blah of Just (I# x) -> body + See GHC.HsToCore.Expr.dsUnliftedBind. + +Wrinkles: + +(W1) For AbsBinds we must check if the local letrec generated by desugaring + AbsBinds would be unlifted; so we just recurse into the abs_binds. E.g. + f :: Num a => (# a, a #) + g :: Num a => a -> a + f = ...g... + g = ...g... + + The top-level bindings for f,g are not unlifted (because of the Num a =>), + but the local, recursive, monomorphic bindings are: t = /\a \(d:Num a). letrec fm :: (# a, a #) = ...g... gm :: a -> a = ...f... in (fm, gm) -Here the binding for 'fm' is illegal. So generally we check the abe_mono types. + Here the binding for 'fm' is illegal. So we recurse into the abs_binds + +(W2) BUT we have a special case when abs_sig is true; + see Note [The abs_sig field of AbsBinds] in GHC.Hs.Binds + +(W3) isUnliftedHsBind returns False even if the binding itself is + unlifted, provided it binds only lifted variables. E.g. + - (# a,b #) = (# reverse xs, xs #) + + - x = sqrt# y# :: Float# + + - type Unl :: UnliftedType + data Unl = MkUnl Int + MkUnl z = blah -BUT we have a special case when abs_sig is true; - see Note [The abs_sig field of AbsBinds] in GHC.Hs.Binds + In each case the RHS of the "=" has unlifted type, but isUnliftedHsBind + returns False. Reason: see GHC Proposal #35 + https://github.com/ghc-proposals/ghc-proposals/blob/master/ + proposals/0035-unbanged-strict-patterns.rst + +(W4) In particular, (W3) applies to a pattern that binds no variables at all. + So { _ = sqrt# y :: Float# } returns False from isUnliftedHsBind, but + { x = sqrt# y :: Float# } returns True. + This is arguably a bit confusing (see #22719) -} ----------------- Bindings -------------------------- -- | Should we treat this as an unlifted bind? This will be true for any -- bind that binds an unlifted variable, but we must be careful around --- AbsBinds. See Note [Unlifted id check in isUnliftedHsBind]. For usage +-- AbsBinds. See Note [isUnliftedHsBind]. For usage -- information, see Note [Strict binds checks] is GHC.HsToCore.Binds. isUnliftedHsBind :: HsBind GhcTc -> Bool -- works only over typechecked binds -isUnliftedHsBind bind - | XHsBindsLR (AbsBinds { abs_exports = exports, abs_sig = has_sig }) <- bind - = if has_sig - then any (is_unlifted_id . abe_poly) exports - else any (is_unlifted_id . abe_mono) exports +isUnliftedHsBind (XHsBindsLR (AbsBinds { abs_exports = exports + , abs_sig = has_sig + , abs_binds = binds })) + | has_sig = any (is_unlifted_id . abe_poly) exports + | otherwise = isUnliftedHsBinds binds + -- See wrinkle (W1) and (W2) in Note [isUnliftedHsBind] -- If has_sig is True we will never generate a binding for abe_mono, -- so we don't need to worry about it being unlifted. The abe_poly -- binding might not be: e.g. forall a. Num a => (# a, a #) + -- If has_sig is False, just recurse - | otherwise - = any is_unlifted_id (collectHsBindBinders CollNoDictBinders bind) - where - is_unlifted_id id = isUnliftedType (idType id) - -- bindings always have a fixed RuntimeRep, so it's OK - -- to call isUnliftedType here +isUnliftedHsBind (FunBind { fun_id = L _ fun }) + = is_unlifted_id fun + +isUnliftedHsBind (VarBind { var_id = var }) + = is_unlifted_id var + +isUnliftedHsBind (PatBind { pat_lhs = pat }) + = any is_unlifted_id (collectPatBinders CollNoDictBinders pat) + -- If we changed our view on (W3) you could add + -- || isUnliftedType pat_ty + -- to this check + +isUnliftedHsBind (PatSynBind {}) = panic "isUnliftedBind: PatSynBind" + +isUnliftedHsBinds :: LHsBinds GhcTc -> Bool +isUnliftedHsBinds = anyBag (isUnliftedHsBind . unLoc) + +is_unlifted_id :: Id -> Bool +is_unlifted_id id = isUnliftedType (idType id) + -- Bindings always have a fixed RuntimeRep, so it's OK + -- to call isUnliftedType here -- | Is a binding a strict variable or pattern bind (e.g. @!x = ...@)? isBangedHsBind :: HsBind GhcTc -> Bool diff --git a/compiler/GHC/HsToCore/Expr.hs b/compiler/GHC/HsToCore/Expr.hs index 54bd0e45b6..52a7822c1e 100644 --- a/compiler/GHC/HsToCore/Expr.hs +++ b/compiler/GHC/HsToCore/Expr.hs @@ -197,7 +197,7 @@ dsUnliftedBind (FunBind { fun_id = L l fun ; let rhs' = core_wrap (mkOptTickBox tick rhs) ; return (bindNonRec fun rhs' body) } -dsUnliftedBind (PatBind {pat_lhs = pat, pat_rhs = grhss +dsUnliftedBind (PatBind { pat_lhs = pat, pat_rhs = grhss , pat_ext = (ty, _) }) body = -- let C x# y# = rhs in body -- ==> case rhs of C x# y# -> body diff --git a/docs/users_guide/exts/primitives.rst b/docs/users_guide/exts/primitives.rst index a596c13f23..9caab6b146 100644 --- a/docs/users_guide/exts/primitives.rst +++ b/docs/users_guide/exts/primitives.rst @@ -136,7 +136,7 @@ There are some restrictions on the use of primitive types: f x = let !(Foo a b, w) = ..rhs.. in ..body.. - since ``b`` has type ``Int#``. + since ``b`` has type ``Int#``. See :ref:`recursive-and-polymorphic-let-bindings`. .. _unboxed-tuples: @@ -198,7 +198,8 @@ example desugars like this: q = snd t in ..body.. -Indeed, the bindings can even be recursive. +Indeed, the bindings can even be recursive. See :ref:`recursive-and-polymorphic-let-bindings` +for a more precise account. To refer to the unboxed tuple type constructors themselves, e.g. if you want to attach instances to them, use ``(# #)``, ``(#,#)``, ``(#,,#)``, etc. @@ -436,9 +437,13 @@ argument either way), GHC currently disallows the more general type ``PEither @l Int Bool -> Bool``. This is a consequence of the `representation-polymorphic binder restriction <#representation-polymorphism-restrictions>`__, -Due to :ghc-ticket:`19487`, it's -currently not possible to declare levity-polymorphic data types with nullary -data constructors. There's a workaround, though: :: +Pattern matching against an unlifted data type work just like that for lifted +types; but see :ref:`recursive-and-polymorphic-let-bindings` for the semantics of +pattern bindings involving unlifted data types. + +Due to :ghc-ticket:`19487`, it is +not currently possible to declare levity-polymorphic data types with nullary +data constructors. There is a workaround, though: :: type T :: TYPE (BoxedRep l) data T where diff --git a/docs/users_guide/exts/strict.rst b/docs/users_guide/exts/strict.rst index 8c0925fdba..1e7bca739b 100644 --- a/docs/users_guide/exts/strict.rst +++ b/docs/users_guide/exts/strict.rst @@ -116,12 +116,10 @@ In both cases ``e`` is evaluated before starting to evaluate ``body``. Note the following points: -- This form is not the same as a bang pattern: - The declarations ``f3 (x,y) = ...`` and ``f4 !(x,y) = ....`` - are equivalent (because the constructor pattern ``(x,y)`` forces the argument), - but the expressions ``let (p,q) = e in body`` and ``let !(p,q) = e in body`` - are different. The former will not evaluate ``e`` unless - ``p`` or ``q`` is forced in ``body``. +- A strict binding (with a top level ``!``) should not be thought of as a regular + pattern binding that happens to have a bang pattern (:ref:`bang-patterns-informal`) on the LHS. + Rather, the top level ``!`` should be considered part of the let-binding, rather than + part of the pattern. This makes a difference when we come to the rules in :ref:`bang-patterns-sem`. - Only a top-level bang (perhaps under parentheses) makes the binding strict; otherwise, it is considered a normal bang pattern. For example, :: @@ -354,8 +352,8 @@ Dynamic semantics of bang patterns ---------------------------------- The semantics of Haskell pattern matching is described in `Section -3.17.2 `__ of -the Haskell Report. To this description add one extra item 10, saying: +3.17.2 `__ of +the Haskell Report. To this description add one extra item 9, saying: - Matching the pattern ``!pat`` against a value ``v`` behaves as follows: @@ -365,8 +363,8 @@ the Haskell Report. To this description add one extra item 10, saying: - otherwise, ``pat`` is matched against ``v`` Similarly, in Figure 4 of `Section -3.17.3 `__, -add a new case (t): :: +3.17.3 `__, +add a new case (w): :: case v of { !pat -> e; _ -> e' } = v `seq` case v of { pat -> e; _ -> e' } @@ -377,20 +375,62 @@ Haskell Report. Replace the "Translation" there with the following one. Given ``let { bind1 ... bindn } in body``: -.. admonition:: FORCE +.. admonition:: SPLIT-LAZY - Replace any binding ``!p = e`` with ``v = case e of p -> (x1, ..., xn); (x1, ..., xn) = v`` and replace - ``body`` with ``v seq body``, where ``v`` is fresh. This translation works fine if - ``p`` is already a variable ``x``, but can obviously be optimised by not - introducing a fresh variable ``v``. + Given a lazy pattern binding ``p = e``, where ``p`` is not a variable, + and ``x1...xn`` are the variables bound by ``p``, + and all these binders have lifted type, + replace the binding with this (where ``v`` is fresh):: -.. admonition:: SPLIT + v = case e of { p -> (x1, ..., xn) } + x1 = case v of { (x1, ..., xn) -> x1 } + ... + xn = case v of { (x1, ..., xn) -> xn }`` - Replace any binding ``p = e``, where ``p`` is not a variable, with - ``v = e; x1 = case v of p -> x1; ...; xn = case v of p -> xn``, where - ``v`` is fresh and ``x1``.. ``xn`` are the bound variables of ``p``. - Again if ``e`` is a variable, this can be optimised by not introducing a - fresh variable. + If n=1 (i.e. exactly one variable is bound), + the desugaring uses the ``Solo`` type to make a 1-tuple. + +.. admonition:: SPLIT-STRICT + + Given a strict pattern binding ``!p = e``, where + ``x1...xn`` are the variables bound by ``p``, + and all these binders have lifted type: + + 1. Replace the binding with this (where ``v`` is fresh):: + + v = case e of { !p -> (x1, ..., xn) } + (x1, ..., xn) = v + + 2. Replace ``body`` with ``v `seq` body``. + + As in SPLIT-LAZY, if n=1 the desugaring uses the ``Solo`` type to make a 1-tuple. + + This transformation is illegal at the top + level of a module (since there is no ``body``), so strict bindings are illegal at top level. + + The transformation is correct when ``p`` is a variable ``x``, but can be optimised to:: + + let !x = e in body ==> let x = e in x `seq` body + +.. admonition:: CASE + + Given a non-recursive strict pattern binding ``!p = e``, + where ``x1...xn`` are the variables bound by ``p``, + and any of the binders has unlifted type: + replace the binding with nothing at all, and replace + ``body`` with ``case e of p -> body``. + + This transformation is illegal at the top + level of a module, so such bindings are rejected. + + The result of this transformation is ill-scoped if any of the binders + ``x1...xn`` appears in ``e``; hence the restriction to non-recursive pattern bindings. + + Exactly the same transformation applies to a non-recursive lazy pattern binding + (i.e. one lacking a top-level ``!``) that binds any unlifted variables; but + such a binding emits a warning :ghc-flag:`-Wunbanged-strict-patterns`. The + warning encourages the programmer to make visible the fact that this binding + is necessarily strict. The result will be a (possibly) recursive set of bindings, binding only simple variables on the left hand side. (One could go one step @@ -412,56 +452,106 @@ Here is a simple non-recursive case: :: !x = factorial y in body - ===> (FORCE) - let x = factorial y in x `seq` body + ===> (SPLIT-STRICT) + let x = factorial y in x `seq` body ===> (inline seq) - let x = factorial y in case x of x -> body + let x = factorial y in case x of !x -> body ===> (inline x) - case factorial y of x -> body + case factorial y of !x -> body Same again, only with a pattern binding: :: - let !(Just x, Left y) = e in body + let !(Just x) = e in body - ===> (FORCE) - let v = case e of (Just x, Left y) -> (x,y) - (x,y) = v - in v `seq` body + ===> (SPLIT-STRICT) + let v = case e of !(Just x) -> Solo x + Solo x = v + in v `seq` body - ===> (SPLIT) - let v = case e of (Just x, Left y) -> (x,y) - x = case v of (x,y) -> x - y = case v of (x,y) -> y - in v `seq` body + ===> (SPLIT-LAZY, drop redundant bang) + let v = case e of Just x -> Solo x + x = case v of Solo x -> x + in v `seq` body ===> (inline seq, float x,y bindings inwards) - let v = case e of (Just x, Left y) -> (x,y) - in case v of v -> let x = case v of (x,y) -> x - y = case v of (x,y) -> y - in body + let v = case e of Just x -> Solo x + in case v of !v -> let x = case v of Solo x -> x + in body ===> (fluff up v's pattern; this is a standard Core optimisation) - let v = case e of (Just x, Left y) -> (x,y) - in case v of v@(p,q) -> let x = case v of (x,y) -> x - y = case v of (x,y) -> y - in body + let v = case e of Just x -> Solo x + in case v of v@(Solo p) -> let x = case v of Solo x -> x + in body ===> (case of known constructor) - let v = case e of (Just x, Left y) -> (x,y) - in case v of v@(p,q) -> let x = p - y = q - in body + let v = case e of Just x -> Solo x + in case v of v@(Solo p) -> let x = p + in body - ===> (inline x,y, v) - case (case e of (Just x, Left y) -> (x,y) of - (p,q) -> body[p/x, q/y] + ===> (inline x, v) + case (case e of Just x -> Solo x) of + Solo p -> body[p/x] ===> (case of case) - case e of (Just x, Left y) -> body[p/x, q/y] + case e of Just x -> body[p/x] + +The final form is just what we want: a simple case expression. Notice, crucially, +that that *pattern* ``Just x`` is forced eagerly, but ``x`` itself is not evaluated +unless and until ``body`` does so. Note also that this example uses a pattern +that binds exactly one variable, and illustrates the use of the ``Solo`` 1-tuple. -The final form is just what we want: a simple case expression. +Rule (SPLIT-STRICT) applies even if the pattern binds no variables:: + + let !(True,False) = e in body + + ===> (SPLIT-STRICT) + let v = case e of !(True,False) -> (); () = v in v `seq` body + + ===> (inline, simplify, drop redundant bang) + case e of (True,False) -> body + +That is, we force ``e`` and check that it has the right form before proceeding with ``body``. +This happens even if the pattern is itself vacuous:: + + let !_ = e in body + + ===> (SPLIT-STRICT) + let v = case e of !_ -> (); () = v in v `seq` body + + ===> (inline, simplify) + case e of !_ -> body + +Again, ``e`` is forced before evaluating ``body``. This (along with ``!x = e``) is the reason +that (SPLIT-STRICT) uses a bang-pattern in the ``case`` in the desugared right-hand side. + +Note that rule (CASE) applies only when any of the *binders* is unlifted; +it is irrelevant whether the binding *itself* is unlifted (see +`GHC proposal #35 `__). +For example (see :ref:`primitives`):: + + let (# a::Int, b::Bool #) = e in body + ===> (SPLIT-LAZY) + let v = case e of (# a,b #) -> (a,b) + a = case v of (a,b) -> a + b = case v of (a,b) -> b + in body + +Even though the tuple pattern is unboxed, it is matched only when ``a`` or ``b`` are evaluated in ``body``. + +Here is an example with an unlifted data type:: + + type T :: UnliftedType + data T = MkT Int + f1 x = let MkT y = blah in body1 + f2 x = let z :: T = blah in body2 + f3 x = let _ :: T = blah in body3 + +In ``f1``, even though ``T`` is an unlifted type, the pattern ``MkT y`` binds a lifted +variable ``y``, so (SPLIT-LAZY) applies, and ``blah`` is not evaluated until ``body1`` evaluates ``y``. +In contrast, in ``f2`` the pattern ``z :: T`` binds a variable ``z`` of unlifted type, so (CASE) applies +and the let-binding is strict. In ``f3`` the pattern binds no variables, so again it is lazy like ``f1``. Here is a recursive case :: @@ -469,14 +559,14 @@ Here is a recursive case :: !xs = factorial y : xs in body - ===> (FORCE) - letrec xs = factorial y : xs in xs `seq` body + ===> (SPLIT-STRICT) + letrec xs = factorial y : xs in xs `seq` body ===> (inline seq) - letrec xs = factorial y : xs in case xs of xs -> body + letrec xs = factorial y : xs in case xs of xs -> body ===> (eliminate case of value) - letrec xs = factorial y : xs in body + letrec xs = factorial y : xs in body and a polymorphic one: :: @@ -484,10 +574,11 @@ and a polymorphic one: :: !f = fst (reverse, True) in body - ===> (FORCE) - let f = /\a. fst (reverse a, True) in f `seq` body + ===> (SPLIT-STRICT) + let f = /\a. fst (reverse a, True) in f `seq` body + ===> (inline seq, inline f) - case (/\a. fst (reverse a, True)) of f -> body + case (/\a. fst (reverse a, True)) of !f -> body Notice that the ``seq`` is added only in the translation to Core If we did it in Haskell source, thus :: @@ -507,10 +598,10 @@ intuitive: :: !f = fst (member, True) in body - ===> (FORCE) - let f = /\a \(d::Eq a). fst (member, True) in f `seq` body + ===> (SPLIT-STRICT) + let f = /\a \(d::Eq a). fst (member, True) in f `seq` body ===> (inline seq, case of value) - let f = /\a \(d::Eq a). fst (member, True) in body + let f = /\a \(d::Eq a). fst (member, True) in body Note that the bang has no effect at all in this case diff --git a/testsuite/tests/deSugar/should_compile/T22719.hs b/testsuite/tests/deSugar/should_compile/T22719.hs new file mode 100644 index 0000000000..2f0caf7e90 --- /dev/null +++ b/testsuite/tests/deSugar/should_compile/T22719.hs @@ -0,0 +1,21 @@ +{-# LANGUAGE UnliftedDatatypes #-} +{-# OPTIONS_GHC -Wall #-} + +module T22719 where + +import GHC.Exts + +type T :: UnliftedType +data T = T + +f :: Int -> T +f 0 = T +f n = f (n-1) + +-- ex1 is lazy in (f 7) +ex1 :: () +ex1 = let _ = f 7 in () + +-- ex2 is strict in (f 10) +ex2 :: () +ex2 = let _a = f 10 in () diff --git a/testsuite/tests/deSugar/should_compile/T22719.stderr b/testsuite/tests/deSugar/should_compile/T22719.stderr new file mode 100644 index 0000000000..090adb87c7 --- /dev/null +++ b/testsuite/tests/deSugar/should_compile/T22719.stderr @@ -0,0 +1,30 @@ + +==================== Tidy Core ==================== +Result size of Tidy Core + = {terms: 25, types: 10, coercions: 0, joins: 0/0} + +-- RHS size: {terms: 1, types: 0, coercions: 0, joins: 0/0} +ex1 :: () +[GblId, Unf=OtherCon []] +ex1 = GHC.Tuple.Prim.() + +Rec { +-- RHS size: {terms: 15, types: 5, coercions: 0, joins: 0/0} +f [Occ=LoopBreaker] :: Int -> T +[GblId, Arity=1, Unf=OtherCon []] +f = \ (ds :: Int) -> + case ds of wild { I# ds1 -> + case ds1 of { + __DEFAULT -> f (- @Int GHC.Num.$fNumInt wild (GHC.Types.I# 1#)); + 0# -> T22719.T + } + } +end Rec } + +-- RHS size: {terms: 6, types: 1, coercions: 0, joins: 0/0} +ex2 :: () +[GblId] +ex2 = case f (GHC.Types.I# 10#) of { T -> GHC.Tuple.Prim.() } + + + diff --git a/testsuite/tests/deSugar/should_compile/all.T b/testsuite/tests/deSugar/should_compile/all.T index 89583f4bb8..43ef2d495e 100644 --- a/testsuite/tests/deSugar/should_compile/all.T +++ b/testsuite/tests/deSugar/should_compile/all.T @@ -112,3 +112,4 @@ test('T16615', normal, compile, ['-ddump-ds -dsuppress-uniques']) test('T18112', [grep_errmsg('cast')], compile, ['-ddump-ds']) test('T19969', normal, compile, ['-ddump-simpl -dsuppress-uniques']) test('T19883', normal, compile, ['']) +test('T22719', normal, compile, ['-ddump-simpl -dsuppress-uniques -dno-typeable-binds']) -- cgit v1.2.1