summaryrefslogtreecommitdiff
path: root/compiler/GHC/HsToCore/Pmc/Check.hs
blob: 772877d3e8b62ba5a59b73651d95e9826723cac3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
{-# LANGUAGE CPP               #-}
{-# LANGUAGE DeriveFunctor     #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs             #-}

-- | Coverage checking step of the
-- [Lower Your Guards paper](https://dl.acm.org/doi/abs/10.1145/3408989).
--
-- Coverage check guard trees (like @'PmMatch' 'Pre'@) to get a
-- 'CheckResult', containing
--
--   1. The set of uncovered values, 'cr_uncov'
--   2. And an annotated tree variant (like @'PmMatch' 'Post'@) that captures
--      redundancy and inaccessibility information as 'RedSets' annotations
--
-- Basically the UA function from Section 5.1, which is an optimised
-- interleaving of U and A from Section 3.2 (Figure 5).
-- The Normalised Refinement Types 'Nablas' are maintained in
-- "GHC.HsToCore.Pmc.Solver".
module GHC.HsToCore.Pmc.Check (
        CheckAction(..),
        checkMatchGroup, checkGRHSs, checkPatBind, checkEmptyCase
    ) where

import GHC.Prelude

import GHC.Builtin.Names ( hasKey, considerAccessibleIdKey, trueDataConKey )
import GHC.HsToCore.Monad ( DsM )
import GHC.HsToCore.Pmc.Types
import GHC.HsToCore.Pmc.Utils
import GHC.HsToCore.Pmc.Solver
import GHC.Driver.Session
import GHC.Utils.Outputable
import GHC.Tc.Utils.TcType (evVarPred)
import GHC.Data.OrdList

import qualified Data.Semigroup as Semi
import Data.List.NonEmpty ( NonEmpty(..) )
import qualified Data.List.NonEmpty as NE
import Data.Coerce

-- | Coverage checking action. Can be composed 'leftToRight' or 'topToBottom'.
newtype CheckAction a = CA { unCA :: Nablas -> DsM (CheckResult a) }
  deriving Functor

-- | Composes 'CheckAction's top-to-bottom:
-- If a value falls through the resulting action, then it must fall through the
-- first action and then through the second action.
-- If a value matches the resulting action, then it either matches the
-- first action or matches the second action.
-- Basically the semantics of the LYG branching construct.
topToBottom :: (top -> bot -> ret)
            -> CheckAction top
            -> CheckAction bot
            -> CheckAction ret
topToBottom f (CA top) (CA bot) = CA $ \inc -> do
  t <- top inc
  b <- bot (cr_uncov t)
  pure CheckResult { cr_ret = f (cr_ret t) (cr_ret b)
                   , cr_uncov = cr_uncov b
                   , cr_approx = cr_approx t Semi.<> cr_approx b }


-- | Composes 'CheckAction's left-to-right:
-- If a value falls through the resulting action, then it either falls through the
-- first action or through the second action.
-- If a value matches the resulting action, then it must match the first action
-- and then match the second action.
-- Basically the semantics of the LYG guard construct.
leftToRight :: (RedSets -> right -> ret)
            -> CheckAction RedSets
            -> CheckAction right
            -> CheckAction ret
leftToRight f (CA left) (CA right) = CA $ \inc -> do
  l <- left inc
  r <- right (rs_cov (cr_ret l))
  limit <- maxPmCheckModels <$> getDynFlags
  let uncov = cr_uncov l Semi.<> cr_uncov r
  -- See Note [Countering exponential blowup]
  let (prec', uncov') = throttle limit inc uncov
  pure CheckResult { cr_ret = f (cr_ret l) (cr_ret r)
                   , cr_uncov = uncov'
                   , cr_approx = prec' Semi.<> cr_approx l Semi.<> cr_approx r }

-- | @throttle limit old new@ returns @old@ if the number of 'Nabla's in @new@
-- is exceeding the given @limit@ and the @old@ number of 'Nabla's.
-- See Note [Countering exponential blowup].
throttle :: Int -> Nablas -> Nablas -> (Precision, Nablas)
throttle limit old@(MkNablas old_ds) new@(MkNablas new_ds)
  --- | pprTrace "PmCheck:throttle" (ppr (length old_ds) <+> ppr (length new_ds) <+> ppr limit) False = undefined
  | length new_ds > max limit (length old_ds) = (Approximate, old)
  | otherwise                                 = (Precise,     new)

checkSequence :: (grdtree -> CheckAction anntree) -> NonEmpty grdtree -> CheckAction (NonEmpty anntree)
-- The implementation is pretty similar to
-- @traverse1 :: Apply f => (a -> f b) -> NonEmpty a -> f (NonEmpty b)@
checkSequence act (t :| [])       = (:| []) <$> act t
checkSequence act (t1 :| (t2:ts)) =
  topToBottom (NE.<|) (act t1) (checkSequence act (t2:|ts))

emptyRedSets :: RedSets
-- Semigroup instance would be misleading!
emptyRedSets = RedSets mempty mempty mempty

checkGrd :: PmGrd -> CheckAction RedSets
checkGrd grd = CA $ \inc -> case grd of
  -- let x = e: Refine with x ~ e
  PmLet x e -> do
    matched <- addPhiCtNablas inc (PhiCoreCt x e)
    tracePm "check:Let" (ppr x <+> char '=' <+> ppr e)
    pure CheckResult { cr_ret = emptyRedSets { rs_cov = matched }
                     , cr_uncov = mempty
                     , cr_approx = Precise }
  -- Bang x _: Diverge on x ~ ⊥, refine with x ≁ ⊥
  PmBang x mb_info -> do
    div <- addPhiCtNablas inc (PhiBotCt x)
    matched <- addPhiCtNablas inc (PhiNotBotCt x)
    -- See Note [Dead bang patterns]
    -- mb_info = Just info <==> PmBang originates from bang pattern in source
    let bangs | Just info <- mb_info = unitOL (div, info)
              | otherwise            = NilOL
    tracePm "check:Bang" (ppr x <+> ppr div)
    pure CheckResult { cr_ret = RedSets { rs_cov = matched, rs_div = div, rs_bangs = bangs }
                     , cr_uncov = mempty
                     , cr_approx = Precise }
  -- See point (3) of Note [considerAccessible]
  PmCon x (PmAltConLike con) _ _ _
    | x `hasKey` considerAccessibleIdKey
    , con `hasKey` trueDataConKey
    -> pure CheckResult { cr_ret = emptyRedSets { rs_cov = initNablas }
                        , cr_uncov = mempty
                        , cr_approx = Precise }
  -- Con: Fall through on x ≁ K and refine with x ~ K ys and type info
  PmCon x con tvs dicts args -> do
    !div <- if isPmAltConMatchStrict con
      then addPhiCtNablas inc (PhiBotCt x)
      else pure mempty
    !matched <- addPhiCtNablas inc (PhiConCt x con tvs (map evVarPred dicts) args)
    !uncov   <- addPhiCtNablas inc (PhiNotConCt x con)
    tracePm "check:Con" $ vcat
      [ ppr grd
      , ppr inc
      , hang (text "div") 2 (ppr div)
      , hang (text "matched") 2 (ppr matched)
      , hang (text "uncov") 2 (ppr uncov)
      ]
    pure CheckResult { cr_ret = emptyRedSets { rs_cov = matched, rs_div = div }
                     , cr_uncov = uncov
                     , cr_approx = Precise }

checkGrds :: [PmGrd] -> CheckAction RedSets
checkGrds [] = CA $ \inc ->
  pure CheckResult { cr_ret = emptyRedSets { rs_cov = inc }
                   , cr_uncov = mempty
                   , cr_approx = Precise }
checkGrds (g:grds) = leftToRight merge (checkGrd g) (checkGrds grds)
  where
    merge ri_g ri_grds = -- This operation would /not/ form a Semigroup!
      RedSets { rs_cov   = rs_cov ri_grds
              , rs_div   = rs_div ri_g   Semi.<> rs_div ri_grds
              , rs_bangs = rs_bangs ri_g Semi.<> rs_bangs ri_grds }

checkMatchGroup :: PmMatchGroup Pre -> CheckAction (PmMatchGroup Post)
checkMatchGroup (PmMatchGroup matches) =
  PmMatchGroup <$> checkSequence checkMatch matches

checkMatch :: PmMatch Pre -> CheckAction (PmMatch Post)
checkMatch (PmMatch { pm_pats = GrdVec grds, pm_grhss = grhss }) =
  leftToRight PmMatch (checkGrds grds) (checkGRHSs grhss)

checkGRHSs :: PmGRHSs Pre -> CheckAction (PmGRHSs Post)
checkGRHSs (PmGRHSs { pgs_lcls = GrdVec lcls, pgs_grhss = grhss }) =
  leftToRight PmGRHSs (checkGrds lcls) (checkSequence checkGRHS grhss)

checkGRHS :: PmGRHS Pre -> CheckAction (PmGRHS Post)
checkGRHS (PmGRHS { pg_grds = GrdVec grds, pg_rhs = rhs_info }) =
  flip PmGRHS rhs_info <$> checkGrds grds

checkEmptyCase :: PmEmptyCase -> CheckAction PmEmptyCase
-- See Note [Checking EmptyCase]
checkEmptyCase pe@(PmEmptyCase { pe_var = var }) = CA $ \inc -> do
  unc <- addPhiCtNablas inc (PhiNotBotCt var)
  pure CheckResult { cr_ret = pe, cr_uncov = unc, cr_approx = mempty }

checkPatBind :: (PmPatBind Pre) -> CheckAction (PmPatBind Post)
checkPatBind = coerce checkGRHS

{- Note [Checking EmptyCase]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-XEmptyCase is useful for matching on empty data types like 'Void'. For example,
the following is a complete match:

    f :: Void -> ()
    f x = case x of {}

Really, -XEmptyCase is the only way to write a program that at the same time is
safe (@f _ = error "boom"@ is not because of ⊥), doesn't trigger a warning
(@f !_ = error "inaccessible" has inaccessible RHS) and doesn't turn an
exception into divergence (@f x = f x@).

Semantically, unlike every other case expression, -XEmptyCase is strict in its
match var x, which rules out ⊥ as an inhabitant. So we add x ≁ ⊥ to the
initial Nabla and check if there are any values left to match on.

Note [Dead bang patterns]
~~~~~~~~~~~~~~~~~~~~~~~~~
Consider

  f :: Bool -> Int
  f True = 1
  f !x   = 2

Whenever we fall through to the second equation, we will already have evaluated
the argument. Thus, the bang pattern serves no purpose and should be warned
about. We call this kind of bang patterns "dead". Dead bangs are the ones
that under no circumstances can force a thunk that wasn't already forced.
Dead bangs are a form of redundant bangs; see below.

We can detect dead bang patterns by checking whether @x ~ ⊥@ is satisfiable
where the PmBang appears in 'checkGrd'. If not, then clearly the bang is
dead. So for a source bang, we add the refined Nabla and the source info to
the 'RedSet's 'rs_bangs'. When collecting stuff to warn, we test that Nabla for
inhabitants. If it's empty, we'll warn that it's redundant.

Note that we don't want to warn for a dead bang that appears on a redundant
clause. That is because in that case, we recommend to delete the clause wholly,
including its leading pattern match.

Dead bang patterns are redundant. But there are bang patterns which are
redundant that aren't dead, for example

  f !() = 0

the bang still forces the match variable, before we attempt to match on (). But
it is redundant with the forcing done by the () match. We currently don't
detect redundant bangs that aren't dead.

Note [Countering exponential blowup]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Precise pattern match exhaustiveness checking is necessarily exponential in
the size of some input programs. We implement a counter-measure in the form of
the -fmax-pmcheck-models flag, limiting the number of Nablas we check against
each pattern by a constant.

How do we do that? Consider

  f True True = ()
  f True True = ()

And imagine we set our limit to 1 for the sake of the example. The first clause
will be checked against the initial Nabla, {}. Doing so will produce an
Uncovered set of size 2, containing the models {x≁True} and {x~True,y≁True}.
Also we find the first clause to cover the model {x~True,y~True}.

But the Uncovered set we get out of the match is too huge! We somehow have to
ensure not to make things worse as they are already, so we continue checking
with a singleton Uncovered set of the initial Nabla {}. Why is this
sound (wrt. the notion in GADTs Meet Their Match)? Well, it basically amounts
to forgetting that we matched against the first clause. The values represented
by {} are a superset of those represented by its two refinements {x≁True} and
{x~True,y≁True}.

This forgetfulness becomes very apparent in the example above: By continuing
with {} we don't detect the second clause as redundant, as it again covers the
same non-empty subset of {}. So we don't flag everything as redundant anymore,
but still will never flag something as redundant that isn't.

For exhaustivity, the converse applies: We will report @f@ as non-exhaustive
and report @f _ _@ as missing, which is a superset of the actual missing
matches. But soundness means we will never fail to report a missing match.

This mechanism is implemented in 'throttle'.

Guards are an extreme example in this regard, with #11195 being a particularly
dreadful example: Since their RHS are often pretty much unique, we split on a
variable (the one representing the RHS) that doesn't occur anywhere else in the
program, so we don't actually get useful information out of that split!

Note [considerAccessible]
~~~~~~~~~~~~~~~~~~~~~~~~~
Consider (T18610)

  f :: Bool -> Int
  f x = case (x, x) of
    (True,  True)  -> 1
    (False, False) -> 2
    (True,  False) -> 3 -- Warning: Redundant

The third case is detected as redundant. But it may be the intent of the
programmer to keep the dead code, in order for it not to bitrot or to support
debugging scenarios. But there is no way to communicate that to the
pattern-match checker! The only way is to deactivate pattern-match checking
whole-sale, which is quite annoying. Hence, we define in "GHC.Exts":

  considerAccessible = True

'considerAccessible' is treated specially by the pattern-match checker in that a
guard with it as the scrutinee expression will keep its parent clause alive:

  g :: Bool -> Int
  g x = case (x, x) of
    (True,  True)  -> 1
    (False, False) -> 2
    (True,  False) | GHC.Exts.considerAccessible -> 3 -- No warning

The key bits of the implementation are:

  1. Its definition is recognised as known-key (see "GHC.Builtin.Names").
  2. After "GHC.HsToCore.Pmc.Desugar", the guard will end up as a 'PmCon', where
     the match var is the known-key 'considerAccessible' and the constructor
     against which it matches is 'True'.
  3. We recognise the 'PmCon' in 'GHC.HsToCore.Check.checkGrd' and inflate the
     incoming set of values for all guards downstream to the unconstrained
     'initNablas' set, e.g. /all/ values.
     (The set of values that falls through that particular guard is empty, as
     matching 'considerAccessible' against 'True' can't fail.)

Note that 'considerAccessible' breaks the invariant that incoming sets of values
reaching syntactic children are subsets of that of the syntactic ancestor:
A whole match, like that of the third clause of the example, might have no
incoming value, but its single RHS has incoming values because of (3).

That means the 'is_covered' flag computed in 'GHC.HsToCore.Pmc.cirbsMatch'
is irrelevant and should not be used to flag all children as redundant (which is
what we used to do).

We achieve great benefits with a very simple implementation.
There are caveats, though:

  (A) Putting potentially failing guards /after/ the
      'considerAccessible' guard might lead to weird check results, e.g.,

        h :: Bool -> Int
        h x = case (x, x) of
          (True,  True)  -> 1
          (False, False) -> 2
          (True,  False) | GHC.Exts.considerAccessible, False <- x -> 3
          -- Warning: Not matched: (_, _)

      That *is* fixable, although we would pay with a much more complicated
      implementation.
  (B) If the programmer puts a 'considerAccessible' marker on an accessible
      clause, the checker doesn't warn about it. E.g.,

        f :: Bool -> Int
        f True | considerAccessible = 0
        f False = 1

      will not emit any warning whatsoever. We could implement code that warns
      here, but it wouldn't be as simple as it is now.
-}