diff options
author | Sebastian Graf <sebastian.graf@kit.edu> | 2020-11-19 14:32:33 +0100 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2021-01-17 05:46:45 -0500 |
commit | 23a545df75002060ed7bcb8bcc0e0511b7f9fb7d (patch) | |
tree | cfd5a6eb4d7939517af036c0e27b21f756f50e92 | |
parent | fe344da9be83be4c7c0c7f76183acfe0a234cc5d (diff) | |
download | haskell-23a545df75002060ed7bcb8bcc0e0511b7f9fb7d.tar.gz |
PmCheck: Positive info doesn't imply there is an inhabitant (#18960)
Consider `T18960`:
```hs
pattern P :: a -> a
pattern P x = x
{-# COMPLETE P :: () #-}
foo :: ()
foo = case () of
P _ -> ()
```
We know about the match variable of the case match that it is equal to `()`.
After the match on `P`, we still know it's equal to `()` (positive info), but
also that it can't be `P` (negative info). By the `COMPLETE` pragma, we know
that implies that the refinement type of the match variable is empty after the
`P` case.
But in the PmCheck solver, we assumed that "has positive info" means
"is not empty", thus assuming we could omit a costly inhabitation test. Which
is wrong, as we saw above.
A bit of a complication arises because the "has positive info" spared us
from doing a lot of inhabitation tests in `T17836b`. So we keep that
check, but give it a lower priority than the check for dirty variables
that requires us doing an inhabitation test.
Needless to say: This doesn't impact soundness of the checker at all,
it just implements a better trade-off between efficiency and precision.
Fixes #18960.
Metric Decrease:
T17836
-rw-r--r-- | compiler/GHC/HsToCore/Pmc/Solver.hs | 180 | ||||
-rw-r--r-- | testsuite/tests/pmcheck/complete_sigs/T18960.hs | 13 | ||||
-rw-r--r-- | testsuite/tests/pmcheck/complete_sigs/T18960b.hs | 20 | ||||
-rw-r--r-- | testsuite/tests/pmcheck/complete_sigs/T18960b.stderr | 20 | ||||
-rw-r--r-- | testsuite/tests/pmcheck/complete_sigs/all.T | 2 |
5 files changed, 207 insertions, 28 deletions
diff --git a/compiler/GHC/HsToCore/Pmc/Solver.hs b/compiler/GHC/HsToCore/Pmc/Solver.hs index 3519f9250a..251fd0af83 100644 --- a/compiler/GHC/HsToCore/Pmc/Solver.hs +++ b/compiler/GHC/HsToCore/Pmc/Solver.hs @@ -86,8 +86,10 @@ import Control.Applicative ((<|>)) import Control.Monad (foldM, forM, guard, mzero, when) import Control.Monad.Trans.Class (lift) import Control.Monad.Trans.State.Strict +import Data.Coerce import Data.Either (partitionEithers) import Data.Foldable (foldlM, minimumBy, toList) +import Data.Monoid (Any(..)) import Data.List (sortBy, find) import qualified Data.List.NonEmpty as NE import Data.Ord (comparing) @@ -120,9 +122,19 @@ isInhabited (MkNablas ds) = pure (not (null ds)) -- See Note [Implementation of COMPLETE pragmas] --- | Update the COMPLETE sets of 'ResidualCompleteMatches'. -updRcm :: (CompleteMatch -> CompleteMatch) -> ResidualCompleteMatches -> ResidualCompleteMatches -updRcm f (RCM vanilla pragmas) = RCM (f <$> vanilla) (fmap f <$> pragmas) +-- | Update the COMPLETE sets of 'ResidualCompleteMatches', or 'Nothing' +-- if there was no change as per the update function. +updRcm :: (CompleteMatch -> (Bool, CompleteMatch)) + -> ResidualCompleteMatches -> (Maybe ResidualCompleteMatches) +updRcm f (RCM vanilla pragmas) + | not any_change = Nothing + | otherwise = Just (RCM vanilla' pragmas') + where + f' :: CompleteMatch -> (Any, CompleteMatch) + f' = coerce f + (chgd, vanilla') = traverse f' vanilla + (chgds, pragmas') = traverse (traverse f') pragmas + any_change = getAny $ chgd `mappend` chgds -- | A pseudo-'CompleteMatch' for the vanilla complete set of the given data -- 'TyCon'. @@ -160,10 +172,17 @@ addTyConMatches tc rcm = add_tc_match <$> addCompleteMatches rcm add_tc_match rcm = rcm{rcm_vanilla = rcm_vanilla rcm <|> vanillaCompleteMatchTC tc} -markMatched :: ConLike -> ResidualCompleteMatches -> DsM ResidualCompleteMatches -markMatched cl rcm = do +markMatched :: PmAltCon -> ResidualCompleteMatches -> DsM (Maybe ResidualCompleteMatches) +-- Nothing means the PmAltCon didn't occur in any COMPLETE set. +-- See Note [Shortcutting the inhabitation test] for how this is useful for +-- performance on T17836. +markMatched (PmAltLit _) _ = pure Nothing -- lits are never part of a COMPLETE set +markMatched (PmAltConLike cl) rcm = do rcm' <- addConLikeMatches cl rcm - pure $ updRcm (flip delOneFromUniqDSet cl) rcm' + let go cm = case lookupUniqDSet cm cl of + Nothing -> (False, cm) + Just _ -> (True, delOneFromUniqDSet cm cl) + pure $ updRcm go rcm' {- Note [Implementation of COMPLETE pragmas] @@ -747,6 +766,10 @@ addNotConCt nabla x nalt = do Just x -> markDirty x nabla' Nothing -> nabla' where + -- | Update `x`'s 'VarInfo' entry. Fail ('MaybeT') if contradiction, + -- otherwise return updated entry and `Just x'` if `x` should be marked dirty, + -- where `x'` is the representative of `x`. + go :: VarInfo -> MaybeT DsM (Maybe Id, VarInfo) go vi@(VI x' pos neg _ rcm) = do -- 1. Bail out quickly when nalt contradicts a solution let contradicts nalt sol = eqPmAltCon (paca_con sol) nalt == Equal @@ -762,13 +785,16 @@ addNotConCt nabla x nalt = do MASSERT( isPmAltConMatchStrict nalt ) let vi' = vi{ vi_neg = neg', vi_bot = IsNotBot } -- 3. Make sure there's at least one other possible constructor - case nalt of - PmAltConLike cl -> do - -- Mark dirty to force a delayed inhabitation test - rcm' <- lift (markMatched cl rcm) - pure (Just x', vi'{ vi_rcm = rcm' }) - _ -> - pure (Nothing, vi') + mb_rcm' <- lift (markMatched nalt rcm) + pure $ case mb_rcm' of + -- If nalt could be removed from a COMPLETE set, we'll get back Just and + -- have to mark x dirty, by returning Just x'. + Just rcm' -> (Just x', vi'{ vi_rcm = rcm' }) + -- Otherwise, nalt didn't occur in any residual COMPLETE set and we + -- don't have to mark it dirty. So we return Nothing, which in the case + -- above would have compromised precision. + -- See Note [Shortcutting the inhabitation test], grep for T17836. + Nothing -> (Nothing, vi') hasRequiredTheta :: PmAltCon -> Bool hasRequiredTheta (PmAltConLike cl) = notNull req_theta @@ -1226,25 +1252,20 @@ inhabitationTest fuel old_ty_st nabla@MkNabla{ nabla_tm_st = ts } = do test_one vi = lift (varNeedsTesting old_ty_st nabla vi) >>= \case True -> do - -- tracPm "test_one" (ppr vi) + -- lift $ tracePm "test_one" (ppr vi) -- No solution yet and needs testing -- We have to test with a Nabla where all dirty bits are cleared instantiate (fuel-1) nabla_not_dirty vi _ -> pure vi -- | Checks whether the given 'VarInfo' needs to be tested for inhabitants. --- --- 1. If it already has a solution, we don't have to test. --- 2. If it's marked dirty because of new negative term constraints, we have --- to test. --- 3. Otherwise, if the type state didn't change, we don't need to test. --- 4. If the type state changed, we compare normalised source types. No need --- to test if unchanged. +-- Returns `False` when we can skip the inhabitation test, presuming it would +-- say "yes" anyway. See Note [Shortcutting the inhabitation test]. varNeedsTesting :: TyState -> Nabla -> VarInfo -> DsM Bool -varNeedsTesting _ _ vi - | notNull (vi_pos vi) = pure False varNeedsTesting _ MkNabla{nabla_tm_st=tm_st} vi | elemDVarSet (vi_id vi) (ts_dirty tm_st) = pure True +varNeedsTesting _ _ vi + | notNull (vi_pos vi) = pure False varNeedsTesting old_ty_st MkNabla{nabla_ty_st=new_ty_st} _ -- Same type state => still inhabited | not (tyStateRefined old_ty_st new_ty_st) = pure False @@ -1517,11 +1538,114 @@ guessConLikeUnivTyArgsFromResTy _ norm_res_ty (PatSynCon ps) = do subst <- tcMatchTy con_res_ty norm_res_ty traverse (lookupTyVar subst) univ_tvs -{- Note [Fuel for the inhabitation test] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Whether or not a type is inhabited is undecidable in general. As a result, we -can run into infinite loops in `inhabitationTest`. Therefore, we adopt a -fuel-based approach to prevent that. +{- Note [Soundness and completeness] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Soundness and completeness of the pattern-match checker depends entirely on the +soundness and completeness of the inhabitation test. + +Achieving both soundness and completeness at the same time is undecidable. +See also T17977 and Note [Fuel for the inhabitation test]. +Losing soundness would make the algorithm pointless; hence we give up on +completeness, but try to get as close as possible (how close is called +the 'precision' of the algorithm). + +Soundness means that you + 1. Can remove clauses flagged as redundant without changing program semantics + (no false positives). + 2. Can be sure that your program is free of incomplete pattern matches + when the checker doesn't flag any inexhaustive definitions + (no false negatives). + +A complete algorithm would mean that + 1. When a clause can be deleted without changing program semantics, it will + be flagged as redundant (no false negatives). + 2. A program that is free of incomplete pattern matches will never have a + definition be flagged as inexhaustive (no false positives). + +Via the LYG algorithm, we reduce both these properties to a property on +the inhabitation test of refinementment types: + *Soundness*: If the inhabitation test says "no" for a given refinement type + Nabla, then it provably has no inhabitant. + *Completeness*: If the inhabitation test says "yes" for a given refinement type + Nabla, then it provably has an inhabitant. +Our test is sound, but incomplete, so there are instances where we say +"yes" but in fact the Nabla is empty. Which entails false positive exhaustivity +and false negative redundancy warnings, as above. + +In summary, we have the following correspondence: + +Property | Exhaustiveness warnings | Redundancy warnings | Inhabitation test | +-------------|-------------------------|---------------------|-------------------| +Soundness | No false negatives | No false positives | Only says "no" | + | | | if there is no | + | | | inhabitant | +Completeness | No false positives | No false negatives | Only says "yes" | + | | | if there is an | + | | | inhabitant | + +Note [Shortcutting the inhabitation test] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Generally, we have to re-test a refinement type for inhabitants whenever we +add a new constraint. Often, we can say "no" early, upon trying to add a +contradicting constraint, see Note [The Pos/Neg invariant]. Still, COMPLETE +sets and type evidence are best handled in a delayed fashion, because of +the recursive nature of the test and our fuel-based approach. +But even then there are some cases in which we can skip the full test, +because we are sure that the refinement type is still inhabited. These +conditions are monitored by 'varNeedsTesting'. It returns + +- `True` whenever a full inhabitation test is needed +- `False` whenever the test can be skipped, amounting to an inhabitation test + that says "yes". + +According to Note [Soundness and Completeness], this test will never compromise +soundness: The `True` case just forwards to the actual inhabitation test and the +`False` case amounts to an inhabitation test that is trivially sound, because it +never says "no". + +Of course, if the returns says `False`, Completeness (and thus Precision) of the +algorithm is affected, but we get to skip costly inhabitation tests. We try to +trade as little Precision as possible against as much Performance as possible. +Here are the tests, in order: + + 1. If a variable is dirty (because of a newly added negative term constraint), + we have to test. + 2. If a variable has positive information, we don't have to test: The + positive information acts as constructive proof for inhabitation. + 3. If the type state didn't change, there is no need to test. + 4. If the variable's normalised type didn't change, there is no need to test. + 5. Otherwise, we have to test. + +Why (1) before (2)? +------------------- +Consider the reverse for (T18960): + pattern P x = x + {-# COMPLETE P :: () #-} + foo = case () of x@(P _) -> () +This should be exhaustive. But if we say "We know `x` has solution `()`, so it's +inhabited", then we'll get a warning saying that `()` wasn't matched. +But the match on `P` added the new negative information to the uncovered set, +in the process of which we marked `x` as dirty. By giving the dirty flag a +higher priority than positive info, we get to test again and see that `x` is +uninhabited and the match is exhaustive. + +But suppose that `P` wasn't mentioned in any COMPLETE set. Then we simply +don't mark `x` as dirty and will emit a warning again (which we would anyway), +without running a superfluous inhabitation test. That speeds up T17836 +considerably. + +Why (2) before (3) and (4)? +--------------------------- +Simply because (2) is more efficient to test than (3) (not by a lot), which +is more efficient to test than (4), which is still more efficient than running +the full inhabitation test (5). + +Note [Fuel for the inhabitation test] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Whether or not a type is inhabited is undecidable in general, see also +Note [Soundness and Completeness]. As a result, we can run into infinite +loops in `inhabitationTest`. Therefore, we adopt a fuel-based approach to +prevent that. Consider the following example: diff --git a/testsuite/tests/pmcheck/complete_sigs/T18960.hs b/testsuite/tests/pmcheck/complete_sigs/T18960.hs new file mode 100644 index 0000000000..ed441647d7 --- /dev/null +++ b/testsuite/tests/pmcheck/complete_sigs/T18960.hs @@ -0,0 +1,13 @@ +{-# OPTIONS_GHC -Wincomplete-patterns -fforce-recomp #-} +{-# LANGUAGE PatternSynonyms #-} + +module T18960 where + +pattern P :: a -> a +pattern P x = x +{-# COMPLETE P :: () #-} + +foo :: () +foo = case () of + P _ -> () + diff --git a/testsuite/tests/pmcheck/complete_sigs/T18960b.hs b/testsuite/tests/pmcheck/complete_sigs/T18960b.hs new file mode 100644 index 0000000000..0cccc8aff1 --- /dev/null +++ b/testsuite/tests/pmcheck/complete_sigs/T18960b.hs @@ -0,0 +1,20 @@ +{-# OPTIONS_GHC -Wincomplete-patterns -fforce-recomp #-} +{-# LANGUAGE PatternSynonyms #-} + +module T18960b where + +pattern P :: a -> a +pattern P x = x +{-# COMPLETE P :: () #-} + +bar :: () +bar = case undefined of + P ((), "hello") -> () + -- prints too many of apparently the same missing patterns, + -- because we prefer to report positive info (@((), [])@) rather than + -- negative info (@P ((), x:_) where x is not one of {'h'}@) + +baz :: () +baz = case undefined of + P ((), "hello") -> () + -- This function is proof that we in theory can provide a "better" warning. diff --git a/testsuite/tests/pmcheck/complete_sigs/T18960b.stderr b/testsuite/tests/pmcheck/complete_sigs/T18960b.stderr new file mode 100644 index 0000000000..6af7fa7bc1 --- /dev/null +++ b/testsuite/tests/pmcheck/complete_sigs/T18960b.stderr @@ -0,0 +1,20 @@ + +T18960b.hs:11:7: warning: [-Wincomplete-patterns (in -Wextra)] + Pattern match(es) are non-exhaustive + In a case alternative: + Patterns of type ‘((), String)’ not matched: + P ((), []) + P ((), (p : P _)) where p is not one of {'h'} + P ((), ['h']) + P ((), ('h' : p : P _)) where p is not one of {'e'} + ... + +T18960b.hs:18:7: warning: [-Wincomplete-patterns (in -Wextra)] + Pattern match(es) are non-exhaustive + In a case alternative: + Patterns of type ‘((), String)’ not matched: + P ((), []) + P ((), (p : P _)) where p is not one of {'h'} + P ((), ['h']) + P ((), ('h' : p : P _)) where p is not one of {'e'} + ... diff --git a/testsuite/tests/pmcheck/complete_sigs/all.T b/testsuite/tests/pmcheck/complete_sigs/all.T index 2728121160..dc80cb3d0e 100644 --- a/testsuite/tests/pmcheck/complete_sigs/all.T +++ b/testsuite/tests/pmcheck/complete_sigs/all.T @@ -26,3 +26,5 @@ test('T14851', normal, compile, ['']) test('T17149', normal, compile, ['']) test('T17386', normal, compile, ['']) test('T18277', normal, compile, ['']) +test('T18960', normal, compile, ['']) +test('T18960b', normal, compile, ['']) |