1 files changed, 849 insertions, 0 deletions
diff --git a/compiler/typecheck/TcTyDecls.hs b/compiler/typecheck/TcTyDecls.hs
new file mode 100644
index 0000000000..f7cde08c7b
--- /dev/null
+++ b/compiler/typecheck/TcTyDecls.hs
@@ -0,0 +1,849 @@
+{-
+(c) The University of Glasgow 2006
+(c) The GRASP/AQUA Project, Glasgow University, 1992-1999
+
+
+Analysis functions over data types.  Specficially, detecting recursive types.
+
+This stuff is only used for source-code decls; it's recorded in interface
+files for imported data types.
+-}
+
+{-# LANGUAGE CPP #-}
+
+module TcTyDecls(
+        calcRecFlags, RecTyInfo(..),
+        calcSynCycles, calcClassCycles,
+        RoleAnnots, extractRoleAnnots, emptyRoleAnnots, lookupRoleAnnots
+    ) where
+
+#include "HsVersions.h"
+
+import TypeRep
+import HsSyn
+import Class
+import Type
+import Kind
+import HscTypes
+import TyCon
+import DataCon
+import Var
+import Name
+import NameEnv
+import VarEnv
+import VarSet
+import NameSet
+import Coercion ( ltRole )
+import Avail
+import Digraph
+import BasicTypes
+import SrcLoc
+import Outputable
+import UniqSet
+import Util
+import Maybes
+import Data.List
+
+#if __GLASGOW_HASKELL__ < 709
+import Control.Applicative (Applicative(..))
+#endif
+
+import Control.Monad
+
+{-
+************************************************************************
+*                                                                      *
+        Cycles in class and type synonym declarations
+*                                                                      *
+************************************************************************
+
+Checking for class-decl loops is easy, because we don't allow class decls
+in interface files.
+
+We allow type synonyms in hi-boot files, but we *trust* hi-boot files,
+so we don't check for loops that involve them.  So we only look for synonym
+loops in the module being compiled.
+
+We check for type synonym and class cycles on the *source* code.
+Main reasons:
+
+  a) Otherwise we'd need a special function to extract type-synonym tycons
+     from a type, whereas we already have the free vars pinned on the decl
+
+  b) If we checked for type synonym loops after building the TyCon, we
+        can't do a hoistForAllTys on the type synonym rhs, (else we fall into
+        a black hole) which seems unclean.  Apart from anything else, it'd mean
+        that a type-synonym rhs could have for-alls to the right of an arrow,
+        which means adding new cases to the validity checker
+
+        Indeed, in general, checking for cycles beforehand means we need to
+        be less careful about black holes through synonym cycles.
+
+The main disadvantage is that a cycle that goes via a type synonym in an
+.hi-boot file can lead the compiler into a loop, because it assumes that cycles
+only occur entirely within the source code of the module being compiled.
+But hi-boot files are trusted anyway, so this isn't much worse than (say)
+a kind error.
+
+[  NOTE ----------------------------------------------
+If we reverse this decision, this comment came from tcTyDecl1, and should
+ go back there
+        -- dsHsType, not tcHsKindedType, to avoid a loop.  tcHsKindedType does hoisting,
+        -- which requires looking through synonyms... and therefore goes into a loop
+        -- on (erroneously) recursive synonyms.
+        -- Solution: do not hoist synonyms, because they'll be hoisted soon enough
+        --           when they are substituted
+
+We'd also need to add back in this definition
+
+synonymTyConsOfType :: Type -> [TyCon]
+-- Does not look through type synonyms at all
+-- Return a list of synonym tycons
+synonymTyConsOfType ty
+  = nameEnvElts (go ty)
+  where
+     go :: Type -> NameEnv TyCon  -- The NameEnv does duplicate elim
+     go (TyVarTy v)               = emptyNameEnv
+     go (TyConApp tc tys)         = go_tc tc tys
+     go (AppTy a b)               = go a `plusNameEnv` go b
+     go (FunTy a b)               = go a `plusNameEnv` go b
+     go (ForAllTy _ ty)           = go ty
+
+     go_tc tc tys | isTypeSynonymTyCon tc = extendNameEnv (go_s tys)
+                                                          (tyConName tc) tc
+                  | otherwise             = go_s tys
+     go_s tys = foldr (plusNameEnv . go) emptyNameEnv tys
+---------------------------------------- END NOTE ]
+-}
+
+mkSynEdges :: [LTyClDecl Name] -> [(LTyClDecl Name, Name, [Name])]
+mkSynEdges syn_decls = [ (ldecl, name, nameSetElems fvs)
+                       | ldecl@(L _ (SynDecl { tcdLName = L _ name
+                                             , tcdFVs = fvs })) <- syn_decls ]
+
+calcSynCycles :: [LTyClDecl Name] -> [SCC (LTyClDecl Name)]
+calcSynCycles = stronglyConnCompFromEdgedVertices . mkSynEdges
+
+{-
+Note [Superclass cycle check]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We can't allow cycles via superclasses because it would result in the
+type checker looping when it canonicalises a class constraint (superclasses
+are added during canonicalisation).  More precisely, given a constraint
+    C ty1 .. tyn
+we want to instantiate all of C's superclasses, transitively, and
+that set must be finite.  So if
+     class (D b, E b a) => C a b
+then when we encounter the constraint
+     C ty1 ty2
+we'll instantiate the superclasses
+     (D ty2, E ty2 ty1)
+and then *their* superclasses, and so on.  This set must be finite!
+
+It is OK for superclasses to be type synonyms for other classes, so
+must "look through" type synonyms. Eg
+     type X a = C [a]
+     class X a => C a   -- No!  Recursive superclass!
+
+We want definitions such as:
+
+  class C cls a where cls a => a -> a
+  class C D a => D a where
+
+to be accepted, even though a naive acyclicity check would reject the
+program as having a cycle between D and its superclass.  Why? Because
+when we instantiate
+     D ty1
+we get the superclas
+     C D ty1
+and C has no superclasses, so we have terminated with a finite set.
+
+More precisely, the rule is this: the superclasses sup_C of a class C
+are rejected iff:
+
+  C \elem expand(sup_C)
+
+Where expand is defined as follows:
+
+(1)  expand(a ty1 ... tyN) = expand(ty1) \union ... \union expand(tyN)
+
+(2)  expand(D ty1 ... tyN) = {D}
+                             \union sup_D[ty1/x1, ..., tyP/xP]
+                             \union expand(ty(P+1)) ... \union expand(tyN)
+           where (D x1 ... xM) is a class, P = min(M,N)
+
+(3)  expand(T ty1 ... tyN) = expand(ty1) \union ... \union expand(tyN)
+        where T is not a class
+
+Eqn (1) is conservative; when there's a type variable at the head,
+look in all the argument types.  Eqn (2) expands superclasses; the
+third component of the union is like Eqn (1).  Eqn (3) happens mainly
+when the context is a (constraint) tuple, such as (Eq a, Show a).
+
+Furthermore, expand always looks through type synonyms.
+-}
+
+calcClassCycles :: Class -> [[TyCon]]
+calcClassCycles cls
+  = nubBy eqAsCycle $
+    expandTheta (unitUniqSet cls) [classTyCon cls] (classSCTheta cls) []
+  where
+    -- The last TyCon in the cycle is always the same as the first
+    eqAsCycle xs ys = any (xs ==) (cycles (tail ys))
+    cycles xs = take n . map (take n) . tails . cycle $ xs
+      where n = length xs
+
+    -- No more superclasses to expand ==> no problems with cycles
+    -- See Note [Superclass cycle check]
+    expandTheta :: UniqSet Class -- Path of Classes to here in set form
+                -> [TyCon]       -- Path to here
+                -> ThetaType     -- Superclass work list
+                -> [[TyCon]]     -- Input error paths
+                -> [[TyCon]]     -- Final error paths
+    expandTheta _    _    []           = id
+    expandTheta seen path (pred:theta) = expandType seen path pred . expandTheta seen path theta
+
+    expandType seen path (TyConApp tc tys)
+      -- Expand unsaturated classes to their superclass theta if they are yet unseen.
+      -- If they have already been seen then we have detected an error!
+      | Just cls <- tyConClass_maybe tc
+      , let (env, remainder) = papp (classTyVars cls) tys
+            rest_tys = either (const []) id remainder
+      = if cls `elementOfUniqSet` seen
+         then (reverse (classTyCon cls:path):)
+              . flip (foldr (expandType seen path)) tys
+         else expandTheta (addOneToUniqSet seen cls) (tc:path)
+                          (substTys (mkTopTvSubst env) (classSCTheta cls))
+              . flip (foldr (expandType seen path)) rest_tys
+
+      -- For synonyms, try to expand them: some arguments might be
+      -- phantoms, after all. We can expand with impunity because at
+      -- this point the type synonym cycle check has already happened.
+      | Just (tvs, rhs) <- synTyConDefn_maybe tc
+      , let (env, remainder) = papp tvs tys
+            rest_tys = either (const []) id remainder
+      = expandType seen (tc:path) (substTy (mkTopTvSubst env) rhs)
+        . flip (foldr (expandType seen path)) rest_tys
+
+      -- For non-class, non-synonyms, just check the arguments
+      | otherwise
+      = flip (foldr (expandType seen path)) tys
+
+    expandType _    _    (TyVarTy {})     = id
+    expandType _    _    (LitTy {})       = id
+    expandType seen path (AppTy t1 t2)    = expandType seen path t1 . expandType seen path t2
+    expandType seen path (FunTy t1 t2)    = expandType seen path t1 . expandType seen path t2
+    expandType seen path (ForAllTy _tv t) = expandType seen path t
+
+    papp :: [TyVar] -> [Type] -> ([(TyVar, Type)], Either [TyVar] [Type])
+    papp []       tys      = ([], Right tys)
+    papp tvs      []       = ([], Left tvs)
+    papp (tv:tvs) (ty:tys) = ((tv, ty):env, remainder)
+      where (env, remainder) = papp tvs tys
+
+{-
+************************************************************************
+*                                                                      *
+        Deciding which type constructors are recursive
+*                                                                      *
+************************************************************************
+
+Identification of recursive TyCons
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The knot-tying parameters: @rec_details_list@ is an alist mapping @Name@s to
+@TyThing@s.
+
+Identifying a TyCon as recursive serves two purposes
+
+1.  Avoid infinite types.  Non-recursive newtypes are treated as
+"transparent", like type synonyms, after the type checker.  If we did
+this for all newtypes, we'd get infinite types.  So we figure out for
+each newtype whether it is "recursive", and add a coercion if so.  In
+effect, we are trying to "cut the loops" by identifying a loop-breaker.
+
+2.  Avoid infinite unboxing.  This has nothing to do with newtypes.
+Suppose we have
+        data T = MkT Int T
+        f (MkT x t) = f t
+Well, this function diverges, but we don't want the strictness analyser
+to diverge.  But the strictness analyser will diverge because it looks
+deeper and deeper into the structure of T.   (I believe there are
+examples where the function does something sane, and the strictness
+analyser still diverges, but I can't see one now.)
+
+Now, concerning (1), the FC2 branch currently adds a coercion for ALL
+newtypes.  I did this as an experiment, to try to expose cases in which
+the coercions got in the way of optimisations.  If it turns out that we
+can indeed always use a coercion, then we don't risk recursive types,
+and don't need to figure out what the loop breakers are.
+
+For newtype *families* though, we will always have a coercion, so they
+are always loop breakers!  So you can easily adjust the current
+algorithm by simply treating all newtype families as loop breakers (and
+indeed type families).  I think.
+
+
+
+For newtypes, we label some as "recursive" such that
+
+    INVARIANT: there is no cycle of non-recursive newtypes
+
+In any loop, only one newtype need be marked as recursive; it is
+a "loop breaker".  Labelling more than necessary as recursive is OK,
+provided the invariant is maintained.
+
+A newtype M.T is defined to be "recursive" iff
+        (a) it is declared in an hi-boot file (see RdrHsSyn.hsIfaceDecl)
+        (b) it is declared in a source file, but that source file has a
+            companion hi-boot file which declares the type
+   or   (c) one can get from T's rhs to T via type
+            synonyms, or non-recursive newtypes *in M*
+             e.g.  newtype T = MkT (T -> Int)
+
+(a) is conservative; declarations in hi-boot files are always
+        made loop breakers. That's why in (b) we can restrict attention
+        to tycons in M, because any loops through newtypes outside M
+        will be broken by those newtypes
+(b) ensures that a newtype is not treated as a loop breaker in one place
+and later as a non-loop-breaker.  This matters in GHCi particularly, when
+a newtype T might be embedded in many types in the environment, and then
+T's source module is compiled.  We don't want T's recursiveness to change.
+
+The "recursive" flag for algebraic data types is irrelevant (never consulted)
+for types with more than one constructor.
+
+
+An algebraic data type M.T is "recursive" iff
+        it has just one constructor, and
+        (a) it is declared in an hi-boot file (see RdrHsSyn.hsIfaceDecl)
+        (b) it is declared in a source file, but that source file has a
+            companion hi-boot file which declares the type
+ or     (c) one can get from its arg types to T via type synonyms,
+            or by non-recursive newtypes or non-recursive product types in M
+             e.g.  data T = MkT (T -> Int) Bool
+Just like newtype in fact
+
+A type synonym is recursive if one can get from its
+right hand side back to it via type synonyms.  (This is
+reported as an error.)
+
+A class is recursive if one can get from its superclasses
+back to it.  (This is an error too.)
+
+Hi-boot types
+~~~~~~~~~~~~~
+A data type read from an hi-boot file will have an AbstractTyCon as its AlgTyConRhs
+and will respond True to isAbstractTyCon. The idea is that we treat these as if one
+could get from these types to anywhere.  So when we see
+
+        module Baz where
+        import {-# SOURCE #-} Foo( T )
+        newtype S = MkS T
+
+then we mark S as recursive, just in case. What that means is that if we see
+
+        import Baz( S )
+        newtype R = MkR S
+
+then we don't need to look inside S to compute R's recursiveness.  Since S is imported
+(not from an hi-boot file), one cannot get from R back to S except via an hi-boot file,
+and that means that some data type will be marked recursive along the way.  So R is
+unconditionly non-recursive (i.e. there'll be a loop breaker elsewhere if necessary)
+
+This in turn means that we grovel through fewer interface files when computing
+recursiveness, because we need only look at the type decls in the module being
+compiled, plus the outer structure of directly-mentioned types.
+-}
+
+data RecTyInfo = RTI { rti_promotable :: Bool
+                     , rti_roles      :: Name -> [Role]
+                     , rti_is_rec     :: Name -> RecFlag }
+
+calcRecFlags :: ModDetails -> Bool  -- hs-boot file?
+             -> RoleAnnots -> [TyThing] -> RecTyInfo
+-- The 'boot_names' are the things declared in M.hi-boot, if M is the current module.
+-- Any type constructors in boot_names are automatically considered loop breakers
+calcRecFlags boot_details is_boot mrole_env tyclss
+  = RTI { rti_promotable = is_promotable
+        , rti_roles      = roles
+        , rti_is_rec     = is_rec }
+  where
+    rec_tycon_names = mkNameSet (map tyConName all_tycons)
+    all_tycons = mapMaybe getTyCon tyclss
+                   -- Recursion of newtypes/data types can happen via
+                   -- the class TyCon, so tyclss includes the class tycons
+
+    is_promotable = all (isPromotableTyCon rec_tycon_names) all_tycons
+
+    roles = inferRoles is_boot mrole_env all_tycons
+
+    ----------------- Recursion calculation ----------------
+    is_rec n | n `elemNameSet` rec_names = Recursive
+             | otherwise                 = NonRecursive
+
+    boot_name_set = availsToNameSet (md_exports boot_details)
+    rec_names = boot_name_set     `unionNameSet`
+                nt_loop_breakers  `unionNameSet`
+                prod_loop_breakers
+
+
+        -------------------------------------------------
+        --                      NOTE
+        -- These edge-construction loops rely on
+        -- every loop going via tyclss, the types and classes
+        -- in the module being compiled.  Stuff in interface
+        -- files should be correctly marked.  If not (e.g. a
+        -- type synonym in a hi-boot file) we can get an infinite
+        -- loop.  We could program round this, but it'd make the code
+        -- rather less nice, so I'm not going to do that yet.
+
+    single_con_tycons = [ tc | tc <- all_tycons
+                             , not (tyConName tc `elemNameSet` boot_name_set)
+                                 -- Remove the boot_name_set because they are
+                                 -- going to be loop breakers regardless.
+                             , isSingleton (tyConDataCons tc) ]
+        -- Both newtypes and data types, with exactly one data constructor
+
+    (new_tycons, prod_tycons) = partition isNewTyCon single_con_tycons
+        -- NB: we do *not* call isProductTyCon because that checks
+        --     for vanilla-ness of data constructors; and that depends
+        --     on empty existential type variables; and that is figured
+        --     out by tcResultType; which uses tcMatchTy; which uses
+        --     coreView; which calls coreExpandTyCon_maybe; which uses
+        --     the recursiveness of the TyCon.  Result... a black hole.
+        -- YUK YUK YUK
+
+        --------------- Newtypes ----------------------
+    nt_loop_breakers = mkNameSet (findLoopBreakers nt_edges)
+    is_rec_nt tc = tyConName tc  `elemNameSet` nt_loop_breakers
+        -- is_rec_nt is a locally-used helper function
+
+    nt_edges = [(t, mk_nt_edges t) | t <- new_tycons]
+
+    mk_nt_edges nt      -- Invariant: nt is a newtype
+        = [ tc | tc <- nameEnvElts (tyConsOfType (new_tc_rhs nt))
+                        -- tyConsOfType looks through synonyms
+               , tc `elem` new_tycons ]
+           -- If not (tc `elem` new_tycons) we know that either it's a local *data* type,
+           -- or it's imported.  Either way, it can't form part of a newtype cycle
+
+        --------------- Product types ----------------------
+    prod_loop_breakers = mkNameSet (findLoopBreakers prod_edges)
+
+    prod_edges = [(tc, mk_prod_edges tc) | tc <- prod_tycons]
+
+    mk_prod_edges tc    -- Invariant: tc is a product tycon
+        = concatMap (mk_prod_edges1 tc) (dataConOrigArgTys (head (tyConDataCons tc)))
+
+    mk_prod_edges1 ptc ty = concatMap (mk_prod_edges2 ptc) (nameEnvElts (tyConsOfType ty))
+
+    mk_prod_edges2 ptc tc
+        | tc `elem` prod_tycons   = [tc]                -- Local product
+        | tc `elem` new_tycons    = if is_rec_nt tc     -- Local newtype
+                                    then []
+                                    else mk_prod_edges1 ptc (new_tc_rhs tc)
+                -- At this point we know that either it's a local non-product data type,
+                -- or it's imported.  Either way, it can't form part of a cycle
+        | otherwise = []
+
+new_tc_rhs :: TyCon -> Type
+new_tc_rhs tc = snd (newTyConRhs tc)    -- Ignore the type variables
+
+getTyCon :: TyThing -> Maybe TyCon
+getTyCon (ATyCon tc) = Just tc
+getTyCon _           = Nothing
+
+findLoopBreakers :: [(TyCon, [TyCon])] -> [Name]
+-- Finds a set of tycons that cut all loops
+findLoopBreakers deps
+  = go [(tc,tc,ds) | (tc,ds) <- deps]
+  where
+    go edges = [ name
+               | CyclicSCC ((tc,_,_) : edges') <- stronglyConnCompFromEdgedVerticesR edges,
+                 name <- tyConName tc : go edges']
+
+{-
+************************************************************************
+*                                                                      *
+                  Promotion calculation
+*                                                                      *
+************************************************************************
+
+See Note [Checking whether a group is promotable]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We only want to promote a TyCon if all its data constructors
+are promotable; it'd be very odd to promote some but not others.
+
+But the data constructors may mention this or other TyCons.
+
+So we treat the recursive uses as all OK (ie promotable) and
+do one pass to check that each TyCon is promotable.
+
+Currently type synonyms are not promotable, though that
+could change.
+-}
+
+isPromotableTyCon :: NameSet -> TyCon -> Bool
+isPromotableTyCon rec_tycons tc
+  =  isAlgTyCon tc    -- Only algebraic; not even synonyms
+                     -- (we could reconsider the latter)
+  && ok_kind (tyConKind tc)
+  && case algTyConRhs tc of
+       DataTyCon { data_cons = cs } -> all ok_con cs
+       NewTyCon { data_con = c }    -> ok_con c
+       AbstractTyCon {}             -> False
+       DataFamilyTyCon {}           -> False
+
+  where
+    ok_kind kind = all isLiftedTypeKind args && isLiftedTypeKind res
+            where  -- Checks for * -> ... -> * -> *
+              (args, res) = splitKindFunTys kind
+
+    -- See Note [Promoted data constructors] in TyCon
+    ok_con con = all (isLiftedTypeKind . tyVarKind) ex_tvs
+              && null eq_spec   -- No constraints
+              && null theta
+              && all (isPromotableType rec_tycons) orig_arg_tys
+       where
+         (_, ex_tvs, eq_spec, theta, orig_arg_tys, _) = dataConFullSig con
+
+
+isPromotableType :: NameSet -> Type -> Bool
+-- Must line up with DataCon.promoteType
+-- But the function lives here because we must treat the
+-- *recursive* tycons as promotable
+isPromotableType rec_tcs con_arg_ty
+  = go con_arg_ty
+  where
+    go (TyConApp tc tys) =  tys `lengthIs` tyConArity tc
+                         && (tyConName tc `elemNameSet` rec_tcs
+                             || isJust (promotableTyCon_maybe tc))
+                         && all go tys
+    go (FunTy arg res)   = go arg && go res
+    go (TyVarTy {})      = True
+    go _                 = False
+
+{-
+************************************************************************
+*                                                                      *
+        Role annotations
+*                                                                      *
+************************************************************************
+-}
+
+type RoleAnnots = NameEnv (LRoleAnnotDecl Name)
+
+extractRoleAnnots :: TyClGroup Name -> RoleAnnots
+extractRoleAnnots (TyClGroup { group_roles = roles })
+  = mkNameEnv [ (tycon, role_annot)
+              | role_annot@(L _ (RoleAnnotDecl (L _ tycon) _)) <- roles ]
+
+emptyRoleAnnots :: RoleAnnots
+emptyRoleAnnots = emptyNameEnv
+
+lookupRoleAnnots :: RoleAnnots -> Name -> Maybe (LRoleAnnotDecl Name)
+lookupRoleAnnots = lookupNameEnv
+
+{-
+************************************************************************
+*                                                                      *
+        Role inference
+*                                                                      *
+************************************************************************
+
+Note [Role inference]
+~~~~~~~~~~~~~~~~~~~~~
+The role inference algorithm datatype definitions to infer the roles on the
+parameters. Although these roles are stored in the tycons, we can perform this
+algorithm on the built tycons, as long as we don't peek at an as-yet-unknown
+roles field! Ah, the magic of laziness.
+
+First, we choose appropriate initial roles. For families and classes, roles
+(including initial roles) are N. For datatypes, we start with the role in the
+role annotation (if any), or otherwise use Phantom. This is done in
+initialRoleEnv1.
+
+The function irGroup then propagates role information until it reaches a
+fixpoint, preferring N over (R or P) and R over P. To aid in this, we have a
+monad RoleM, which is a combination reader and state monad. In its state are
+the current RoleEnv, which gets updated by role propagation, and an update
+bit, which we use to know whether or not we've reached the fixpoint. The
+environment of RoleM contains the tycon whose parameters we are inferring, and
+a VarEnv from parameters to their positions, so we can update the RoleEnv.
+Between tycons, this reader information is missing; it is added by
+addRoleInferenceInfo.
+
+There are two kinds of tycons to consider: algebraic ones (excluding classes)
+and type synonyms. (Remember, families don't participate -- all their parameters
+are N.) An algebraic tycon processes each of its datacons, in turn. Note that
+a datacon's universally quantified parameters might be different from the parent
+tycon's parameters, so we use the datacon's univ parameters in the mapping from
+vars to positions. Note also that we don't want to infer roles for existentials
+(they're all at N, too), so we put them in the set of local variables. As an
+optimisation, we skip any tycons whose roles are already all Nominal, as there
+nowhere else for them to go. For synonyms, we just analyse their right-hand sides.
+
+irType walks through a type, looking for uses of a variable of interest and
+propagating role information. Because anything used under a phantom position
+is at phantom and anything used under a nominal position is at nominal, the
+irType function can assume that anything it sees is at representational. (The
+other possibilities are pruned when they're encountered.)
+
+The rest of the code is just plumbing.
+
+How do we know that this algorithm is correct? It should meet the following
+specification:
+
+Let Z be a role context -- a mapping from variables to roles. The following
+rules define the property (Z |- t : r), where t is a type and r is a role:
+
+Z(a) = r'        r' <= r
+------------------------- RCVar
+Z |- a : r
+
+---------- RCConst
+Z |- T : r               -- T is a type constructor
+
+Z |- t1 : r
+Z |- t2 : N
+-------------- RCApp
+Z |- t1 t2 : r
+
+forall i<=n. (r_i is R or N) implies Z |- t_i : r_i
+roles(T) = r_1 .. r_n
+---------------------------------------------------- RCDApp
+Z |- T t_1 .. t_n : R
+
+Z, a:N |- t : r
+---------------------- RCAll
+Z |- forall a:k.t : r
+
+
+We also have the following rules:
+
+For all datacon_i in type T, where a_1 .. a_n are universally quantified
+and b_1 .. b_m are existentially quantified, and the arguments are t_1 .. t_p,
+then if forall j<=p, a_1 : r_1 .. a_n : r_n, b_1 : N .. b_m : N |- t_j : R,
+then roles(T) = r_1 .. r_n
+
+roles(->) = R, R
+roles(~#) = N, N
+
+With -dcore-lint on, the output of this algorithm is checked in checkValidRoles,
+called from checkValidTycon.
+
+Note [Role-checking data constructor arguments]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider
+  data T a where
+    MkT :: Eq b => F a -> (a->a) -> T (G a)
+
+Then we want to check the roles at which 'a' is used
+in MkT's type.  We want to work on the user-written type,
+so we need to take into account
+  * the arguments:   (F a) and (a->a)
+  * the context:     C a b
+  * the result type: (G a)   -- this is in the eq_spec
+-}
+
+type RoleEnv    = NameEnv [Role]        -- from tycon names to roles
+
+-- This, and any of the functions it calls, must *not* look at the roles
+-- field of a tycon we are inferring roles about!
+-- See Note [Role inference]
+inferRoles :: Bool -> RoleAnnots -> [TyCon] -> Name -> [Role]
+inferRoles is_boot annots tycons
+  = let role_env  = initialRoleEnv is_boot annots tycons
+        role_env' = irGroup role_env tycons in
+    \name -> case lookupNameEnv role_env' name of
+      Just roles -> roles
+      Nothing    -> pprPanic "inferRoles" (ppr name)
+
+initialRoleEnv :: Bool -> RoleAnnots -> [TyCon] -> RoleEnv
+initialRoleEnv is_boot annots = extendNameEnvList emptyNameEnv .
+                                map (initialRoleEnv1 is_boot annots)
+
+initialRoleEnv1 :: Bool -> RoleAnnots -> TyCon -> (Name, [Role])
+initialRoleEnv1 is_boot annots_env tc
+  | isFamilyTyCon tc      = (name, map (const Nominal) tyvars)
+  | isAlgTyCon tc         = (name, default_roles)
+  | isTypeSynonymTyCon tc = (name, default_roles)
+  | otherwise             = pprPanic "initialRoleEnv1" (ppr tc)
+  where name         = tyConName tc
+        tyvars       = tyConTyVars tc
+        (kvs, tvs)   = span isKindVar tyvars
+
+          -- if the number of annotations in the role annotation decl
+          -- is wrong, just ignore it. We check this in the validity check.
+        role_annots
+          = case lookupNameEnv annots_env name of
+              Just (L _ (RoleAnnotDecl _ annots))
+                | annots `equalLength` tvs -> map unLoc annots
+              _                            -> map (const Nothing) tvs
+        default_roles = map (const Nominal) kvs ++
+                        zipWith orElse role_annots (repeat default_role)
+
+        default_role
+          | isClassTyCon tc = Nominal
+          | is_boot         = Representational
+          | otherwise       = Phantom
+
+irGroup :: RoleEnv -> [TyCon] -> RoleEnv
+irGroup env tcs
+  = let (env', update) = runRoleM env $ mapM_ irTyCon tcs in
+    if update
+    then irGroup env' tcs
+    else env'
+
+irTyCon :: TyCon -> RoleM ()
+irTyCon tc
+  | isAlgTyCon tc
+  = do { old_roles <- lookupRoles tc
+       ; unless (all (== Nominal) old_roles) $  -- also catches data families,
+                                                -- which don't want or need role inference
+    do { whenIsJust (tyConClass_maybe tc) (irClass tc_name)
+       ; addRoleInferenceInfo tc_name (tyConTyVars tc) $
+         mapM_ (irType emptyVarSet) (tyConStupidTheta tc)  -- See #8958
+       ; mapM_ (irDataCon tc_name) (visibleDataCons $ algTyConRhs tc) }}
+
+  | Just ty <- synTyConRhs_maybe tc
+  = addRoleInferenceInfo tc_name (tyConTyVars tc) $
+    irType emptyVarSet ty
+
+  | otherwise
+  = return ()
+
+  where
+    tc_name = tyConName tc
+
+-- any type variable used in an associated type must be Nominal
+irClass :: Name -> Class -> RoleM ()
+irClass tc_name cls
+  = addRoleInferenceInfo tc_name cls_tvs $
+    mapM_ ir_at (classATs cls)
+  where
+    cls_tvs    = classTyVars cls
+    cls_tv_set = mkVarSet cls_tvs
+
+    ir_at at_tc
+      = mapM_ (updateRole Nominal) (varSetElems nvars)
+      where nvars = (mkVarSet $ tyConTyVars at_tc) `intersectVarSet` cls_tv_set
+
+-- See Note [Role inference]
+irDataCon :: Name -> DataCon -> RoleM ()
+irDataCon tc_name datacon
+  = addRoleInferenceInfo tc_name univ_tvs $
+    mapM_ (irType ex_var_set) (eqSpecPreds eq_spec ++ theta ++ arg_tys)
+      -- See Note [Role-checking data constructor arguments]
+  where
+    (univ_tvs, ex_tvs, eq_spec, theta, arg_tys, _res_ty) = dataConFullSig datacon
+    ex_var_set = mkVarSet ex_tvs
+
+irType :: VarSet -> Type -> RoleM ()
+irType = go
+  where
+    go lcls (TyVarTy tv) = unless (tv `elemVarSet` lcls) $
+                           updateRole Representational tv
+    go lcls (AppTy t1 t2) = go lcls t1 >> mark_nominal lcls t2
+    go lcls (TyConApp tc tys)
+      = do { roles <- lookupRolesX tc
+           ; zipWithM_ (go_app lcls) roles tys }
+    go lcls (FunTy t1 t2) = go lcls t1 >> go lcls t2
+    go lcls (ForAllTy tv ty) = go (extendVarSet lcls tv) ty
+    go _    (LitTy {}) = return ()
+
+    go_app _ Phantom _ = return ()                 -- nothing to do here
+    go_app lcls Nominal ty = mark_nominal lcls ty  -- all vars below here are N
+    go_app lcls Representational ty = go lcls ty
+
+    mark_nominal lcls ty = let nvars = tyVarsOfType ty `minusVarSet` lcls in
+                           mapM_ (updateRole Nominal) (varSetElems nvars)
+
+-- like lookupRoles, but with Nominal tags at the end for oversaturated TyConApps
+lookupRolesX :: TyCon -> RoleM [Role]
+lookupRolesX tc
+  = do { roles <- lookupRoles tc
+       ; return $ roles ++ repeat Nominal }
+
+-- gets the roles either from the environment or the tycon
+lookupRoles :: TyCon -> RoleM [Role]
+lookupRoles tc
+  = do { env <- getRoleEnv
+       ; case lookupNameEnv env (tyConName tc) of
+           Just roles -> return roles
+           Nothing    -> return $ tyConRoles tc }
+
+-- tries to update a role; won't ever update a role "downwards"
+updateRole :: Role -> TyVar -> RoleM ()
+updateRole role tv
+  = do { var_ns <- getVarNs
+       ; case lookupVarEnv var_ns tv of
+       { Nothing -> pprPanic "updateRole" (ppr tv)
+       ; Just n  -> do
+       { name <- getTyConName
+       ; updateRoleEnv name n role }}}
+
+-- the state in the RoleM monad
+data RoleInferenceState = RIS { role_env  :: RoleEnv
+                              , update    :: Bool }
+
+-- the environment in the RoleM monad
+type VarPositions = VarEnv Int
+data RoleInferenceInfo = RII { var_ns :: VarPositions
+                             , name   :: Name }
+
+-- See [Role inference]
+newtype RoleM a = RM { unRM :: Maybe RoleInferenceInfo
+                            -> RoleInferenceState
+                            -> (a, RoleInferenceState) }
+
+instance Functor RoleM where
+    fmap = liftM
+
+instance Applicative RoleM where
+    pure = return
+    (<*>) = ap
+
+instance Monad RoleM where
+  return x = RM $ \_ state -> (x, state)
+  a >>= f  = RM $ \m_info state -> let (a', state') = unRM a m_info state in
+                                   unRM (f a') m_info state'
+
+runRoleM :: RoleEnv -> RoleM () -> (RoleEnv, Bool)
+runRoleM env thing = (env', update)
+  where RIS { role_env = env', update = update } = snd $ unRM thing Nothing state
+        state = RIS { role_env  = env, update    = False }
+
+addRoleInferenceInfo :: Name -> [TyVar] -> RoleM a -> RoleM a
+addRoleInferenceInfo name tvs thing
+  = RM $ \_nothing state -> ASSERT( isNothing _nothing )
+                            unRM thing (Just info) state
+  where info = RII { var_ns = mkVarEnv (zip tvs [0..]), name = name }
+
+getRoleEnv :: RoleM RoleEnv
+getRoleEnv = RM $ \_ state@(RIS { role_env = env }) -> (env, state)
+
+getVarNs :: RoleM VarPositions
+getVarNs = RM $ \m_info state ->
+                case m_info of
+                  Nothing -> panic "getVarNs"
+                  Just (RII { var_ns = var_ns }) -> (var_ns, state)
+
+getTyConName :: RoleM Name
+getTyConName = RM $ \m_info state ->
+                    case m_info of
+                      Nothing -> panic "getTyConName"
+                      Just (RII { name = name }) -> (name, state)
+
+
+updateRoleEnv :: Name -> Int -> Role -> RoleM ()
+updateRoleEnv name n role
+  = RM $ \_ state@(RIS { role_env = role_env }) -> ((),
+         case lookupNameEnv role_env name of
+           Nothing -> pprPanic "updateRoleEnv" (ppr name)
+           Just roles -> let (before, old_role : after) = splitAt n roles in
+                         if role `ltRole` old_role
+                         then let roles' = before ++ role : after
+                                  role_env' = extendNameEnv role_env name roles' in
+                              RIS { role_env = role_env', update = True }
+                         else state )