diff options
Diffstat (limited to 'compiler/GHC/Tc/Deriv/Generics.hs')
-rw-r--r-- | compiler/GHC/Tc/Deriv/Generics.hs | 1039 |
1 files changed, 1039 insertions, 0 deletions
diff --git a/compiler/GHC/Tc/Deriv/Generics.hs b/compiler/GHC/Tc/Deriv/Generics.hs new file mode 100644 index 0000000000..d40824e3ea --- /dev/null +++ b/compiler/GHC/Tc/Deriv/Generics.hs @@ -0,0 +1,1039 @@ +{- +(c) The University of Glasgow 2011 + +-} + +{-# LANGUAGE CPP, ScopedTypeVariables, TupleSections #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TypeFamilies #-} + +{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} + +-- | The deriving code for the Generic class +module GHC.Tc.Deriv.Generics + (canDoGenerics + , canDoGenerics1 + , GenericKind(..) + , gen_Generic_binds + , get_gen1_constrained_tys + ) +where + +import GhcPrelude + +import GHC.Hs +import GHC.Core.Type +import GHC.Tc.Utils.TcType +import GHC.Tc.Deriv.Generate +import GHC.Tc.Deriv.Functor +import GHC.Core.DataCon +import GHC.Core.TyCon +import GHC.Core.FamInstEnv ( FamInst, FamFlavor(..), mkSingleCoAxiom ) +import GHC.Tc.Instance.Family +import GHC.Types.Module ( moduleName, moduleNameFS + , moduleUnitId, unitIdFS, getModule ) +import GHC.Iface.Env ( newGlobalBinder ) +import GHC.Types.Name hiding ( varName ) +import GHC.Types.Name.Reader +import GHC.Types.Basic +import TysPrim +import TysWiredIn +import PrelNames +import GHC.Tc.Utils.Env +import GHC.Tc.Utils.Monad +import GHC.Driver.Types +import ErrUtils( Validity(..), andValid ) +import GHC.Types.SrcLoc +import Bag +import GHC.Types.Var.Env +import GHC.Types.Var.Set (elemVarSet) +import Outputable +import FastString +import Util + +import Control.Monad (mplus) +import Data.List (zip4, partition) +import Data.Maybe (isJust) + +#include "HsVersions.h" + +{- +************************************************************************ +* * +\subsection{Bindings for the new generic deriving mechanism} +* * +************************************************************************ + +For the generic representation we need to generate: +\begin{itemize} +\item A Generic instance +\item A Rep type instance +\item Many auxiliary datatypes and instances for them (for the meta-information) +\end{itemize} +-} + +gen_Generic_binds :: GenericKind -> TyCon -> [Type] + -> TcM (LHsBinds GhcPs, FamInst) +gen_Generic_binds gk tc inst_tys = do + repTyInsts <- tc_mkRepFamInsts gk tc inst_tys + return (mkBindsRep gk tc, repTyInsts) + +{- +************************************************************************ +* * +\subsection{Generating representation types} +* * +************************************************************************ +-} + +get_gen1_constrained_tys :: TyVar -> Type -> [Type] +-- called by GHC.Tc.Deriv.Infer.inferConstraints; generates a list of +-- types, each of which must be a Functor in order for the Generic1 instance to +-- work. +get_gen1_constrained_tys argVar + = argTyFold argVar $ ArgTyAlg { ata_rec0 = const [] + , ata_par1 = [], ata_rec1 = const [] + , ata_comp = (:) } + +{- + +Note [Requirements for deriving Generic and Rep] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the following, T, Tfun, and Targ are "meta-variables" ranging over type +expressions. + +(Generic T) and (Rep T) are derivable for some type expression T if the +following constraints are satisfied. + + (a) D is a type constructor *value*. In other words, D is either a type + constructor or it is equivalent to the head of a data family instance (up to + alpha-renaming). + + (b) D cannot have a "stupid context". + + (c) The right-hand side of D cannot include existential types, universally + quantified types, or "exotic" unlifted types. An exotic unlifted type + is one which is not listed in the definition of allowedUnliftedTy + (i.e., one for which we have no representation type). + See Note [Generics and unlifted types] + + (d) T :: *. + +(Generic1 T) and (Rep1 T) are derivable for some type expression T if the +following constraints are satisfied. + + (a),(b),(c) As above. + + (d) T must expect arguments, and its last parameter must have kind *. + + We use `a' to denote the parameter of D that corresponds to the last + parameter of T. + + (e) For any type-level application (Tfun Targ) in the right-hand side of D + where the head of Tfun is not a tuple constructor: + + (b1) `a' must not occur in Tfun. + + (b2) If `a' occurs in Targ, then Tfun :: * -> *. + +-} + +canDoGenerics :: TyCon -> Validity +-- canDoGenerics determines if Generic/Rep can be derived. +-- +-- Check (a) from Note [Requirements for deriving Generic and Rep] is taken +-- care of because canDoGenerics is applied to rep tycons. +-- +-- It returns IsValid if deriving is possible. It returns (NotValid reason) +-- if not. +canDoGenerics tc + = mergeErrors ( + -- Check (b) from Note [Requirements for deriving Generic and Rep]. + (if (not (null (tyConStupidTheta tc))) + then (NotValid (tc_name <+> text "must not have a datatype context")) + else IsValid) + -- See comment below + : (map bad_con (tyConDataCons tc))) + where + -- The tc can be a representation tycon. When we want to display it to the + -- user (in an error message) we should print its parent + tc_name = ppr $ case tyConFamInst_maybe tc of + Just (ptc, _) -> ptc + _ -> tc + + -- Check (c) from Note [Requirements for deriving Generic and Rep]. + -- + -- If any of the constructors has an exotic unlifted type as argument, + -- then we can't build the embedding-projection pair, because + -- it relies on instantiating *polymorphic* sum and product types + -- at the argument types of the constructors + bad_con dc = if (any bad_arg_type (dataConOrigArgTys dc)) + then (NotValid (ppr dc <+> text + "must not have exotic unlifted or polymorphic arguments")) + else (if (not (isVanillaDataCon dc)) + then (NotValid (ppr dc <+> text "must be a vanilla data constructor")) + else IsValid) + + -- Nor can we do the job if it's an existential data constructor, + -- Nor if the args are polymorphic types (I don't think) + bad_arg_type ty = (isUnliftedType ty && not (allowedUnliftedTy ty)) + || not (isTauTy ty) + +-- Returns True the Type argument is an unlifted type which has a +-- corresponding generic representation type. For example, +-- (allowedUnliftedTy Int#) would return True since there is the UInt +-- representation type. +allowedUnliftedTy :: Type -> Bool +allowedUnliftedTy = isJust . unboxedRepRDRs + +mergeErrors :: [Validity] -> Validity +mergeErrors [] = IsValid +mergeErrors (NotValid s:t) = case mergeErrors t of + IsValid -> NotValid s + NotValid s' -> NotValid (s <> text ", and" $$ s') +mergeErrors (IsValid : t) = mergeErrors t + +-- A datatype used only inside of canDoGenerics1. It's the result of analysing +-- a type term. +data Check_for_CanDoGenerics1 = CCDG1 + { _ccdg1_hasParam :: Bool -- does the parameter of interest occurs in + -- this type? + , _ccdg1_errors :: Validity -- errors generated by this type + } + +{- + +Note [degenerate use of FFoldType] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We use foldDataConArgs here only for its ability to treat tuples +specially. foldDataConArgs also tracks covariance (though it assumes all +higher-order type parameters are covariant) and has hooks for special handling +of functions and polytypes, but we do *not* use those. + +The key issue is that Generic1 deriving currently offers no sophisticated +support for functions. For example, we cannot handle + + data F a = F ((a -> Int) -> Int) + +even though a is occurring covariantly. + +In fact, our rule is harsh: a is simply not allowed to occur within the first +argument of (->). We treat (->) the same as any other non-tuple tycon. + +Unfortunately, this means we have to track "the parameter occurs in this type" +explicitly, even though foldDataConArgs is also doing this internally. + +-} + +-- canDoGenerics1 determines if a Generic1/Rep1 can be derived. +-- +-- Checks (a) through (c) from Note [Requirements for deriving Generic and Rep] +-- are taken care of by the call to canDoGenerics. +-- +-- It returns IsValid if deriving is possible. It returns (NotValid reason) +-- if not. +canDoGenerics1 :: TyCon -> Validity +canDoGenerics1 rep_tc = + canDoGenerics rep_tc `andValid` additionalChecks + where + additionalChecks + -- check (d) from Note [Requirements for deriving Generic and Rep] + | null (tyConTyVars rep_tc) = NotValid $ + text "Data type" <+> quotes (ppr rep_tc) + <+> text "must have some type parameters" + + | otherwise = mergeErrors $ concatMap check_con data_cons + + data_cons = tyConDataCons rep_tc + check_con con = case check_vanilla con of + j@(NotValid {}) -> [j] + IsValid -> _ccdg1_errors `map` foldDataConArgs (ft_check con) con + + bad :: DataCon -> SDoc -> SDoc + bad con msg = text "Constructor" <+> quotes (ppr con) <+> msg + + check_vanilla :: DataCon -> Validity + check_vanilla con | isVanillaDataCon con = IsValid + | otherwise = NotValid (bad con existential) + + bmzero = CCDG1 False IsValid + bmbad con s = CCDG1 True $ NotValid $ bad con s + bmplus (CCDG1 b1 m1) (CCDG1 b2 m2) = CCDG1 (b1 || b2) (m1 `andValid` m2) + + -- check (e) from Note [Requirements for deriving Generic and Rep] + -- See also Note [degenerate use of FFoldType] + ft_check :: DataCon -> FFoldType Check_for_CanDoGenerics1 + ft_check con = FT + { ft_triv = bmzero + + , ft_var = caseVar, ft_co_var = caseVar + + -- (component_0,component_1,...,component_n) + , ft_tup = \_ components -> if any _ccdg1_hasParam (init components) + then bmbad con wrong_arg + else foldr bmplus bmzero components + + -- (dom -> rng), where the head of ty is not a tuple tycon + , ft_fun = \dom rng -> -- cf #8516 + if _ccdg1_hasParam dom + then bmbad con wrong_arg + else bmplus dom rng + + -- (ty arg), where head of ty is neither (->) nor a tuple constructor and + -- the parameter of interest does not occur in ty + , ft_ty_app = \_ _ arg -> arg + + , ft_bad_app = bmbad con wrong_arg + , ft_forall = \_ body -> body -- polytypes are handled elsewhere + } + where + caseVar = CCDG1 True IsValid + + + existential = text "must not have existential arguments" + wrong_arg = text "applies a type to an argument involving the last parameter" + $$ text "but the applied type is not of kind * -> *" + +{- +************************************************************************ +* * +\subsection{Generating the RHS of a generic default method} +* * +************************************************************************ +-} + +type US = Int -- Local unique supply, just a plain Int +type Alt = (LPat GhcPs, LHsExpr GhcPs) + +-- GenericKind serves to mark if a datatype derives Generic (Gen0) or +-- Generic1 (Gen1). +data GenericKind = Gen0 | Gen1 + +-- as above, but with a payload of the TyCon's name for "the" parameter +data GenericKind_ = Gen0_ | Gen1_ TyVar + +-- as above, but using a single datacon's name for "the" parameter +data GenericKind_DC = Gen0_DC | Gen1_DC TyVar + +forgetArgVar :: GenericKind_DC -> GenericKind +forgetArgVar Gen0_DC = Gen0 +forgetArgVar Gen1_DC{} = Gen1 + +-- When working only within a single datacon, "the" parameter's name should +-- match that datacon's name for it. +gk2gkDC :: GenericKind_ -> DataCon -> GenericKind_DC +gk2gkDC Gen0_ _ = Gen0_DC +gk2gkDC Gen1_{} d = Gen1_DC $ last $ dataConUnivTyVars d + + +-- Bindings for the Generic instance +mkBindsRep :: GenericKind -> TyCon -> LHsBinds GhcPs +mkBindsRep gk tycon = + unitBag (mkRdrFunBind (L loc from01_RDR) [from_eqn]) + `unionBags` + unitBag (mkRdrFunBind (L loc to01_RDR) [to_eqn]) + where + -- The topmost M1 (the datatype metadata) has the exact same type + -- across all cases of a from/to definition, and can be factored out + -- to save some allocations during typechecking. + -- See Note [Generics compilation speed tricks] + from_eqn = mkHsCaseAlt x_Pat $ mkM1_E + $ nlHsPar $ nlHsCase x_Expr from_matches + to_eqn = mkHsCaseAlt (mkM1_P x_Pat) $ nlHsCase x_Expr to_matches + + from_matches = [mkHsCaseAlt pat rhs | (pat,rhs) <- from_alts] + to_matches = [mkHsCaseAlt pat rhs | (pat,rhs) <- to_alts ] + loc = srcLocSpan (getSrcLoc tycon) + datacons = tyConDataCons tycon + + (from01_RDR, to01_RDR) = case gk of + Gen0 -> (from_RDR, to_RDR) + Gen1 -> (from1_RDR, to1_RDR) + + -- Recurse over the sum first + from_alts, to_alts :: [Alt] + (from_alts, to_alts) = mkSum gk_ (1 :: US) datacons + where gk_ = case gk of + Gen0 -> Gen0_ + Gen1 -> ASSERT(tyvars `lengthAtLeast` 1) + Gen1_ (last tyvars) + where tyvars = tyConTyVars tycon + +-------------------------------------------------------------------------------- +-- The type synonym instance and synonym +-- type instance Rep (D a b) = Rep_D a b +-- type Rep_D a b = ...representation type for D ... +-------------------------------------------------------------------------------- + +tc_mkRepFamInsts :: GenericKind -- Gen0 or Gen1 + -> TyCon -- The type to generate representation for + -> [Type] -- The type(s) to which Generic(1) is applied + -- in the generated instance + -> TcM FamInst -- Generated representation0 coercion +tc_mkRepFamInsts gk tycon inst_tys = + -- Consider the example input tycon `D`, where data D a b = D_ a + -- Also consider `R:DInt`, where { data family D x y :: * -> * + -- ; data instance D Int a b = D_ a } + do { -- `rep` = GHC.Generics.Rep or GHC.Generics.Rep1 (type family) + fam_tc <- case gk of + Gen0 -> tcLookupTyCon repTyConName + Gen1 -> tcLookupTyCon rep1TyConName + + ; fam_envs <- tcGetFamInstEnvs + + ; let -- If the derived instance is + -- instance Generic (Foo x) + -- then: + -- `arg_ki` = *, `inst_ty` = Foo x :: * + -- + -- If the derived instance is + -- instance Generic1 (Bar x :: k -> *) + -- then: + -- `arg_k` = k, `inst_ty` = Bar x :: k -> * + (arg_ki, inst_ty) = case (gk, inst_tys) of + (Gen0, [inst_t]) -> (liftedTypeKind, inst_t) + (Gen1, [arg_k, inst_t]) -> (arg_k, inst_t) + _ -> pprPanic "tc_mkRepFamInsts" (ppr inst_tys) + + ; let mbFamInst = tyConFamInst_maybe tycon + -- If we're examining a data family instance, we grab the parent + -- TyCon (ptc) and use it to determine the type arguments + -- (inst_args) for the data family *instance*'s type variables. + ptc = maybe tycon fst mbFamInst + (_, inst_args, _) = tcLookupDataFamInst fam_envs ptc $ snd + $ tcSplitTyConApp inst_ty + + ; let -- `tyvars` = [a,b] + (tyvars, gk_) = case gk of + Gen0 -> (all_tyvars, Gen0_) + Gen1 -> ASSERT(not $ null all_tyvars) + (init all_tyvars, Gen1_ $ last all_tyvars) + where all_tyvars = tyConTyVars tycon + + -- `repTy` = D1 ... (C1 ... (S1 ... (Rec0 a))) :: * -> * + ; repTy <- tc_mkRepTy gk_ tycon arg_ki + + -- `rep_name` is a name we generate for the synonym + ; mod <- getModule + ; loc <- getSrcSpanM + ; let tc_occ = nameOccName (tyConName tycon) + rep_occ = case gk of Gen0 -> mkGenR tc_occ; Gen1 -> mkGen1R tc_occ + ; rep_name <- newGlobalBinder mod rep_occ loc + + -- We make sure to substitute the tyvars with their user-supplied + -- type arguments before generating the Rep/Rep1 instance, since some + -- of the tyvars might have been instantiated when deriving. + -- See Note [Generating a correctly typed Rep instance]. + ; let (env_tyvars, env_inst_args) + = case gk_ of + Gen0_ -> (tyvars, inst_args) + Gen1_ last_tv + -- See the "wrinkle" in + -- Note [Generating a correctly typed Rep instance] + -> ( last_tv : tyvars + , anyTypeOfKind (tyVarKind last_tv) : inst_args ) + env = zipTyEnv env_tyvars env_inst_args + in_scope = mkInScopeSet (tyCoVarsOfTypes inst_tys) + subst = mkTvSubst in_scope env + repTy' = substTyUnchecked subst repTy + tcv' = tyCoVarsOfTypeList inst_ty + (tv', cv') = partition isTyVar tcv' + tvs' = scopedSort tv' + cvs' = scopedSort cv' + axiom = mkSingleCoAxiom Nominal rep_name tvs' [] cvs' + fam_tc inst_tys repTy' + + ; newFamInst SynFamilyInst axiom } + +-------------------------------------------------------------------------------- +-- Type representation +-------------------------------------------------------------------------------- + +-- | See documentation of 'argTyFold'; that function uses the fields of this +-- type to interpret the structure of a type when that type is considered as an +-- argument to a constructor that is being represented with 'Rep1'. +data ArgTyAlg a = ArgTyAlg + { ata_rec0 :: (Type -> a) + , ata_par1 :: a, ata_rec1 :: (Type -> a) + , ata_comp :: (Type -> a -> a) + } + +-- | @argTyFold@ implements a generalised and safer variant of the @arg@ +-- function from Figure 3 in <http://dreixel.net/research/pdf/gdmh.pdf>. @arg@ +-- is conceptually equivalent to: +-- +-- > arg t = case t of +-- > _ | isTyVar t -> if (t == argVar) then Par1 else Par0 t +-- > App f [t'] | +-- > representable1 f && +-- > t' == argVar -> Rec1 f +-- > App f [t'] | +-- > representable1 f && +-- > t' has tyvars -> f :.: (arg t') +-- > _ -> Rec0 t +-- +-- where @argVar@ is the last type variable in the data type declaration we are +-- finding the representation for. +-- +-- @argTyFold@ is more general than @arg@ because it uses 'ArgTyAlg' to +-- abstract out the concrete invocations of @Par0@, @Rec0@, @Par1@, @Rec1@, and +-- @:.:@. +-- +-- @argTyFold@ is safer than @arg@ because @arg@ would lead to a GHC panic for +-- some data types. The problematic case is when @t@ is an application of a +-- non-representable type @f@ to @argVar@: @App f [argVar]@ is caught by the +-- @_@ pattern, and ends up represented as @Rec0 t@. This type occurs /free/ in +-- the RHS of the eventual @Rep1@ instance, which is therefore ill-formed. Some +-- representable1 checks have been relaxed, and others were moved to +-- @canDoGenerics1@. +argTyFold :: forall a. TyVar -> ArgTyAlg a -> Type -> a +argTyFold argVar (ArgTyAlg {ata_rec0 = mkRec0, + ata_par1 = mkPar1, ata_rec1 = mkRec1, + ata_comp = mkComp}) = + -- mkRec0 is the default; use it if there is no interesting structure + -- (e.g. occurrences of parameters or recursive occurrences) + \t -> maybe (mkRec0 t) id $ go t where + go :: Type -> -- type to fold through + Maybe a -- the result (e.g. representation type), unless it's trivial + go t = isParam `mplus` isApp where + + isParam = do -- handles parameters + t' <- getTyVar_maybe t + Just $ if t' == argVar then mkPar1 -- moreover, it is "the" parameter + else mkRec0 t -- NB mkRec0 instead of the conventional mkPar0 + + isApp = do -- handles applications + (phi, beta) <- tcSplitAppTy_maybe t + + let interesting = argVar `elemVarSet` exactTyCoVarsOfType beta + + -- Does it have no interesting structure to represent? + if not interesting then Nothing + else -- Is the argument the parameter? Special case for mkRec1. + if Just argVar == getTyVar_maybe beta then Just $ mkRec1 phi + else mkComp phi `fmap` go beta -- It must be a composition. + + +tc_mkRepTy :: -- Gen0_ or Gen1_, for Rep or Rep1 + GenericKind_ + -- The type to generate representation for + -> TyCon + -- The kind of the representation type's argument + -- See Note [Handling kinds in a Rep instance] + -> Kind + -- Generated representation0 type + -> TcM Type +tc_mkRepTy gk_ tycon k = + do + d1 <- tcLookupTyCon d1TyConName + c1 <- tcLookupTyCon c1TyConName + s1 <- tcLookupTyCon s1TyConName + rec0 <- tcLookupTyCon rec0TyConName + rec1 <- tcLookupTyCon rec1TyConName + par1 <- tcLookupTyCon par1TyConName + u1 <- tcLookupTyCon u1TyConName + v1 <- tcLookupTyCon v1TyConName + plus <- tcLookupTyCon sumTyConName + times <- tcLookupTyCon prodTyConName + comp <- tcLookupTyCon compTyConName + uAddr <- tcLookupTyCon uAddrTyConName + uChar <- tcLookupTyCon uCharTyConName + uDouble <- tcLookupTyCon uDoubleTyConName + uFloat <- tcLookupTyCon uFloatTyConName + uInt <- tcLookupTyCon uIntTyConName + uWord <- tcLookupTyCon uWordTyConName + + let tcLookupPromDataCon = fmap promoteDataCon . tcLookupDataCon + + md <- tcLookupPromDataCon metaDataDataConName + mc <- tcLookupPromDataCon metaConsDataConName + ms <- tcLookupPromDataCon metaSelDataConName + pPrefix <- tcLookupPromDataCon prefixIDataConName + pInfix <- tcLookupPromDataCon infixIDataConName + pLA <- tcLookupPromDataCon leftAssociativeDataConName + pRA <- tcLookupPromDataCon rightAssociativeDataConName + pNA <- tcLookupPromDataCon notAssociativeDataConName + pSUpk <- tcLookupPromDataCon sourceUnpackDataConName + pSNUpk <- tcLookupPromDataCon sourceNoUnpackDataConName + pNSUpkness <- tcLookupPromDataCon noSourceUnpackednessDataConName + pSLzy <- tcLookupPromDataCon sourceLazyDataConName + pSStr <- tcLookupPromDataCon sourceStrictDataConName + pNSStrness <- tcLookupPromDataCon noSourceStrictnessDataConName + pDLzy <- tcLookupPromDataCon decidedLazyDataConName + pDStr <- tcLookupPromDataCon decidedStrictDataConName + pDUpk <- tcLookupPromDataCon decidedUnpackDataConName + + fix_env <- getFixityEnv + + let mkSum' a b = mkTyConApp plus [k,a,b] + mkProd a b = mkTyConApp times [k,a,b] + mkRec0 a = mkBoxTy uAddr uChar uDouble uFloat uInt uWord rec0 k a + mkRec1 a = mkTyConApp rec1 [k,a] + mkPar1 = mkTyConTy par1 + mkD a = mkTyConApp d1 [ k, metaDataTy, sumP (tyConDataCons a) ] + mkC a = mkTyConApp c1 [ k + , metaConsTy a + , prod (dataConInstOrigArgTys a + . mkTyVarTys . tyConTyVars $ tycon) + (dataConSrcBangs a) + (dataConImplBangs a) + (dataConFieldLabels a)] + mkS mlbl su ss ib a = mkTyConApp s1 [k, metaSelTy mlbl su ss ib, a] + + -- Sums and products are done in the same way for both Rep and Rep1 + sumP l = foldBal mkSum' (mkTyConApp v1 [k]) . map mkC $ l + -- The Bool is True if this constructor has labelled fields + prod :: [Type] -> [HsSrcBang] -> [HsImplBang] -> [FieldLabel] -> Type + prod l sb ib fl = foldBal mkProd (mkTyConApp u1 [k]) + [ ASSERT(null fl || lengthExceeds fl j) + arg t sb' ib' (if null fl + then Nothing + else Just (fl !! j)) + | (t,sb',ib',j) <- zip4 l sb ib [0..] ] + + arg :: Type -> HsSrcBang -> HsImplBang -> Maybe FieldLabel -> Type + arg t (HsSrcBang _ su ss) ib fl = mkS fl su ss ib $ case gk_ of + -- Here we previously used Par0 if t was a type variable, but we + -- realized that we can't always guarantee that we are wrapping-up + -- all type variables in Par0. So we decided to stop using Par0 + -- altogether, and use Rec0 all the time. + Gen0_ -> mkRec0 t + Gen1_ argVar -> argPar argVar t + where + -- Builds argument representation for Rep1 (more complicated due to + -- the presence of composition). + argPar argVar = argTyFold argVar $ ArgTyAlg + {ata_rec0 = mkRec0, ata_par1 = mkPar1, + ata_rec1 = mkRec1, ata_comp = mkComp comp k} + + tyConName_user = case tyConFamInst_maybe tycon of + Just (ptycon, _) -> tyConName ptycon + Nothing -> tyConName tycon + + dtName = mkStrLitTy . occNameFS . nameOccName $ tyConName_user + mdName = mkStrLitTy . moduleNameFS . moduleName + . nameModule . tyConName $ tycon + pkgName = mkStrLitTy . unitIdFS . moduleUnitId + . nameModule . tyConName $ tycon + isNT = mkTyConTy $ if isNewTyCon tycon + then promotedTrueDataCon + else promotedFalseDataCon + + ctName = mkStrLitTy . occNameFS . nameOccName . dataConName + ctFix c + | dataConIsInfix c + = case lookupFixity fix_env (dataConName c) of + Fixity _ n InfixL -> buildFix n pLA + Fixity _ n InfixR -> buildFix n pRA + Fixity _ n InfixN -> buildFix n pNA + | otherwise = mkTyConTy pPrefix + buildFix n assoc = mkTyConApp pInfix [ mkTyConTy assoc + , mkNumLitTy (fromIntegral n)] + + isRec c = mkTyConTy $ if dataConFieldLabels c `lengthExceeds` 0 + then promotedTrueDataCon + else promotedFalseDataCon + + selName = mkStrLitTy . flLabel + + mbSel Nothing = mkTyConApp promotedNothingDataCon [typeSymbolKind] + mbSel (Just s) = mkTyConApp promotedJustDataCon + [typeSymbolKind, selName s] + + metaDataTy = mkTyConApp md [dtName, mdName, pkgName, isNT] + metaConsTy c = mkTyConApp mc [ctName c, ctFix c, isRec c] + metaSelTy mlbl su ss ib = + mkTyConApp ms [mbSel mlbl, pSUpkness, pSStrness, pDStrness] + where + pSUpkness = mkTyConTy $ case su of + SrcUnpack -> pSUpk + SrcNoUnpack -> pSNUpk + NoSrcUnpack -> pNSUpkness + + pSStrness = mkTyConTy $ case ss of + SrcLazy -> pSLzy + SrcStrict -> pSStr + NoSrcStrict -> pNSStrness + + pDStrness = mkTyConTy $ case ib of + HsLazy -> pDLzy + HsStrict -> pDStr + HsUnpack{} -> pDUpk + + return (mkD tycon) + +mkComp :: TyCon -> Kind -> Type -> Type -> Type +mkComp comp k f g + | k1_first = mkTyConApp comp [k,liftedTypeKind,f,g] + | otherwise = mkTyConApp comp [liftedTypeKind,k,f,g] + where + -- Which of these is the case? + -- newtype (:.:) {k1} {k2} (f :: k2->*) (g :: k1->k2) (p :: k1) = ... + -- or newtype (:.:) {k2} {k1} (f :: k2->*) (g :: k1->k2) (p :: k1) = ... + -- We want to instantiate with k1=k, and k2=* + -- Reason for k2=*: see Note [Handling kinds in a Rep instance] + -- But we need to know which way round! + k1_first = k_first == p_kind_var + [k_first,_,_,_,p] = tyConTyVars comp + Just p_kind_var = getTyVar_maybe (tyVarKind p) + +-- Given the TyCons for each URec-related type synonym, check to see if the +-- given type is an unlifted type that generics understands. If so, return +-- its representation type. Otherwise, return Rec0. +-- See Note [Generics and unlifted types] +mkBoxTy :: TyCon -- UAddr + -> TyCon -- UChar + -> TyCon -- UDouble + -> TyCon -- UFloat + -> TyCon -- UInt + -> TyCon -- UWord + -> TyCon -- Rec0 + -> Kind -- What to instantiate Rec0's kind variable with + -> Type + -> Type +mkBoxTy uAddr uChar uDouble uFloat uInt uWord rec0 k ty + | ty `eqType` addrPrimTy = mkTyConApp uAddr [k] + | ty `eqType` charPrimTy = mkTyConApp uChar [k] + | ty `eqType` doublePrimTy = mkTyConApp uDouble [k] + | ty `eqType` floatPrimTy = mkTyConApp uFloat [k] + | ty `eqType` intPrimTy = mkTyConApp uInt [k] + | ty `eqType` wordPrimTy = mkTyConApp uWord [k] + | otherwise = mkTyConApp rec0 [k,ty] + +-------------------------------------------------------------------------------- +-- Dealing with sums +-------------------------------------------------------------------------------- + +mkSum :: GenericKind_ -- Generic or Generic1? + -> US -- Base for generating unique names + -> [DataCon] -- The data constructors + -> ([Alt], -- Alternatives for the T->Trep "from" function + [Alt]) -- Alternatives for the Trep->T "to" function + +-- Datatype without any constructors +mkSum _ _ [] = ([from_alt], [to_alt]) + where + from_alt = (x_Pat, nlHsCase x_Expr []) + to_alt = (x_Pat, nlHsCase x_Expr []) + -- These M1s are meta-information for the datatype + +-- Datatype with at least one constructor +mkSum gk_ us datacons = + -- switch the payload of gk_ to be datacon-centric instead of tycon-centric + unzip [ mk1Sum (gk2gkDC gk_ d) us i (length datacons) d + | (d,i) <- zip datacons [1..] ] + +-- Build the sum for a particular constructor +mk1Sum :: GenericKind_DC -- Generic or Generic1? + -> US -- Base for generating unique names + -> Int -- The index of this constructor + -> Int -- Total number of constructors + -> DataCon -- The data constructor + -> (Alt, -- Alternative for the T->Trep "from" function + Alt) -- Alternative for the Trep->T "to" function +mk1Sum gk_ us i n datacon = (from_alt, to_alt) + where + gk = forgetArgVar gk_ + + -- Existentials already excluded + argTys = dataConOrigArgTys datacon + n_args = dataConSourceArity datacon + + datacon_varTys = zip (map mkGenericLocal [us .. us+n_args-1]) argTys + datacon_vars = map fst datacon_varTys + + datacon_rdr = getRdrName datacon + + from_alt = (nlConVarPat datacon_rdr datacon_vars, from_alt_rhs) + from_alt_rhs = genLR_E i n (mkProd_E gk_ datacon_varTys) + + to_alt = ( genLR_P i n (mkProd_P gk datacon_varTys) + , to_alt_rhs + ) -- These M1s are meta-information for the datatype + to_alt_rhs = case gk_ of + Gen0_DC -> nlHsVarApps datacon_rdr datacon_vars + Gen1_DC argVar -> nlHsApps datacon_rdr $ map argTo datacon_varTys + where + argTo (var, ty) = converter ty `nlHsApp` nlHsVar var where + converter = argTyFold argVar $ ArgTyAlg + {ata_rec0 = nlHsVar . unboxRepRDR, + ata_par1 = nlHsVar unPar1_RDR, + ata_rec1 = const $ nlHsVar unRec1_RDR, + ata_comp = \_ cnv -> (nlHsVar fmap_RDR `nlHsApp` cnv) + `nlHsCompose` nlHsVar unComp1_RDR} + + +-- Generates the L1/R1 sum pattern +genLR_P :: Int -> Int -> LPat GhcPs -> LPat GhcPs +genLR_P i n p + | n == 0 = error "impossible" + | n == 1 = p + | i <= div n 2 = nlParPat $ nlConPat l1DataCon_RDR [genLR_P i (div n 2) p] + | otherwise = nlParPat $ nlConPat r1DataCon_RDR [genLR_P (i-m) (n-m) p] + where m = div n 2 + +-- Generates the L1/R1 sum expression +genLR_E :: Int -> Int -> LHsExpr GhcPs -> LHsExpr GhcPs +genLR_E i n e + | n == 0 = error "impossible" + | n == 1 = e + | i <= div n 2 = nlHsVar l1DataCon_RDR `nlHsApp` + nlHsPar (genLR_E i (div n 2) e) + | otherwise = nlHsVar r1DataCon_RDR `nlHsApp` + nlHsPar (genLR_E (i-m) (n-m) e) + where m = div n 2 + +-------------------------------------------------------------------------------- +-- Dealing with products +-------------------------------------------------------------------------------- + +-- Build a product expression +mkProd_E :: GenericKind_DC -- Generic or Generic1? + -> [(RdrName, Type)] + -- List of variables matched on the lhs and their types + -> LHsExpr GhcPs -- Resulting product expression +mkProd_E gk_ varTys = mkM1_E (foldBal prod (nlHsVar u1DataCon_RDR) appVars) + -- These M1s are meta-information for the constructor + where + appVars = map (wrapArg_E gk_) varTys + prod a b = prodDataCon_RDR `nlHsApps` [a,b] + +wrapArg_E :: GenericKind_DC -> (RdrName, Type) -> LHsExpr GhcPs +wrapArg_E Gen0_DC (var, ty) = mkM1_E $ + boxRepRDR ty `nlHsVarApps` [var] + -- This M1 is meta-information for the selector +wrapArg_E (Gen1_DC argVar) (var, ty) = mkM1_E $ + converter ty `nlHsApp` nlHsVar var + -- This M1 is meta-information for the selector + where converter = argTyFold argVar $ ArgTyAlg + {ata_rec0 = nlHsVar . boxRepRDR, + ata_par1 = nlHsVar par1DataCon_RDR, + ata_rec1 = const $ nlHsVar rec1DataCon_RDR, + ata_comp = \_ cnv -> nlHsVar comp1DataCon_RDR `nlHsCompose` + (nlHsVar fmap_RDR `nlHsApp` cnv)} + +boxRepRDR :: Type -> RdrName +boxRepRDR = maybe k1DataCon_RDR fst . unboxedRepRDRs + +unboxRepRDR :: Type -> RdrName +unboxRepRDR = maybe unK1_RDR snd . unboxedRepRDRs + +-- Retrieve the RDRs associated with each URec data family instance +-- constructor. See Note [Generics and unlifted types] +unboxedRepRDRs :: Type -> Maybe (RdrName, RdrName) +unboxedRepRDRs ty + | ty `eqType` addrPrimTy = Just (uAddrDataCon_RDR, uAddrHash_RDR) + | ty `eqType` charPrimTy = Just (uCharDataCon_RDR, uCharHash_RDR) + | ty `eqType` doublePrimTy = Just (uDoubleDataCon_RDR, uDoubleHash_RDR) + | ty `eqType` floatPrimTy = Just (uFloatDataCon_RDR, uFloatHash_RDR) + | ty `eqType` intPrimTy = Just (uIntDataCon_RDR, uIntHash_RDR) + | ty `eqType` wordPrimTy = Just (uWordDataCon_RDR, uWordHash_RDR) + | otherwise = Nothing + +-- Build a product pattern +mkProd_P :: GenericKind -- Gen0 or Gen1 + -> [(RdrName, Type)] -- List of variables to match, + -- along with their types + -> LPat GhcPs -- Resulting product pattern +mkProd_P gk varTys = mkM1_P (foldBal prod (nlNullaryConPat u1DataCon_RDR) appVars) + -- These M1s are meta-information for the constructor + where + appVars = unzipWith (wrapArg_P gk) varTys + prod a b = nlParPat $ prodDataCon_RDR `nlConPat` [a,b] + +wrapArg_P :: GenericKind -> RdrName -> Type -> LPat GhcPs +wrapArg_P Gen0 v ty = mkM1_P (nlParPat $ boxRepRDR ty `nlConVarPat` [v]) + -- This M1 is meta-information for the selector +wrapArg_P Gen1 v _ = nlParPat $ m1DataCon_RDR `nlConVarPat` [v] + +mkGenericLocal :: US -> RdrName +mkGenericLocal u = mkVarUnqual (mkFastString ("g" ++ show u)) + +x_RDR :: RdrName +x_RDR = mkVarUnqual (fsLit "x") + +x_Expr :: LHsExpr GhcPs +x_Expr = nlHsVar x_RDR + +x_Pat :: LPat GhcPs +x_Pat = nlVarPat x_RDR + +mkM1_E :: LHsExpr GhcPs -> LHsExpr GhcPs +mkM1_E e = nlHsVar m1DataCon_RDR `nlHsApp` e + +mkM1_P :: LPat GhcPs -> LPat GhcPs +mkM1_P p = nlParPat $ m1DataCon_RDR `nlConPat` [p] + +nlHsCompose :: LHsExpr GhcPs -> LHsExpr GhcPs -> LHsExpr GhcPs +nlHsCompose x y = compose_RDR `nlHsApps` [x, y] + +-- | Variant of foldr for producing balanced lists +foldBal :: (a -> a -> a) -> a -> [a] -> a +foldBal _ x [] = x +foldBal _ _ [y] = y +foldBal op x l = let (a,b) = splitAt (length l `div` 2) l + in foldBal op x a `op` foldBal op x b + +{- +Note [Generics and unlifted types] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Normally, all constants are marked with K1/Rec0. The exception to this rule is +when a data constructor has an unlifted argument (e.g., Int#, Char#, etc.). In +that case, we must use a data family instance of URec (from GHC.Generics) to +mark it. As a result, before we can generate K1 or unK1, we must first check +to see if the type is actually one of the unlifted types for which URec has a +data family instance; if so, we generate that instead. + +See wiki:commentary/compiler/generic-deriving#handling-unlifted-types for more +details on why URec is implemented the way it is. + +Note [Generating a correctly typed Rep instance] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +tc_mkRepTy derives the RHS of the Rep(1) type family instance when deriving +Generic(1). That is, it derives the ellipsis in the following: + + instance Generic Foo where + type Rep Foo = ... + +However, tc_mkRepTy only has knowledge of the *TyCon* of the type for which +a Generic(1) instance is being derived, not the fully instantiated type. As a +result, tc_mkRepTy builds the most generalized Rep(1) instance possible using +the type variables it learns from the TyCon (i.e., it uses tyConTyVars). This +can cause problems when the instance has instantiated type variables +(see #11732). As an example: + + data T a = MkT a + deriving instance Generic (T Int) + ==> + instance Generic (T Int) where + type Rep (T Int) = (... (Rec0 a)) -- wrong! + +-XStandaloneDeriving is one way for the type variables to become instantiated. +Another way is when Generic1 is being derived for a datatype with a visible +kind binder, e.g., + + data P k (a :: k) = MkP k deriving Generic1 + ==> + instance Generic1 (P *) where + type Rep1 (P *) = (... (Rec0 k)) -- wrong! + +See Note [Unify kinds in deriving] in GHC.Tc.Deriv. + +In any such scenario, we must prevent a discrepancy between the LHS and RHS of +a Rep(1) instance. To do so, we create a type variable substitution that maps +the tyConTyVars of the TyCon to their counterparts in the fully instantiated +type. (For example, using T above as example, you'd map a :-> Int.) We then +apply the substitution to the RHS before generating the instance. + +A wrinkle in all of this: when forming the type variable substitution for +Generic1 instances, we map the last type variable of the tycon to Any. Why? +It's because of wily data types like this one (#15012): + + data T a = MkT (FakeOut a) + type FakeOut a = Int + +If we ignore a, then we'll produce the following Rep1 instance: + + instance Generic1 T where + type Rep1 T = ... (Rec0 (FakeOut a)) + ... + +Oh no! Now we have `a` on the RHS, but it's completely unbound. Instead, we +ensure that `a` is mapped to Any: + + instance Generic1 T where + type Rep1 T = ... (Rec0 (FakeOut Any)) + ... + +And now all is good. + +Alternatively, we could have avoided this problem by expanding all type +synonyms on the RHSes of Rep1 instances. But we might blow up the size of +these types even further by doing this, so we choose not to do so. + +Note [Handling kinds in a Rep instance] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Because Generic1 is poly-kinded, the representation types were generalized to +be kind-polymorphic as well. As a result, tc_mkRepTy must explicitly apply +the kind of the instance being derived to all the representation type +constructors. For instance, if you have + + data Empty (a :: k) = Empty deriving Generic1 + +Then the generated code is now approximately (with -fprint-explicit-kinds +syntax): + + instance Generic1 k (Empty k) where + type Rep1 k (Empty k) = U1 k + +Most representation types have only one kind variable, making them easy to deal +with. The only non-trivial case is (:.:), which is only used in Generic1 +instances: + + newtype (:.:) (f :: k2 -> *) (g :: k1 -> k2) (p :: k1) = + Comp1 { unComp1 :: f (g p) } + +Here, we do something a bit counter-intuitive: we make k1 be the kind of the +instance being derived, and we always make k2 be *. Why *? It's because +the code that GHC generates using (:.:) is always of the form x :.: Rec1 y +for some types x and y. In other words, the second type to which (:.:) is +applied always has kind k -> *, for some kind k, so k2 cannot possibly be +anything other than * in a generated Generic1 instance. + +Note [Generics compilation speed tricks] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Deriving Generic(1) is known to have a large constant factor during +compilation, which contributes to noticeable compilation slowdowns when +deriving Generic(1) for large datatypes (see #5642). + +To ease the pain, there is a trick one can play when generating definitions for +to(1) and from(1). If you have a datatype like: + + data Letter = A | B | C | D + +then a naïve Generic instance for Letter would be: + + instance Generic Letter where + type Rep Letter = D1 ('MetaData ...) ... + + to (M1 (L1 (L1 (M1 U1)))) = A + to (M1 (L1 (R1 (M1 U1)))) = B + to (M1 (R1 (L1 (M1 U1)))) = C + to (M1 (R1 (R1 (M1 U1)))) = D + + from A = M1 (L1 (L1 (M1 U1))) + from B = M1 (L1 (R1 (M1 U1))) + from C = M1 (R1 (L1 (M1 U1))) + from D = M1 (R1 (R1 (M1 U1))) + +Notice that in every LHS pattern-match of the 'to' definition, and in every RHS +expression in the 'from' definition, the topmost constructor is M1. This +corresponds to the datatype-specific metadata (the D1 in the Rep Letter +instance). But this is wasteful from a typechecking perspective, since this +definition requires GHC to typecheck an application of M1 in every single case, +leading to an O(n) increase in the number of coercions the typechecker has to +solve, which in turn increases allocations and degrades compilation speed. + +Luckily, since the topmost M1 has the exact same type across every case, we can +factor it out reduce the typechecker's burden: + + instance Generic Letter where + type Rep Letter = D1 ('MetaData ...) ... + + to (M1 x) = case x of + L1 (L1 (M1 U1)) -> A + L1 (R1 (M1 U1)) -> B + R1 (L1 (M1 U1)) -> C + R1 (R1 (M1 U1)) -> D + + from x = M1 (case x of + A -> L1 (L1 (M1 U1)) + B -> L1 (R1 (M1 U1)) + C -> R1 (L1 (M1 U1)) + D -> R1 (R1 (M1 U1))) + +A simple change, but one that pays off, since it goes turns an O(n) amount of +coercions to an O(1) amount. +-} |