diff options
Diffstat (limited to 'compiler/GHC/Tc/Deriv/Generate.hs')
-rw-r--r-- | compiler/GHC/Tc/Deriv/Generate.hs | 819 |
1 files changed, 556 insertions, 263 deletions
diff --git a/compiler/GHC/Tc/Deriv/Generate.hs b/compiler/GHC/Tc/Deriv/Generate.hs index 7fa9975790..935298e2ba 100644 --- a/compiler/GHC/Tc/Deriv/Generate.hs +++ b/compiler/GHC/Tc/Deriv/Generate.hs @@ -46,8 +46,6 @@ import GHC.Types.Name.Reader import GHC.Types.Basic import GHC.Core.DataCon import GHC.Types.Name -import GHC.Utils.Fingerprint -import GHC.Utils.Encoding import GHC.Driver.Session import GHC.Builtin.Utils @@ -82,22 +80,77 @@ import Data.List ( find, partition, intersperse ) type BagDerivStuff = Bag DerivStuff +-- | A declarative description of an auxiliary binding that should be +-- generated. See @Note [Auxiliary binders]@ for a more detailed description +-- of how these are used. data AuxBindSpec - = DerivCon2Tag TyCon -- The con2Tag for given TyCon - | DerivTag2Con TyCon -- ...ditto tag2Con - | DerivMaxTag TyCon -- ...and maxTag - deriving( Eq ) + -- DerivCon2Tag, DerivTag2Con, and DerivMaxTag are used in derived Eq, Ord, + -- Enum, and Ix instances. -- All these generate ZERO-BASED tag operations -- I.e first constructor has tag 0 + -- | @$con2tag@: Computes the tag for a given constructor + = DerivCon2Tag + TyCon -- The type constructor of the data type to which the + -- constructors belong + RdrName -- The to-be-generated $con2tag binding's RdrName + + -- | @$tag2con@: Given a tag, computes the corresponding data constructor + | DerivTag2Con + TyCon -- The type constructor of the data type to which the + -- constructors belong + RdrName -- The to-be-generated $tag2con binding's RdrName + + -- | @$maxtag@: The maximum possible tag value among a data type's + -- constructors + | DerivMaxTag + TyCon -- The type constructor of the data type to which the + -- constructors belong + RdrName -- The to-be-generated $maxtag binding's RdrName + + -- DerivDataDataType and DerivDataConstr are only used in derived Data + -- instances + + -- | @$t@: The @DataType@ representation for a @Data@ instance + | DerivDataDataType + TyCon -- The type constructor of the data type to be represented + RdrName -- The to-be-generated $t binding's RdrName + [RdrName] -- The RdrNames of the to-be-generated $c bindings for each + -- data constructor. These are only used on the RHS of the + -- to-be-generated $t binding. + + -- | @$c@: The @Constr@ representation for a @Data@ instance + | DerivDataConstr + DataCon -- The data constructor to be represented + RdrName -- The to-be-generated $c binding's RdrName + RdrName -- The RdrName of the to-be-generated $t binding for the parent + -- data type. This is only used on the RHS of the + -- to-be-generated $c binding. + +-- | Retrieve the 'RdrName' of the binding that the supplied 'AuxBindSpec' +-- describes. +auxBindSpecRdrName :: AuxBindSpec -> RdrName +auxBindSpecRdrName (DerivCon2Tag _ con2tag_RDR) = con2tag_RDR +auxBindSpecRdrName (DerivTag2Con _ tag2con_RDR) = tag2con_RDR +auxBindSpecRdrName (DerivMaxTag _ maxtag_RDR) = maxtag_RDR +auxBindSpecRdrName (DerivDataDataType _ dataT_RDR _) = dataT_RDR +auxBindSpecRdrName (DerivDataConstr _ dataC_RDR _) = dataC_RDR + data DerivStuff -- Please add this auxiliary stuff = DerivAuxBind AuxBindSpec + -- ^ A new, top-level auxiliary binding. Used for deriving 'Eq', 'Ord', + -- 'Enum', 'Ix', and 'Data'. See Note [Auxiliary binders]. -- Generics and DeriveAnyClass | DerivFamInst FamInst -- New type family instances - - -- New top-level auxiliary bindings - | DerivHsBind (LHsBind GhcPs, LSig GhcPs) -- Also used for SYB + -- ^ A new type family instance. Used for: + -- + -- * @DeriveGeneric@, which generates instances of @Rep(1)@ + -- + -- * @DeriveAnyClass@, which can fill in associated type family defaults + -- + -- * @GeneralizedNewtypeDeriving@, which generates instances of associated + -- type families for newtypes {- @@ -161,8 +214,10 @@ produced don't get through the typechecker. gen_Eq_binds :: SrcSpan -> TyCon -> TcM (LHsBinds GhcPs, BagDerivStuff) gen_Eq_binds loc tycon = do - dflags <- getDynFlags - return (method_binds dflags, aux_binds) + -- See Note [Auxiliary binders] + con2tag_RDR <- new_con2tag_rdr_name loc tycon + + return (method_binds con2tag_RDR, aux_binds con2tag_RDR) where all_cons = tyConDataCons tycon (nullary_cons, non_nullary_cons) = partition isNullarySrcDataCon all_cons @@ -176,7 +231,7 @@ gen_Eq_binds loc tycon = do no_tag_match_cons = null tag_match_cons - fall_through_eqn dflags + fall_through_eqn con2tag_RDR | no_tag_match_cons -- All constructors have arguments = case pat_match_cons of [] -> [] -- No constructors; no fall-though case @@ -188,16 +243,18 @@ gen_Eq_binds loc tycon = do | otherwise -- One or more tag_match cons; add fall-through of -- extract tags compare for equality = [([a_Pat, b_Pat], - untag_Expr dflags tycon [(a_RDR,ah_RDR), (b_RDR,bh_RDR)] + untag_Expr con2tag_RDR [(a_RDR,ah_RDR), (b_RDR,bh_RDR)] (genPrimOpApp (nlHsVar ah_RDR) eqInt_RDR (nlHsVar bh_RDR)))] - aux_binds | no_tag_match_cons = emptyBag - | otherwise = unitBag $ DerivAuxBind $ DerivCon2Tag tycon + aux_binds con2tag_RDR + | no_tag_match_cons = emptyBag + | otherwise = unitBag $ DerivAuxBind $ DerivCon2Tag tycon con2tag_RDR - method_binds dflags = unitBag (eq_bind dflags) - eq_bind dflags = mkFunBindEC 2 loc eq_RDR (const true_Expr) - (map pats_etc pat_match_cons - ++ fall_through_eqn dflags) + method_binds con2tag_RDR = unitBag (eq_bind con2tag_RDR) + eq_bind con2tag_RDR + = mkFunBindEC 2 loc eq_RDR (const true_Expr) + (map pats_etc pat_match_cons + ++ fall_through_eqn con2tag_RDR) ------------------------------------------------------------------ pats_etc data_con @@ -341,21 +398,25 @@ gtResult OrdGT = true_Expr ------------ gen_Ord_binds :: SrcSpan -> TyCon -> TcM (LHsBinds GhcPs, BagDerivStuff) gen_Ord_binds loc tycon = do - dflags <- getDynFlags + -- See Note [Auxiliary binders] + con2tag_RDR <- new_con2tag_rdr_name loc tycon + return $ if null tycon_data_cons -- No data-cons => invoke bale-out case then ( unitBag $ mkFunBindEC 2 loc compare_RDR (const eqTag_Expr) [] , emptyBag) - else ( unitBag (mkOrdOp dflags OrdCompare) `unionBags` other_ops dflags - , aux_binds) + else ( unitBag (mkOrdOp con2tag_RDR OrdCompare) + `unionBags` other_ops con2tag_RDR + , aux_binds con2tag_RDR) where - aux_binds | single_con_type = emptyBag - | otherwise = unitBag $ DerivAuxBind $ DerivCon2Tag tycon + aux_binds con2tag_RDR + | single_con_type = emptyBag + | otherwise = unitBag $ DerivAuxBind $ DerivCon2Tag tycon con2tag_RDR -- Note [Game plan for deriving Ord] - other_ops dflags + other_ops con2tag_RDR | (last_tag - first_tag) <= 2 -- 1-3 constructors || null non_nullary_cons -- Or it's an enumeration - = listToBag [mkOrdOp dflags OrdLT, lE, gT, gE] + = listToBag [mkOrdOp con2tag_RDR OrdLT, lE, gT, gE] | otherwise = emptyBag @@ -381,39 +442,40 @@ gen_Ord_binds loc tycon = do (nullary_cons, non_nullary_cons) = partition isNullarySrcDataCon tycon_data_cons - mkOrdOp :: DynFlags -> OrdOp -> LHsBind GhcPs + mkOrdOp :: RdrName -> OrdOp -> LHsBind GhcPs -- Returns a binding op a b = ... compares a and b according to op .... - mkOrdOp dflags op = mkSimpleGeneratedFunBind loc (ordMethRdr op) [a_Pat, b_Pat] - (mkOrdOpRhs dflags op) + mkOrdOp con2tag_RDR op + = mkSimpleGeneratedFunBind loc (ordMethRdr op) [a_Pat, b_Pat] + (mkOrdOpRhs con2tag_RDR op) - mkOrdOpRhs :: DynFlags -> OrdOp -> LHsExpr GhcPs - mkOrdOpRhs dflags op -- RHS for comparing 'a' and 'b' according to op + mkOrdOpRhs :: RdrName -> OrdOp -> LHsExpr GhcPs + mkOrdOpRhs con2tag_RDR op -- RHS for comparing 'a' and 'b' according to op | nullary_cons `lengthAtMost` 2 -- Two nullary or fewer, so use cases = nlHsCase (nlHsVar a_RDR) $ - map (mkOrdOpAlt dflags op) tycon_data_cons + map (mkOrdOpAlt con2tag_RDR op) tycon_data_cons -- i.e. case a of { C1 x y -> case b of C1 x y -> ....compare x,y... -- C2 x -> case b of C2 x -> ....comopare x.... } | null non_nullary_cons -- All nullary, so go straight to comparing tags - = mkTagCmp dflags op + = mkTagCmp con2tag_RDR op | otherwise -- Mixed nullary and non-nullary = nlHsCase (nlHsVar a_RDR) $ - (map (mkOrdOpAlt dflags op) non_nullary_cons - ++ [mkHsCaseAlt nlWildPat (mkTagCmp dflags op)]) + (map (mkOrdOpAlt con2tag_RDR op) non_nullary_cons + ++ [mkHsCaseAlt nlWildPat (mkTagCmp con2tag_RDR op)]) - mkOrdOpAlt :: DynFlags -> OrdOp -> DataCon - -> LMatch GhcPs (LHsExpr GhcPs) + mkOrdOpAlt :: RdrName -> OrdOp -> DataCon + -> LMatch GhcPs (LHsExpr GhcPs) -- Make the alternative (Ki a1 a2 .. av -> - mkOrdOpAlt dflags op data_con + mkOrdOpAlt con2tag_RDR op data_con = mkHsCaseAlt (nlConVarPat data_con_RDR as_needed) - (mkInnerRhs dflags op data_con) + (mkInnerRhs con2tag_RDR op data_con) where as_needed = take (dataConSourceArity data_con) as_RDRs data_con_RDR = getRdrName data_con - mkInnerRhs dflags op data_con + mkInnerRhs con2tag_RDR op data_con | single_con_type = nlHsCase (nlHsVar b_RDR) [ mkInnerEqAlt op data_con ] @@ -436,14 +498,14 @@ gen_Ord_binds loc tycon = do , mkHsCaseAlt nlWildPat (gtResult op) ] | tag > last_tag `div` 2 -- lower range is larger - = untag_Expr dflags tycon [(b_RDR, bh_RDR)] $ + = untag_Expr con2tag_RDR [(b_RDR, bh_RDR)] $ nlHsIf (genPrimOpApp (nlHsVar bh_RDR) ltInt_RDR tag_lit) (gtResult op) $ -- Definitely GT nlHsCase (nlHsVar b_RDR) [ mkInnerEqAlt op data_con , mkHsCaseAlt nlWildPat (ltResult op) ] | otherwise -- upper range is larger - = untag_Expr dflags tycon [(b_RDR, bh_RDR)] $ + = untag_Expr con2tag_RDR [(b_RDR, bh_RDR)] $ nlHsIf (genPrimOpApp (nlHsVar bh_RDR) gtInt_RDR tag_lit) (ltResult op) $ -- Definitely LT nlHsCase (nlHsVar b_RDR) [ mkInnerEqAlt op data_con @@ -462,11 +524,11 @@ gen_Ord_binds loc tycon = do data_con_RDR = getRdrName data_con bs_needed = take (dataConSourceArity data_con) bs_RDRs - mkTagCmp :: DynFlags -> OrdOp -> LHsExpr GhcPs + mkTagCmp :: RdrName -> OrdOp -> LHsExpr GhcPs -- Both constructors known to be nullary -- generates (case data2Tag a of a# -> case data2Tag b of b# -> a# `op` b# - mkTagCmp dflags op = - untag_Expr dflags tycon[(a_RDR, ah_RDR),(b_RDR, bh_RDR)] $ + mkTagCmp con2tag_RDR op = + untag_Expr con2tag_RDR [(a_RDR, ah_RDR),(b_RDR, bh_RDR)] $ unliftedOrdOp intPrimTy op ah_RDR bh_RDR mkCompareFields :: OrdOp -> [Type] -> LHsExpr GhcPs @@ -586,78 +648,86 @@ For @enumFromTo@ and @enumFromThenTo@, we use the default methods. gen_Enum_binds :: SrcSpan -> TyCon -> TcM (LHsBinds GhcPs, BagDerivStuff) gen_Enum_binds loc tycon = do - dflags <- getDynFlags - return (method_binds dflags, aux_binds) + -- See Note [Auxiliary binders] + con2tag_RDR <- new_con2tag_rdr_name loc tycon + tag2con_RDR <- new_tag2con_rdr_name loc tycon + maxtag_RDR <- new_maxtag_rdr_name loc tycon + + return ( method_binds con2tag_RDR tag2con_RDR maxtag_RDR + , aux_binds con2tag_RDR tag2con_RDR maxtag_RDR ) where - method_binds dflags = listToBag - [ succ_enum dflags - , pred_enum dflags - , to_enum dflags - , enum_from dflags -- [0 ..] - , enum_from_then dflags -- [0, 1 ..] - , from_enum dflags + method_binds con2tag_RDR tag2con_RDR maxtag_RDR = listToBag + [ succ_enum con2tag_RDR tag2con_RDR maxtag_RDR + , pred_enum con2tag_RDR tag2con_RDR + , to_enum tag2con_RDR maxtag_RDR + , enum_from con2tag_RDR tag2con_RDR maxtag_RDR -- [0 ..] + , enum_from_then con2tag_RDR tag2con_RDR maxtag_RDR -- [0, 1 ..] + , from_enum con2tag_RDR + ] + aux_binds con2tag_RDR tag2con_RDR maxtag_RDR = listToBag $ map DerivAuxBind + [ DerivCon2Tag tycon con2tag_RDR + , DerivTag2Con tycon tag2con_RDR + , DerivMaxTag tycon maxtag_RDR ] - aux_binds = listToBag $ map DerivAuxBind - [DerivCon2Tag tycon, DerivTag2Con tycon, DerivMaxTag tycon] occ_nm = getOccString tycon - succ_enum dflags + succ_enum con2tag_RDR tag2con_RDR maxtag_RDR = mkSimpleGeneratedFunBind loc succ_RDR [a_Pat] $ - untag_Expr dflags tycon [(a_RDR, ah_RDR)] $ - nlHsIf (nlHsApps eq_RDR [nlHsVar (maxtag_RDR dflags tycon), + untag_Expr con2tag_RDR [(a_RDR, ah_RDR)] $ + nlHsIf (nlHsApps eq_RDR [nlHsVar maxtag_RDR, nlHsVarApps intDataCon_RDR [ah_RDR]]) (illegal_Expr "succ" occ_nm "tried to take `succ' of last tag in enumeration") - (nlHsApp (nlHsVar (tag2con_RDR dflags tycon)) + (nlHsApp (nlHsVar tag2con_RDR) (nlHsApps plus_RDR [nlHsVarApps intDataCon_RDR [ah_RDR], nlHsIntLit 1])) - pred_enum dflags + pred_enum con2tag_RDR tag2con_RDR = mkSimpleGeneratedFunBind loc pred_RDR [a_Pat] $ - untag_Expr dflags tycon [(a_RDR, ah_RDR)] $ + untag_Expr con2tag_RDR [(a_RDR, ah_RDR)] $ nlHsIf (nlHsApps eq_RDR [nlHsIntLit 0, nlHsVarApps intDataCon_RDR [ah_RDR]]) (illegal_Expr "pred" occ_nm "tried to take `pred' of first tag in enumeration") - (nlHsApp (nlHsVar (tag2con_RDR dflags tycon)) + (nlHsApp (nlHsVar tag2con_RDR) (nlHsApps plus_RDR [ nlHsVarApps intDataCon_RDR [ah_RDR] , nlHsLit (HsInt noExtField (mkIntegralLit (-1 :: Int)))])) - to_enum dflags + to_enum tag2con_RDR maxtag_RDR = mkSimpleGeneratedFunBind loc toEnum_RDR [a_Pat] $ nlHsIf (nlHsApps and_RDR [nlHsApps ge_RDR [nlHsVar a_RDR, nlHsIntLit 0], nlHsApps le_RDR [ nlHsVar a_RDR - , nlHsVar (maxtag_RDR dflags tycon)]]) - (nlHsVarApps (tag2con_RDR dflags tycon) [a_RDR]) - (illegal_toEnum_tag occ_nm (maxtag_RDR dflags tycon)) + , nlHsVar maxtag_RDR]]) + (nlHsVarApps tag2con_RDR [a_RDR]) + (illegal_toEnum_tag occ_nm maxtag_RDR) - enum_from dflags + enum_from con2tag_RDR tag2con_RDR maxtag_RDR = mkSimpleGeneratedFunBind loc enumFrom_RDR [a_Pat] $ - untag_Expr dflags tycon [(a_RDR, ah_RDR)] $ + untag_Expr con2tag_RDR [(a_RDR, ah_RDR)] $ nlHsApps map_RDR - [nlHsVar (tag2con_RDR dflags tycon), + [nlHsVar tag2con_RDR, nlHsPar (enum_from_to_Expr (nlHsVarApps intDataCon_RDR [ah_RDR]) - (nlHsVar (maxtag_RDR dflags tycon)))] + (nlHsVar maxtag_RDR))] - enum_from_then dflags + enum_from_then con2tag_RDR tag2con_RDR maxtag_RDR = mkSimpleGeneratedFunBind loc enumFromThen_RDR [a_Pat, b_Pat] $ - untag_Expr dflags tycon [(a_RDR, ah_RDR), (b_RDR, bh_RDR)] $ - nlHsApp (nlHsVarApps map_RDR [tag2con_RDR dflags tycon]) $ + untag_Expr con2tag_RDR [(a_RDR, ah_RDR), (b_RDR, bh_RDR)] $ + nlHsApp (nlHsVarApps map_RDR [tag2con_RDR]) $ nlHsPar (enum_from_then_to_Expr (nlHsVarApps intDataCon_RDR [ah_RDR]) (nlHsVarApps intDataCon_RDR [bh_RDR]) (nlHsIf (nlHsApps gt_RDR [nlHsVarApps intDataCon_RDR [ah_RDR], nlHsVarApps intDataCon_RDR [bh_RDR]]) (nlHsIntLit 0) - (nlHsVar (maxtag_RDR dflags tycon)) + (nlHsVar maxtag_RDR) )) - from_enum dflags + from_enum con2tag_RDR = mkSimpleGeneratedFunBind loc fromEnum_RDR [a_Pat] $ - untag_Expr dflags tycon [(a_RDR, ah_RDR)] $ + untag_Expr con2tag_RDR [(a_RDR, ah_RDR)] $ (nlHsVarApps intDataCon_RDR [ah_RDR]) {- @@ -758,35 +828,40 @@ we follow the scheme given in Figure~19 of the Haskell~1.2 report gen_Ix_binds :: SrcSpan -> TyCon -> TcM (LHsBinds GhcPs, BagDerivStuff) gen_Ix_binds loc tycon = do - dflags <- getDynFlags + -- See Note [Auxiliary binders] + con2tag_RDR <- new_con2tag_rdr_name loc tycon + tag2con_RDR <- new_tag2con_rdr_name loc tycon + return $ if isEnumerationTyCon tycon - then (enum_ixes dflags, listToBag $ map DerivAuxBind - [DerivCon2Tag tycon, DerivTag2Con tycon, DerivMaxTag tycon]) - else (single_con_ixes, unitBag (DerivAuxBind (DerivCon2Tag tycon))) + then (enum_ixes con2tag_RDR tag2con_RDR, listToBag $ map DerivAuxBind + [ DerivCon2Tag tycon con2tag_RDR + , DerivTag2Con tycon tag2con_RDR + ]) + else (single_con_ixes, unitBag (DerivAuxBind (DerivCon2Tag tycon con2tag_RDR))) where -------------------------------------------------------------- - enum_ixes dflags = listToBag - [ enum_range dflags - , enum_index dflags - , enum_inRange dflags + enum_ixes con2tag_RDR tag2con_RDR = listToBag + [ enum_range con2tag_RDR tag2con_RDR + , enum_index con2tag_RDR + , enum_inRange con2tag_RDR ] - enum_range dflags + enum_range con2tag_RDR tag2con_RDR = mkSimpleGeneratedFunBind loc range_RDR [nlTuplePat [a_Pat, b_Pat] Boxed] $ - untag_Expr dflags tycon [(a_RDR, ah_RDR)] $ - untag_Expr dflags tycon [(b_RDR, bh_RDR)] $ - nlHsApp (nlHsVarApps map_RDR [tag2con_RDR dflags tycon]) $ + untag_Expr con2tag_RDR [(a_RDR, ah_RDR)] $ + untag_Expr con2tag_RDR [(b_RDR, bh_RDR)] $ + nlHsApp (nlHsVarApps map_RDR [tag2con_RDR]) $ nlHsPar (enum_from_to_Expr (nlHsVarApps intDataCon_RDR [ah_RDR]) (nlHsVarApps intDataCon_RDR [bh_RDR])) - enum_index dflags + enum_index con2tag_RDR = mkSimpleGeneratedFunBind loc unsafeIndex_RDR [noLoc (AsPat noExtField (noLoc c_RDR) (nlTuplePat [a_Pat, nlWildPat] Boxed)), d_Pat] ( - untag_Expr dflags tycon [(a_RDR, ah_RDR)] ( - untag_Expr dflags tycon [(d_RDR, dh_RDR)] ( + untag_Expr con2tag_RDR [(a_RDR, ah_RDR)] ( + untag_Expr con2tag_RDR [(d_RDR, dh_RDR)] ( let rhs = nlHsVarApps intDataCon_RDR [c_RDR] in @@ -797,11 +872,11 @@ gen_Ix_binds loc tycon = do ) -- This produces something like `(ch >= ah) && (ch <= bh)` - enum_inRange dflags + enum_inRange con2tag_RDR = mkSimpleGeneratedFunBind loc inRange_RDR [nlTuplePat [a_Pat, b_Pat] Boxed, c_Pat] $ - untag_Expr dflags tycon [(a_RDR, ah_RDR)] ( - untag_Expr dflags tycon [(b_RDR, bh_RDR)] ( - untag_Expr dflags tycon [(c_RDR, ch_RDR)] ( + untag_Expr con2tag_RDR [(a_RDR, ah_RDR)] ( + untag_Expr con2tag_RDR [(b_RDR, bh_RDR)] ( + untag_Expr con2tag_RDR [(c_RDR, ch_RDR)] ( -- This used to use `if`, which interacts badly with RebindableSyntax. -- See #11396. nlHsApps and_RDR @@ -1313,66 +1388,24 @@ gen_Data_binds :: SrcSpan -> TcM (LHsBinds GhcPs, -- The method bindings BagDerivStuff) -- Auxiliary bindings gen_Data_binds loc rep_tc - = do { dflags <- getDynFlags - - -- Make unique names for the data type and constructor - -- auxiliary bindings. Start with the name of the TyCon/DataCon - -- but that might not be unique: see #12245. - ; dt_occ <- chooseUniqueOccTc (mkDataTOcc (getOccName rep_tc)) - ; dc_occs <- mapM (chooseUniqueOccTc . mkDataCOcc . getOccName) - (tyConDataCons rep_tc) - ; let dt_rdr = mkRdrUnqual dt_occ - dc_rdrs = map mkRdrUnqual dc_occs - - -- OK, now do the work - ; return (gen_data dflags dt_rdr dc_rdrs loc rep_tc) } - -gen_data :: DynFlags -> RdrName -> [RdrName] - -> SrcSpan -> TyCon - -> (LHsBinds GhcPs, -- The method bindings - BagDerivStuff) -- Auxiliary bindings -gen_data dflags data_type_name constr_names loc rep_tc - = (listToBag [gfoldl_bind, gunfold_bind, toCon_bind, dataTypeOf_bind] - `unionBags` gcast_binds, - -- Auxiliary definitions: the data type and constructors - listToBag ( genDataTyCon - : zipWith genDataDataCon data_cons constr_names ) ) + = do { -- See Note [Auxiliary binders] + dataT_RDR <- new_dataT_rdr_name loc rep_tc + ; dataC_RDRs <- traverse (new_dataC_rdr_name loc) data_cons + + ; pure ( listToBag [ gfoldl_bind, gunfold_bind + , toCon_bind dataC_RDRs, dataTypeOf_bind dataT_RDR ] + `unionBags` gcast_binds + -- Auxiliary definitions: the data type and constructors + , listToBag $ map DerivAuxBind + ( DerivDataDataType rep_tc dataT_RDR dataC_RDRs + : zipWith (\data_con dataC_RDR -> + DerivDataConstr data_con dataC_RDR dataT_RDR) + data_cons dataC_RDRs ) + ) } where data_cons = tyConDataCons rep_tc n_cons = length data_cons one_constr = n_cons == 1 - genDataTyCon :: DerivStuff - genDataTyCon -- $dT - = DerivHsBind (mkHsVarBind loc data_type_name rhs, - L loc (TypeSig noExtField [L loc data_type_name] sig_ty)) - - sig_ty = mkLHsSigWcType (nlHsTyVar dataType_RDR) - ctx = initDefaultSDocContext dflags - rhs = nlHsVar mkDataType_RDR - `nlHsApp` nlHsLit (mkHsString (showSDocOneLine ctx (ppr rep_tc))) - `nlHsApp` nlList (map nlHsVar constr_names) - - genDataDataCon :: DataCon -> RdrName -> DerivStuff - genDataDataCon dc constr_name -- $cT1 etc - = DerivHsBind (mkHsVarBind loc constr_name rhs, - L loc (TypeSig noExtField [L loc constr_name] sig_ty)) - where - sig_ty = mkLHsSigWcType (nlHsTyVar constr_RDR) - rhs = nlHsApps mkConstr_RDR constr_args - - constr_args - = [ -- nlHsIntLit (toInteger (dataConTag dc)), -- Tag - nlHsVar (data_type_name) -- DataType - , nlHsLit (mkHsString (occNameString dc_occ)) -- String name - , nlList labels -- Field labels - , nlHsVar fixity ] -- Fixity - - labels = map (nlHsLit . mkHsString . unpackFS . flLabel) - (dataConFieldLabels dc) - dc_occ = getOccName dc - is_infix = isDataSymOcc dc_occ - fixity | is_infix = infix_RDR - | otherwise = prefix_RDR ------------ gfoldl gfoldl_bind = mkFunBindEC 3 loc gfoldl_RDR id (map gfoldl_eqn data_cons) @@ -1420,16 +1453,18 @@ gen_data dflags data_type_name constr_names loc rep_tc tag = dataConTag dc ------------ toConstr - toCon_bind = mkFunBindEC 1 loc toConstr_RDR id - (zipWith to_con_eqn data_cons constr_names) + toCon_bind dataC_RDRs + = mkFunBindEC 1 loc toConstr_RDR id + (zipWith to_con_eqn data_cons dataC_RDRs) to_con_eqn dc con_name = ([nlWildConPat dc], nlHsVar con_name) ------------ dataTypeOf - dataTypeOf_bind = mkSimpleGeneratedFunBind - loc - dataTypeOf_RDR - [nlWildPat] - (nlHsVar data_type_name) + dataTypeOf_bind dataT_RDR + = mkSimpleGeneratedFunBind + loc + dataTypeOf_RDR + [nlWildPat] + (nlHsVar dataT_RDR) ------------ gcast1/2 -- Make the binding dataCast1 x = gcast1 x -- if T :: * -> * @@ -1944,7 +1979,7 @@ mkCoerceClassMethEqn cls inst_tvs inst_tys rhs_ty id {- ************************************************************************ * * -\subsection{Generating extra binds (@con2tag@ and @tag2con@)} +\subsection{Generating extra binds (@con2tag@, @tag2con@, etc.)} * * ************************************************************************ @@ -1960,80 +1995,142 @@ The `tags' here start at zero, hence the @fIRST_TAG@ (currently one) fiddling around. -} -genAuxBindSpec :: DynFlags -> SrcSpan -> AuxBindSpec - -> (LHsBind GhcPs, LSig GhcPs) -genAuxBindSpec dflags loc (DerivCon2Tag tycon) - = (mkFunBindSE 0 loc rdr_name eqns, - L loc (TypeSig noExtField [L loc rdr_name] sig_ty)) +-- | Generate the full code for an auxiliary binding. +-- See @Note [Auxiliary binders] (Wrinkle: Reducing code duplication)@. +genAuxBindSpecOriginal :: DynFlags -> SrcSpan -> AuxBindSpec + -> (LHsBind GhcPs, LSig GhcPs) +genAuxBindSpecOriginal dflags loc spec + = (gen_bind spec, + L loc (TypeSig noExtField [L loc (auxBindSpecRdrName spec)] + (genAuxBindSpecSig loc spec))) where - rdr_name = con2tag_RDR dflags tycon + gen_bind :: AuxBindSpec -> LHsBind GhcPs + gen_bind (DerivCon2Tag tycon con2tag_RDR) + = mkFunBindSE 0 loc con2tag_RDR eqns + where + lots_of_constructors = tyConFamilySize tycon > 8 + -- was: mAX_FAMILY_SIZE_FOR_VEC_RETURNS + -- but we don't do vectored returns any more. - sig_ty = mkLHsSigWcType $ L loc $ XHsType $ NHsCoreTy $ - mkSpecSigmaTy (tyConTyVars tycon) (tyConStupidTheta tycon) $ - mkParentType tycon `mkVisFunTyMany` intPrimTy + eqns | lots_of_constructors = [get_tag_eqn] + | otherwise = map mk_eqn (tyConDataCons tycon) - lots_of_constructors = tyConFamilySize tycon > 8 - -- was: mAX_FAMILY_SIZE_FOR_VEC_RETURNS - -- but we don't do vectored returns any more. + get_tag_eqn = ([nlVarPat a_RDR], nlHsApp (nlHsVar getTag_RDR) a_Expr) - eqns | lots_of_constructors = [get_tag_eqn] - | otherwise = map mk_eqn (tyConDataCons tycon) + mk_eqn :: DataCon -> ([LPat GhcPs], LHsExpr GhcPs) + mk_eqn con = ([nlWildConPat con], + nlHsLit (HsIntPrim NoSourceText + (toInteger ((dataConTag con) - fIRST_TAG)))) - get_tag_eqn = ([nlVarPat a_RDR], nlHsApp (nlHsVar getTag_RDR) a_Expr) + gen_bind (DerivTag2Con _ tag2con_RDR) + = mkFunBindSE 0 loc tag2con_RDR + [([nlConVarPat intDataCon_RDR [a_RDR]], + nlHsApp (nlHsVar tagToEnum_RDR) a_Expr)] - mk_eqn :: DataCon -> ([LPat GhcPs], LHsExpr GhcPs) - mk_eqn con = ([nlWildConPat con], - nlHsLit (HsIntPrim NoSourceText - (toInteger ((dataConTag con) - fIRST_TAG)))) + gen_bind (DerivMaxTag tycon maxtag_RDR) + = mkHsVarBind loc maxtag_RDR rhs + where + rhs = nlHsApp (nlHsVar intDataCon_RDR) + (nlHsLit (HsIntPrim NoSourceText max_tag)) + max_tag = case (tyConDataCons tycon) of + data_cons -> toInteger ((length data_cons) - fIRST_TAG) -genAuxBindSpec dflags loc (DerivTag2Con tycon) - = (mkFunBindSE 0 loc rdr_name - [([nlConVarPat intDataCon_RDR [a_RDR]], - nlHsApp (nlHsVar tagToEnum_RDR) a_Expr)], - L loc (TypeSig noExtField [L loc rdr_name] sig_ty)) - where - sig_ty = mkLHsSigWcType $ L loc $ - XHsType $ NHsCoreTy $ mkSpecForAllTys (tyConTyVars tycon) $ - intTy `mkVisFunTyMany` mkParentType tycon + gen_bind (DerivDataDataType tycon dataT_RDR dataC_RDRs) + = mkHsVarBind loc dataT_RDR rhs + where + ctx = initDefaultSDocContext dflags + rhs = nlHsVar mkDataType_RDR + `nlHsApp` nlHsLit (mkHsString (showSDocOneLine ctx (ppr tycon))) + `nlHsApp` nlList (map nlHsVar dataC_RDRs) + + gen_bind (DerivDataConstr dc dataC_RDR dataT_RDR) + = mkHsVarBind loc dataC_RDR rhs + where + rhs = nlHsApps mkConstr_RDR constr_args - rdr_name = tag2con_RDR dflags tycon + constr_args + = [ -- nlHsIntLit (toInteger (dataConTag dc)), -- Tag + nlHsVar dataT_RDR -- DataType + , nlHsLit (mkHsString (occNameString dc_occ)) -- String name + , nlList labels -- Field labels + , nlHsVar fixity ] -- Fixity + + labels = map (nlHsLit . mkHsString . unpackFS . flLabel) + (dataConFieldLabels dc) + dc_occ = getOccName dc + is_infix = isDataSymOcc dc_occ + fixity | is_infix = infix_RDR + | otherwise = prefix_RDR -genAuxBindSpec dflags loc (DerivMaxTag tycon) - = (mkHsVarBind loc rdr_name rhs, - L loc (TypeSig noExtField [L loc rdr_name] sig_ty)) +-- | Generate the code for an auxiliary binding that is a duplicate of another +-- auxiliary binding. +-- See @Note [Auxiliary binders] (Wrinkle: Reducing code duplication)@. +genAuxBindSpecDup :: SrcSpan -> RdrName -> AuxBindSpec + -> (LHsBind GhcPs, LSig GhcPs) +genAuxBindSpecDup loc original_rdr_name dup_spec + = (mkHsVarBind loc dup_rdr_name (nlHsVar original_rdr_name), + L loc (TypeSig noExtField [L loc dup_rdr_name] + (genAuxBindSpecSig loc dup_spec))) where - rdr_name = maxtag_RDR dflags tycon - sig_ty = mkLHsSigWcType (L loc (XHsType (NHsCoreTy intTy))) - rhs = nlHsApp (nlHsVar intDataCon_RDR) - (nlHsLit (HsIntPrim NoSourceText max_tag)) - max_tag = case (tyConDataCons tycon) of - data_cons -> toInteger ((length data_cons) - fIRST_TAG) + dup_rdr_name = auxBindSpecRdrName dup_spec + +-- | Generate the type signature of an auxiliary binding. +-- See @Note [Auxiliary binders]@. +genAuxBindSpecSig :: SrcSpan -> AuxBindSpec -> LHsSigWcType GhcPs +genAuxBindSpecSig loc spec = case spec of + DerivCon2Tag tycon _ + -> mkLHsSigWcType $ L loc $ XHsType $ NHsCoreTy $ + mkSpecSigmaTy (tyConTyVars tycon) (tyConStupidTheta tycon) $ + mkParentType tycon `mkVisFunTyMany` intPrimTy + DerivTag2Con tycon _ + -> mkLHsSigWcType $ L loc $ + XHsType $ NHsCoreTy $ mkSpecForAllTys (tyConTyVars tycon) $ + intTy `mkVisFunTyMany` mkParentType tycon + DerivMaxTag _ _ + -> mkLHsSigWcType (L loc (XHsType (NHsCoreTy intTy))) + DerivDataDataType _ _ _ + -> mkLHsSigWcType (nlHsTyVar dataType_RDR) + DerivDataConstr _ _ _ + -> mkLHsSigWcType (nlHsTyVar constr_RDR) type SeparateBagsDerivStuff = - -- AuxBinds and SYB bindings + -- DerivAuxBinds ( Bag (LHsBind GhcPs, LSig GhcPs) - -- Extra family instances (used by Generic and DeriveAnyClass) - , Bag (FamInst) ) + -- Extra family instances (used by DeriveGeneric, DeriveAnyClass, and + -- GeneralizedNewtypeDeriving) + , Bag FamInst ) + +-- | Take a 'BagDerivStuff' and partition it into 'SeparateBagsDerivStuff'. +-- Also generate the code for auxiliary bindings based on the declarative +-- descriptions in the supplied 'AuxBindSpec's. See @Note [Auxiliary binders]@. genAuxBinds :: DynFlags -> SrcSpan -> BagDerivStuff -> SeparateBagsDerivStuff -genAuxBinds dflags loc b = genAuxBinds' b2 where +genAuxBinds dflags loc b = (gen_aux_bind_specs b1, b2) where (b1,b2) = partitionBagWith splitDerivAuxBind b splitDerivAuxBind (DerivAuxBind x) = Left x - splitDerivAuxBind x = Right x - - rm_dups = foldr dup_check emptyBag - dup_check a b = if anyBag (== a) b then b else consBag a b - - genAuxBinds' :: BagDerivStuff -> SeparateBagsDerivStuff - genAuxBinds' = foldr f ( mapBag (genAuxBindSpec dflags loc) (rm_dups b1) - , emptyBag ) - f :: DerivStuff -> SeparateBagsDerivStuff -> SeparateBagsDerivStuff - f (DerivAuxBind _) = panic "genAuxBinds'" -- We have removed these before - f (DerivHsBind b) = add1 b - f (DerivFamInst t) = add2 t - - add1 x (a,b) = (x `consBag` a,b) - add2 x (a,b) = (a,x `consBag` b) + splitDerivAuxBind (DerivFamInst t) = Right t + + gen_aux_bind_specs = snd . foldr gen_aux_bind_spec (emptyOccEnv, emptyBag) + + -- Perform a CSE-like pass over the generated auxiliary bindings to avoid + -- code duplication, as described in + -- Note [Auxiliary binders] (Wrinkle: Reducing code duplication). + -- The OccEnv remembers the first occurrence of each sort of auxiliary + -- binding and maps it to the unique RdrName for that binding. + gen_aux_bind_spec :: AuxBindSpec + -> (OccEnv RdrName, Bag (LHsBind GhcPs, LSig GhcPs)) + -> (OccEnv RdrName, Bag (LHsBind GhcPs, LSig GhcPs)) + gen_aux_bind_spec spec (original_rdr_name_env, spec_bag) = + case lookupOccEnv original_rdr_name_env spec_occ of + Nothing + -> ( extendOccEnv original_rdr_name_env spec_occ spec_rdr_name + , genAuxBindSpecOriginal dflags loc spec `consBag` spec_bag ) + Just original_rdr_name + -> ( original_rdr_name_env + , genAuxBindSpecDup loc original_rdr_name spec `consBag` spec_bag ) + where + spec_rdr_name = auxBindSpecRdrName spec + spec_occ = rdrNameOcc spec_rdr_name mkParentType :: TyCon -> Type -- Turn the representation tycon of a family into @@ -2268,13 +2365,12 @@ eq_Expr ty a b where (_, _, prim_eq, _, _) = primOrdOps "Eq" ty -untag_Expr :: DynFlags -> TyCon -> [( RdrName, RdrName)] - -> LHsExpr GhcPs -> LHsExpr GhcPs -untag_Expr _ _ [] expr = expr -untag_Expr dflags tycon ((untag_this, put_tag_here) : more) expr - = nlHsCase (nlHsPar (nlHsVarApps (con2tag_RDR dflags tycon) - [untag_this])) {-of-} - [mkHsCaseAlt (nlVarPat put_tag_here) (untag_Expr dflags tycon more expr)] +untag_Expr :: RdrName -> [(RdrName, RdrName)] + -> LHsExpr GhcPs -> LHsExpr GhcPs +untag_Expr _ [] expr = expr +untag_Expr con2tag_RDR ((untag_this, put_tag_here) : more) expr + = nlHsCase (nlHsPar (nlHsVarApps con2tag_RDR [untag_this])) {-of-} + [mkHsCaseAlt (nlVarPat put_tag_here) (untag_Expr con2tag_RDR more expr)] enum_from_to_Expr :: LHsExpr GhcPs -> LHsExpr GhcPs @@ -2386,54 +2482,251 @@ minusInt_RDR, tagToEnum_RDR :: RdrName minusInt_RDR = getRdrName (primOpId IntSubOp ) tagToEnum_RDR = getRdrName (primOpId TagToEnumOp) -con2tag_RDR, tag2con_RDR, maxtag_RDR :: DynFlags -> TyCon -> RdrName --- Generates Orig s RdrName, for the binding positions -con2tag_RDR dflags tycon = mk_tc_deriv_name dflags tycon mkCon2TagOcc -tag2con_RDR dflags tycon = mk_tc_deriv_name dflags tycon mkTag2ConOcc -maxtag_RDR dflags tycon = mk_tc_deriv_name dflags tycon mkMaxTagOcc - -mk_tc_deriv_name :: DynFlags -> TyCon -> (OccName -> OccName) -> RdrName -mk_tc_deriv_name dflags tycon occ_fun = - mkAuxBinderName dflags (tyConName tycon) occ_fun - -mkAuxBinderName :: DynFlags -> Name -> (OccName -> OccName) -> RdrName --- ^ Make a top-level binder name for an auxiliary binding for a parent name --- See Note [Auxiliary binders] -mkAuxBinderName dflags parent occ_fun - = mkRdrUnqual (occ_fun stable_parent_occ) - where - stable_parent_occ = mkOccName (occNameSpace parent_occ) stable_string - stable_string - | hasPprDebug dflags = parent_stable - | otherwise = parent_stable_hash - parent_stable = nameStableString parent - parent_stable_hash = - let Fingerprint high low = fingerprintString parent_stable - in toBase62 high ++ toBase62Padded low - -- See Note [Base 62 encoding 128-bit integers] in GHC.Utils.Encoding - parent_occ = nameOccName parent +new_con2tag_rdr_name, new_tag2con_rdr_name, new_maxtag_rdr_name + :: SrcSpan -> TyCon -> TcM RdrName +-- Generates Exact RdrNames, for the binding positions +new_con2tag_rdr_name dflags tycon = new_tc_deriv_rdr_name dflags tycon mkCon2TagOcc +new_tag2con_rdr_name dflags tycon = new_tc_deriv_rdr_name dflags tycon mkTag2ConOcc +new_maxtag_rdr_name dflags tycon = new_tc_deriv_rdr_name dflags tycon mkMaxTagOcc + +new_dataT_rdr_name :: SrcSpan -> TyCon -> TcM RdrName +new_dataT_rdr_name dflags tycon = new_tc_deriv_rdr_name dflags tycon mkDataTOcc + +new_dataC_rdr_name :: SrcSpan -> DataCon -> TcM RdrName +new_dataC_rdr_name dflags dc = new_dc_deriv_rdr_name dflags dc mkDataCOcc + +new_tc_deriv_rdr_name :: SrcSpan -> TyCon -> (OccName -> OccName) -> TcM RdrName +new_tc_deriv_rdr_name loc tycon occ_fun + = newAuxBinderRdrName loc (tyConName tycon) occ_fun + +new_dc_deriv_rdr_name :: SrcSpan -> DataCon -> (OccName -> OccName) -> TcM RdrName +new_dc_deriv_rdr_name loc dc occ_fun + = newAuxBinderRdrName loc (dataConName dc) occ_fun + +-- | Generate the name for an auxiliary binding, giving it a fresh 'Unique'. +-- Returns an 'Exact' 'RdrName' with an underlying 'System' 'Name'. +-- See @Note [Auxiliary binders]@. +newAuxBinderRdrName :: SrcSpan -> Name -> (OccName -> OccName) -> TcM RdrName +newAuxBinderRdrName loc parent occ_fun = do + uniq <- newUnique + pure $ Exact $ mkSystemNameAt uniq (occ_fun (nameOccName parent)) loc {- Note [Auxiliary binders] ~~~~~~~~~~~~~~~~~~~~~~~~ -We often want to make a top-level auxiliary binding. E.g. for comparison we have +We often want to make top-level auxiliary bindings in derived instances. +For example, derived Eq instances sometimes generate code like this: + + data T = ... + deriving instance Eq T + + ==> + + instance Eq T where + a == b = $con2tag_T a == $con2tag_T b + + $con2tag_T :: T -> Int + $con2tag_T = ...code.... + +Note that multiple instances of the same type might need to use the same sort +of auxiliary binding. For example, $con2tag is used not only in derived Eq +instances, but also in derived Ord instances: + + deriving instance Ord T + + ==> instance Ord T where - compare a b = $con2tag a `compare` $con2tag b + compare a b = $con2tag_T a `compare` $con2tag_T b + + $con2tag_T :: T -> Int + $con2tag_T = ...code.... + +How do we ensure that the two usages of $con2tag_T do not conflict with each +other? We do so by generating a separate $con2tag_T definition for each +instance, giving each definition an Exact RdrName with a separate Unique to +avoid name clashes: + + instance Eq T where + a == b = $con2tag_T{Uniq1} a == $con2tag_T{Uniq1} b - $con2tag :: T -> Int - $con2tag = ...code.... + instance Ord T where + compare a b = $con2tag_T{Uniq2} a `compare` $con2tag_T{Uniq2} b -Of course these top-level bindings should all have distinct name, and we are -generating RdrNames here. We can't just use the TyCon or DataCon to distinguish -because with standalone deriving two imported TyCons might both be called T! -(See #7947.) + -- $con2tag_T{Uniq1} and $con2tag_T{Uniq2} are Exact RdrNames with + -- underyling System Names -So we use package name, module name and the name of the parent -(T in this example) as part of the OccName we generate for the new binding. -To make the symbol names short we take a base62 hash of the full name. + $con2tag_T{Uniq1} :: T -> Int + $con2tag_T{Uniq1} = ...code.... -In the past we used the *unique* from the parent, but that's not stable across -recompilations as uniques are nondeterministic. + $con2tag_T{Uniq2} :: T -> Int + $con2tag_T{Uniq2} = ...code.... + +Note that: + +* This is /precisely/ the same mechanism that we use for + Template Haskell–generated code. + See Note [Binders in Template Haskell] in GHC.ThToHs. + There we explain why we use a 'System' flavour of the Name we generate. + +* See "Wrinkle: Reducing code duplication" for how we can avoid generating + lots of duplicated code in common situations. + +* See "Wrinkle: Why we sometimes do generated duplicate code" for why this + de-duplication mechanism isn't perfect, so we fall back to CSE + (which is very effective within a single module). + +* Note that the "_T" part of "$con2tag_T" is just for debug-printing + purposes. We could call them all "$con2tag", or even just "aux". + The Unique is enough to keep them separate. + + This is important: we might be generating an Eq instance for two + completely-distinct imported type constructors T. + +At first glance, it might appear that this plan is infeasible, as it would +require generating multiple top-level declarations with the same OccName. But +what if auxiliary bindings /weren't/ top-level? Conceptually, we could imagine +that auxiliary bindings are /local/ to the instance declarations in which they +are used. Using some hypothetical Haskell syntax, it might look like this: + + let { + $con2tag_T{Uniq1} :: T -> Int + $con2tag_T{Uniq1} = ...code.... + + $con2tag_T{Uniq2} :: T -> Int + $con2tag_T{Uniq2} = ...code.... + } in { + instance Eq T where + a == b = $con2tag_T{Uniq1} a == $con2tag_T{Uniq1} b + + instance Ord T where + compare a b = $con2tag_T{Uniq2} a `compare` $con2tag_T{Uniq2} b + } + +Making auxiliary bindings local is key to making this work, since GHC will +not reject local bindings with duplicate names provided that: + +* Each binding has a distinct unique, and +* Each binding has an Exact RdrName with a System Name. + +Even though the hypothetical Haskell syntax above does not exist, we can +accomplish the same end result through some sleight of hand in renameDeriv: +we rename auxiliary bindings with rnLocalValBindsLHS. (If we had used +rnTopBindsLHS instead, then GHC would spuriously reject auxiliary bindings +with the same OccName as duplicates.) Luckily, no special treatment is needed +to typecheck them; we can typecheck them as normal top-level bindings +(using tcTopBinds) without danger. + +----- +-- Wrinkle: Reducing code duplication +----- + +While the approach of generating copies of each sort of auxiliary binder per +derived instance is simpler, it can lead to code bloat if done naïvely. +Consider this example: + + data T = ... + deriving instance Eq T + deriving instance Ord T + + ==> + + instance Eq T where + a == b = $con2tag_T{Uniq1} a == $con2tag_T{Uniq1} b + + instance Ord T where + compare a b = $con2tag_T{Uniq2} a `compare` $con2tag_T{Uniq2} b + + $con2tag_T{Uniq1} :: T -> Int + $con2tag_T{Uniq1} = ...code.... + + $con2tag_T{Uniq2} :: T -> Int + $con2tag_T{Uniq2} = ...code.... + +$con2tag_T{Uniq1} and $con2tag_T{Uniq2} are blatant duplicates of each other, +which is not ideal. Surely GHC can do better than that at the very least! And +indeed it does. Within the genAuxBinds function, GHC performs a small CSE-like +pass to define duplicate auxiliary binders in terms of the original one. On +the example above, that would look like this: + + $con2tag_T{Uniq1} :: T -> Int + $con2tag_T{Uniq1} = ...code.... + + $con2tag_T{Uniq2} :: T -> Int + $con2tag_T{Uniq2} = $con2tag_T{Uniq1} + +(Note that this pass does not cover all possible forms of code duplication. +See "Wrinkle: Why we sometimes do generate duplicate code" for situations +where genAuxBinds does not deduplicate code.) + +To start, genAuxBinds is given a list of AuxBindSpecs, which describe the sort +of auxiliary bindings that must be generates along with their RdrNames. As +genAuxBinds processes this list, it marks the first occurrence of each sort of +auxiliary binding as the "original". For example, if genAuxBinds sees a +DerivCon2Tag for the first time (with the RdrName $con2tag_T{Uniq1}), then it +will generate the full code for a $con2tag binding: + + $con2tag_T{Uniq1} :: T -> Int + $con2tag_T{Uniq1} = ...code.... + +Later, if genAuxBinds sees any additional DerivCon2Tag values, it will treat +them as duplicates. For example, if genAuxBinds later sees a DerivCon2Tag with +the RdrName $con2tag_T{Uniq2}, it will generate this code, which is much more +compact: + + $con2tag_T{Uniq2} :: T -> Int + $con2tag_T{Uniq2} = $con2tag_T{Uniq1} + +An alternative approach would be /not/ performing any kind of deduplication in +genAuxBinds at all and simply relying on GHC's simplifier to perform this kind +of CSE. But this is a more expensive analysis in general, while genAuxBinds can +accomplish the same result with a simple check. + +----- +-- Wrinkle: Why we sometimes do generate duplicate code +----- + +It is worth noting that deduplicating auxiliary binders is difficult in the +general case. Here are two particular examples where GHC cannot easily remove +duplicate copies of an auxiliary binding: + +1. When derived instances are contained in different modules, as in the + following example: + + module A where + data T = ... + module B where + import A + deriving instance Eq T + module C where + import B + deriving instance Enum T + + The derived Eq and Enum instances for T make use of $con2tag_T, and since + they are defined in separate modules, each module must produce its own copy + of $con2tag_T. + +2. When derived instances are separated by TH splices (#18321), as in the + following example: + + module M where + + data T = ... + deriving instance Eq T + $(pure []) + deriving instance Enum T + + Due to the way that GHC typechecks TyClGroups, genAuxBinds will run twice + in this program: once for all the declarations before the TH splice, and + once again for all the declarations after the TH splice. As a result, + $con2tag_T will be generated twice, since genAuxBinds will be unable to + recognize the presence of duplicates. + +These situations are much rarer, so we do not spend any effort to deduplicate +auxiliary bindings there. Instead, we focus on the common case of multiple +derived instances within the same module, not separated by any TH splices. +(This is the case described in "Wrinkle: Reducing code duplication".) In +situation (1), we can at least fall back on GHC's simplifier to pick up +genAuxBinds' slack. -} |