diff options
author | Vladislav Zavialov <vlad.z.4096@gmail.com> | 2019-12-05 03:06:40 +0300 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2020-07-21 14:50:01 -0400 |
commit | 19e80b9af252eee760dc047765a9930ef00067ec (patch) | |
tree | cb45fce4b1e74e1a82c5bd926fda0e92de1964c1 /compiler/GHC/Parser | |
parent | 58235d46bd4e9fbf69bd82969b29cd9c6ab051e1 (diff) | |
download | haskell-19e80b9af252eee760dc047765a9930ef00067ec.tar.gz |
Accumulate Haddock comments in P (#17544, #17561, #8944)
Haddock comments are, first and foremost, comments. It's very annoying
to incorporate them into the grammar. We can take advantage of an
important property: adding a Haddock comment does not change the parse
tree in any way other than wrapping some nodes in HsDocTy and the like
(and if it does, that's a bug).
This patch implements the following:
* Accumulate Haddock comments with their locations in the P monad.
This is handled in the lexer.
* After parsing, do a pass over the AST to associate Haddock comments
with AST nodes using location info.
* Report the leftover comments to the user as a warning (-Winvalid-haddock).
Diffstat (limited to 'compiler/GHC/Parser')
-rw-r--r-- | compiler/GHC/Parser/Lexer.x | 106 | ||||
-rw-r--r-- | compiler/GHC/Parser/PostProcess.hs | 214 | ||||
-rw-r--r-- | compiler/GHC/Parser/PostProcess/Haddock.hs | 1565 |
3 files changed, 1676 insertions, 209 deletions
diff --git a/compiler/GHC/Parser/Lexer.x b/compiler/GHC/Parser/Lexer.x index ef9f1803bf..7265e1dffb 100644 --- a/compiler/GHC/Parser/Lexer.x +++ b/compiler/GHC/Parser/Lexer.x @@ -66,7 +66,8 @@ module GHC.Parser.Lexer ( lexTokenStream, AddAnn(..),mkParensApiAnn, addAnnsAt, - commentToAnnotation + commentToAnnotation, + HdkComment(..), ) where import GHC.Prelude @@ -97,6 +98,8 @@ import GHC.Utils.Outputable import GHC.Data.StringBuffer import GHC.Data.FastString import GHC.Types.Unique.FM +import GHC.Data.Maybe +import GHC.Data.OrdList import GHC.Utils.Misc ( readRational, readHexRational ) -- compiler/main @@ -109,6 +112,7 @@ import GHC.Unit import GHC.Types.Basic ( InlineSpec(..), RuleMatchInfo(..), IntegralLit(..), FractionalLit(..), SourceText(..) ) +import GHC.Hs.Doc -- compiler/parser import GHC.Parser.CharClass @@ -363,10 +367,8 @@ $tab { warnTab } -- Haddock comments -<0,option_prags> { - "-- " $docsym / { ifExtension HaddockBit } { multiline_doc_comment } - "{-" \ ? $docsym / { ifExtension HaddockBit } { nested_doc_comment } -} +"-- " $docsym / { ifExtension HaddockBit } { multiline_doc_comment } +"{-" \ ? $docsym / { ifExtension HaddockBit } { nested_doc_comment } -- "special" symbols @@ -1271,11 +1273,8 @@ nested_comment cont span buf len = do go (reverse $ lexemeToString buf len) (1::Int) input where go commentAcc 0 input = do - setInput input - b <- getBit RawTokenStreamBit - if b - then docCommentEnd input commentAcc ITblockComment buf span - else cont + let finalizeComment str = (Nothing, ITblockComment str) + commentEnd cont input commentAcc finalizeComment buf span go commentAcc n input = case alexGetChar' input of Nothing -> errBrace input (psRealSpan span) Just ('-',input) -> case alexGetChar' input of @@ -1365,24 +1364,37 @@ return control to parseNestedPragma by returning the ITcomment_line_prag token. See #314 for more background on the bug this fixes. -} -withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (PsLocated Token)) +withLexedDocType :: (AlexInput -> (String -> (HdkComment, Token)) -> Bool -> P (PsLocated Token)) -> P (PsLocated Token) withLexedDocType lexDocComment = do input@(AI _ buf) <- getInput case prevChar buf ' ' of -- The `Bool` argument to lexDocComment signals whether or not the next -- line of input might also belong to this doc comment. - '|' -> lexDocComment input ITdocCommentNext True - '^' -> lexDocComment input ITdocCommentPrev True - '$' -> lexDocComment input ITdocCommentNamed True + '|' -> lexDocComment input mkHdkCommentNext True + '^' -> lexDocComment input mkHdkCommentPrev True + '$' -> lexDocComment input mkHdkCommentNamed True '*' -> lexDocSection 1 input _ -> panic "withLexedDocType: Bad doc type" where lexDocSection n input = case alexGetChar' input of Just ('*', input) -> lexDocSection (n+1) input - Just (_, _) -> lexDocComment input (ITdocSection n) False + Just (_, _) -> lexDocComment input (mkHdkCommentSection n) False Nothing -> do setInput input; lexToken -- eof reached, lex it normally +mkHdkCommentNext, mkHdkCommentPrev :: String -> (HdkComment, Token) +mkHdkCommentNext str = (HdkCommentNext (mkHsDocString str), ITdocCommentNext str) +mkHdkCommentPrev str = (HdkCommentPrev (mkHsDocString str), ITdocCommentPrev str) + +mkHdkCommentNamed :: String -> (HdkComment, Token) +mkHdkCommentNamed str = + let (name, rest) = break isSpace str + in (HdkCommentNamed name (mkHsDocString rest), ITdocCommentNamed str) + +mkHdkCommentSection :: Int -> String -> (HdkComment, Token) +mkHdkCommentSection n str = + (HdkCommentSection n (mkHsDocString str), ITdocSection n str) + -- RULES pragmas turn on the forall and '.' keywords, and we turn them -- off again at the end of the pragma. rulePrag :: Action @@ -1425,17 +1437,34 @@ endPrag span _buf _len = do -- it writes the wrong token length to the parser state. This function is -- called afterwards, so it can just update the state. -docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer -> - PsSpan -> P (PsLocated Token) -docCommentEnd input commentAcc docType buf span = do +commentEnd :: P (PsLocated Token) + -> AlexInput + -> String + -> (String -> (Maybe HdkComment, Token)) + -> StringBuffer + -> PsSpan + -> P (PsLocated Token) +commentEnd cont input commentAcc finalizeComment buf span = do setInput input let (AI loc nextBuf) = input comment = reverse commentAcc span' = mkPsSpan (psSpanStart span) loc last_len = byteDiff buf nextBuf - span `seq` setLastToken span' last_len - return (L span' (docType comment)) + let (m_hdk_comment, hdk_token) = finalizeComment comment + whenIsJust m_hdk_comment $ \hdk_comment -> + P $ \s -> POk (s {hdk_comments = hdk_comments s `snocOL` L span' hdk_comment}) () + b <- getBit RawTokenStreamBit + if b then return (L span' hdk_token) + else cont + +docCommentEnd :: AlexInput -> String -> (String -> (HdkComment, Token)) -> StringBuffer -> + PsSpan -> P (PsLocated Token) +docCommentEnd input commentAcc docType buf span = do + let finalizeComment str = + let (hdk_comment, token) = docType str + in (Just hdk_comment, token) + commentEnd lexToken input commentAcc finalizeComment buf span errBrace :: AlexInput -> RealSrcSpan -> P a errBrace (AI end _) span = failLocMsgP (realSrcSpanStart span) (psRealLoc end) "unterminated `{-'" @@ -2170,6 +2199,15 @@ data ParserFlags = ParserFlags { , pExtsBitmap :: !ExtsBitmap -- ^ bitmap of permitted extensions } +-- | Haddock comment as produced by the lexer. These are accumulated in +-- 'PState' and then processed in "GHC.Parser.PostProcess.Haddock". +data HdkComment + = HdkCommentNext HsDocString + | HdkCommentPrev HsDocString + | HdkCommentNamed String HsDocString + | HdkCommentSection Int HsDocString + deriving Show + data PState = PState { buffer :: StringBuffer, options :: ParserFlags, @@ -2211,7 +2249,13 @@ data PState = PState { annotations :: [(ApiAnnKey,[RealSrcSpan])], eof_pos :: Maybe RealSrcSpan, comment_q :: [RealLocated AnnotationComment], - annotations_comments :: [(RealSrcSpan,[RealLocated AnnotationComment])] + annotations_comments :: [(RealSrcSpan,[RealLocated AnnotationComment])], + + -- Haddock comments accumulated in ascending order of their location + -- (BufPos). We use OrdList to get O(1) snoc. + -- + -- See Note [Adding Haddock comments to the syntax tree] in GHC.Parser.PostProcess.Haddock + hdk_comments :: OrdList (PsLocated HdkComment) } -- last_loc and last_len are used when generating error messages, -- and in pushCurrentContext only. Sigh, if only Happy passed the @@ -2698,7 +2742,8 @@ mkPStatePure options buf loc = annotations = [], eof_pos = Nothing, comment_q = [], - annotations_comments = [] + annotations_comments = [], + hdk_comments = nilOL } where init_loc = PsLoc loc (BufPos 0) @@ -2917,10 +2962,6 @@ lexer queueComments cont = do (L span tok) <- lexTokenFun --trace ("token: " ++ show tok) $ do - if (queueComments && isDocComment tok) - then queueComment (L (psRealSpan span) tok) - else return () - if (queueComments && isComment tok) then queueComment (L (psRealSpan span) tok) >> lexer queueComments cont else cont (L (mkSrcSpanPs span) tok) @@ -3372,13 +3413,10 @@ commentToAnnotation _ = panic "commentToAnnotation" isComment :: Token -> Bool isComment (ITlineComment _) = True isComment (ITblockComment _) = True +isComment (ITdocCommentNext _) = True +isComment (ITdocCommentPrev _) = True +isComment (ITdocCommentNamed _) = True +isComment (ITdocSection _ _) = True +isComment (ITdocOptions _) = True isComment _ = False - -isDocComment :: Token -> Bool -isDocComment (ITdocCommentNext _) = True -isDocComment (ITdocCommentPrev _) = True -isDocComment (ITdocCommentNamed _) = True -isDocComment (ITdocSection _ _) = True -isDocComment (ITdocOptions _) = True -isDocComment _ = False } diff --git a/compiler/GHC/Parser/PostProcess.hs b/compiler/GHC/Parser/PostProcess.hs index 398bd78ddc..3cf5b30b06 100644 --- a/compiler/GHC/Parser/PostProcess.hs +++ b/compiler/GHC/Parser/PostProcess.hs @@ -126,7 +126,6 @@ import GHC.Builtin.Names ( allNameStrings ) import GHC.Types.SrcLoc import GHC.Types.Unique ( hasKey ) import GHC.Data.OrdList ( OrdList, fromOL ) -import GHC.Data.Bag ( emptyBag, consBag ) import GHC.Utils.Outputable as Outputable import GHC.Data.FastString import GHC.Data.Maybe @@ -172,16 +171,18 @@ mkClassDecl :: SrcSpan -> Located (Maybe (LHsContext GhcPs), LHsType GhcPs) -> Located (a,[LHsFunDep GhcPs]) -> OrdList (LHsDecl GhcPs) + -> LayoutInfo -> P (LTyClDecl GhcPs) -mkClassDecl loc (L _ (mcxt, tycl_hdr)) fds where_cls +mkClassDecl loc (L _ (mcxt, tycl_hdr)) fds where_cls layoutInfo = do { (binds, sigs, ats, at_defs, _, docs) <- cvBindsAndSigs where_cls ; let cxt = fromMaybe (noLoc []) mcxt ; (cls, tparams, fixity, ann) <- checkTyClHdr True tycl_hdr ; addAnnsAt loc ann -- Add any API Annotations to the top SrcSpan ; (tyvars,annst) <- checkTyVars (text "class") whereDots cls tparams ; addAnnsAt loc annst -- Add any API Annotations to the top SrcSpan - ; return (L loc (ClassDecl { tcdCExt = noExtField, tcdCtxt = cxt + ; return (L loc (ClassDecl { tcdCExt = layoutInfo + , tcdCtxt = cxt , tcdLName = cls, tcdTyVars = tyvars , tcdFixity = fixity , tcdFDs = snd (unLoc fds) @@ -418,14 +419,7 @@ fromSpecTyVarBndr bndr = case bndr of -- | Groups together bindings for a single function cvTopDecls :: OrdList (LHsDecl GhcPs) -> [LHsDecl GhcPs] -cvTopDecls decls = go (fromOL decls) - where - go :: [LHsDecl GhcPs] -> [LHsDecl GhcPs] - go [] = [] - go ((L l (ValD x b)) : ds) - = L l' (ValD x b') : go ds' - where (L l' b', ds') = getMonoBind (L l b) ds - go (d : ds) = d : go ds +cvTopDecls decls = getMonoBindAll (fromOL decls) -- Declaration list may only contain value bindings and signatures. cvBindGroup :: OrdList (LHsDecl GhcPs) -> P (HsValBinds GhcPs) @@ -441,33 +435,32 @@ cvBindsAndSigs :: OrdList (LHsDecl GhcPs) -- Input decls contain just value bindings and signatures -- and in case of class or instance declarations also -- associated type declarations. They might also contain Haddock comments. -cvBindsAndSigs fb = go (fromOL fb) +cvBindsAndSigs fb = do + fb' <- drop_bad_decls (fromOL fb) + return (partitionBindsAndSigs (getMonoBindAll fb')) where - go [] = return (emptyBag, [], [], [], [], []) - go ((L l (ValD _ b)) : ds) - = do { (bs, ss, ts, tfis, dfis, docs) <- go ds' - ; return (b' `consBag` bs, ss, ts, tfis, dfis, docs) } - where - (b', ds') = getMonoBind (L l b) ds - go ((L l decl) : ds) - = do { (bs, ss, ts, tfis, dfis, docs) <- go ds - ; case decl of - SigD _ s - -> return (bs, L l s : ss, ts, tfis, dfis, docs) - TyClD _ (FamDecl _ t) - -> return (bs, ss, L l t : ts, tfis, dfis, docs) - InstD _ (TyFamInstD { tfid_inst = tfi }) - -> return (bs, ss, ts, L l tfi : tfis, dfis, docs) - InstD _ (DataFamInstD { dfid_inst = dfi }) - -> return (bs, ss, ts, tfis, L l dfi : dfis, docs) - DocD _ d - -> return (bs, ss, ts, tfis, dfis, L l d : docs) - SpliceD _ d - -> addFatalError l $ - hang (text "Declaration splices are allowed only" <+> - text "at the top level:") - 2 (ppr d) - _ -> pprPanic "cvBindsAndSigs" (ppr decl) } + -- cvBindsAndSigs is called in several places in the parser, + -- and its items can be produced by various productions: + -- + -- * decl (when parsing a where clause or a let-expression) + -- * decl_inst (when parsing an instance declaration) + -- * decl_cls (when parsing a class declaration) + -- + -- partitionBindsAndSigs can handle almost all declaration forms produced + -- by the aforementioned productions, except for SpliceD, which we filter + -- out here (in drop_bad_decls). + -- + -- We're not concerned with every declaration form possible, such as those + -- produced by the topdecl parser production, because cvBindsAndSigs is not + -- called on top-level declarations. + drop_bad_decls [] = return [] + drop_bad_decls (L l (SpliceD _ d) : ds) = do + addError l $ + hang (text "Declaration splices are allowed only" <+> + text "at the top level:") + 2 (ppr d) + drop_bad_decls ds + drop_bad_decls (d:ds) = (d:) <$> drop_bad_decls ds ----------------------------------------------------------------------------- -- Group function bindings into equation groups @@ -512,6 +505,14 @@ getMonoBind (L loc1 (FunBind { fun_id = fun_id1@(L _ f1) getMonoBind bind binds = (bind, binds) +-- Group together adjacent FunBinds for every function. +getMonoBindAll :: [LHsDecl GhcPs] -> [LHsDecl GhcPs] +getMonoBindAll [] = [] +getMonoBindAll (L l (ValD _ b) : ds) = + let (L l' b', ds') = getMonoBind (L l b) ds + in L l' (ValD noExtField b') : getMonoBindAll ds' +getMonoBindAll (d : ds) = d : getMonoBindAll ds + has_args :: [LMatch GhcPs (LHsExpr GhcPs)] -> Bool has_args [] = panic "GHC.Parser.PostProcess.has_args" has_args (L _ (Match { m_pats = args }) : _) = not (null args) @@ -1035,21 +1036,7 @@ checkContext (L l orig_t) else (anns ++ mkParensApiAnn lp1) -- no need for anns, returning original - check _anns t = checkNoDocs msg t *> return ([],L l [L l orig_t]) - - msg = text "data constructor context" - --- | Check recursively if there are any 'HsDocTy's in the given type. --- This only works on a subset of types produced by 'btype_no_ops' -checkNoDocs :: SDoc -> LHsType GhcPs -> P () -checkNoDocs msg ty = go ty - where - go (L _ (HsAppKindTy _ ty ki)) = go ty *> go ki - go (L _ (HsAppTy _ t1 t2)) = go t1 *> go t2 - go (L l (HsDocTy _ t ds)) = addError l $ hsep - [ text "Unexpected haddock", quotes (ppr ds) - , text "on", msg, quotes (ppr t) ] - go _ = pure () + check _anns _t = return ([],L l [L l orig_t]) checkImportDecl :: Maybe (Located Token) -> Maybe (Located Token) @@ -1338,7 +1325,6 @@ data TyEl = TyElOpr RdrName | TyElOpd (HsType GhcPs) | TyElKindApp SrcSpan (LHsType GhcPs) -- See Note [TyElKindApp SrcSpan interpretation] | TyElUnpackedness ([AddAnn], SourceText, SrcUnpackedness) - | TyElDocPrev HsDocString {- Note [TyElKindApp SrcSpan interpretation] @@ -1360,7 +1346,6 @@ instance Outputable TyEl where ppr (TyElOpd ty) = ppr ty ppr (TyElKindApp _ ki) = text "@" <> ppr ki ppr (TyElUnpackedness (_, _, unpk)) = ppr unpk - ppr (TyElDocPrev doc) = ppr doc -- | Extract a strictness/unpackedness annotation from the front of a reversed -- 'TyEl' list. @@ -1447,11 +1432,6 @@ mergeOps all_xs = go (0 :: Int) [] id all_xs -- See Note [Impossible case in mergeOps clause [unpk]] panic "mergeOps.UNPACK: impossible position" - -- clause [doc]: - -- we do not expect to encounter any docs - go _ _ _ ((L l (TyElDocPrev _)):_) = - failOpDocPrev l - -- clause [opr]: -- when we encounter an operator, we must have accumulated -- something for its rhs, and there must be something left @@ -1571,13 +1551,6 @@ pLHsTypeArg (L l (TyElOpd a)) = Just (HsValArg (L l a)) pLHsTypeArg (L _ (TyElKindApp l a)) = Just (HsTypeArg l a) pLHsTypeArg _ = Nothing -pDocPrev :: [Located TyEl] -> (Maybe LHsDocString, [Located TyEl]) -pDocPrev = go Nothing - where - go mTrailingDoc ((L l (TyElDocPrev doc)):xs) = - go (mTrailingDoc `mplus` Just (L l doc)) xs - go mTrailingDoc xs = (mTrailingDoc, xs) - orErr :: Maybe a -> b -> Either b a orErr (Just a) _ = Right a orErr Nothing b = Left b @@ -1594,123 +1567,77 @@ mergeDataCon :: [Located TyEl] -> P ( Located RdrName -- constructor name , HsConDeclDetails GhcPs -- constructor field information - , Maybe LHsDocString -- docstring to go on the constructor ) mergeDataCon all_xs = do { (addAnns, a) <- eitherToP res ; addAnns ; return a } where - -- We start by splitting off the trailing documentation comment, - -- if any exists. - (mTrailingDoc, all_xs') = pDocPrev all_xs - - -- Determine whether the trailing documentation comment exists and is the - -- only docstring in this constructor declaration. - -- - -- When true, it means that it applies to the constructor itself: - -- data T = C - -- A - -- B -- ^ Comment on C (singleDoc == True) - -- - -- When false, it means that it applies to the last field: - -- data T = C -- ^ Comment on C - -- A -- ^ Comment on A - -- B -- ^ Comment on B (singleDoc == False) - singleDoc = isJust mTrailingDoc && - null [ () | (L _ (TyElDocPrev _)) <- all_xs' ] - -- The result of merging the list of reversed TyEl into a -- data constructor, along with [AddAnn]. - res = goFirst all_xs' - - -- Take the trailing docstring into account when interpreting - -- the docstring near the constructor. - -- - -- data T = C -- ^ docstring right after C - -- A - -- B -- ^ trailing docstring - -- - -- 'mkConDoc' must be applied to the docstring right after C, so that it - -- falls back to the trailing docstring when appropriate (see singleDoc). - mkConDoc mDoc | singleDoc = mDoc `mplus` mTrailingDoc - | otherwise = mDoc - - -- The docstring for the last field of a data constructor. - trailingFieldDoc | singleDoc = Nothing - | otherwise = mTrailingDoc + res = goFirst all_xs goFirst [ L l (TyElOpd (HsTyVar _ _ (L _ tc))) ] = do { data_con <- tyConToDataCon l tc - ; return (pure (), (data_con, PrefixCon [], mTrailingDoc)) } + ; return (pure (), (data_con, PrefixCon [])) } goFirst ((L l (TyElOpd (HsRecTy _ fields))):xs) - | (mConDoc, xs') <- pDocPrev xs - , [ L l' (TyElOpd (HsTyVar _ _ (L _ tc))) ] <- xs' + | [ L l' (TyElOpd (HsTyVar _ _ (L _ tc))) ] <- xs = do { data_con <- tyConToDataCon l' tc - ; let mDoc = mTrailingDoc `mplus` mConDoc - ; return (pure (), (data_con, RecCon (L l fields), mDoc)) } + ; return (pure (), (data_con, RecCon (L l fields))) } goFirst [L l (TyElOpd (HsTupleTy _ HsBoxedOrConstraintTuple ts))] = return ( pure () , ( L l (getRdrName (tupleDataCon Boxed (length ts))) - , PrefixCon (map hsLinear ts) - , mTrailingDoc ) ) + , PrefixCon (map hsLinear ts) ) ) goFirst ((L l (TyElOpd t)):xs) | (_, t', addAnns, xs') <- pBangTy (L l t) xs - = go addAnns Nothing [mkLHsDocTyMaybe t' trailingFieldDoc] xs' + = go addAnns [t'] xs' goFirst (L l (TyElKindApp _ _):_) = goInfix Monoid.<> Left (l, kindAppErr) goFirst xs - = go (pure ()) mTrailingDoc [] xs + = go (pure ()) [] xs - go addAnns mLastDoc ts [ L l (TyElOpd (HsTyVar _ _ (L _ tc))) ] + go addAnns ts [ L l (TyElOpd (HsTyVar _ _ (L _ tc))) ] = do { data_con <- tyConToDataCon l tc - ; return (addAnns, (data_con, PrefixCon (map hsLinear ts), mkConDoc mLastDoc)) } - go addAnns mLastDoc ts ((L l (TyElDocPrev doc)):xs) = - go addAnns (mLastDoc `mplus` Just (L l doc)) ts xs - go addAnns mLastDoc ts ((L l (TyElOpd t)):xs) + ; return (addAnns, (data_con, PrefixCon (map hsLinear ts))) } + go addAnns ts ((L l (TyElOpd t)):xs) | (_, t', addAnns', xs') <- pBangTy (L l t) xs - , t'' <- mkLHsDocTyMaybe t' mLastDoc - = go (addAnns >> addAnns') Nothing (t'':ts) xs' - go _ _ _ ((L _ (TyElOpr _)):_) = + = go (addAnns >> addAnns') (t':ts) xs' + go _ _ ((L _ (TyElOpr _)):_) = -- Encountered an operator: backtrack to the beginning and attempt -- to parse as an infix definition. goInfix - go _ _ _ (L l (TyElKindApp _ _):_) = goInfix Monoid.<> Left (l, kindAppErr) - go _ _ _ _ = Left malformedErr + go _ _ (L l (TyElKindApp _ _):_) = goInfix Monoid.<> Left (l, kindAppErr) + go _ _ _ = Left malformedErr where malformedErr = - ( foldr combineSrcSpans noSrcSpan (map getLoc all_xs') + ( foldr combineSrcSpans noSrcSpan (map getLoc all_xs) , text "Cannot parse data constructor" <+> text "in a data/newtype declaration:" $$ - nest 2 (hsep . reverse $ map ppr all_xs')) + nest 2 (hsep . reverse $ map ppr all_xs)) goInfix = - do { let xs0 = all_xs' - ; (rhs_t, rhs_addAnns, xs1) <- pInfixSide xs0 `orErr` malformedErr - ; let (mOpDoc, xs2) = pDocPrev xs1 - ; (op, xs3) <- case xs2 of + do { let xs0 = all_xs + ; (rhs, rhs_addAnns, xs1) <- pInfixSide xs0 `orErr` malformedErr + ; (op, xs3) <- case xs1 of (L l (TyElOpr op)) : xs3 -> do { data_con <- tyConToDataCon l op ; return (data_con, xs3) } _ -> Left malformedErr - ; let (mLhsDoc, xs4) = pDocPrev xs3 - ; (lhs_t, lhs_addAnns, xs5) <- pInfixSide xs4 `orErr` malformedErr + ; (lhs, lhs_addAnns, xs5) <- pInfixSide xs3 `orErr` malformedErr ; unless (null xs5) (Left malformedErr) - ; let rhs = mkLHsDocTyMaybe rhs_t trailingFieldDoc - lhs = mkLHsDocTyMaybe lhs_t mLhsDoc - addAnns = lhs_addAnns >> rhs_addAnns - ; return (addAnns, (op, InfixCon (hsLinear lhs) (hsLinear rhs), mkConDoc mOpDoc)) } + ; let addAnns = lhs_addAnns >> rhs_addAnns + ; return (addAnns, (op, InfixCon (hsLinear lhs) (hsLinear rhs))) } where malformedErr = - ( foldr combineSrcSpans noSrcSpan (map getLoc all_xs') + ( foldr combineSrcSpans noSrcSpan (map getLoc all_xs) , text "Cannot parse an infix data constructor" <+> text "in a data/newtype declaration:" $$ - nest 2 (hsep . reverse $ map ppr all_xs')) + nest 2 (hsep . reverse $ map ppr all_xs)) kindAppErr = text "Unexpected kind application" <+> text "in a data/newtype declaration:" $$ - nest 2 (hsep . reverse $ map ppr all_xs') + nest 2 (hsep . reverse $ map ppr all_xs) --------------------------------------------------------------------------- -- | Check for monad comprehensions @@ -2902,11 +2829,6 @@ failOpFewArgs (L loc op) = where too_few = text "Operator applied to too few arguments:" <+> ppr op -failOpDocPrev :: SrcSpan -> P a -failOpDocPrev loc = addFatalError loc msg - where - msg = text "Unexpected documentation comment." - ----------------------------------------------------------------------------- -- Misc utils @@ -3140,14 +3062,6 @@ mkLHsOpTy x op y = let loc = getLoc x `combineSrcSpans` getLoc op `combineSrcSpans` getLoc y in L loc (mkHsOpTy x op y) -mkLHsDocTy :: LHsType GhcPs -> LHsDocString -> LHsType GhcPs -mkLHsDocTy t doc = - let loc = getLoc t `combineSrcSpans` getLoc doc - in L loc (HsDocTy noExtField t doc) - -mkLHsDocTyMaybe :: LHsType GhcPs -> Maybe LHsDocString -> LHsType GhcPs -mkLHsDocTyMaybe t = maybe t (mkLHsDocTy t) - ----------------------------------------------------------------------------- -- Token symbols diff --git a/compiler/GHC/Parser/PostProcess/Haddock.hs b/compiler/GHC/Parser/PostProcess/Haddock.hs index 409b0c120f..e109fada55 100644 --- a/compiler/GHC/Parser/PostProcess/Haddock.hs +++ b/compiler/GHC/Parser/PostProcess/Haddock.hs @@ -1,39 +1,1554 @@ -{-# OPTIONS_GHC -Wno-incomplete-record-updates #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE ApplicativeDo #-} +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE DerivingVia #-} -module GHC.Parser.PostProcess.Haddock where +{- | This module implements 'addHaddockToModule', which inserts Haddock + comments accumulated during parsing into the AST (#17544). -import GHC.Prelude +We process Haddock comments in two phases: + +1. Parse the program (via the Happy parser in `Parser.y`), generating + an AST, and (quite separately) a list of all the Haddock comments + found in the file. More precisely, the Haddock comments are + accumulated in the `hdk_comments` field of the `PState`, the parser + state (see Lexer.x): + + data PState = PState { ... + , hdk_comments :: [PsLocated HdkComment] } + + Each of these Haddock comments has a `PsSpan`, which gives the `BufPos` of + the beginning and end of the Haddock comment. + +2. Walk over the AST, attaching the Haddock comments to the correct + parts of the tree. This step is called `addHaddockToModule`, and is + implemented in this module. + + See Note [Adding Haddock comments to the syntax tree]. + +This approach codifies an important principle: + + The presence or absence of a Haddock comment should never change the parsing + of a program. + +Alternative approaches that did not work properly: + +1. Using 'RealSrcLoc' instead of 'BufPos'. This led to failures in presence + of {-# LANGUAGE CPP #-} and other sources of line pragmas. See documentation + on 'BufPos' (in GHC.Types.SrcLoc) for the details. + +2. In earlier versions of GHC, the Haddock comments were incorporated into the + Parser.y grammar. The parser constructed the AST and attached comments to it in + a single pass. See Note [Old solution: Haddock in the grammar] for the details. +-} +module GHC.Parser.PostProcess.Haddock (addHaddockToModule) where + +import GHC.Prelude hiding (mod) import GHC.Hs import GHC.Types.SrcLoc +import GHC.Driver.Session ( WarningFlag(..) ) +import GHC.Utils.Outputable hiding ( (<>) ) +import GHC.Data.Bag +import Data.Semigroup +import Data.Foldable +import Data.Traversable +import Data.Maybe import Control.Monad +import Control.Monad.Trans.State.Strict +import Control.Monad.Trans.Reader +import Control.Monad.Trans.Writer +import Data.Functor.Identity +import Data.Coerce +import qualified Data.Monoid + +import GHC.Parser.Lexer +import GHC.Utils.Misc (mergeListsBy, filterOut, mapLastM, (<&&>)) + +{- Note [Adding Haddock comments to the syntax tree] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +'addHaddock' traverses the AST in concrete syntax order, building a computation +(represented by HdkA) that reconstructs the AST but with Haddock comments +inserted in appropriate positions: + + addHaddock :: HasHaddock a => a -> HdkA a + +Consider this code example: + + f :: Int -- ^ comment on argument + -> Bool -- ^ comment on result + +In the AST, the "Int" part of this snippet is represented like this +(pseudo-code): + + L (BufSpan 6 8) (HsTyVar "Int") :: LHsType GhcPs + +And the comments are represented like this (pseudo-code): + + L (BufSpan 11 35) (HdkCommentPrev "comment on argument") + L (BufSpan 46 69) (HdkCommentPrev "comment on result") + +So when we are traversing the AST and 'addHaddock' is applied to HsTyVar "Int", +how does it know to associate it with "comment on argument" but not with +"comment on result"? + +The trick is to look in the space between syntactic elements. In the example above, +the location range in which we search for HdkCommentPrev is as follows: + + f :: Int████████████████████████ + ████Bool -- ^ comment on result + +We search for comments after HsTyVar "Int" and until the next syntactic +element, in this case HsTyVar "Bool". + +Ignoring the "->" allows us to accomodate alternative coding styles: + + f :: Int -> -- ^ comment on argument + Bool -- ^ comment on result + +Sometimes we also need to take indentation information into account. +Compare the following examples: + + class C a where + f :: a -> Int + -- ^ comment on f + + class C a where + f :: a -> Int + -- ^ comment on C + +Notice how "comment on f" and "comment on C" differ only by indentation level. + +Therefore, in order to know the location range in which the comments are applicable +to a syntactic elements, we need three nuggets of information: + 1. lower bound on the BufPos of a comment + 2. upper bound on the BufPos of a comment + 3. minimum indentation level of a comment + +This information is represented by the 'LocRange' type. + +In order to propagate this information, we have the 'HdkA' applicative. +'HdkA' is defined as follows: + + data HdkA a = HdkA (Maybe BufSpan) (HdkM a) + +The first field contains a 'BufSpan', which represents the location +span taken by a syntactic element: + + addHaddock (L bufSpan ...) = HdkA (Just bufSpan) ... + +The second field, 'HdkM', is a stateful computation that looks up Haddock +comments in the specified location range: + + HdkM a ≈ + LocRange -- The allowed location range + -> [PsLocated HdkComment] -- Unallocated comments + -> (a, -- AST with comments inserted into it + [PsLocated HdkComment]) -- Leftover comments + +The 'Applicative' instance for 'HdkA' is defined in such a way that the +location range of every computation is defined by its neighbours: + + addHaddock aaa <*> addHaddock bbb <*> addHaddock ccc + +Here, the 'LocRange' passed to the 'HdkM' computation of addHaddock bbb +is determined by the BufSpan recorded in addHaddock aaa and addHaddock ccc. + +This is why it's important to traverse the AST in the order of the concrete +syntax. In the example above we assume that aaa, bbb, ccc are ordered by location: + + * getBufSpan (getLoc aaa) < getBufSpan (getLoc bbb) + * getBufSpan (getLoc bbb) < getBufSpan (getLoc ccc) + +Violation of this assumption would lead to bugs, and care must be taken to +traverse the AST correctly. For example, when dealing with class declarations, +we have to use 'flattenBindsAndSigs' to traverse it in the correct order. +-} + +-- | Add Haddock documentation accumulated in the parser state +-- to a parsed HsModule. +-- +-- Reports badly positioned comments when -Winvalid-haddock is enabled. +addHaddockToModule :: Located HsModule -> P (Located HsModule) +addHaddockToModule lmod = do + pState <- getPState + let all_comments = toList (hdk_comments pState) + initial_hdk_st = HdkSt all_comments [] + (lmod', final_hdk_st) = runHdkA (addHaddock lmod) initial_hdk_st + hdk_warnings = collectHdkWarnings final_hdk_st + -- lmod': module with Haddock comments inserted into the AST + -- hdk_warnings: warnings accumulated during AST/comment processing + mapM_ reportHdkWarning hdk_warnings + return lmod' + +reportHdkWarning :: HdkWarn -> P () +reportHdkWarning (HdkWarnInvalidComment (L l _)) = + addWarning Opt_WarnInvalidHaddock (mkSrcSpanPs l) $ + text "A Haddock comment cannot appear in this position and will be ignored." +reportHdkWarning (HdkWarnExtraComment (L l _)) = + addWarning Opt_WarnInvalidHaddock l $ + text "Multiple Haddock comments for a single entity are not allowed." $$ + text "The extraneous comment will be ignored." + +collectHdkWarnings :: HdkSt -> [HdkWarn] +collectHdkWarnings HdkSt{ hdk_st_pending, hdk_st_warnings } = + map HdkWarnInvalidComment hdk_st_pending -- leftover Haddock comments not inserted into the AST + ++ hdk_st_warnings + +{- ********************************************************************* +* * +* addHaddock: a family of functions that processes the AST * +* in concrete syntax order, adding documentation comments to it * +* * +********************************************************************* -} + +-- HasHaddock is a convenience class for overloading the addHaddock operation. +-- Alternatively, we could define a family of monomorphic functions: +-- +-- addHaddockSomeTypeX :: SomeTypeX -> HdkA SomeTypeX +-- addHaddockAnotherTypeY :: AnotherTypeY -> HdkA AnotherTypeY +-- addHaddockOneMoreTypeZ :: OneMoreTypeZ -> HdkA OneMoreTypeZ +-- +-- But having a single name for all of them is just easier to read, and makes it clear +-- that they all are of the form t -> HdkA t for some t. +-- +-- If you need to handle a more complicated scenario that doesn't fit this +-- pattern, it's always possible to define separate functions outside of this +-- class, as is done in case of e.g. addHaddockConDeclField. +-- +-- See Note [Adding Haddock comments to the syntax tree]. +class HasHaddock a where + addHaddock :: a -> HdkA a + +instance HasHaddock a => HasHaddock [a] where + addHaddock = traverse addHaddock + +-- -- | Module header comment +-- module M ( +-- -- * Export list comment +-- Item1, +-- Item2, +-- -- * Export list comment +-- item3, +-- item4 +-- ) where +-- +instance HasHaddock (Located HsModule) where + addHaddock (L l_mod mod) = do + -- Step 1, get the module header documentation comment: + -- + -- -- | Module header comment + -- module M where + -- + -- Only do this when the module header exists. + headerDocs <- + for @Maybe (hsmodName mod) $ \(L l_name _) -> + extendHdkA l_name $ liftHdkA $ do + -- todo: register keyword location of 'module', see Note [Register keyword location] + docs <- + inLocRange (locRangeTo (getBufPos (srcSpanStart l_name))) $ + takeHdkComments mkDocNext + selectDocString docs + + -- Step 2, process documentation comments in the export list: + -- + -- module M ( + -- -- * Export list comment + -- Item1, + -- Item2, + -- -- * Export list comment + -- item3, + -- item4 + -- ) where + -- + -- Only do this when the export list exists. + hsmodExports' <- traverse @Maybe addHaddock (hsmodExports mod) + + -- Step 3, register the import section to reject invalid comments: + -- + -- import Data.Maybe + -- -- | rejected comment (cannot appear here) + -- import Data.Bool + -- + traverse_ registerHdkA (hsmodImports mod) + + -- Step 4, process declarations: + -- + -- module M where + -- -- | Comment on D + -- data D = MkD -- ^ Comment on MkD + -- data C = MkC -- ^ Comment on MkC + -- -- ^ Comment on C + -- + let layout_info = hsmodLayout mod + hsmodDecls' <- addHaddockInterleaveItems layout_info (mkDocHsDecl layout_info) (hsmodDecls mod) + + pure $ L l_mod $ + mod { hsmodExports = hsmodExports' + , hsmodDecls = hsmodDecls' + , hsmodHaddockModHeader = join @Maybe headerDocs } + +-- Only for module exports, not module imports. +-- +-- module M (a, b, c) where -- use on this [LIE GhcPs] +-- import I (a, b, c) -- do not use here! +-- +-- Imports cannot have documentation comments anyway. +instance HasHaddock (Located [LIE GhcPs]) where + addHaddock (L l_exports exports) = + extendHdkA l_exports $ do + exports' <- addHaddockInterleaveItems NoLayoutInfo mkDocIE exports + registerLocHdkA (srcLocSpan (srcSpanEnd l_exports)) -- Do not consume comments after the closing parenthesis + pure $ L l_exports exports' + +-- Needed to use 'addHaddockInterleaveItems' in 'instance HasHaddock (Located [LIE GhcPs])'. +instance HasHaddock (LIE GhcPs) where + addHaddock a = a <$ registerHdkA a + +{- Add Haddock items to a list of non-Haddock items. +Used to process export lists (with mkDocIE) and declarations (with mkDocHsDecl). + +For example: + + module M where + -- | Comment on D + data D = MkD -- ^ Comment on MkD + data C = MkC -- ^ Comment on MkC + -- ^ Comment on C + +In this case, we should produce four HsDecl items (pseudo-code): + + 1. DocD (DocCommentNext "Comment on D") + 2. TyClD (DataDecl "D" ... [ConDeclH98 "MkD" ... (Just "Comment on MkD")]) + 3. TyClD (DataDecl "C" ... [ConDeclH98 "MkC" ... (Just "Comment on MkC")]) + 4. DocD (DocCommentPrev "Comment on C") + +The inputs to addHaddockInterleaveItems are: + + * layout_info :: LayoutInfo + + In the example above, note that the indentation level inside the module is + 2 spaces. It would be represented as layout_info = VirtualBraces 2. + + It is used to delimit the search space for comments when processing + declarations. Here, we restrict indentation levels to >=(2+1), so that when + we look up comment on MkC, we get "Comment on MkC" but not "Comment on C". + + * get_doc_item :: PsLocated HdkComment -> Maybe a + + This is the function used to look up documentation comments. + In the above example, get_doc_item = mkDocHsDecl layout_info, + and it will produce the following parts of the output: + + DocD (DocCommentNext "Comment on D") + DocD (DocCommentPrev "Comment on C") + + * The list of items. These are the declarations that will be annotated with + documentation comments. + + Before processing: + TyClD (DataDecl "D" ... [ConDeclH98 "MkD" ... Nothing]) + TyClD (DataDecl "C" ... [ConDeclH98 "MkC" ... Nothing]) + + After processing: + TyClD (DataDecl "D" ... [ConDeclH98 "MkD" ... (Just "Comment on MkD")]) + TyClD (DataDecl "C" ... [ConDeclH98 "MkC" ... (Just "Comment on MkC")]) +-} +addHaddockInterleaveItems + :: forall a. + HasHaddock a + => LayoutInfo + -> (PsLocated HdkComment -> Maybe a) -- Get a documentation item + -> [a] -- Unprocessed (non-documentation) items + -> HdkA [a] -- Documentation items & processed non-documentation items +addHaddockInterleaveItems layout_info get_doc_item = go + where + go :: [a] -> HdkA [a] + go [] = liftHdkA (takeHdkComments get_doc_item) + go (item : items) = do + docItems <- liftHdkA (takeHdkComments get_doc_item) + item' <- with_layout_info (addHaddock item) + other_items <- go items + pure $ docItems ++ item':other_items + + with_layout_info :: HdkA a -> HdkA a + with_layout_info = case layout_info of + NoLayoutInfo -> id + ExplicitBraces -> id + VirtualBraces n -> + let loc_range = mempty { loc_range_col = ColumnFrom (n+1) } + in hoistHdkA (inLocRange loc_range) + +instance HasHaddock (LHsDecl GhcPs) where + addHaddock ldecl = + extendHdkA (getLoc ldecl) $ + traverse @Located addHaddock ldecl + +-- Process documentation comments *inside* a declaration, for example: +-- +-- data T = MkT -- ^ Comment on MkT (inside DataDecl) +-- f, g +-- :: Int -- ^ Comment on Int (inside TypeSig) +-- -> Bool -- ^ Comment on Bool (inside TypeSig) +-- +-- Comments that relate to the entire declaration are processed elsewhere: +-- +-- -- | Comment on T (not processed in this instance) +-- data T = MkT +-- +-- -- | Comment on f, g (not processed in this instance) +-- f, g :: Int -> Bool +-- f = ... +-- g = ... +-- +-- Such comments are inserted into the syntax tree as DocD declarations +-- by addHaddockInterleaveItems, and then associated with other declarations +-- in GHC.HsToCore.Docs (see DeclDocMap). +-- +-- In this instance, we only process comments that relate to parts of the +-- declaration, not to the declaration itself. +instance HasHaddock (HsDecl GhcPs) where + + -- Type signatures: + -- + -- f, g + -- :: Int -- ^ Comment on Int + -- -> Bool -- ^ Comment on Bool + -- + addHaddock (SigD _ (TypeSig _ names t)) = do + traverse_ registerHdkA names + t' <- addHaddock t + pure (SigD noExtField (TypeSig noExtField names t')) + + -- Pattern synonym type signatures: + -- + -- pattern MyPat + -- :: Bool -- ^ Comment on Bool + -- -> Maybe Bool -- ^ Comment on Maybe Bool + -- + addHaddock (SigD _ (PatSynSig _ names t)) = do + traverse_ registerHdkA names + t' <- addHaddock t + pure (SigD noExtField (PatSynSig noExtField names t')) + + -- Class method signatures and default signatures: + -- + -- class C x where + -- method_of_c + -- :: Maybe x -- ^ Comment on Maybe x + -- -> IO () -- ^ Comment on IO () + -- default method_of_c + -- :: Eq x + -- => Maybe x -- ^ Comment on Maybe x + -- -> IO () -- ^ Comment on IO () + -- + addHaddock (SigD _ (ClassOpSig _ is_dflt names t)) = do + traverse_ registerHdkA names + t' <- addHaddock t + pure (SigD noExtField (ClassOpSig noExtField is_dflt names t')) + + -- Data/newtype declarations: + -- + -- data T = MkT -- ^ Comment on MkT + -- A -- ^ Comment on A + -- B -- ^ Comment on B + -- + -- data G where + -- -- | Comment on MkG + -- MkG :: A -- ^ Comment on A + -- -> B -- ^ Comment on B + -- -> G + -- + -- newtype N = MkN { getN :: Natural } -- ^ Comment on N + -- deriving newtype (Eq {- ^ Comment on Eq N -}) + -- deriving newtype (Ord {- ^ Comment on Ord N -}) + -- + addHaddock (TyClD _ decl) + | DataDecl { tcdLName, tcdTyVars, tcdFixity, tcdDataDefn = defn } <- decl + = do + registerHdkA tcdLName + defn' <- addHaddock defn + pure $ + TyClD noExtField (DataDecl { + tcdDExt = noExtField, + tcdLName, tcdTyVars, tcdFixity, + tcdDataDefn = defn' }) + + -- Class declarations: + -- + -- class C a where + -- -- | Comment on the first method + -- first_method :: a -> Bool + -- second_method :: a -> String + -- -- ^ Comment on the second method + -- + addHaddock (TyClD _ decl) + | ClassDecl { tcdCExt = tcdLayout, + tcdCtxt, tcdLName, tcdTyVars, tcdFixity, tcdFDs, + tcdSigs, tcdMeths, tcdATs, tcdATDefs } <- decl + = do + registerHdkA tcdLName + -- todo: register keyword location of 'where', see Note [Register keyword location] + where_cls' <- + addHaddockInterleaveItems tcdLayout (mkDocHsDecl tcdLayout) $ + flattenBindsAndSigs (tcdMeths, tcdSigs, tcdATs, tcdATDefs, [], []) + pure $ + let (tcdMeths', tcdSigs', tcdATs', tcdATDefs', _, tcdDocs) = partitionBindsAndSigs where_cls' + decl' = ClassDecl { tcdCExt = tcdLayout + , tcdCtxt, tcdLName, tcdTyVars, tcdFixity, tcdFDs + , tcdSigs = tcdSigs' + , tcdMeths = tcdMeths' + , tcdATs = tcdATs' + , tcdATDefs = tcdATDefs' + , tcdDocs } + in TyClD noExtField decl' + + -- Data family instances: + -- + -- data instance D Bool where ... (same as data/newtype declarations) + -- data instance D Bool = ... (same as data/newtype declarations) + -- + addHaddock (InstD _ decl) + | DataFamInstD { dfid_inst } <- decl + , DataFamInstDecl { dfid_eqn } <- dfid_inst + = do + dfid_eqn' <- case dfid_eqn of + HsIB _ (FamEqn { feqn_tycon, feqn_bndrs, feqn_pats, feqn_fixity, feqn_rhs }) + -> do + registerHdkA feqn_tycon + feqn_rhs' <- addHaddock feqn_rhs + pure $ + HsIB noExtField (FamEqn { + feqn_ext = noExtField, + feqn_tycon, feqn_bndrs, feqn_pats, feqn_fixity, + feqn_rhs = feqn_rhs' }) + pure $ InstD noExtField (DataFamInstD { + dfid_ext = noExtField, + dfid_inst = DataFamInstDecl { dfid_eqn = dfid_eqn' } }) + + -- Type synonyms: + -- + -- type T = Int -- ^ Comment on Int + -- + addHaddock (TyClD _ decl) + | SynDecl { tcdLName, tcdTyVars, tcdFixity, tcdRhs } <- decl + = do + registerHdkA tcdLName + -- todo: register keyword location of '=', see Note [Register keyword location] + tcdRhs' <- addHaddock tcdRhs + pure $ + TyClD noExtField (SynDecl { + tcdSExt = noExtField, + tcdLName, tcdTyVars, tcdFixity, + tcdRhs = tcdRhs' }) + + -- Foreign imports: + -- + -- foreign import ccall unsafe + -- o :: Float -- ^ The input float + -- -> IO Float -- ^ The output float + -- + addHaddock (ForD _ decl) = do + registerHdkA (fd_name decl) + fd_sig_ty' <- addHaddock (fd_sig_ty decl) + pure $ ForD noExtField (decl{ fd_sig_ty = fd_sig_ty' }) + + -- Other declarations + addHaddock d = pure d + +-- The right-hand side of a data/newtype declaration or data family instance. +instance HasHaddock (HsDataDefn GhcPs) where + addHaddock defn@HsDataDefn{} = do + + -- Register the kind signature: + -- data D :: Type -> Type where ... + -- data instance D Bool :: Type where ... + traverse_ @Maybe registerHdkA (dd_kindSig defn) + -- todo: register keyword location of '=' or 'where', see Note [Register keyword location] + + -- Process the data constructors: + -- + -- data T + -- = MkT1 Int Bool -- ^ Comment on MkT1 + -- | MkT2 Char Int -- ^ Comment on MkT2 + -- + dd_cons' <- addHaddock (dd_cons defn) + + -- Process the deriving clauses: + -- + -- newtype N = MkN Natural + -- deriving (Eq {- ^ Comment on Eq N -}) + -- deriving (Ord {- ^ Comment on Ord N -}) + -- + dd_derivs' <- addHaddock (dd_derivs defn) + + pure $ defn { dd_cons = dd_cons', + dd_derivs = dd_derivs' } + +-- Process the deriving clauses of a data/newtype declaration. +-- Not used for standalone deriving. +instance HasHaddock (HsDeriving GhcPs) where + addHaddock lderivs = + extendHdkA (getLoc lderivs) $ + traverse @Located addHaddock lderivs + +-- Process a single deriving clause of a data/newtype declaration: +-- +-- newtype N = MkN Natural +-- deriving newtype (Eq {- ^ Comment on Eq N -}) +-- deriving (Ord {- ^ Comment on Ord N -}) via Down N +-- +-- Not used for standalone deriving. +instance HasHaddock (LHsDerivingClause GhcPs) where + addHaddock lderiv = + extendHdkA (getLoc lderiv) $ + for @Located lderiv $ \deriv -> + case deriv of + HsDerivingClause { deriv_clause_strategy, deriv_clause_tys } -> do + let + -- 'stock', 'anyclass', and 'newtype' strategies come + -- before the clause types. + -- + -- 'via' comes after. + -- + -- See tests/.../T11768.hs + (register_strategy_before, register_strategy_after) = + case deriv_clause_strategy of + Nothing -> (pure (), pure ()) + Just (L l (ViaStrategy _)) -> (pure (), registerLocHdkA l) + Just (L l _) -> (registerLocHdkA l, pure ()) + register_strategy_before + deriv_clause_tys' <- + extendHdkA (getLoc deriv_clause_tys) $ + traverse @Located addHaddock deriv_clause_tys + register_strategy_after + pure HsDerivingClause + { deriv_clause_ext = noExtField, + deriv_clause_strategy, + deriv_clause_tys = deriv_clause_tys' } + +-- Process a single data constructor declaration, which may come in one of the +-- following forms: +-- +-- 1. H98-syntax PrefixCon: +-- data T = +-- MkT -- ^ Comment on MkT +-- Int -- ^ Comment on Int +-- Bool -- ^ Comment on Bool +-- +-- 2. H98-syntax InfixCon: +-- data T = +-- Int -- ^ Comment on Int +-- :+ -- ^ Comment on (:+) +-- Bool -- ^ Comment on Bool +-- +-- 3. H98-syntax RecCon: +-- data T = +-- MkT { int_field :: Int, -- ^ Comment on int_field +-- bool_field :: Bool } -- ^ Comment on bool_field +-- +-- 4. GADT-syntax PrefixCon: +-- data T where +-- -- | Comment on MkT +-- MkT :: Int -- ^ Comment on Int +-- -> Bool -- ^ Comment on Bool +-- -> T +-- +-- 5. GADT-syntax RecCon: +-- data T where +-- -- | Comment on MkT +-- MkT :: { int_field :: Int, -- ^ Comment on int_field +-- bool_field :: Bool } -- ^ Comment on bool_field +-- -> T +-- +instance HasHaddock (LConDecl GhcPs) where + addHaddock (L l_con_decl con_decl) = + extendHdkA l_con_decl $ + case con_decl of + ConDeclGADT { con_g_ext, con_names, con_forall, con_qvars, con_mb_cxt, con_args, con_res_ty } -> do + -- discardHasInnerDocs is ok because we don't need this info for GADTs. + con_doc' <- discardHasInnerDocs $ getConDoc (getLoc (head con_names)) + con_args' <- + case con_args of + PrefixCon ts -> PrefixCon <$> addHaddock ts + RecCon (L l_rec flds) -> do + -- discardHasInnerDocs is ok because we don't need this info for GADTs. + flds' <- traverse (discardHasInnerDocs . addHaddockConDeclField) flds + pure $ RecCon (L l_rec flds') + InfixCon _ _ -> panic "ConDeclGADT InfixCon" + con_res_ty' <- addHaddock con_res_ty + pure $ L l_con_decl $ + ConDeclGADT { con_g_ext, con_names, con_forall, con_qvars, con_mb_cxt, + con_doc = con_doc', + con_args = con_args', + con_res_ty = con_res_ty' } + ConDeclH98 { con_ext, con_name, con_forall, con_ex_tvs, con_mb_cxt, con_args } -> + addConTrailingDoc (srcSpanEnd l_con_decl) $ + case con_args of + PrefixCon ts -> do + con_doc' <- getConDoc (getLoc con_name) + ts' <- traverse addHaddockConDeclFieldTy ts + pure $ L l_con_decl $ + ConDeclH98 { con_ext, con_name, con_forall, con_ex_tvs, con_mb_cxt, + con_doc = con_doc', + con_args = PrefixCon ts' } + InfixCon t1 t2 -> do + t1' <- addHaddockConDeclFieldTy t1 + con_doc' <- getConDoc (getLoc con_name) + t2' <- addHaddockConDeclFieldTy t2 + pure $ L l_con_decl $ + ConDeclH98 { con_ext, con_name, con_forall, con_ex_tvs, con_mb_cxt, + con_doc = con_doc', + con_args = InfixCon t1' t2' } + RecCon (L l_rec flds) -> do + con_doc' <- getConDoc (getLoc con_name) + flds' <- traverse addHaddockConDeclField flds + pure $ L l_con_decl $ + ConDeclH98 { con_ext, con_name, con_forall, con_ex_tvs, con_mb_cxt, + con_doc = con_doc', + con_args = RecCon (L l_rec flds') } + XConDecl (ConDeclGADTPrefixPs { con_gp_names, con_gp_ty }) -> do + -- discardHasInnerDocs is ok because we don't need this info for GADTs. + con_gp_doc' <- discardHasInnerDocs $ getConDoc (getLoc (head con_gp_names)) + con_gp_ty' <- addHaddock con_gp_ty + pure $ L l_con_decl $ + XConDecl (ConDeclGADTPrefixPs + { con_gp_names, + con_gp_ty = con_gp_ty', + con_gp_doc = con_gp_doc' }) + +-- Keep track of documentation comments on the data constructor or any of its +-- fields. +-- +-- See Note [Trailing comment on constructor declaration] +type ConHdkA = WriterT HasInnerDocs HdkA + +-- Does the data constructor declaration have any inner (non-trailing) +-- documentation comments? +-- +-- Example when HasInnerDocs is True: +-- +-- data X = +-- MkX -- ^ inner comment +-- Field1 -- ^ inner comment +-- Field2 -- ^ inner comment +-- Field3 -- ^ trailing comment +-- +-- Example when HasInnerDocs is False: +-- +-- data Y = MkY Field1 Field2 Field3 -- ^ trailing comment +-- +-- See Note [Trailing comment on constructor declaration] +newtype HasInnerDocs = HasInnerDocs Bool + deriving (Semigroup, Monoid) via Data.Monoid.Any + +-- Run ConHdkA by discarding the HasInnerDocs info when we have no use for it. +-- +-- We only do this when processing data declarations that use GADT syntax, +-- because only the H98 syntax declarations have special treatment for the +-- trailing documentation comment. +-- +-- See Note [Trailing comment on constructor declaration] +discardHasInnerDocs :: ConHdkA a -> HdkA a +discardHasInnerDocs = fmap fst . runWriterT + +-- Get the documentation comment associated with the data constructor in a +-- data/newtype declaration. +getConDoc + :: SrcSpan -- Location of the data constructor + -> ConHdkA (Maybe LHsDocString) +getConDoc l = + WriterT $ extendHdkA l $ liftHdkA $ do + mDoc <- getPrevNextDoc l + return (mDoc, HasInnerDocs (isJust mDoc)) + +-- Add documentation comment to a data constructor field. +-- Used for PrefixCon and InfixCon. +addHaddockConDeclFieldTy + :: HsScaled GhcPs (LHsType GhcPs) + -> ConHdkA (HsScaled GhcPs (LHsType GhcPs)) +addHaddockConDeclFieldTy (HsScaled mult (L l t)) = + WriterT $ extendHdkA l $ liftHdkA $ do + mDoc <- getPrevNextDoc l + return (HsScaled mult (mkLHsDocTy (L l t) mDoc), + HasInnerDocs (isJust mDoc)) + +-- Add documentation comment to a data constructor field. +-- Used for RecCon. +addHaddockConDeclField + :: LConDeclField GhcPs + -> ConHdkA (LConDeclField GhcPs) +addHaddockConDeclField (L l_fld fld) = + WriterT $ extendHdkA l_fld $ liftHdkA $ do + cd_fld_doc <- getPrevNextDoc l_fld + return (L l_fld (fld { cd_fld_doc }), + HasInnerDocs (isJust cd_fld_doc)) + +-- 1. Process a H98-syntax data constructor declaration in a context with no +-- access to the trailing documentation comment (by running the provided +-- ConHdkA computation). +-- +-- 2. Then grab the trailing comment (if it exists) and attach it where +-- appropriate: either to the data constructor itself or to its last field, +-- depending on HasInnerDocs. +-- +-- See Note [Trailing comment on constructor declaration] +addConTrailingDoc + :: SrcLoc -- The end of a data constructor declaration. + -- Any docprev comment past this point is considered trailing. + -> ConHdkA (LConDecl GhcPs) + -> HdkA (LConDecl GhcPs) +addConTrailingDoc l_sep = + hoistHdkA add_trailing_doc . runWriterT + where + add_trailing_doc + :: HdkM (LConDecl GhcPs, HasInnerDocs) + -> HdkM (LConDecl GhcPs) + add_trailing_doc m = do + (L l con_decl, HasInnerDocs has_inner_docs) <- + inLocRange (locRangeTo (getBufPos l_sep)) m + -- inLocRange delimits the context so that the inner computation + -- will not consume the trailing documentation comment. + case con_decl of + ConDeclH98{} -> do + trailingDocs <- + inLocRange (locRangeFrom (getBufPos l_sep)) $ + takeHdkComments mkDocPrev + if null trailingDocs + then return (L l con_decl) + else do + if has_inner_docs then do + let mk_doc_ty :: HsScaled GhcPs (LHsType GhcPs) + -> HdkM (HsScaled GhcPs (LHsType GhcPs)) + mk_doc_ty x@(HsScaled _ (L _ HsDocTy{})) = + -- Happens in the following case: + -- + -- data T = + -- MkT + -- -- | Comment on SomeField + -- SomeField + -- -- ^ Another comment on SomeField? (rejected) + -- + -- See tests/.../haddockExtraDocs.hs + x <$ reportExtraDocs trailingDocs + mk_doc_ty (HsScaled mult (L l' t)) = do + doc <- selectDocString trailingDocs + return $ HsScaled mult (mkLHsDocTy (L l' t) doc) + let mk_doc_fld :: LConDeclField GhcPs + -> HdkM (LConDeclField GhcPs) + mk_doc_fld x@(L _ (ConDeclField { cd_fld_doc = Just _ })) = + -- Happens in the following case: + -- + -- data T = + -- MkT { + -- -- | Comment on SomeField + -- someField :: SomeField + -- } -- ^ Another comment on SomeField? (rejected) + -- + -- See tests/.../haddockExtraDocs.hs + x <$ reportExtraDocs trailingDocs + mk_doc_fld (L l' con_fld) = do + doc <- selectDocString trailingDocs + return $ L l' (con_fld { cd_fld_doc = doc }) + con_args' <- case con_args con_decl of + x@(PrefixCon []) -> x <$ reportExtraDocs trailingDocs + x@(RecCon (L _ [])) -> x <$ reportExtraDocs trailingDocs + PrefixCon ts -> PrefixCon <$> mapLastM mk_doc_ty ts + InfixCon t1 t2 -> InfixCon t1 <$> mk_doc_ty t2 + RecCon (L l_rec flds) -> do + flds' <- mapLastM mk_doc_fld flds + return (RecCon (L l_rec flds')) + return $ L l (con_decl{ con_args = con_args' }) + else do + con_doc' <- selectDocString (con_doc con_decl `mcons` trailingDocs) + return $ L l (con_decl{ con_doc = con_doc' }) + _ -> panic "addConTrailingDoc: non-H98 ConDecl" + +{- Note [Trailing comment on constructor declaration] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The trailing comment after a constructor declaration is associated with the +constructor itself when there are no other comments inside the declaration: + + data T = MkT A B -- ^ Comment on MkT + data T = MkT { x :: A } -- ^ Comment on MkT + +When there are other comments, the trailing comment applies to the last field: + + data T = MkT -- ^ Comment on MkT + A -- ^ Comment on A + B -- ^ Comment on B + + data T = + MkT { a :: A -- ^ Comment on a + , b :: B -- ^ Comment on b + , c :: C } -- ^ Comment on c + +This makes the trailing comment context-sensitive. Example: + data T = + -- | comment 1 + MkT Int Bool -- ^ comment 2 + + Here, "comment 2" applies to the Bool field. + But if we removed "comment 1", then "comment 2" would be apply to the data + constructor rather than its field. + +All of this applies to H98-style data declarations only. +GADTSyntax data constructors don't have any special treatment for the trailing comment. + +We implement this in two steps: + + 1. Process the data constructor declaration in a delimited context where the + trailing documentation comment is not visible. Delimiting the context is done + in addConTrailingDoc. + + When processing the declaration, track whether the constructor or any of + its fields have a documentation comment associated with them. + This is done using WriterT HasInnerDocs, see ConHdkA. + + 2. Depending on whether HasInnerDocs is True or False, attach the + trailing documentation comment to the data constructor itself + or to its last field. +-} + +instance HasHaddock a => HasHaddock (HsScaled GhcPs a) where + addHaddock (HsScaled mult a) = HsScaled mult <$> addHaddock a + +instance HasHaddock (LHsSigWcType GhcPs) where + addHaddock (HsWC _ t) = HsWC noExtField <$> addHaddock t + +instance HasHaddock (LHsSigType GhcPs) where + addHaddock (HsIB _ t) = HsIB noExtField <$> addHaddock t + +-- Process a type, adding documentation comments to function arguments +-- and the result. Many formatting styles are supported. +-- +-- my_function :: +-- forall a. +-- Eq a => +-- Maybe a -> -- ^ Comment on Maybe a (function argument) +-- Bool -> -- ^ Comment on Bool (function argument) +-- String -- ^ Comment on String (the result) +-- +-- my_function +-- :: forall a. Eq a +-- => Maybe a -- ^ Comment on Maybe a (function argument) +-- -> Bool -- ^ Comment on Bool (function argument) +-- -> String -- ^ Comment on String (the result) +-- +-- my_function :: +-- forall a. Eq a => +-- -- | Comment on Maybe a (function argument) +-- Maybe a -> +-- -- | Comment on Bool (function argument) +-- Bool -> +-- -- | Comment on String (the result) +-- String +-- +-- This is achieved by simply ignoring (not registering the location of) the +-- function arrow (->). +instance HasHaddock (LHsType GhcPs) where + addHaddock (L l t) = + extendHdkA l $ + case t of + + -- forall a b c. t + HsForAllTy _ tele body -> do + registerLocHdkA (getForAllTeleLoc tele) + body' <- addHaddock body + pure $ L l (HsForAllTy noExtField tele body') + + -- (Eq a, Num a) => t + HsQualTy _ lhs rhs -> do + registerHdkA lhs + rhs' <- addHaddock rhs + pure $ L l (HsQualTy noExtField lhs rhs') + + -- arg -> res + HsFunTy _ mult lhs rhs -> do + lhs' <- addHaddock lhs + rhs' <- addHaddock rhs + pure $ L l (HsFunTy noExtField mult lhs' rhs') + + -- other types + _ -> liftHdkA $ do + mDoc <- getPrevNextDoc l + return (mkLHsDocTy (L l t) mDoc) + +{- ********************************************************************* +* * +* HdkA: a layer over HdkM that propagates location information * +* * +********************************************************************* -} + +-- See Note [Adding Haddock comments to the syntax tree]. +-- +-- 'HdkA' provides a way to propagate location information from surrounding +-- computations: +-- +-- left_neighbour <*> HdkA inner_span inner_m <*> right_neighbour +-- +-- Here, the following holds: +-- +-- * the 'left_neighbour' will only see Haddock comments until 'bufSpanStart' of 'inner_span' +-- * the 'right_neighbour' will only see Haddock comments after 'bufSpanEnd' of 'inner_span' +-- * the 'inner_m' will only see Haddock comments between its 'left_neighbour' and its 'right_neighbour' +-- +-- In other words, every computation: +-- +-- * delimits the surrounding computations +-- * is delimited by the surrounding computations +-- +-- Therefore, a 'HdkA' computation must be always considered in the context in +-- which it is used. +data HdkA a = + HdkA + !(Maybe BufSpan) -- Just b <=> BufSpan occupied by the processed AST element. + -- The surrounding computations will not look inside. + -- + -- Nothing <=> No BufSpan (e.g. when the HdkA is constructed by 'pure' or 'liftHdkA'). + -- The surrounding computations are not delimited. + + !(HdkM a) -- The stateful computation that looks up Haddock comments and + -- adds them to the resulting AST node. + + deriving (Functor) + +instance Applicative HdkA where + HdkA l1 m1 <*> HdkA l2 m2 = + HdkA + (l1 <> l2) -- The combined BufSpan that covers both subcomputations. + -- + -- The Semigroup instance for Maybe quite conveniently does the right thing: + -- Nothing <> b = b + -- a <> Nothing = a + -- Just a <> Just b = Just (a <> b) + + (delim1 m1 <*> delim2 m2) -- Stateful computations are run in left-to-right order, + -- without any smart reordering strategy. So users of this + -- operation must take care to traverse the AST + -- in concrete syntax order. + -- See Note [Smart reordering in HdkA (or lack of thereof)] + -- + -- Each computation is delimited ("sandboxed") + -- in a way that it doesn't see any Haddock + -- comments past the neighbouring AST node. + -- These delim1/delim2 are key to how HdkA operates. + where + -- Delimit the LHS by the location information from the RHS + delim1 = inLocRange (locRangeTo (fmap @Maybe bufSpanStart l2)) + -- Delimit the RHS by the location information from the LHS + delim2 = inLocRange (locRangeFrom (fmap @Maybe bufSpanEnd l1)) + + pure a = + -- Return a value without performing any stateful computation, and without + -- any delimiting effect on the surrounding computations. + liftHdkA (pure a) + +{- Note [Smart reordering in HdkA (or lack of thereof)] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +When traversing the AST, the user must take care to traverse it in concrete +syntax order. + +For example, when processing HsFunTy, it's important to get it right and write +it like so: + + HsFunTy _ mult lhs rhs -> do + lhs' <- addHaddock lhs + rhs' <- addHaddock rhs + pure $ L l (HsFunTy noExtField mult lhs' rhs') + +Rather than like so: + + HsFunTy _ mult lhs rhs -> do + rhs' <- addHaddock rhs -- bad! wrong order + lhs' <- addHaddock lhs -- bad! wrong order + pure $ L l (HsFunTy noExtField mult lhs' rhs') + +This is somewhat bug-prone, so we could try to fix this with some Applicative +magic. When we define (<*>) for HdkA, why not reorder the computations as +necessary? In pseudo-code: + + a1 <*> a2 | a1 `before` a2 = ... normal processing ... + | otherwise = a1 <**> a2 + +While this trick could work for any two *adjacent* AST elements out of order +(as in HsFunTy example above), it would fail in more elaborate scenarios (e.g. +processing a list of declarations out of order). + +If it's not obvious why this trick doesn't work, ponder this: it's a bit like trying to get +a sorted list by defining a 'smart' concatenation operator in the following manner: + + a ?++ b | a <= b = a ++ b + | otherwise = b ++ a + +At first glance it seems to work: + + ghci> [1] ?++ [2] ?++ [3] + [1,2,3] + + ghci> [2] ?++ [1] ?++ [3] + [1,2,3] -- wow, sorted! + +But it actually doesn't: + + ghci> [3] ?++ [1] ?++ [2] + [1,3,2] -- not sorted... +-} + +-- Run a HdkA computation in an unrestricted LocRange. This is only used at the +-- top level to run the final computation for the entire module. +runHdkA :: HdkA a -> HdkSt -> (a, HdkSt) +runHdkA (HdkA _ m) = unHdkM m mempty + +-- Let the neighbours know about an item at this location. +-- +-- Consider this example: +-- +-- class -- | peculiarly placed comment +-- MyClass a where +-- my_method :: a -> a +-- +-- How do we know to reject the "peculiarly placed comment" instead of +-- associating it with my_method? Its indentation level matches. +-- +-- But clearly, there's "MyClass a where" separating the comment and my_method. +-- To take it into account, we must register its location using registerLocHdkA +-- or registerHdkA. +-- +-- See Note [Register keyword location]. +-- See Note [Adding Haddock comments to the syntax tree]. +registerLocHdkA :: SrcSpan -> HdkA () +registerLocHdkA l = HdkA (getBufSpan l) (pure ()) + +-- Let the neighbours know about an item at this location. +-- A small wrapper over registerLocHdkA. +-- +-- See Note [Adding Haddock comments to the syntax tree]. +registerHdkA :: Located a -> HdkA () +registerHdkA a = registerLocHdkA (getLoc a) + +-- Modify the action of a HdkA computation. +hoistHdkA :: (HdkM a -> HdkM b) -> HdkA a -> HdkA b +hoistHdkA f (HdkA l m) = HdkA l (f m) + +-- Lift a HdkM computation to HdkA. +liftHdkA :: HdkM a -> HdkA a +liftHdkA = HdkA mempty + +-- Extend the declared location span of a 'HdkA' computation: +-- +-- left_neighbour <*> extendHdkA l x <*> right_neighbour +-- +-- The declared location of 'x' now includes 'l', so that the surrounding +-- computations 'left_neighbour' and 'right_neighbour' will not look for +-- Haddock comments inside the 'l' location span. +extendHdkA :: SrcSpan -> HdkA a -> HdkA a +extendHdkA l' (HdkA l m) = HdkA (getBufSpan l' <> l) m + + +{- ********************************************************************* +* * +* HdkM: a stateful computation to associate * +* accumulated documentation comments with AST nodes * +* * +********************************************************************* -} + +-- The state of 'HdkM' contains a list of pending Haddock comments. We go +-- over the AST, looking up these comments using 'takeHdkComments' and removing +-- them from the state. The remaining, un-removed ones are ignored with a +-- warning (-Winvalid-haddock). Also, using a state means we never use the same +-- Haddock twice. +-- +-- See Note [Adding Haddock comments to the syntax tree]. +newtype HdkM a = HdkM (ReaderT LocRange (State HdkSt) a) + deriving (Functor, Applicative, Monad) + +-- | The state of HdkM. +data HdkSt = + HdkSt + { hdk_st_pending :: [PsLocated HdkComment] + -- a list of pending (unassociated with an AST node) + -- Haddock comments, sorted by location: in ascending order of the starting 'BufPos' + , hdk_st_warnings :: [HdkWarn] + -- accumulated warnings (order doesn't matter) + } + +-- | Warnings accumulated in HdkM. +data HdkWarn + = HdkWarnInvalidComment (PsLocated HdkComment) + | HdkWarnExtraComment LHsDocString + +-- 'HdkM' without newtype wrapping/unwrapping. +type InlineHdkM a = LocRange -> HdkSt -> (a, HdkSt) + +mkHdkM :: InlineHdkM a -> HdkM a +unHdkM :: HdkM a -> InlineHdkM a +mkHdkM = coerce +unHdkM = coerce + +-- Restrict the range in which a HdkM computation will look up comments: +-- +-- inLocRange r1 $ +-- inLocRange r2 $ +-- takeHdkComments ... -- Only takes comments in the (r1 <> r2) location range. +-- +-- Note that it does not blindly override the range but tightens it using (<>). +-- At many use sites, you will see something along the lines of: +-- +-- inLocRange (locRangeTo end_pos) $ ... +-- +-- And 'locRangeTo' defines a location range from the start of the file to +-- 'end_pos'. This does not mean that we now search for every comment from the +-- start of the file, as this restriction will be combined with other +-- restrictions. Somewhere up the callstack we might have: +-- +-- inLocRange (locRangeFrom start_pos) $ ... +-- +-- The net result is that the location range is delimited by 'start_pos' on +-- one side and by 'end_pos' on the other side. +-- +-- In 'HdkA', every (<*>) may restrict the location range of its +-- subcomputations. +inLocRange :: LocRange -> HdkM a -> HdkM a +inLocRange r (HdkM m) = HdkM (local (mappend r) m) + +-- Take the Haddock comments that satisfy the matching function, +-- leaving the rest pending. +takeHdkComments :: forall a. (PsLocated HdkComment -> Maybe a) -> HdkM [a] +takeHdkComments f = + mkHdkM $ + \(LocRange hdk_from hdk_to hdk_col) -> + \hdk_st -> + let + comments = hdk_st_pending hdk_st + (comments_before_range, comments') = break (is_after hdk_from) comments + (comments_in_range, comments_after_range) = span (is_before hdk_to <&&> is_indented hdk_col) comments' + (items, other_comments) = foldr add_comment ([], []) comments_in_range + remaining_comments = comments_before_range ++ other_comments ++ comments_after_range + hdk_st' = hdk_st{ hdk_st_pending = remaining_comments } + in + (items, hdk_st') + where + is_after StartOfFile _ = True + is_after (StartLoc l) (L l_comment _) = bufSpanStart (psBufSpan l_comment) >= l + is_before EndOfFile _ = True + is_before (EndLoc l) (L l_comment _) = bufSpanStart (psBufSpan l_comment) <= l + is_indented (ColumnFrom n) (L l_comment _) = srcSpanStartCol (psRealSpan l_comment) >= n + + add_comment + :: PsLocated HdkComment + -> ([a], [PsLocated HdkComment]) + -> ([a], [PsLocated HdkComment]) + add_comment hdk_comment (items, other_hdk_comments) = + case f hdk_comment of + Just item -> (item : items, other_hdk_comments) + Nothing -> (items, hdk_comment : other_hdk_comments) + +-- Get the docnext or docprev comment for an AST node at the given source span. +getPrevNextDoc :: SrcSpan -> HdkM (Maybe LHsDocString) +getPrevNextDoc l = do + let (l_start, l_end) = (srcSpanStart l, srcSpanEnd l) + before_t = locRangeTo (getBufPos l_start) + after_t = locRangeFrom (getBufPos l_end) + nextDocs <- inLocRange before_t $ takeHdkComments mkDocNext + prevDocs <- inLocRange after_t $ takeHdkComments mkDocPrev + selectDocString (nextDocs ++ prevDocs) + +appendHdkWarning :: HdkWarn -> HdkM () +appendHdkWarning e = HdkM (ReaderT (\_ -> modify append_warn)) + where + append_warn hdk_st = hdk_st { hdk_st_warnings = e : hdk_st_warnings hdk_st } + +selectDocString :: [LHsDocString] -> HdkM (Maybe LHsDocString) +selectDocString = select . filterOut (isEmptyDocString . unLoc) + where + select [] = return Nothing + select [doc] = return (Just doc) + select (doc : extra_docs) = do + reportExtraDocs extra_docs + return (Just doc) + +reportExtraDocs :: [LHsDocString] -> HdkM () +reportExtraDocs = + traverse_ (\extra_doc -> appendHdkWarning (HdkWarnExtraComment extra_doc)) + +{- ********************************************************************* +* * +* Matching functions for extracting documentation comments * +* * +********************************************************************* -} + +mkDocHsDecl :: LayoutInfo -> PsLocated HdkComment -> Maybe (LHsDecl GhcPs) +mkDocHsDecl layout_info a = mapLoc (DocD noExtField) <$> mkDocDecl layout_info a + +mkDocDecl :: LayoutInfo -> PsLocated HdkComment -> Maybe LDocDecl +mkDocDecl layout_info (L l_comment hdk_comment) + | indent_mismatch = Nothing + | otherwise = + Just $ L (mkSrcSpanPs l_comment) $ + case hdk_comment of + HdkCommentNext doc -> DocCommentNext doc + HdkCommentPrev doc -> DocCommentPrev doc + HdkCommentNamed s doc -> DocCommentNamed s doc + HdkCommentSection n doc -> DocGroup n doc + where + -- 'indent_mismatch' checks if the documentation comment has the exact + -- indentation level expected by the parent node. + -- + -- For example, when extracting documentation comments between class + -- method declarations, there are three cases to consider: + -- + -- 1. Indent matches (indent_mismatch=False): + -- class C a where + -- f :: a -> a + -- -- ^ doc on f + -- + -- 2. Indented too much (indent_mismatch=True): + -- class C a where + -- f :: a -> a + -- -- ^ indent mismatch + -- + -- 3. Indented too little (indent_mismatch=True): + -- class C a where + -- f :: a -> a + -- -- ^ indent mismatch + indent_mismatch = case layout_info of + NoLayoutInfo -> False + ExplicitBraces -> False + VirtualBraces n -> n /= srcSpanStartCol (psRealSpan l_comment) + +mkDocIE :: PsLocated HdkComment -> Maybe (LIE GhcPs) +mkDocIE (L l_comment hdk_comment) = + case hdk_comment of + HdkCommentSection n doc -> Just $ L l (IEGroup noExtField n doc) + HdkCommentNamed s _doc -> Just $ L l (IEDocNamed noExtField s) + HdkCommentNext doc -> Just $ L l (IEDoc noExtField doc) + _ -> Nothing + where l = mkSrcSpanPs l_comment + +mkDocNext :: PsLocated HdkComment -> Maybe LHsDocString +mkDocNext (L l (HdkCommentNext doc)) = Just $ L (mkSrcSpanPs l) doc +mkDocNext _ = Nothing + +mkDocPrev :: PsLocated HdkComment -> Maybe LHsDocString +mkDocPrev (L l (HdkCommentPrev doc)) = Just $ L (mkSrcSpanPs l) doc +mkDocPrev _ = Nothing + + +{- ********************************************************************* +* * +* LocRange: a location range * +* * +********************************************************************* -} + +-- A location range for extracting documentation comments. +data LocRange = + LocRange + { loc_range_from :: !LowerLocBound, + loc_range_to :: !UpperLocBound, + loc_range_col :: !ColumnBound } + +instance Semigroup LocRange where + LocRange from1 to1 col1 <> LocRange from2 to2 col2 = + LocRange (from1 <> from2) (to1 <> to2) (col1 <> col2) + +instance Monoid LocRange where + mempty = LocRange mempty mempty mempty + +-- The location range from the specified position to the end of the file. +locRangeFrom :: Maybe BufPos -> LocRange +locRangeFrom (Just l) = mempty { loc_range_from = StartLoc l } +locRangeFrom Nothing = mempty + +-- The location range from the start of the file to the specified position. +locRangeTo :: Maybe BufPos -> LocRange +locRangeTo (Just l) = mempty { loc_range_to = EndLoc l } +locRangeTo Nothing = mempty + +-- Represents a predicate on BufPos: +-- +-- LowerLocBound | BufPos -> Bool +-- --------------+----------------- +-- StartOfFile | const True +-- StartLoc p | (>= p) +-- +-- The semigroup instance corresponds to (&&). +-- +-- We don't use the BufPos -> Bool representation +-- as it would lead to redundant checks. +-- +-- That is, instead of +-- +-- (pos >= 20) && (pos >= 30) && (pos >= 40) +-- +-- We'd rather only do the (>=40) check. So we reify the predicate to make +-- sure we only check for the most restrictive bound. +data LowerLocBound = StartOfFile | StartLoc !BufPos + +instance Semigroup LowerLocBound where + StartOfFile <> l = l + l <> StartOfFile = l + StartLoc l1 <> StartLoc l2 = StartLoc (max l1 l2) + +instance Monoid LowerLocBound where + mempty = StartOfFile + +-- Represents a predicate on BufPos: +-- +-- UpperLocBound | BufPos -> Bool +-- --------------+----------------- +-- EndOfFile | const True +-- EndLoc p | (<= p) +-- +-- The semigroup instance corresponds to (&&). +-- +-- We don't use the BufPos -> Bool representation +-- as it would lead to redundant checks. +-- +-- That is, instead of +-- +-- (pos <= 40) && (pos <= 30) && (pos <= 20) +-- +-- We'd rather only do the (<=20) check. So we reify the predicate to make +-- sure we only check for the most restrictive bound. +data UpperLocBound = EndOfFile | EndLoc !BufPos + +instance Semigroup UpperLocBound where + EndOfFile <> l = l + l <> EndOfFile = l + EndLoc l1 <> EndLoc l2 = EndLoc (min l1 l2) + +instance Monoid UpperLocBound where + mempty = EndOfFile + +-- | Represents a predicate on the column number. +-- +-- ColumnBound | Int -> Bool +-- --------------+----------------- +-- ColumnFrom n | (>=n) +-- +-- The semigroup instance corresponds to (&&). +-- +newtype ColumnBound = ColumnFrom Int -- n >= GHC.Types.SrcLoc.leftmostColumn + +instance Semigroup ColumnBound where + ColumnFrom n <> ColumnFrom m = ColumnFrom (max n m) + +instance Monoid ColumnBound where + mempty = ColumnFrom leftmostColumn + + +{- ********************************************************************* +* * +* AST manipulation utilities * +* * +********************************************************************* -} + +mkLHsDocTy :: LHsType GhcPs -> Maybe LHsDocString -> LHsType GhcPs +mkLHsDocTy t Nothing = t +mkLHsDocTy t (Just doc) = L (getLoc t) (HsDocTy noExtField t doc) + +getForAllTeleLoc :: HsForAllTelescope GhcPs -> SrcSpan +getForAllTeleLoc tele = + foldr combineSrcSpans noSrcSpan $ + case tele of + HsForAllVis{ hsf_vis_bndrs } -> map getLoc hsf_vis_bndrs + HsForAllInvis { hsf_invis_bndrs } -> map getLoc hsf_invis_bndrs + +-- | The inverse of 'partitionBindsAndSigs' that merges partitioned items back +-- into a flat list. Elements are put back into the order in which they +-- appeared in the original program before partitioning, using BufPos to order +-- them. +-- +-- Precondition (unchecked): the input lists are already sorted. +flattenBindsAndSigs + :: (LHsBinds GhcPs, [LSig GhcPs], [LFamilyDecl GhcPs], + [LTyFamInstDecl GhcPs], [LDataFamInstDecl GhcPs], [LDocDecl]) + -> [LHsDecl GhcPs] +flattenBindsAndSigs (all_bs, all_ss, all_ts, all_tfis, all_dfis, all_docs) = + -- 'cmpBufSpan' is safe here with the following assumptions: + -- + -- * 'LHsDecl' produced by 'decl_cls' in Parser.y always have a 'BufSpan' + -- * 'partitionBindsAndSigs' does not discard this 'BufSpan' + mergeListsBy cmpBufSpan [ + mapLL (\b -> ValD noExtField b) (bagToList all_bs), + mapLL (\s -> SigD noExtField s) all_ss, + mapLL (\t -> TyClD noExtField (FamDecl noExtField t)) all_ts, + mapLL (\tfi -> InstD noExtField (TyFamInstD noExtField tfi)) all_tfis, + mapLL (\dfi -> InstD noExtField (DataFamInstD noExtField dfi)) all_dfis, + mapLL (\d -> DocD noExtField d) all_docs + ] + +{- ********************************************************************* +* * +* General purpose utilities * +* * +********************************************************************* -} + +-- Cons an element to a list, if exists. +mcons :: Maybe a -> [a] -> [a] +mcons = maybe id (:) + +-- Map a function over a list of located items. +mapLL :: (a -> b) -> [Located a] -> [Located b] +mapLL f = map (mapLoc f) + +{- Note [Old solution: Haddock in the grammar] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In the past, Haddock comments were incorporated into the grammar (Parser.y). +This led to excessive complexity and duplication. + +For example, here's the grammar production for types without documentation: + + type : btype + | btype '->' ctype + +To support Haddock, we had to also maintain an additional grammar production +for types with documentation on function arguments and function result: + + typedoc : btype + | btype docprev + | docnext btype + | btype '->' ctypedoc + | btype docprev '->' ctypedoc + | docnext btype '->' ctypedoc + +Sometimes handling documentation comments during parsing led to bugs (#17561), +and sometimes it simply made it hard to modify and extend the grammar. + +Another issue was that sometimes Haddock would fail to parse code +that GHC could parse succesfully: --- ----------------------------------------------------------------------------- --- Adding documentation to record fields (used in parsing). + class BadIndent where + f :: a -> Int + -- ^ comment + g :: a -> Int -addFieldDoc :: LConDeclField GhcPs -> Maybe LHsDocString -> LConDeclField GhcPs -addFieldDoc (L l fld) doc - = L l (fld { cd_fld_doc = cd_fld_doc fld `mplus` doc }) +This declaration was accepted by ghc but rejected by ghc -haddock. +-} -addFieldDocs :: [LConDeclField GhcPs] -> Maybe LHsDocString -> [LConDeclField GhcPs] -addFieldDocs [] _ = [] -addFieldDocs (x:xs) doc = addFieldDoc x doc : xs +{- Note [Register keyword location] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +At the moment, 'addHaddock' erroneously associates some comments with +constructs that are separated by a keyword. For example: + data Foo -- | Comment for MkFoo + where MkFoo :: Foo -addConDoc :: LConDecl GhcPs -> Maybe LHsDocString -> LConDecl GhcPs -addConDoc decl Nothing = decl -addConDoc (L p c) doc = L p $ case c of - ConDeclH98 { con_doc = old_doc } -> c { con_doc = old_doc `mplus` doc } - ConDeclGADT { con_doc = old_doc } -> c { con_doc = old_doc `mplus` doc } - XConDecl x@(ConDeclGADTPrefixPs { con_gp_doc = old_doc }) -> - XConDecl (x { con_gp_doc = old_doc `mplus` doc }) +The issue stems from the lack of location information for keywords. We could +utilize API Annotations for this purpose, but not without modification. For +example, API Annotations operate on RealSrcSpan, whereas we need BufSpan. -addConDocs :: [LConDecl GhcPs] -> Maybe LHsDocString -> [LConDecl GhcPs] -addConDocs [] _ = [] -addConDocs [x] doc = [addConDoc x doc] -addConDocs (x:xs) doc = x : addConDocs xs doc +Also, there's work towards making API Annotations available in-tree (not in +a separate Map), see #17638. This change should make the fix very easy (it +is not as easy with the current design). -addConDocFirst :: [LConDecl GhcPs] -> Maybe LHsDocString -> [LConDecl GhcPs] -addConDocFirst [] _ = [] -addConDocFirst (x:xs) doc = addConDoc x doc : xs +See also testsuite/tests/haddock/should_compile_flag_haddock/T17544_kw.hs +-} |