summaryrefslogtreecommitdiff
path: root/compiler/GHC/Parser
diff options
context:
space:
mode:
authorVladislav Zavialov <vlad.z.4096@gmail.com>2019-12-05 03:06:40 +0300
committerBen Gamari <ben@smart-cactus.org>2020-07-21 14:50:01 -0400
commit19e80b9af252eee760dc047765a9930ef00067ec (patch)
treecb45fce4b1e74e1a82c5bd926fda0e92de1964c1 /compiler/GHC/Parser
parent58235d46bd4e9fbf69bd82969b29cd9c6ab051e1 (diff)
downloadhaskell-19e80b9af252eee760dc047765a9930ef00067ec.tar.gz
Accumulate Haddock comments in P (#17544, #17561, #8944)
Haddock comments are, first and foremost, comments. It's very annoying to incorporate them into the grammar. We can take advantage of an important property: adding a Haddock comment does not change the parse tree in any way other than wrapping some nodes in HsDocTy and the like (and if it does, that's a bug). This patch implements the following: * Accumulate Haddock comments with their locations in the P monad. This is handled in the lexer. * After parsing, do a pass over the AST to associate Haddock comments with AST nodes using location info. * Report the leftover comments to the user as a warning (-Winvalid-haddock).
Diffstat (limited to 'compiler/GHC/Parser')
-rw-r--r--compiler/GHC/Parser/Lexer.x106
-rw-r--r--compiler/GHC/Parser/PostProcess.hs214
-rw-r--r--compiler/GHC/Parser/PostProcess/Haddock.hs1565
3 files changed, 1676 insertions, 209 deletions
diff --git a/compiler/GHC/Parser/Lexer.x b/compiler/GHC/Parser/Lexer.x
index ef9f1803bf..7265e1dffb 100644
--- a/compiler/GHC/Parser/Lexer.x
+++ b/compiler/GHC/Parser/Lexer.x
@@ -66,7 +66,8 @@ module GHC.Parser.Lexer (
lexTokenStream,
AddAnn(..),mkParensApiAnn,
addAnnsAt,
- commentToAnnotation
+ commentToAnnotation,
+ HdkComment(..),
) where
import GHC.Prelude
@@ -97,6 +98,8 @@ import GHC.Utils.Outputable
import GHC.Data.StringBuffer
import GHC.Data.FastString
import GHC.Types.Unique.FM
+import GHC.Data.Maybe
+import GHC.Data.OrdList
import GHC.Utils.Misc ( readRational, readHexRational )
-- compiler/main
@@ -109,6 +112,7 @@ import GHC.Unit
import GHC.Types.Basic ( InlineSpec(..), RuleMatchInfo(..),
IntegralLit(..), FractionalLit(..),
SourceText(..) )
+import GHC.Hs.Doc
-- compiler/parser
import GHC.Parser.CharClass
@@ -363,10 +367,8 @@ $tab { warnTab }
-- Haddock comments
-<0,option_prags> {
- "-- " $docsym / { ifExtension HaddockBit } { multiline_doc_comment }
- "{-" \ ? $docsym / { ifExtension HaddockBit } { nested_doc_comment }
-}
+"-- " $docsym / { ifExtension HaddockBit } { multiline_doc_comment }
+"{-" \ ? $docsym / { ifExtension HaddockBit } { nested_doc_comment }
-- "special" symbols
@@ -1271,11 +1273,8 @@ nested_comment cont span buf len = do
go (reverse $ lexemeToString buf len) (1::Int) input
where
go commentAcc 0 input = do
- setInput input
- b <- getBit RawTokenStreamBit
- if b
- then docCommentEnd input commentAcc ITblockComment buf span
- else cont
+ let finalizeComment str = (Nothing, ITblockComment str)
+ commentEnd cont input commentAcc finalizeComment buf span
go commentAcc n input = case alexGetChar' input of
Nothing -> errBrace input (psRealSpan span)
Just ('-',input) -> case alexGetChar' input of
@@ -1365,24 +1364,37 @@ return control to parseNestedPragma by returning the ITcomment_line_prag token.
See #314 for more background on the bug this fixes.
-}
-withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (PsLocated Token))
+withLexedDocType :: (AlexInput -> (String -> (HdkComment, Token)) -> Bool -> P (PsLocated Token))
-> P (PsLocated Token)
withLexedDocType lexDocComment = do
input@(AI _ buf) <- getInput
case prevChar buf ' ' of
-- The `Bool` argument to lexDocComment signals whether or not the next
-- line of input might also belong to this doc comment.
- '|' -> lexDocComment input ITdocCommentNext True
- '^' -> lexDocComment input ITdocCommentPrev True
- '$' -> lexDocComment input ITdocCommentNamed True
+ '|' -> lexDocComment input mkHdkCommentNext True
+ '^' -> lexDocComment input mkHdkCommentPrev True
+ '$' -> lexDocComment input mkHdkCommentNamed True
'*' -> lexDocSection 1 input
_ -> panic "withLexedDocType: Bad doc type"
where
lexDocSection n input = case alexGetChar' input of
Just ('*', input) -> lexDocSection (n+1) input
- Just (_, _) -> lexDocComment input (ITdocSection n) False
+ Just (_, _) -> lexDocComment input (mkHdkCommentSection n) False
Nothing -> do setInput input; lexToken -- eof reached, lex it normally
+mkHdkCommentNext, mkHdkCommentPrev :: String -> (HdkComment, Token)
+mkHdkCommentNext str = (HdkCommentNext (mkHsDocString str), ITdocCommentNext str)
+mkHdkCommentPrev str = (HdkCommentPrev (mkHsDocString str), ITdocCommentPrev str)
+
+mkHdkCommentNamed :: String -> (HdkComment, Token)
+mkHdkCommentNamed str =
+ let (name, rest) = break isSpace str
+ in (HdkCommentNamed name (mkHsDocString rest), ITdocCommentNamed str)
+
+mkHdkCommentSection :: Int -> String -> (HdkComment, Token)
+mkHdkCommentSection n str =
+ (HdkCommentSection n (mkHsDocString str), ITdocSection n str)
+
-- RULES pragmas turn on the forall and '.' keywords, and we turn them
-- off again at the end of the pragma.
rulePrag :: Action
@@ -1425,17 +1437,34 @@ endPrag span _buf _len = do
-- it writes the wrong token length to the parser state. This function is
-- called afterwards, so it can just update the state.
-docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer ->
- PsSpan -> P (PsLocated Token)
-docCommentEnd input commentAcc docType buf span = do
+commentEnd :: P (PsLocated Token)
+ -> AlexInput
+ -> String
+ -> (String -> (Maybe HdkComment, Token))
+ -> StringBuffer
+ -> PsSpan
+ -> P (PsLocated Token)
+commentEnd cont input commentAcc finalizeComment buf span = do
setInput input
let (AI loc nextBuf) = input
comment = reverse commentAcc
span' = mkPsSpan (psSpanStart span) loc
last_len = byteDiff buf nextBuf
-
span `seq` setLastToken span' last_len
- return (L span' (docType comment))
+ let (m_hdk_comment, hdk_token) = finalizeComment comment
+ whenIsJust m_hdk_comment $ \hdk_comment ->
+ P $ \s -> POk (s {hdk_comments = hdk_comments s `snocOL` L span' hdk_comment}) ()
+ b <- getBit RawTokenStreamBit
+ if b then return (L span' hdk_token)
+ else cont
+
+docCommentEnd :: AlexInput -> String -> (String -> (HdkComment, Token)) -> StringBuffer ->
+ PsSpan -> P (PsLocated Token)
+docCommentEnd input commentAcc docType buf span = do
+ let finalizeComment str =
+ let (hdk_comment, token) = docType str
+ in (Just hdk_comment, token)
+ commentEnd lexToken input commentAcc finalizeComment buf span
errBrace :: AlexInput -> RealSrcSpan -> P a
errBrace (AI end _) span = failLocMsgP (realSrcSpanStart span) (psRealLoc end) "unterminated `{-'"
@@ -2170,6 +2199,15 @@ data ParserFlags = ParserFlags {
, pExtsBitmap :: !ExtsBitmap -- ^ bitmap of permitted extensions
}
+-- | Haddock comment as produced by the lexer. These are accumulated in
+-- 'PState' and then processed in "GHC.Parser.PostProcess.Haddock".
+data HdkComment
+ = HdkCommentNext HsDocString
+ | HdkCommentPrev HsDocString
+ | HdkCommentNamed String HsDocString
+ | HdkCommentSection Int HsDocString
+ deriving Show
+
data PState = PState {
buffer :: StringBuffer,
options :: ParserFlags,
@@ -2211,7 +2249,13 @@ data PState = PState {
annotations :: [(ApiAnnKey,[RealSrcSpan])],
eof_pos :: Maybe RealSrcSpan,
comment_q :: [RealLocated AnnotationComment],
- annotations_comments :: [(RealSrcSpan,[RealLocated AnnotationComment])]
+ annotations_comments :: [(RealSrcSpan,[RealLocated AnnotationComment])],
+
+ -- Haddock comments accumulated in ascending order of their location
+ -- (BufPos). We use OrdList to get O(1) snoc.
+ --
+ -- See Note [Adding Haddock comments to the syntax tree] in GHC.Parser.PostProcess.Haddock
+ hdk_comments :: OrdList (PsLocated HdkComment)
}
-- last_loc and last_len are used when generating error messages,
-- and in pushCurrentContext only. Sigh, if only Happy passed the
@@ -2698,7 +2742,8 @@ mkPStatePure options buf loc =
annotations = [],
eof_pos = Nothing,
comment_q = [],
- annotations_comments = []
+ annotations_comments = [],
+ hdk_comments = nilOL
}
where init_loc = PsLoc loc (BufPos 0)
@@ -2917,10 +2962,6 @@ lexer queueComments cont = do
(L span tok) <- lexTokenFun
--trace ("token: " ++ show tok) $ do
- if (queueComments && isDocComment tok)
- then queueComment (L (psRealSpan span) tok)
- else return ()
-
if (queueComments && isComment tok)
then queueComment (L (psRealSpan span) tok) >> lexer queueComments cont
else cont (L (mkSrcSpanPs span) tok)
@@ -3372,13 +3413,10 @@ commentToAnnotation _ = panic "commentToAnnotation"
isComment :: Token -> Bool
isComment (ITlineComment _) = True
isComment (ITblockComment _) = True
+isComment (ITdocCommentNext _) = True
+isComment (ITdocCommentPrev _) = True
+isComment (ITdocCommentNamed _) = True
+isComment (ITdocSection _ _) = True
+isComment (ITdocOptions _) = True
isComment _ = False
-
-isDocComment :: Token -> Bool
-isDocComment (ITdocCommentNext _) = True
-isDocComment (ITdocCommentPrev _) = True
-isDocComment (ITdocCommentNamed _) = True
-isDocComment (ITdocSection _ _) = True
-isDocComment (ITdocOptions _) = True
-isDocComment _ = False
}
diff --git a/compiler/GHC/Parser/PostProcess.hs b/compiler/GHC/Parser/PostProcess.hs
index 398bd78ddc..3cf5b30b06 100644
--- a/compiler/GHC/Parser/PostProcess.hs
+++ b/compiler/GHC/Parser/PostProcess.hs
@@ -126,7 +126,6 @@ import GHC.Builtin.Names ( allNameStrings )
import GHC.Types.SrcLoc
import GHC.Types.Unique ( hasKey )
import GHC.Data.OrdList ( OrdList, fromOL )
-import GHC.Data.Bag ( emptyBag, consBag )
import GHC.Utils.Outputable as Outputable
import GHC.Data.FastString
import GHC.Data.Maybe
@@ -172,16 +171,18 @@ mkClassDecl :: SrcSpan
-> Located (Maybe (LHsContext GhcPs), LHsType GhcPs)
-> Located (a,[LHsFunDep GhcPs])
-> OrdList (LHsDecl GhcPs)
+ -> LayoutInfo
-> P (LTyClDecl GhcPs)
-mkClassDecl loc (L _ (mcxt, tycl_hdr)) fds where_cls
+mkClassDecl loc (L _ (mcxt, tycl_hdr)) fds where_cls layoutInfo
= do { (binds, sigs, ats, at_defs, _, docs) <- cvBindsAndSigs where_cls
; let cxt = fromMaybe (noLoc []) mcxt
; (cls, tparams, fixity, ann) <- checkTyClHdr True tycl_hdr
; addAnnsAt loc ann -- Add any API Annotations to the top SrcSpan
; (tyvars,annst) <- checkTyVars (text "class") whereDots cls tparams
; addAnnsAt loc annst -- Add any API Annotations to the top SrcSpan
- ; return (L loc (ClassDecl { tcdCExt = noExtField, tcdCtxt = cxt
+ ; return (L loc (ClassDecl { tcdCExt = layoutInfo
+ , tcdCtxt = cxt
, tcdLName = cls, tcdTyVars = tyvars
, tcdFixity = fixity
, tcdFDs = snd (unLoc fds)
@@ -418,14 +419,7 @@ fromSpecTyVarBndr bndr = case bndr of
-- | Groups together bindings for a single function
cvTopDecls :: OrdList (LHsDecl GhcPs) -> [LHsDecl GhcPs]
-cvTopDecls decls = go (fromOL decls)
- where
- go :: [LHsDecl GhcPs] -> [LHsDecl GhcPs]
- go [] = []
- go ((L l (ValD x b)) : ds)
- = L l' (ValD x b') : go ds'
- where (L l' b', ds') = getMonoBind (L l b) ds
- go (d : ds) = d : go ds
+cvTopDecls decls = getMonoBindAll (fromOL decls)
-- Declaration list may only contain value bindings and signatures.
cvBindGroup :: OrdList (LHsDecl GhcPs) -> P (HsValBinds GhcPs)
@@ -441,33 +435,32 @@ cvBindsAndSigs :: OrdList (LHsDecl GhcPs)
-- Input decls contain just value bindings and signatures
-- and in case of class or instance declarations also
-- associated type declarations. They might also contain Haddock comments.
-cvBindsAndSigs fb = go (fromOL fb)
+cvBindsAndSigs fb = do
+ fb' <- drop_bad_decls (fromOL fb)
+ return (partitionBindsAndSigs (getMonoBindAll fb'))
where
- go [] = return (emptyBag, [], [], [], [], [])
- go ((L l (ValD _ b)) : ds)
- = do { (bs, ss, ts, tfis, dfis, docs) <- go ds'
- ; return (b' `consBag` bs, ss, ts, tfis, dfis, docs) }
- where
- (b', ds') = getMonoBind (L l b) ds
- go ((L l decl) : ds)
- = do { (bs, ss, ts, tfis, dfis, docs) <- go ds
- ; case decl of
- SigD _ s
- -> return (bs, L l s : ss, ts, tfis, dfis, docs)
- TyClD _ (FamDecl _ t)
- -> return (bs, ss, L l t : ts, tfis, dfis, docs)
- InstD _ (TyFamInstD { tfid_inst = tfi })
- -> return (bs, ss, ts, L l tfi : tfis, dfis, docs)
- InstD _ (DataFamInstD { dfid_inst = dfi })
- -> return (bs, ss, ts, tfis, L l dfi : dfis, docs)
- DocD _ d
- -> return (bs, ss, ts, tfis, dfis, L l d : docs)
- SpliceD _ d
- -> addFatalError l $
- hang (text "Declaration splices are allowed only" <+>
- text "at the top level:")
- 2 (ppr d)
- _ -> pprPanic "cvBindsAndSigs" (ppr decl) }
+ -- cvBindsAndSigs is called in several places in the parser,
+ -- and its items can be produced by various productions:
+ --
+ -- * decl (when parsing a where clause or a let-expression)
+ -- * decl_inst (when parsing an instance declaration)
+ -- * decl_cls (when parsing a class declaration)
+ --
+ -- partitionBindsAndSigs can handle almost all declaration forms produced
+ -- by the aforementioned productions, except for SpliceD, which we filter
+ -- out here (in drop_bad_decls).
+ --
+ -- We're not concerned with every declaration form possible, such as those
+ -- produced by the topdecl parser production, because cvBindsAndSigs is not
+ -- called on top-level declarations.
+ drop_bad_decls [] = return []
+ drop_bad_decls (L l (SpliceD _ d) : ds) = do
+ addError l $
+ hang (text "Declaration splices are allowed only" <+>
+ text "at the top level:")
+ 2 (ppr d)
+ drop_bad_decls ds
+ drop_bad_decls (d:ds) = (d:) <$> drop_bad_decls ds
-----------------------------------------------------------------------------
-- Group function bindings into equation groups
@@ -512,6 +505,14 @@ getMonoBind (L loc1 (FunBind { fun_id = fun_id1@(L _ f1)
getMonoBind bind binds = (bind, binds)
+-- Group together adjacent FunBinds for every function.
+getMonoBindAll :: [LHsDecl GhcPs] -> [LHsDecl GhcPs]
+getMonoBindAll [] = []
+getMonoBindAll (L l (ValD _ b) : ds) =
+ let (L l' b', ds') = getMonoBind (L l b) ds
+ in L l' (ValD noExtField b') : getMonoBindAll ds'
+getMonoBindAll (d : ds) = d : getMonoBindAll ds
+
has_args :: [LMatch GhcPs (LHsExpr GhcPs)] -> Bool
has_args [] = panic "GHC.Parser.PostProcess.has_args"
has_args (L _ (Match { m_pats = args }) : _) = not (null args)
@@ -1035,21 +1036,7 @@ checkContext (L l orig_t)
else (anns ++ mkParensApiAnn lp1)
-- no need for anns, returning original
- check _anns t = checkNoDocs msg t *> return ([],L l [L l orig_t])
-
- msg = text "data constructor context"
-
--- | Check recursively if there are any 'HsDocTy's in the given type.
--- This only works on a subset of types produced by 'btype_no_ops'
-checkNoDocs :: SDoc -> LHsType GhcPs -> P ()
-checkNoDocs msg ty = go ty
- where
- go (L _ (HsAppKindTy _ ty ki)) = go ty *> go ki
- go (L _ (HsAppTy _ t1 t2)) = go t1 *> go t2
- go (L l (HsDocTy _ t ds)) = addError l $ hsep
- [ text "Unexpected haddock", quotes (ppr ds)
- , text "on", msg, quotes (ppr t) ]
- go _ = pure ()
+ check _anns _t = return ([],L l [L l orig_t])
checkImportDecl :: Maybe (Located Token)
-> Maybe (Located Token)
@@ -1338,7 +1325,6 @@ data TyEl = TyElOpr RdrName | TyElOpd (HsType GhcPs)
| TyElKindApp SrcSpan (LHsType GhcPs)
-- See Note [TyElKindApp SrcSpan interpretation]
| TyElUnpackedness ([AddAnn], SourceText, SrcUnpackedness)
- | TyElDocPrev HsDocString
{- Note [TyElKindApp SrcSpan interpretation]
@@ -1360,7 +1346,6 @@ instance Outputable TyEl where
ppr (TyElOpd ty) = ppr ty
ppr (TyElKindApp _ ki) = text "@" <> ppr ki
ppr (TyElUnpackedness (_, _, unpk)) = ppr unpk
- ppr (TyElDocPrev doc) = ppr doc
-- | Extract a strictness/unpackedness annotation from the front of a reversed
-- 'TyEl' list.
@@ -1447,11 +1432,6 @@ mergeOps all_xs = go (0 :: Int) [] id all_xs
-- See Note [Impossible case in mergeOps clause [unpk]]
panic "mergeOps.UNPACK: impossible position"
- -- clause [doc]:
- -- we do not expect to encounter any docs
- go _ _ _ ((L l (TyElDocPrev _)):_) =
- failOpDocPrev l
-
-- clause [opr]:
-- when we encounter an operator, we must have accumulated
-- something for its rhs, and there must be something left
@@ -1571,13 +1551,6 @@ pLHsTypeArg (L l (TyElOpd a)) = Just (HsValArg (L l a))
pLHsTypeArg (L _ (TyElKindApp l a)) = Just (HsTypeArg l a)
pLHsTypeArg _ = Nothing
-pDocPrev :: [Located TyEl] -> (Maybe LHsDocString, [Located TyEl])
-pDocPrev = go Nothing
- where
- go mTrailingDoc ((L l (TyElDocPrev doc)):xs) =
- go (mTrailingDoc `mplus` Just (L l doc)) xs
- go mTrailingDoc xs = (mTrailingDoc, xs)
-
orErr :: Maybe a -> b -> Either b a
orErr (Just a) _ = Right a
orErr Nothing b = Left b
@@ -1594,123 +1567,77 @@ mergeDataCon
:: [Located TyEl]
-> P ( Located RdrName -- constructor name
, HsConDeclDetails GhcPs -- constructor field information
- , Maybe LHsDocString -- docstring to go on the constructor
)
mergeDataCon all_xs =
do { (addAnns, a) <- eitherToP res
; addAnns
; return a }
where
- -- We start by splitting off the trailing documentation comment,
- -- if any exists.
- (mTrailingDoc, all_xs') = pDocPrev all_xs
-
- -- Determine whether the trailing documentation comment exists and is the
- -- only docstring in this constructor declaration.
- --
- -- When true, it means that it applies to the constructor itself:
- -- data T = C
- -- A
- -- B -- ^ Comment on C (singleDoc == True)
- --
- -- When false, it means that it applies to the last field:
- -- data T = C -- ^ Comment on C
- -- A -- ^ Comment on A
- -- B -- ^ Comment on B (singleDoc == False)
- singleDoc = isJust mTrailingDoc &&
- null [ () | (L _ (TyElDocPrev _)) <- all_xs' ]
-
-- The result of merging the list of reversed TyEl into a
-- data constructor, along with [AddAnn].
- res = goFirst all_xs'
-
- -- Take the trailing docstring into account when interpreting
- -- the docstring near the constructor.
- --
- -- data T = C -- ^ docstring right after C
- -- A
- -- B -- ^ trailing docstring
- --
- -- 'mkConDoc' must be applied to the docstring right after C, so that it
- -- falls back to the trailing docstring when appropriate (see singleDoc).
- mkConDoc mDoc | singleDoc = mDoc `mplus` mTrailingDoc
- | otherwise = mDoc
-
- -- The docstring for the last field of a data constructor.
- trailingFieldDoc | singleDoc = Nothing
- | otherwise = mTrailingDoc
+ res = goFirst all_xs
goFirst [ L l (TyElOpd (HsTyVar _ _ (L _ tc))) ]
= do { data_con <- tyConToDataCon l tc
- ; return (pure (), (data_con, PrefixCon [], mTrailingDoc)) }
+ ; return (pure (), (data_con, PrefixCon [])) }
goFirst ((L l (TyElOpd (HsRecTy _ fields))):xs)
- | (mConDoc, xs') <- pDocPrev xs
- , [ L l' (TyElOpd (HsTyVar _ _ (L _ tc))) ] <- xs'
+ | [ L l' (TyElOpd (HsTyVar _ _ (L _ tc))) ] <- xs
= do { data_con <- tyConToDataCon l' tc
- ; let mDoc = mTrailingDoc `mplus` mConDoc
- ; return (pure (), (data_con, RecCon (L l fields), mDoc)) }
+ ; return (pure (), (data_con, RecCon (L l fields))) }
goFirst [L l (TyElOpd (HsTupleTy _ HsBoxedOrConstraintTuple ts))]
= return ( pure ()
, ( L l (getRdrName (tupleDataCon Boxed (length ts)))
- , PrefixCon (map hsLinear ts)
- , mTrailingDoc ) )
+ , PrefixCon (map hsLinear ts) ) )
goFirst ((L l (TyElOpd t)):xs)
| (_, t', addAnns, xs') <- pBangTy (L l t) xs
- = go addAnns Nothing [mkLHsDocTyMaybe t' trailingFieldDoc] xs'
+ = go addAnns [t'] xs'
goFirst (L l (TyElKindApp _ _):_)
= goInfix Monoid.<> Left (l, kindAppErr)
goFirst xs
- = go (pure ()) mTrailingDoc [] xs
+ = go (pure ()) [] xs
- go addAnns mLastDoc ts [ L l (TyElOpd (HsTyVar _ _ (L _ tc))) ]
+ go addAnns ts [ L l (TyElOpd (HsTyVar _ _ (L _ tc))) ]
= do { data_con <- tyConToDataCon l tc
- ; return (addAnns, (data_con, PrefixCon (map hsLinear ts), mkConDoc mLastDoc)) }
- go addAnns mLastDoc ts ((L l (TyElDocPrev doc)):xs) =
- go addAnns (mLastDoc `mplus` Just (L l doc)) ts xs
- go addAnns mLastDoc ts ((L l (TyElOpd t)):xs)
+ ; return (addAnns, (data_con, PrefixCon (map hsLinear ts))) }
+ go addAnns ts ((L l (TyElOpd t)):xs)
| (_, t', addAnns', xs') <- pBangTy (L l t) xs
- , t'' <- mkLHsDocTyMaybe t' mLastDoc
- = go (addAnns >> addAnns') Nothing (t'':ts) xs'
- go _ _ _ ((L _ (TyElOpr _)):_) =
+ = go (addAnns >> addAnns') (t':ts) xs'
+ go _ _ ((L _ (TyElOpr _)):_) =
-- Encountered an operator: backtrack to the beginning and attempt
-- to parse as an infix definition.
goInfix
- go _ _ _ (L l (TyElKindApp _ _):_) = goInfix Monoid.<> Left (l, kindAppErr)
- go _ _ _ _ = Left malformedErr
+ go _ _ (L l (TyElKindApp _ _):_) = goInfix Monoid.<> Left (l, kindAppErr)
+ go _ _ _ = Left malformedErr
where
malformedErr =
- ( foldr combineSrcSpans noSrcSpan (map getLoc all_xs')
+ ( foldr combineSrcSpans noSrcSpan (map getLoc all_xs)
, text "Cannot parse data constructor" <+>
text "in a data/newtype declaration:" $$
- nest 2 (hsep . reverse $ map ppr all_xs'))
+ nest 2 (hsep . reverse $ map ppr all_xs))
goInfix =
- do { let xs0 = all_xs'
- ; (rhs_t, rhs_addAnns, xs1) <- pInfixSide xs0 `orErr` malformedErr
- ; let (mOpDoc, xs2) = pDocPrev xs1
- ; (op, xs3) <- case xs2 of
+ do { let xs0 = all_xs
+ ; (rhs, rhs_addAnns, xs1) <- pInfixSide xs0 `orErr` malformedErr
+ ; (op, xs3) <- case xs1 of
(L l (TyElOpr op)) : xs3 ->
do { data_con <- tyConToDataCon l op
; return (data_con, xs3) }
_ -> Left malformedErr
- ; let (mLhsDoc, xs4) = pDocPrev xs3
- ; (lhs_t, lhs_addAnns, xs5) <- pInfixSide xs4 `orErr` malformedErr
+ ; (lhs, lhs_addAnns, xs5) <- pInfixSide xs3 `orErr` malformedErr
; unless (null xs5) (Left malformedErr)
- ; let rhs = mkLHsDocTyMaybe rhs_t trailingFieldDoc
- lhs = mkLHsDocTyMaybe lhs_t mLhsDoc
- addAnns = lhs_addAnns >> rhs_addAnns
- ; return (addAnns, (op, InfixCon (hsLinear lhs) (hsLinear rhs), mkConDoc mOpDoc)) }
+ ; let addAnns = lhs_addAnns >> rhs_addAnns
+ ; return (addAnns, (op, InfixCon (hsLinear lhs) (hsLinear rhs))) }
where
malformedErr =
- ( foldr combineSrcSpans noSrcSpan (map getLoc all_xs')
+ ( foldr combineSrcSpans noSrcSpan (map getLoc all_xs)
, text "Cannot parse an infix data constructor" <+>
text "in a data/newtype declaration:" $$
- nest 2 (hsep . reverse $ map ppr all_xs'))
+ nest 2 (hsep . reverse $ map ppr all_xs))
kindAppErr =
text "Unexpected kind application" <+>
text "in a data/newtype declaration:" $$
- nest 2 (hsep . reverse $ map ppr all_xs')
+ nest 2 (hsep . reverse $ map ppr all_xs)
---------------------------------------------------------------------------
-- | Check for monad comprehensions
@@ -2902,11 +2829,6 @@ failOpFewArgs (L loc op) =
where
too_few = text "Operator applied to too few arguments:" <+> ppr op
-failOpDocPrev :: SrcSpan -> P a
-failOpDocPrev loc = addFatalError loc msg
- where
- msg = text "Unexpected documentation comment."
-
-----------------------------------------------------------------------------
-- Misc utils
@@ -3140,14 +3062,6 @@ mkLHsOpTy x op y =
let loc = getLoc x `combineSrcSpans` getLoc op `combineSrcSpans` getLoc y
in L loc (mkHsOpTy x op y)
-mkLHsDocTy :: LHsType GhcPs -> LHsDocString -> LHsType GhcPs
-mkLHsDocTy t doc =
- let loc = getLoc t `combineSrcSpans` getLoc doc
- in L loc (HsDocTy noExtField t doc)
-
-mkLHsDocTyMaybe :: LHsType GhcPs -> Maybe LHsDocString -> LHsType GhcPs
-mkLHsDocTyMaybe t = maybe t (mkLHsDocTy t)
-
-----------------------------------------------------------------------------
-- Token symbols
diff --git a/compiler/GHC/Parser/PostProcess/Haddock.hs b/compiler/GHC/Parser/PostProcess/Haddock.hs
index 409b0c120f..e109fada55 100644
--- a/compiler/GHC/Parser/PostProcess/Haddock.hs
+++ b/compiler/GHC/Parser/PostProcess/Haddock.hs
@@ -1,39 +1,1554 @@
-{-# OPTIONS_GHC -Wno-incomplete-record-updates #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE NamedFieldPuns #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE ApplicativeDo #-}
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE DerivingVia #-}
-module GHC.Parser.PostProcess.Haddock where
+{- | This module implements 'addHaddockToModule', which inserts Haddock
+ comments accumulated during parsing into the AST (#17544).
-import GHC.Prelude
+We process Haddock comments in two phases:
+
+1. Parse the program (via the Happy parser in `Parser.y`), generating
+ an AST, and (quite separately) a list of all the Haddock comments
+ found in the file. More precisely, the Haddock comments are
+ accumulated in the `hdk_comments` field of the `PState`, the parser
+ state (see Lexer.x):
+
+ data PState = PState { ...
+ , hdk_comments :: [PsLocated HdkComment] }
+
+ Each of these Haddock comments has a `PsSpan`, which gives the `BufPos` of
+ the beginning and end of the Haddock comment.
+
+2. Walk over the AST, attaching the Haddock comments to the correct
+ parts of the tree. This step is called `addHaddockToModule`, and is
+ implemented in this module.
+
+ See Note [Adding Haddock comments to the syntax tree].
+
+This approach codifies an important principle:
+
+ The presence or absence of a Haddock comment should never change the parsing
+ of a program.
+
+Alternative approaches that did not work properly:
+
+1. Using 'RealSrcLoc' instead of 'BufPos'. This led to failures in presence
+ of {-# LANGUAGE CPP #-} and other sources of line pragmas. See documentation
+ on 'BufPos' (in GHC.Types.SrcLoc) for the details.
+
+2. In earlier versions of GHC, the Haddock comments were incorporated into the
+ Parser.y grammar. The parser constructed the AST and attached comments to it in
+ a single pass. See Note [Old solution: Haddock in the grammar] for the details.
+-}
+module GHC.Parser.PostProcess.Haddock (addHaddockToModule) where
+
+import GHC.Prelude hiding (mod)
import GHC.Hs
import GHC.Types.SrcLoc
+import GHC.Driver.Session ( WarningFlag(..) )
+import GHC.Utils.Outputable hiding ( (<>) )
+import GHC.Data.Bag
+import Data.Semigroup
+import Data.Foldable
+import Data.Traversable
+import Data.Maybe
import Control.Monad
+import Control.Monad.Trans.State.Strict
+import Control.Monad.Trans.Reader
+import Control.Monad.Trans.Writer
+import Data.Functor.Identity
+import Data.Coerce
+import qualified Data.Monoid
+
+import GHC.Parser.Lexer
+import GHC.Utils.Misc (mergeListsBy, filterOut, mapLastM, (<&&>))
+
+{- Note [Adding Haddock comments to the syntax tree]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+'addHaddock' traverses the AST in concrete syntax order, building a computation
+(represented by HdkA) that reconstructs the AST but with Haddock comments
+inserted in appropriate positions:
+
+ addHaddock :: HasHaddock a => a -> HdkA a
+
+Consider this code example:
+
+ f :: Int -- ^ comment on argument
+ -> Bool -- ^ comment on result
+
+In the AST, the "Int" part of this snippet is represented like this
+(pseudo-code):
+
+ L (BufSpan 6 8) (HsTyVar "Int") :: LHsType GhcPs
+
+And the comments are represented like this (pseudo-code):
+
+ L (BufSpan 11 35) (HdkCommentPrev "comment on argument")
+ L (BufSpan 46 69) (HdkCommentPrev "comment on result")
+
+So when we are traversing the AST and 'addHaddock' is applied to HsTyVar "Int",
+how does it know to associate it with "comment on argument" but not with
+"comment on result"?
+
+The trick is to look in the space between syntactic elements. In the example above,
+the location range in which we search for HdkCommentPrev is as follows:
+
+ f :: Int████████████████████████
+ ████Bool -- ^ comment on result
+
+We search for comments after HsTyVar "Int" and until the next syntactic
+element, in this case HsTyVar "Bool".
+
+Ignoring the "->" allows us to accomodate alternative coding styles:
+
+ f :: Int -> -- ^ comment on argument
+ Bool -- ^ comment on result
+
+Sometimes we also need to take indentation information into account.
+Compare the following examples:
+
+ class C a where
+ f :: a -> Int
+ -- ^ comment on f
+
+ class C a where
+ f :: a -> Int
+ -- ^ comment on C
+
+Notice how "comment on f" and "comment on C" differ only by indentation level.
+
+Therefore, in order to know the location range in which the comments are applicable
+to a syntactic elements, we need three nuggets of information:
+ 1. lower bound on the BufPos of a comment
+ 2. upper bound on the BufPos of a comment
+ 3. minimum indentation level of a comment
+
+This information is represented by the 'LocRange' type.
+
+In order to propagate this information, we have the 'HdkA' applicative.
+'HdkA' is defined as follows:
+
+ data HdkA a = HdkA (Maybe BufSpan) (HdkM a)
+
+The first field contains a 'BufSpan', which represents the location
+span taken by a syntactic element:
+
+ addHaddock (L bufSpan ...) = HdkA (Just bufSpan) ...
+
+The second field, 'HdkM', is a stateful computation that looks up Haddock
+comments in the specified location range:
+
+ HdkM a ≈
+ LocRange -- The allowed location range
+ -> [PsLocated HdkComment] -- Unallocated comments
+ -> (a, -- AST with comments inserted into it
+ [PsLocated HdkComment]) -- Leftover comments
+
+The 'Applicative' instance for 'HdkA' is defined in such a way that the
+location range of every computation is defined by its neighbours:
+
+ addHaddock aaa <*> addHaddock bbb <*> addHaddock ccc
+
+Here, the 'LocRange' passed to the 'HdkM' computation of addHaddock bbb
+is determined by the BufSpan recorded in addHaddock aaa and addHaddock ccc.
+
+This is why it's important to traverse the AST in the order of the concrete
+syntax. In the example above we assume that aaa, bbb, ccc are ordered by location:
+
+ * getBufSpan (getLoc aaa) < getBufSpan (getLoc bbb)
+ * getBufSpan (getLoc bbb) < getBufSpan (getLoc ccc)
+
+Violation of this assumption would lead to bugs, and care must be taken to
+traverse the AST correctly. For example, when dealing with class declarations,
+we have to use 'flattenBindsAndSigs' to traverse it in the correct order.
+-}
+
+-- | Add Haddock documentation accumulated in the parser state
+-- to a parsed HsModule.
+--
+-- Reports badly positioned comments when -Winvalid-haddock is enabled.
+addHaddockToModule :: Located HsModule -> P (Located HsModule)
+addHaddockToModule lmod = do
+ pState <- getPState
+ let all_comments = toList (hdk_comments pState)
+ initial_hdk_st = HdkSt all_comments []
+ (lmod', final_hdk_st) = runHdkA (addHaddock lmod) initial_hdk_st
+ hdk_warnings = collectHdkWarnings final_hdk_st
+ -- lmod': module with Haddock comments inserted into the AST
+ -- hdk_warnings: warnings accumulated during AST/comment processing
+ mapM_ reportHdkWarning hdk_warnings
+ return lmod'
+
+reportHdkWarning :: HdkWarn -> P ()
+reportHdkWarning (HdkWarnInvalidComment (L l _)) =
+ addWarning Opt_WarnInvalidHaddock (mkSrcSpanPs l) $
+ text "A Haddock comment cannot appear in this position and will be ignored."
+reportHdkWarning (HdkWarnExtraComment (L l _)) =
+ addWarning Opt_WarnInvalidHaddock l $
+ text "Multiple Haddock comments for a single entity are not allowed." $$
+ text "The extraneous comment will be ignored."
+
+collectHdkWarnings :: HdkSt -> [HdkWarn]
+collectHdkWarnings HdkSt{ hdk_st_pending, hdk_st_warnings } =
+ map HdkWarnInvalidComment hdk_st_pending -- leftover Haddock comments not inserted into the AST
+ ++ hdk_st_warnings
+
+{- *********************************************************************
+* *
+* addHaddock: a family of functions that processes the AST *
+* in concrete syntax order, adding documentation comments to it *
+* *
+********************************************************************* -}
+
+-- HasHaddock is a convenience class for overloading the addHaddock operation.
+-- Alternatively, we could define a family of monomorphic functions:
+--
+-- addHaddockSomeTypeX :: SomeTypeX -> HdkA SomeTypeX
+-- addHaddockAnotherTypeY :: AnotherTypeY -> HdkA AnotherTypeY
+-- addHaddockOneMoreTypeZ :: OneMoreTypeZ -> HdkA OneMoreTypeZ
+--
+-- But having a single name for all of them is just easier to read, and makes it clear
+-- that they all are of the form t -> HdkA t for some t.
+--
+-- If you need to handle a more complicated scenario that doesn't fit this
+-- pattern, it's always possible to define separate functions outside of this
+-- class, as is done in case of e.g. addHaddockConDeclField.
+--
+-- See Note [Adding Haddock comments to the syntax tree].
+class HasHaddock a where
+ addHaddock :: a -> HdkA a
+
+instance HasHaddock a => HasHaddock [a] where
+ addHaddock = traverse addHaddock
+
+-- -- | Module header comment
+-- module M (
+-- -- * Export list comment
+-- Item1,
+-- Item2,
+-- -- * Export list comment
+-- item3,
+-- item4
+-- ) where
+--
+instance HasHaddock (Located HsModule) where
+ addHaddock (L l_mod mod) = do
+ -- Step 1, get the module header documentation comment:
+ --
+ -- -- | Module header comment
+ -- module M where
+ --
+ -- Only do this when the module header exists.
+ headerDocs <-
+ for @Maybe (hsmodName mod) $ \(L l_name _) ->
+ extendHdkA l_name $ liftHdkA $ do
+ -- todo: register keyword location of 'module', see Note [Register keyword location]
+ docs <-
+ inLocRange (locRangeTo (getBufPos (srcSpanStart l_name))) $
+ takeHdkComments mkDocNext
+ selectDocString docs
+
+ -- Step 2, process documentation comments in the export list:
+ --
+ -- module M (
+ -- -- * Export list comment
+ -- Item1,
+ -- Item2,
+ -- -- * Export list comment
+ -- item3,
+ -- item4
+ -- ) where
+ --
+ -- Only do this when the export list exists.
+ hsmodExports' <- traverse @Maybe addHaddock (hsmodExports mod)
+
+ -- Step 3, register the import section to reject invalid comments:
+ --
+ -- import Data.Maybe
+ -- -- | rejected comment (cannot appear here)
+ -- import Data.Bool
+ --
+ traverse_ registerHdkA (hsmodImports mod)
+
+ -- Step 4, process declarations:
+ --
+ -- module M where
+ -- -- | Comment on D
+ -- data D = MkD -- ^ Comment on MkD
+ -- data C = MkC -- ^ Comment on MkC
+ -- -- ^ Comment on C
+ --
+ let layout_info = hsmodLayout mod
+ hsmodDecls' <- addHaddockInterleaveItems layout_info (mkDocHsDecl layout_info) (hsmodDecls mod)
+
+ pure $ L l_mod $
+ mod { hsmodExports = hsmodExports'
+ , hsmodDecls = hsmodDecls'
+ , hsmodHaddockModHeader = join @Maybe headerDocs }
+
+-- Only for module exports, not module imports.
+--
+-- module M (a, b, c) where -- use on this [LIE GhcPs]
+-- import I (a, b, c) -- do not use here!
+--
+-- Imports cannot have documentation comments anyway.
+instance HasHaddock (Located [LIE GhcPs]) where
+ addHaddock (L l_exports exports) =
+ extendHdkA l_exports $ do
+ exports' <- addHaddockInterleaveItems NoLayoutInfo mkDocIE exports
+ registerLocHdkA (srcLocSpan (srcSpanEnd l_exports)) -- Do not consume comments after the closing parenthesis
+ pure $ L l_exports exports'
+
+-- Needed to use 'addHaddockInterleaveItems' in 'instance HasHaddock (Located [LIE GhcPs])'.
+instance HasHaddock (LIE GhcPs) where
+ addHaddock a = a <$ registerHdkA a
+
+{- Add Haddock items to a list of non-Haddock items.
+Used to process export lists (with mkDocIE) and declarations (with mkDocHsDecl).
+
+For example:
+
+ module M where
+ -- | Comment on D
+ data D = MkD -- ^ Comment on MkD
+ data C = MkC -- ^ Comment on MkC
+ -- ^ Comment on C
+
+In this case, we should produce four HsDecl items (pseudo-code):
+
+ 1. DocD (DocCommentNext "Comment on D")
+ 2. TyClD (DataDecl "D" ... [ConDeclH98 "MkD" ... (Just "Comment on MkD")])
+ 3. TyClD (DataDecl "C" ... [ConDeclH98 "MkC" ... (Just "Comment on MkC")])
+ 4. DocD (DocCommentPrev "Comment on C")
+
+The inputs to addHaddockInterleaveItems are:
+
+ * layout_info :: LayoutInfo
+
+ In the example above, note that the indentation level inside the module is
+ 2 spaces. It would be represented as layout_info = VirtualBraces 2.
+
+ It is used to delimit the search space for comments when processing
+ declarations. Here, we restrict indentation levels to >=(2+1), so that when
+ we look up comment on MkC, we get "Comment on MkC" but not "Comment on C".
+
+ * get_doc_item :: PsLocated HdkComment -> Maybe a
+
+ This is the function used to look up documentation comments.
+ In the above example, get_doc_item = mkDocHsDecl layout_info,
+ and it will produce the following parts of the output:
+
+ DocD (DocCommentNext "Comment on D")
+ DocD (DocCommentPrev "Comment on C")
+
+ * The list of items. These are the declarations that will be annotated with
+ documentation comments.
+
+ Before processing:
+ TyClD (DataDecl "D" ... [ConDeclH98 "MkD" ... Nothing])
+ TyClD (DataDecl "C" ... [ConDeclH98 "MkC" ... Nothing])
+
+ After processing:
+ TyClD (DataDecl "D" ... [ConDeclH98 "MkD" ... (Just "Comment on MkD")])
+ TyClD (DataDecl "C" ... [ConDeclH98 "MkC" ... (Just "Comment on MkC")])
+-}
+addHaddockInterleaveItems
+ :: forall a.
+ HasHaddock a
+ => LayoutInfo
+ -> (PsLocated HdkComment -> Maybe a) -- Get a documentation item
+ -> [a] -- Unprocessed (non-documentation) items
+ -> HdkA [a] -- Documentation items & processed non-documentation items
+addHaddockInterleaveItems layout_info get_doc_item = go
+ where
+ go :: [a] -> HdkA [a]
+ go [] = liftHdkA (takeHdkComments get_doc_item)
+ go (item : items) = do
+ docItems <- liftHdkA (takeHdkComments get_doc_item)
+ item' <- with_layout_info (addHaddock item)
+ other_items <- go items
+ pure $ docItems ++ item':other_items
+
+ with_layout_info :: HdkA a -> HdkA a
+ with_layout_info = case layout_info of
+ NoLayoutInfo -> id
+ ExplicitBraces -> id
+ VirtualBraces n ->
+ let loc_range = mempty { loc_range_col = ColumnFrom (n+1) }
+ in hoistHdkA (inLocRange loc_range)
+
+instance HasHaddock (LHsDecl GhcPs) where
+ addHaddock ldecl =
+ extendHdkA (getLoc ldecl) $
+ traverse @Located addHaddock ldecl
+
+-- Process documentation comments *inside* a declaration, for example:
+--
+-- data T = MkT -- ^ Comment on MkT (inside DataDecl)
+-- f, g
+-- :: Int -- ^ Comment on Int (inside TypeSig)
+-- -> Bool -- ^ Comment on Bool (inside TypeSig)
+--
+-- Comments that relate to the entire declaration are processed elsewhere:
+--
+-- -- | Comment on T (not processed in this instance)
+-- data T = MkT
+--
+-- -- | Comment on f, g (not processed in this instance)
+-- f, g :: Int -> Bool
+-- f = ...
+-- g = ...
+--
+-- Such comments are inserted into the syntax tree as DocD declarations
+-- by addHaddockInterleaveItems, and then associated with other declarations
+-- in GHC.HsToCore.Docs (see DeclDocMap).
+--
+-- In this instance, we only process comments that relate to parts of the
+-- declaration, not to the declaration itself.
+instance HasHaddock (HsDecl GhcPs) where
+
+ -- Type signatures:
+ --
+ -- f, g
+ -- :: Int -- ^ Comment on Int
+ -- -> Bool -- ^ Comment on Bool
+ --
+ addHaddock (SigD _ (TypeSig _ names t)) = do
+ traverse_ registerHdkA names
+ t' <- addHaddock t
+ pure (SigD noExtField (TypeSig noExtField names t'))
+
+ -- Pattern synonym type signatures:
+ --
+ -- pattern MyPat
+ -- :: Bool -- ^ Comment on Bool
+ -- -> Maybe Bool -- ^ Comment on Maybe Bool
+ --
+ addHaddock (SigD _ (PatSynSig _ names t)) = do
+ traverse_ registerHdkA names
+ t' <- addHaddock t
+ pure (SigD noExtField (PatSynSig noExtField names t'))
+
+ -- Class method signatures and default signatures:
+ --
+ -- class C x where
+ -- method_of_c
+ -- :: Maybe x -- ^ Comment on Maybe x
+ -- -> IO () -- ^ Comment on IO ()
+ -- default method_of_c
+ -- :: Eq x
+ -- => Maybe x -- ^ Comment on Maybe x
+ -- -> IO () -- ^ Comment on IO ()
+ --
+ addHaddock (SigD _ (ClassOpSig _ is_dflt names t)) = do
+ traverse_ registerHdkA names
+ t' <- addHaddock t
+ pure (SigD noExtField (ClassOpSig noExtField is_dflt names t'))
+
+ -- Data/newtype declarations:
+ --
+ -- data T = MkT -- ^ Comment on MkT
+ -- A -- ^ Comment on A
+ -- B -- ^ Comment on B
+ --
+ -- data G where
+ -- -- | Comment on MkG
+ -- MkG :: A -- ^ Comment on A
+ -- -> B -- ^ Comment on B
+ -- -> G
+ --
+ -- newtype N = MkN { getN :: Natural } -- ^ Comment on N
+ -- deriving newtype (Eq {- ^ Comment on Eq N -})
+ -- deriving newtype (Ord {- ^ Comment on Ord N -})
+ --
+ addHaddock (TyClD _ decl)
+ | DataDecl { tcdLName, tcdTyVars, tcdFixity, tcdDataDefn = defn } <- decl
+ = do
+ registerHdkA tcdLName
+ defn' <- addHaddock defn
+ pure $
+ TyClD noExtField (DataDecl {
+ tcdDExt = noExtField,
+ tcdLName, tcdTyVars, tcdFixity,
+ tcdDataDefn = defn' })
+
+ -- Class declarations:
+ --
+ -- class C a where
+ -- -- | Comment on the first method
+ -- first_method :: a -> Bool
+ -- second_method :: a -> String
+ -- -- ^ Comment on the second method
+ --
+ addHaddock (TyClD _ decl)
+ | ClassDecl { tcdCExt = tcdLayout,
+ tcdCtxt, tcdLName, tcdTyVars, tcdFixity, tcdFDs,
+ tcdSigs, tcdMeths, tcdATs, tcdATDefs } <- decl
+ = do
+ registerHdkA tcdLName
+ -- todo: register keyword location of 'where', see Note [Register keyword location]
+ where_cls' <-
+ addHaddockInterleaveItems tcdLayout (mkDocHsDecl tcdLayout) $
+ flattenBindsAndSigs (tcdMeths, tcdSigs, tcdATs, tcdATDefs, [], [])
+ pure $
+ let (tcdMeths', tcdSigs', tcdATs', tcdATDefs', _, tcdDocs) = partitionBindsAndSigs where_cls'
+ decl' = ClassDecl { tcdCExt = tcdLayout
+ , tcdCtxt, tcdLName, tcdTyVars, tcdFixity, tcdFDs
+ , tcdSigs = tcdSigs'
+ , tcdMeths = tcdMeths'
+ , tcdATs = tcdATs'
+ , tcdATDefs = tcdATDefs'
+ , tcdDocs }
+ in TyClD noExtField decl'
+
+ -- Data family instances:
+ --
+ -- data instance D Bool where ... (same as data/newtype declarations)
+ -- data instance D Bool = ... (same as data/newtype declarations)
+ --
+ addHaddock (InstD _ decl)
+ | DataFamInstD { dfid_inst } <- decl
+ , DataFamInstDecl { dfid_eqn } <- dfid_inst
+ = do
+ dfid_eqn' <- case dfid_eqn of
+ HsIB _ (FamEqn { feqn_tycon, feqn_bndrs, feqn_pats, feqn_fixity, feqn_rhs })
+ -> do
+ registerHdkA feqn_tycon
+ feqn_rhs' <- addHaddock feqn_rhs
+ pure $
+ HsIB noExtField (FamEqn {
+ feqn_ext = noExtField,
+ feqn_tycon, feqn_bndrs, feqn_pats, feqn_fixity,
+ feqn_rhs = feqn_rhs' })
+ pure $ InstD noExtField (DataFamInstD {
+ dfid_ext = noExtField,
+ dfid_inst = DataFamInstDecl { dfid_eqn = dfid_eqn' } })
+
+ -- Type synonyms:
+ --
+ -- type T = Int -- ^ Comment on Int
+ --
+ addHaddock (TyClD _ decl)
+ | SynDecl { tcdLName, tcdTyVars, tcdFixity, tcdRhs } <- decl
+ = do
+ registerHdkA tcdLName
+ -- todo: register keyword location of '=', see Note [Register keyword location]
+ tcdRhs' <- addHaddock tcdRhs
+ pure $
+ TyClD noExtField (SynDecl {
+ tcdSExt = noExtField,
+ tcdLName, tcdTyVars, tcdFixity,
+ tcdRhs = tcdRhs' })
+
+ -- Foreign imports:
+ --
+ -- foreign import ccall unsafe
+ -- o :: Float -- ^ The input float
+ -- -> IO Float -- ^ The output float
+ --
+ addHaddock (ForD _ decl) = do
+ registerHdkA (fd_name decl)
+ fd_sig_ty' <- addHaddock (fd_sig_ty decl)
+ pure $ ForD noExtField (decl{ fd_sig_ty = fd_sig_ty' })
+
+ -- Other declarations
+ addHaddock d = pure d
+
+-- The right-hand side of a data/newtype declaration or data family instance.
+instance HasHaddock (HsDataDefn GhcPs) where
+ addHaddock defn@HsDataDefn{} = do
+
+ -- Register the kind signature:
+ -- data D :: Type -> Type where ...
+ -- data instance D Bool :: Type where ...
+ traverse_ @Maybe registerHdkA (dd_kindSig defn)
+ -- todo: register keyword location of '=' or 'where', see Note [Register keyword location]
+
+ -- Process the data constructors:
+ --
+ -- data T
+ -- = MkT1 Int Bool -- ^ Comment on MkT1
+ -- | MkT2 Char Int -- ^ Comment on MkT2
+ --
+ dd_cons' <- addHaddock (dd_cons defn)
+
+ -- Process the deriving clauses:
+ --
+ -- newtype N = MkN Natural
+ -- deriving (Eq {- ^ Comment on Eq N -})
+ -- deriving (Ord {- ^ Comment on Ord N -})
+ --
+ dd_derivs' <- addHaddock (dd_derivs defn)
+
+ pure $ defn { dd_cons = dd_cons',
+ dd_derivs = dd_derivs' }
+
+-- Process the deriving clauses of a data/newtype declaration.
+-- Not used for standalone deriving.
+instance HasHaddock (HsDeriving GhcPs) where
+ addHaddock lderivs =
+ extendHdkA (getLoc lderivs) $
+ traverse @Located addHaddock lderivs
+
+-- Process a single deriving clause of a data/newtype declaration:
+--
+-- newtype N = MkN Natural
+-- deriving newtype (Eq {- ^ Comment on Eq N -})
+-- deriving (Ord {- ^ Comment on Ord N -}) via Down N
+--
+-- Not used for standalone deriving.
+instance HasHaddock (LHsDerivingClause GhcPs) where
+ addHaddock lderiv =
+ extendHdkA (getLoc lderiv) $
+ for @Located lderiv $ \deriv ->
+ case deriv of
+ HsDerivingClause { deriv_clause_strategy, deriv_clause_tys } -> do
+ let
+ -- 'stock', 'anyclass', and 'newtype' strategies come
+ -- before the clause types.
+ --
+ -- 'via' comes after.
+ --
+ -- See tests/.../T11768.hs
+ (register_strategy_before, register_strategy_after) =
+ case deriv_clause_strategy of
+ Nothing -> (pure (), pure ())
+ Just (L l (ViaStrategy _)) -> (pure (), registerLocHdkA l)
+ Just (L l _) -> (registerLocHdkA l, pure ())
+ register_strategy_before
+ deriv_clause_tys' <-
+ extendHdkA (getLoc deriv_clause_tys) $
+ traverse @Located addHaddock deriv_clause_tys
+ register_strategy_after
+ pure HsDerivingClause
+ { deriv_clause_ext = noExtField,
+ deriv_clause_strategy,
+ deriv_clause_tys = deriv_clause_tys' }
+
+-- Process a single data constructor declaration, which may come in one of the
+-- following forms:
+--
+-- 1. H98-syntax PrefixCon:
+-- data T =
+-- MkT -- ^ Comment on MkT
+-- Int -- ^ Comment on Int
+-- Bool -- ^ Comment on Bool
+--
+-- 2. H98-syntax InfixCon:
+-- data T =
+-- Int -- ^ Comment on Int
+-- :+ -- ^ Comment on (:+)
+-- Bool -- ^ Comment on Bool
+--
+-- 3. H98-syntax RecCon:
+-- data T =
+-- MkT { int_field :: Int, -- ^ Comment on int_field
+-- bool_field :: Bool } -- ^ Comment on bool_field
+--
+-- 4. GADT-syntax PrefixCon:
+-- data T where
+-- -- | Comment on MkT
+-- MkT :: Int -- ^ Comment on Int
+-- -> Bool -- ^ Comment on Bool
+-- -> T
+--
+-- 5. GADT-syntax RecCon:
+-- data T where
+-- -- | Comment on MkT
+-- MkT :: { int_field :: Int, -- ^ Comment on int_field
+-- bool_field :: Bool } -- ^ Comment on bool_field
+-- -> T
+--
+instance HasHaddock (LConDecl GhcPs) where
+ addHaddock (L l_con_decl con_decl) =
+ extendHdkA l_con_decl $
+ case con_decl of
+ ConDeclGADT { con_g_ext, con_names, con_forall, con_qvars, con_mb_cxt, con_args, con_res_ty } -> do
+ -- discardHasInnerDocs is ok because we don't need this info for GADTs.
+ con_doc' <- discardHasInnerDocs $ getConDoc (getLoc (head con_names))
+ con_args' <-
+ case con_args of
+ PrefixCon ts -> PrefixCon <$> addHaddock ts
+ RecCon (L l_rec flds) -> do
+ -- discardHasInnerDocs is ok because we don't need this info for GADTs.
+ flds' <- traverse (discardHasInnerDocs . addHaddockConDeclField) flds
+ pure $ RecCon (L l_rec flds')
+ InfixCon _ _ -> panic "ConDeclGADT InfixCon"
+ con_res_ty' <- addHaddock con_res_ty
+ pure $ L l_con_decl $
+ ConDeclGADT { con_g_ext, con_names, con_forall, con_qvars, con_mb_cxt,
+ con_doc = con_doc',
+ con_args = con_args',
+ con_res_ty = con_res_ty' }
+ ConDeclH98 { con_ext, con_name, con_forall, con_ex_tvs, con_mb_cxt, con_args } ->
+ addConTrailingDoc (srcSpanEnd l_con_decl) $
+ case con_args of
+ PrefixCon ts -> do
+ con_doc' <- getConDoc (getLoc con_name)
+ ts' <- traverse addHaddockConDeclFieldTy ts
+ pure $ L l_con_decl $
+ ConDeclH98 { con_ext, con_name, con_forall, con_ex_tvs, con_mb_cxt,
+ con_doc = con_doc',
+ con_args = PrefixCon ts' }
+ InfixCon t1 t2 -> do
+ t1' <- addHaddockConDeclFieldTy t1
+ con_doc' <- getConDoc (getLoc con_name)
+ t2' <- addHaddockConDeclFieldTy t2
+ pure $ L l_con_decl $
+ ConDeclH98 { con_ext, con_name, con_forall, con_ex_tvs, con_mb_cxt,
+ con_doc = con_doc',
+ con_args = InfixCon t1' t2' }
+ RecCon (L l_rec flds) -> do
+ con_doc' <- getConDoc (getLoc con_name)
+ flds' <- traverse addHaddockConDeclField flds
+ pure $ L l_con_decl $
+ ConDeclH98 { con_ext, con_name, con_forall, con_ex_tvs, con_mb_cxt,
+ con_doc = con_doc',
+ con_args = RecCon (L l_rec flds') }
+ XConDecl (ConDeclGADTPrefixPs { con_gp_names, con_gp_ty }) -> do
+ -- discardHasInnerDocs is ok because we don't need this info for GADTs.
+ con_gp_doc' <- discardHasInnerDocs $ getConDoc (getLoc (head con_gp_names))
+ con_gp_ty' <- addHaddock con_gp_ty
+ pure $ L l_con_decl $
+ XConDecl (ConDeclGADTPrefixPs
+ { con_gp_names,
+ con_gp_ty = con_gp_ty',
+ con_gp_doc = con_gp_doc' })
+
+-- Keep track of documentation comments on the data constructor or any of its
+-- fields.
+--
+-- See Note [Trailing comment on constructor declaration]
+type ConHdkA = WriterT HasInnerDocs HdkA
+
+-- Does the data constructor declaration have any inner (non-trailing)
+-- documentation comments?
+--
+-- Example when HasInnerDocs is True:
+--
+-- data X =
+-- MkX -- ^ inner comment
+-- Field1 -- ^ inner comment
+-- Field2 -- ^ inner comment
+-- Field3 -- ^ trailing comment
+--
+-- Example when HasInnerDocs is False:
+--
+-- data Y = MkY Field1 Field2 Field3 -- ^ trailing comment
+--
+-- See Note [Trailing comment on constructor declaration]
+newtype HasInnerDocs = HasInnerDocs Bool
+ deriving (Semigroup, Monoid) via Data.Monoid.Any
+
+-- Run ConHdkA by discarding the HasInnerDocs info when we have no use for it.
+--
+-- We only do this when processing data declarations that use GADT syntax,
+-- because only the H98 syntax declarations have special treatment for the
+-- trailing documentation comment.
+--
+-- See Note [Trailing comment on constructor declaration]
+discardHasInnerDocs :: ConHdkA a -> HdkA a
+discardHasInnerDocs = fmap fst . runWriterT
+
+-- Get the documentation comment associated with the data constructor in a
+-- data/newtype declaration.
+getConDoc
+ :: SrcSpan -- Location of the data constructor
+ -> ConHdkA (Maybe LHsDocString)
+getConDoc l =
+ WriterT $ extendHdkA l $ liftHdkA $ do
+ mDoc <- getPrevNextDoc l
+ return (mDoc, HasInnerDocs (isJust mDoc))
+
+-- Add documentation comment to a data constructor field.
+-- Used for PrefixCon and InfixCon.
+addHaddockConDeclFieldTy
+ :: HsScaled GhcPs (LHsType GhcPs)
+ -> ConHdkA (HsScaled GhcPs (LHsType GhcPs))
+addHaddockConDeclFieldTy (HsScaled mult (L l t)) =
+ WriterT $ extendHdkA l $ liftHdkA $ do
+ mDoc <- getPrevNextDoc l
+ return (HsScaled mult (mkLHsDocTy (L l t) mDoc),
+ HasInnerDocs (isJust mDoc))
+
+-- Add documentation comment to a data constructor field.
+-- Used for RecCon.
+addHaddockConDeclField
+ :: LConDeclField GhcPs
+ -> ConHdkA (LConDeclField GhcPs)
+addHaddockConDeclField (L l_fld fld) =
+ WriterT $ extendHdkA l_fld $ liftHdkA $ do
+ cd_fld_doc <- getPrevNextDoc l_fld
+ return (L l_fld (fld { cd_fld_doc }),
+ HasInnerDocs (isJust cd_fld_doc))
+
+-- 1. Process a H98-syntax data constructor declaration in a context with no
+-- access to the trailing documentation comment (by running the provided
+-- ConHdkA computation).
+--
+-- 2. Then grab the trailing comment (if it exists) and attach it where
+-- appropriate: either to the data constructor itself or to its last field,
+-- depending on HasInnerDocs.
+--
+-- See Note [Trailing comment on constructor declaration]
+addConTrailingDoc
+ :: SrcLoc -- The end of a data constructor declaration.
+ -- Any docprev comment past this point is considered trailing.
+ -> ConHdkA (LConDecl GhcPs)
+ -> HdkA (LConDecl GhcPs)
+addConTrailingDoc l_sep =
+ hoistHdkA add_trailing_doc . runWriterT
+ where
+ add_trailing_doc
+ :: HdkM (LConDecl GhcPs, HasInnerDocs)
+ -> HdkM (LConDecl GhcPs)
+ add_trailing_doc m = do
+ (L l con_decl, HasInnerDocs has_inner_docs) <-
+ inLocRange (locRangeTo (getBufPos l_sep)) m
+ -- inLocRange delimits the context so that the inner computation
+ -- will not consume the trailing documentation comment.
+ case con_decl of
+ ConDeclH98{} -> do
+ trailingDocs <-
+ inLocRange (locRangeFrom (getBufPos l_sep)) $
+ takeHdkComments mkDocPrev
+ if null trailingDocs
+ then return (L l con_decl)
+ else do
+ if has_inner_docs then do
+ let mk_doc_ty :: HsScaled GhcPs (LHsType GhcPs)
+ -> HdkM (HsScaled GhcPs (LHsType GhcPs))
+ mk_doc_ty x@(HsScaled _ (L _ HsDocTy{})) =
+ -- Happens in the following case:
+ --
+ -- data T =
+ -- MkT
+ -- -- | Comment on SomeField
+ -- SomeField
+ -- -- ^ Another comment on SomeField? (rejected)
+ --
+ -- See tests/.../haddockExtraDocs.hs
+ x <$ reportExtraDocs trailingDocs
+ mk_doc_ty (HsScaled mult (L l' t)) = do
+ doc <- selectDocString trailingDocs
+ return $ HsScaled mult (mkLHsDocTy (L l' t) doc)
+ let mk_doc_fld :: LConDeclField GhcPs
+ -> HdkM (LConDeclField GhcPs)
+ mk_doc_fld x@(L _ (ConDeclField { cd_fld_doc = Just _ })) =
+ -- Happens in the following case:
+ --
+ -- data T =
+ -- MkT {
+ -- -- | Comment on SomeField
+ -- someField :: SomeField
+ -- } -- ^ Another comment on SomeField? (rejected)
+ --
+ -- See tests/.../haddockExtraDocs.hs
+ x <$ reportExtraDocs trailingDocs
+ mk_doc_fld (L l' con_fld) = do
+ doc <- selectDocString trailingDocs
+ return $ L l' (con_fld { cd_fld_doc = doc })
+ con_args' <- case con_args con_decl of
+ x@(PrefixCon []) -> x <$ reportExtraDocs trailingDocs
+ x@(RecCon (L _ [])) -> x <$ reportExtraDocs trailingDocs
+ PrefixCon ts -> PrefixCon <$> mapLastM mk_doc_ty ts
+ InfixCon t1 t2 -> InfixCon t1 <$> mk_doc_ty t2
+ RecCon (L l_rec flds) -> do
+ flds' <- mapLastM mk_doc_fld flds
+ return (RecCon (L l_rec flds'))
+ return $ L l (con_decl{ con_args = con_args' })
+ else do
+ con_doc' <- selectDocString (con_doc con_decl `mcons` trailingDocs)
+ return $ L l (con_decl{ con_doc = con_doc' })
+ _ -> panic "addConTrailingDoc: non-H98 ConDecl"
+
+{- Note [Trailing comment on constructor declaration]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The trailing comment after a constructor declaration is associated with the
+constructor itself when there are no other comments inside the declaration:
+
+ data T = MkT A B -- ^ Comment on MkT
+ data T = MkT { x :: A } -- ^ Comment on MkT
+
+When there are other comments, the trailing comment applies to the last field:
+
+ data T = MkT -- ^ Comment on MkT
+ A -- ^ Comment on A
+ B -- ^ Comment on B
+
+ data T =
+ MkT { a :: A -- ^ Comment on a
+ , b :: B -- ^ Comment on b
+ , c :: C } -- ^ Comment on c
+
+This makes the trailing comment context-sensitive. Example:
+ data T =
+ -- | comment 1
+ MkT Int Bool -- ^ comment 2
+
+ Here, "comment 2" applies to the Bool field.
+ But if we removed "comment 1", then "comment 2" would be apply to the data
+ constructor rather than its field.
+
+All of this applies to H98-style data declarations only.
+GADTSyntax data constructors don't have any special treatment for the trailing comment.
+
+We implement this in two steps:
+
+ 1. Process the data constructor declaration in a delimited context where the
+ trailing documentation comment is not visible. Delimiting the context is done
+ in addConTrailingDoc.
+
+ When processing the declaration, track whether the constructor or any of
+ its fields have a documentation comment associated with them.
+ This is done using WriterT HasInnerDocs, see ConHdkA.
+
+ 2. Depending on whether HasInnerDocs is True or False, attach the
+ trailing documentation comment to the data constructor itself
+ or to its last field.
+-}
+
+instance HasHaddock a => HasHaddock (HsScaled GhcPs a) where
+ addHaddock (HsScaled mult a) = HsScaled mult <$> addHaddock a
+
+instance HasHaddock (LHsSigWcType GhcPs) where
+ addHaddock (HsWC _ t) = HsWC noExtField <$> addHaddock t
+
+instance HasHaddock (LHsSigType GhcPs) where
+ addHaddock (HsIB _ t) = HsIB noExtField <$> addHaddock t
+
+-- Process a type, adding documentation comments to function arguments
+-- and the result. Many formatting styles are supported.
+--
+-- my_function ::
+-- forall a.
+-- Eq a =>
+-- Maybe a -> -- ^ Comment on Maybe a (function argument)
+-- Bool -> -- ^ Comment on Bool (function argument)
+-- String -- ^ Comment on String (the result)
+--
+-- my_function
+-- :: forall a. Eq a
+-- => Maybe a -- ^ Comment on Maybe a (function argument)
+-- -> Bool -- ^ Comment on Bool (function argument)
+-- -> String -- ^ Comment on String (the result)
+--
+-- my_function ::
+-- forall a. Eq a =>
+-- -- | Comment on Maybe a (function argument)
+-- Maybe a ->
+-- -- | Comment on Bool (function argument)
+-- Bool ->
+-- -- | Comment on String (the result)
+-- String
+--
+-- This is achieved by simply ignoring (not registering the location of) the
+-- function arrow (->).
+instance HasHaddock (LHsType GhcPs) where
+ addHaddock (L l t) =
+ extendHdkA l $
+ case t of
+
+ -- forall a b c. t
+ HsForAllTy _ tele body -> do
+ registerLocHdkA (getForAllTeleLoc tele)
+ body' <- addHaddock body
+ pure $ L l (HsForAllTy noExtField tele body')
+
+ -- (Eq a, Num a) => t
+ HsQualTy _ lhs rhs -> do
+ registerHdkA lhs
+ rhs' <- addHaddock rhs
+ pure $ L l (HsQualTy noExtField lhs rhs')
+
+ -- arg -> res
+ HsFunTy _ mult lhs rhs -> do
+ lhs' <- addHaddock lhs
+ rhs' <- addHaddock rhs
+ pure $ L l (HsFunTy noExtField mult lhs' rhs')
+
+ -- other types
+ _ -> liftHdkA $ do
+ mDoc <- getPrevNextDoc l
+ return (mkLHsDocTy (L l t) mDoc)
+
+{- *********************************************************************
+* *
+* HdkA: a layer over HdkM that propagates location information *
+* *
+********************************************************************* -}
+
+-- See Note [Adding Haddock comments to the syntax tree].
+--
+-- 'HdkA' provides a way to propagate location information from surrounding
+-- computations:
+--
+-- left_neighbour <*> HdkA inner_span inner_m <*> right_neighbour
+--
+-- Here, the following holds:
+--
+-- * the 'left_neighbour' will only see Haddock comments until 'bufSpanStart' of 'inner_span'
+-- * the 'right_neighbour' will only see Haddock comments after 'bufSpanEnd' of 'inner_span'
+-- * the 'inner_m' will only see Haddock comments between its 'left_neighbour' and its 'right_neighbour'
+--
+-- In other words, every computation:
+--
+-- * delimits the surrounding computations
+-- * is delimited by the surrounding computations
+--
+-- Therefore, a 'HdkA' computation must be always considered in the context in
+-- which it is used.
+data HdkA a =
+ HdkA
+ !(Maybe BufSpan) -- Just b <=> BufSpan occupied by the processed AST element.
+ -- The surrounding computations will not look inside.
+ --
+ -- Nothing <=> No BufSpan (e.g. when the HdkA is constructed by 'pure' or 'liftHdkA').
+ -- The surrounding computations are not delimited.
+
+ !(HdkM a) -- The stateful computation that looks up Haddock comments and
+ -- adds them to the resulting AST node.
+
+ deriving (Functor)
+
+instance Applicative HdkA where
+ HdkA l1 m1 <*> HdkA l2 m2 =
+ HdkA
+ (l1 <> l2) -- The combined BufSpan that covers both subcomputations.
+ --
+ -- The Semigroup instance for Maybe quite conveniently does the right thing:
+ -- Nothing <> b = b
+ -- a <> Nothing = a
+ -- Just a <> Just b = Just (a <> b)
+
+ (delim1 m1 <*> delim2 m2) -- Stateful computations are run in left-to-right order,
+ -- without any smart reordering strategy. So users of this
+ -- operation must take care to traverse the AST
+ -- in concrete syntax order.
+ -- See Note [Smart reordering in HdkA (or lack of thereof)]
+ --
+ -- Each computation is delimited ("sandboxed")
+ -- in a way that it doesn't see any Haddock
+ -- comments past the neighbouring AST node.
+ -- These delim1/delim2 are key to how HdkA operates.
+ where
+ -- Delimit the LHS by the location information from the RHS
+ delim1 = inLocRange (locRangeTo (fmap @Maybe bufSpanStart l2))
+ -- Delimit the RHS by the location information from the LHS
+ delim2 = inLocRange (locRangeFrom (fmap @Maybe bufSpanEnd l1))
+
+ pure a =
+ -- Return a value without performing any stateful computation, and without
+ -- any delimiting effect on the surrounding computations.
+ liftHdkA (pure a)
+
+{- Note [Smart reordering in HdkA (or lack of thereof)]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+When traversing the AST, the user must take care to traverse it in concrete
+syntax order.
+
+For example, when processing HsFunTy, it's important to get it right and write
+it like so:
+
+ HsFunTy _ mult lhs rhs -> do
+ lhs' <- addHaddock lhs
+ rhs' <- addHaddock rhs
+ pure $ L l (HsFunTy noExtField mult lhs' rhs')
+
+Rather than like so:
+
+ HsFunTy _ mult lhs rhs -> do
+ rhs' <- addHaddock rhs -- bad! wrong order
+ lhs' <- addHaddock lhs -- bad! wrong order
+ pure $ L l (HsFunTy noExtField mult lhs' rhs')
+
+This is somewhat bug-prone, so we could try to fix this with some Applicative
+magic. When we define (<*>) for HdkA, why not reorder the computations as
+necessary? In pseudo-code:
+
+ a1 <*> a2 | a1 `before` a2 = ... normal processing ...
+ | otherwise = a1 <**> a2
+
+While this trick could work for any two *adjacent* AST elements out of order
+(as in HsFunTy example above), it would fail in more elaborate scenarios (e.g.
+processing a list of declarations out of order).
+
+If it's not obvious why this trick doesn't work, ponder this: it's a bit like trying to get
+a sorted list by defining a 'smart' concatenation operator in the following manner:
+
+ a ?++ b | a <= b = a ++ b
+ | otherwise = b ++ a
+
+At first glance it seems to work:
+
+ ghci> [1] ?++ [2] ?++ [3]
+ [1,2,3]
+
+ ghci> [2] ?++ [1] ?++ [3]
+ [1,2,3] -- wow, sorted!
+
+But it actually doesn't:
+
+ ghci> [3] ?++ [1] ?++ [2]
+ [1,3,2] -- not sorted...
+-}
+
+-- Run a HdkA computation in an unrestricted LocRange. This is only used at the
+-- top level to run the final computation for the entire module.
+runHdkA :: HdkA a -> HdkSt -> (a, HdkSt)
+runHdkA (HdkA _ m) = unHdkM m mempty
+
+-- Let the neighbours know about an item at this location.
+--
+-- Consider this example:
+--
+-- class -- | peculiarly placed comment
+-- MyClass a where
+-- my_method :: a -> a
+--
+-- How do we know to reject the "peculiarly placed comment" instead of
+-- associating it with my_method? Its indentation level matches.
+--
+-- But clearly, there's "MyClass a where" separating the comment and my_method.
+-- To take it into account, we must register its location using registerLocHdkA
+-- or registerHdkA.
+--
+-- See Note [Register keyword location].
+-- See Note [Adding Haddock comments to the syntax tree].
+registerLocHdkA :: SrcSpan -> HdkA ()
+registerLocHdkA l = HdkA (getBufSpan l) (pure ())
+
+-- Let the neighbours know about an item at this location.
+-- A small wrapper over registerLocHdkA.
+--
+-- See Note [Adding Haddock comments to the syntax tree].
+registerHdkA :: Located a -> HdkA ()
+registerHdkA a = registerLocHdkA (getLoc a)
+
+-- Modify the action of a HdkA computation.
+hoistHdkA :: (HdkM a -> HdkM b) -> HdkA a -> HdkA b
+hoistHdkA f (HdkA l m) = HdkA l (f m)
+
+-- Lift a HdkM computation to HdkA.
+liftHdkA :: HdkM a -> HdkA a
+liftHdkA = HdkA mempty
+
+-- Extend the declared location span of a 'HdkA' computation:
+--
+-- left_neighbour <*> extendHdkA l x <*> right_neighbour
+--
+-- The declared location of 'x' now includes 'l', so that the surrounding
+-- computations 'left_neighbour' and 'right_neighbour' will not look for
+-- Haddock comments inside the 'l' location span.
+extendHdkA :: SrcSpan -> HdkA a -> HdkA a
+extendHdkA l' (HdkA l m) = HdkA (getBufSpan l' <> l) m
+
+
+{- *********************************************************************
+* *
+* HdkM: a stateful computation to associate *
+* accumulated documentation comments with AST nodes *
+* *
+********************************************************************* -}
+
+-- The state of 'HdkM' contains a list of pending Haddock comments. We go
+-- over the AST, looking up these comments using 'takeHdkComments' and removing
+-- them from the state. The remaining, un-removed ones are ignored with a
+-- warning (-Winvalid-haddock). Also, using a state means we never use the same
+-- Haddock twice.
+--
+-- See Note [Adding Haddock comments to the syntax tree].
+newtype HdkM a = HdkM (ReaderT LocRange (State HdkSt) a)
+ deriving (Functor, Applicative, Monad)
+
+-- | The state of HdkM.
+data HdkSt =
+ HdkSt
+ { hdk_st_pending :: [PsLocated HdkComment]
+ -- a list of pending (unassociated with an AST node)
+ -- Haddock comments, sorted by location: in ascending order of the starting 'BufPos'
+ , hdk_st_warnings :: [HdkWarn]
+ -- accumulated warnings (order doesn't matter)
+ }
+
+-- | Warnings accumulated in HdkM.
+data HdkWarn
+ = HdkWarnInvalidComment (PsLocated HdkComment)
+ | HdkWarnExtraComment LHsDocString
+
+-- 'HdkM' without newtype wrapping/unwrapping.
+type InlineHdkM a = LocRange -> HdkSt -> (a, HdkSt)
+
+mkHdkM :: InlineHdkM a -> HdkM a
+unHdkM :: HdkM a -> InlineHdkM a
+mkHdkM = coerce
+unHdkM = coerce
+
+-- Restrict the range in which a HdkM computation will look up comments:
+--
+-- inLocRange r1 $
+-- inLocRange r2 $
+-- takeHdkComments ... -- Only takes comments in the (r1 <> r2) location range.
+--
+-- Note that it does not blindly override the range but tightens it using (<>).
+-- At many use sites, you will see something along the lines of:
+--
+-- inLocRange (locRangeTo end_pos) $ ...
+--
+-- And 'locRangeTo' defines a location range from the start of the file to
+-- 'end_pos'. This does not mean that we now search for every comment from the
+-- start of the file, as this restriction will be combined with other
+-- restrictions. Somewhere up the callstack we might have:
+--
+-- inLocRange (locRangeFrom start_pos) $ ...
+--
+-- The net result is that the location range is delimited by 'start_pos' on
+-- one side and by 'end_pos' on the other side.
+--
+-- In 'HdkA', every (<*>) may restrict the location range of its
+-- subcomputations.
+inLocRange :: LocRange -> HdkM a -> HdkM a
+inLocRange r (HdkM m) = HdkM (local (mappend r) m)
+
+-- Take the Haddock comments that satisfy the matching function,
+-- leaving the rest pending.
+takeHdkComments :: forall a. (PsLocated HdkComment -> Maybe a) -> HdkM [a]
+takeHdkComments f =
+ mkHdkM $
+ \(LocRange hdk_from hdk_to hdk_col) ->
+ \hdk_st ->
+ let
+ comments = hdk_st_pending hdk_st
+ (comments_before_range, comments') = break (is_after hdk_from) comments
+ (comments_in_range, comments_after_range) = span (is_before hdk_to <&&> is_indented hdk_col) comments'
+ (items, other_comments) = foldr add_comment ([], []) comments_in_range
+ remaining_comments = comments_before_range ++ other_comments ++ comments_after_range
+ hdk_st' = hdk_st{ hdk_st_pending = remaining_comments }
+ in
+ (items, hdk_st')
+ where
+ is_after StartOfFile _ = True
+ is_after (StartLoc l) (L l_comment _) = bufSpanStart (psBufSpan l_comment) >= l
+ is_before EndOfFile _ = True
+ is_before (EndLoc l) (L l_comment _) = bufSpanStart (psBufSpan l_comment) <= l
+ is_indented (ColumnFrom n) (L l_comment _) = srcSpanStartCol (psRealSpan l_comment) >= n
+
+ add_comment
+ :: PsLocated HdkComment
+ -> ([a], [PsLocated HdkComment])
+ -> ([a], [PsLocated HdkComment])
+ add_comment hdk_comment (items, other_hdk_comments) =
+ case f hdk_comment of
+ Just item -> (item : items, other_hdk_comments)
+ Nothing -> (items, hdk_comment : other_hdk_comments)
+
+-- Get the docnext or docprev comment for an AST node at the given source span.
+getPrevNextDoc :: SrcSpan -> HdkM (Maybe LHsDocString)
+getPrevNextDoc l = do
+ let (l_start, l_end) = (srcSpanStart l, srcSpanEnd l)
+ before_t = locRangeTo (getBufPos l_start)
+ after_t = locRangeFrom (getBufPos l_end)
+ nextDocs <- inLocRange before_t $ takeHdkComments mkDocNext
+ prevDocs <- inLocRange after_t $ takeHdkComments mkDocPrev
+ selectDocString (nextDocs ++ prevDocs)
+
+appendHdkWarning :: HdkWarn -> HdkM ()
+appendHdkWarning e = HdkM (ReaderT (\_ -> modify append_warn))
+ where
+ append_warn hdk_st = hdk_st { hdk_st_warnings = e : hdk_st_warnings hdk_st }
+
+selectDocString :: [LHsDocString] -> HdkM (Maybe LHsDocString)
+selectDocString = select . filterOut (isEmptyDocString . unLoc)
+ where
+ select [] = return Nothing
+ select [doc] = return (Just doc)
+ select (doc : extra_docs) = do
+ reportExtraDocs extra_docs
+ return (Just doc)
+
+reportExtraDocs :: [LHsDocString] -> HdkM ()
+reportExtraDocs =
+ traverse_ (\extra_doc -> appendHdkWarning (HdkWarnExtraComment extra_doc))
+
+{- *********************************************************************
+* *
+* Matching functions for extracting documentation comments *
+* *
+********************************************************************* -}
+
+mkDocHsDecl :: LayoutInfo -> PsLocated HdkComment -> Maybe (LHsDecl GhcPs)
+mkDocHsDecl layout_info a = mapLoc (DocD noExtField) <$> mkDocDecl layout_info a
+
+mkDocDecl :: LayoutInfo -> PsLocated HdkComment -> Maybe LDocDecl
+mkDocDecl layout_info (L l_comment hdk_comment)
+ | indent_mismatch = Nothing
+ | otherwise =
+ Just $ L (mkSrcSpanPs l_comment) $
+ case hdk_comment of
+ HdkCommentNext doc -> DocCommentNext doc
+ HdkCommentPrev doc -> DocCommentPrev doc
+ HdkCommentNamed s doc -> DocCommentNamed s doc
+ HdkCommentSection n doc -> DocGroup n doc
+ where
+ -- 'indent_mismatch' checks if the documentation comment has the exact
+ -- indentation level expected by the parent node.
+ --
+ -- For example, when extracting documentation comments between class
+ -- method declarations, there are three cases to consider:
+ --
+ -- 1. Indent matches (indent_mismatch=False):
+ -- class C a where
+ -- f :: a -> a
+ -- -- ^ doc on f
+ --
+ -- 2. Indented too much (indent_mismatch=True):
+ -- class C a where
+ -- f :: a -> a
+ -- -- ^ indent mismatch
+ --
+ -- 3. Indented too little (indent_mismatch=True):
+ -- class C a where
+ -- f :: a -> a
+ -- -- ^ indent mismatch
+ indent_mismatch = case layout_info of
+ NoLayoutInfo -> False
+ ExplicitBraces -> False
+ VirtualBraces n -> n /= srcSpanStartCol (psRealSpan l_comment)
+
+mkDocIE :: PsLocated HdkComment -> Maybe (LIE GhcPs)
+mkDocIE (L l_comment hdk_comment) =
+ case hdk_comment of
+ HdkCommentSection n doc -> Just $ L l (IEGroup noExtField n doc)
+ HdkCommentNamed s _doc -> Just $ L l (IEDocNamed noExtField s)
+ HdkCommentNext doc -> Just $ L l (IEDoc noExtField doc)
+ _ -> Nothing
+ where l = mkSrcSpanPs l_comment
+
+mkDocNext :: PsLocated HdkComment -> Maybe LHsDocString
+mkDocNext (L l (HdkCommentNext doc)) = Just $ L (mkSrcSpanPs l) doc
+mkDocNext _ = Nothing
+
+mkDocPrev :: PsLocated HdkComment -> Maybe LHsDocString
+mkDocPrev (L l (HdkCommentPrev doc)) = Just $ L (mkSrcSpanPs l) doc
+mkDocPrev _ = Nothing
+
+
+{- *********************************************************************
+* *
+* LocRange: a location range *
+* *
+********************************************************************* -}
+
+-- A location range for extracting documentation comments.
+data LocRange =
+ LocRange
+ { loc_range_from :: !LowerLocBound,
+ loc_range_to :: !UpperLocBound,
+ loc_range_col :: !ColumnBound }
+
+instance Semigroup LocRange where
+ LocRange from1 to1 col1 <> LocRange from2 to2 col2 =
+ LocRange (from1 <> from2) (to1 <> to2) (col1 <> col2)
+
+instance Monoid LocRange where
+ mempty = LocRange mempty mempty mempty
+
+-- The location range from the specified position to the end of the file.
+locRangeFrom :: Maybe BufPos -> LocRange
+locRangeFrom (Just l) = mempty { loc_range_from = StartLoc l }
+locRangeFrom Nothing = mempty
+
+-- The location range from the start of the file to the specified position.
+locRangeTo :: Maybe BufPos -> LocRange
+locRangeTo (Just l) = mempty { loc_range_to = EndLoc l }
+locRangeTo Nothing = mempty
+
+-- Represents a predicate on BufPos:
+--
+-- LowerLocBound | BufPos -> Bool
+-- --------------+-----------------
+-- StartOfFile | const True
+-- StartLoc p | (>= p)
+--
+-- The semigroup instance corresponds to (&&).
+--
+-- We don't use the BufPos -> Bool representation
+-- as it would lead to redundant checks.
+--
+-- That is, instead of
+--
+-- (pos >= 20) && (pos >= 30) && (pos >= 40)
+--
+-- We'd rather only do the (>=40) check. So we reify the predicate to make
+-- sure we only check for the most restrictive bound.
+data LowerLocBound = StartOfFile | StartLoc !BufPos
+
+instance Semigroup LowerLocBound where
+ StartOfFile <> l = l
+ l <> StartOfFile = l
+ StartLoc l1 <> StartLoc l2 = StartLoc (max l1 l2)
+
+instance Monoid LowerLocBound where
+ mempty = StartOfFile
+
+-- Represents a predicate on BufPos:
+--
+-- UpperLocBound | BufPos -> Bool
+-- --------------+-----------------
+-- EndOfFile | const True
+-- EndLoc p | (<= p)
+--
+-- The semigroup instance corresponds to (&&).
+--
+-- We don't use the BufPos -> Bool representation
+-- as it would lead to redundant checks.
+--
+-- That is, instead of
+--
+-- (pos <= 40) && (pos <= 30) && (pos <= 20)
+--
+-- We'd rather only do the (<=20) check. So we reify the predicate to make
+-- sure we only check for the most restrictive bound.
+data UpperLocBound = EndOfFile | EndLoc !BufPos
+
+instance Semigroup UpperLocBound where
+ EndOfFile <> l = l
+ l <> EndOfFile = l
+ EndLoc l1 <> EndLoc l2 = EndLoc (min l1 l2)
+
+instance Monoid UpperLocBound where
+ mempty = EndOfFile
+
+-- | Represents a predicate on the column number.
+--
+-- ColumnBound | Int -> Bool
+-- --------------+-----------------
+-- ColumnFrom n | (>=n)
+--
+-- The semigroup instance corresponds to (&&).
+--
+newtype ColumnBound = ColumnFrom Int -- n >= GHC.Types.SrcLoc.leftmostColumn
+
+instance Semigroup ColumnBound where
+ ColumnFrom n <> ColumnFrom m = ColumnFrom (max n m)
+
+instance Monoid ColumnBound where
+ mempty = ColumnFrom leftmostColumn
+
+
+{- *********************************************************************
+* *
+* AST manipulation utilities *
+* *
+********************************************************************* -}
+
+mkLHsDocTy :: LHsType GhcPs -> Maybe LHsDocString -> LHsType GhcPs
+mkLHsDocTy t Nothing = t
+mkLHsDocTy t (Just doc) = L (getLoc t) (HsDocTy noExtField t doc)
+
+getForAllTeleLoc :: HsForAllTelescope GhcPs -> SrcSpan
+getForAllTeleLoc tele =
+ foldr combineSrcSpans noSrcSpan $
+ case tele of
+ HsForAllVis{ hsf_vis_bndrs } -> map getLoc hsf_vis_bndrs
+ HsForAllInvis { hsf_invis_bndrs } -> map getLoc hsf_invis_bndrs
+
+-- | The inverse of 'partitionBindsAndSigs' that merges partitioned items back
+-- into a flat list. Elements are put back into the order in which they
+-- appeared in the original program before partitioning, using BufPos to order
+-- them.
+--
+-- Precondition (unchecked): the input lists are already sorted.
+flattenBindsAndSigs
+ :: (LHsBinds GhcPs, [LSig GhcPs], [LFamilyDecl GhcPs],
+ [LTyFamInstDecl GhcPs], [LDataFamInstDecl GhcPs], [LDocDecl])
+ -> [LHsDecl GhcPs]
+flattenBindsAndSigs (all_bs, all_ss, all_ts, all_tfis, all_dfis, all_docs) =
+ -- 'cmpBufSpan' is safe here with the following assumptions:
+ --
+ -- * 'LHsDecl' produced by 'decl_cls' in Parser.y always have a 'BufSpan'
+ -- * 'partitionBindsAndSigs' does not discard this 'BufSpan'
+ mergeListsBy cmpBufSpan [
+ mapLL (\b -> ValD noExtField b) (bagToList all_bs),
+ mapLL (\s -> SigD noExtField s) all_ss,
+ mapLL (\t -> TyClD noExtField (FamDecl noExtField t)) all_ts,
+ mapLL (\tfi -> InstD noExtField (TyFamInstD noExtField tfi)) all_tfis,
+ mapLL (\dfi -> InstD noExtField (DataFamInstD noExtField dfi)) all_dfis,
+ mapLL (\d -> DocD noExtField d) all_docs
+ ]
+
+{- *********************************************************************
+* *
+* General purpose utilities *
+* *
+********************************************************************* -}
+
+-- Cons an element to a list, if exists.
+mcons :: Maybe a -> [a] -> [a]
+mcons = maybe id (:)
+
+-- Map a function over a list of located items.
+mapLL :: (a -> b) -> [Located a] -> [Located b]
+mapLL f = map (mapLoc f)
+
+{- Note [Old solution: Haddock in the grammar]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In the past, Haddock comments were incorporated into the grammar (Parser.y).
+This led to excessive complexity and duplication.
+
+For example, here's the grammar production for types without documentation:
+
+ type : btype
+ | btype '->' ctype
+
+To support Haddock, we had to also maintain an additional grammar production
+for types with documentation on function arguments and function result:
+
+ typedoc : btype
+ | btype docprev
+ | docnext btype
+ | btype '->' ctypedoc
+ | btype docprev '->' ctypedoc
+ | docnext btype '->' ctypedoc
+
+Sometimes handling documentation comments during parsing led to bugs (#17561),
+and sometimes it simply made it hard to modify and extend the grammar.
+
+Another issue was that sometimes Haddock would fail to parse code
+that GHC could parse succesfully:
--- -----------------------------------------------------------------------------
--- Adding documentation to record fields (used in parsing).
+ class BadIndent where
+ f :: a -> Int
+ -- ^ comment
+ g :: a -> Int
-addFieldDoc :: LConDeclField GhcPs -> Maybe LHsDocString -> LConDeclField GhcPs
-addFieldDoc (L l fld) doc
- = L l (fld { cd_fld_doc = cd_fld_doc fld `mplus` doc })
+This declaration was accepted by ghc but rejected by ghc -haddock.
+-}
-addFieldDocs :: [LConDeclField GhcPs] -> Maybe LHsDocString -> [LConDeclField GhcPs]
-addFieldDocs [] _ = []
-addFieldDocs (x:xs) doc = addFieldDoc x doc : xs
+{- Note [Register keyword location]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+At the moment, 'addHaddock' erroneously associates some comments with
+constructs that are separated by a keyword. For example:
+ data Foo -- | Comment for MkFoo
+ where MkFoo :: Foo
-addConDoc :: LConDecl GhcPs -> Maybe LHsDocString -> LConDecl GhcPs
-addConDoc decl Nothing = decl
-addConDoc (L p c) doc = L p $ case c of
- ConDeclH98 { con_doc = old_doc } -> c { con_doc = old_doc `mplus` doc }
- ConDeclGADT { con_doc = old_doc } -> c { con_doc = old_doc `mplus` doc }
- XConDecl x@(ConDeclGADTPrefixPs { con_gp_doc = old_doc }) ->
- XConDecl (x { con_gp_doc = old_doc `mplus` doc })
+The issue stems from the lack of location information for keywords. We could
+utilize API Annotations for this purpose, but not without modification. For
+example, API Annotations operate on RealSrcSpan, whereas we need BufSpan.
-addConDocs :: [LConDecl GhcPs] -> Maybe LHsDocString -> [LConDecl GhcPs]
-addConDocs [] _ = []
-addConDocs [x] doc = [addConDoc x doc]
-addConDocs (x:xs) doc = x : addConDocs xs doc
+Also, there's work towards making API Annotations available in-tree (not in
+a separate Map), see #17638. This change should make the fix very easy (it
+is not as easy with the current design).
-addConDocFirst :: [LConDecl GhcPs] -> Maybe LHsDocString -> [LConDecl GhcPs]
-addConDocFirst [] _ = []
-addConDocFirst (x:xs) doc = addConDoc x doc : xs
+See also testsuite/tests/haddock/should_compile_flag_haddock/T17544_kw.hs
+-}