diff options
author | Vladislav Zavialov <vlad.z.4096@gmail.com> | 2019-12-05 03:06:40 +0300 |
---|---|---|
committer | Ben Gamari <ben@smart-cactus.org> | 2020-07-21 14:50:01 -0400 |
commit | 19e80b9af252eee760dc047765a9930ef00067ec (patch) | |
tree | cb45fce4b1e74e1a82c5bd926fda0e92de1964c1 /compiler/GHC/Parser/Lexer.x | |
parent | 58235d46bd4e9fbf69bd82969b29cd9c6ab051e1 (diff) | |
download | haskell-19e80b9af252eee760dc047765a9930ef00067ec.tar.gz |
Accumulate Haddock comments in P (#17544, #17561, #8944)
Haddock comments are, first and foremost, comments. It's very annoying
to incorporate them into the grammar. We can take advantage of an
important property: adding a Haddock comment does not change the parse
tree in any way other than wrapping some nodes in HsDocTy and the like
(and if it does, that's a bug).
This patch implements the following:
* Accumulate Haddock comments with their locations in the P monad.
This is handled in the lexer.
* After parsing, do a pass over the AST to associate Haddock comments
with AST nodes using location info.
* Report the leftover comments to the user as a warning (-Winvalid-haddock).
Diffstat (limited to 'compiler/GHC/Parser/Lexer.x')
-rw-r--r-- | compiler/GHC/Parser/Lexer.x | 106 |
1 files changed, 72 insertions, 34 deletions
diff --git a/compiler/GHC/Parser/Lexer.x b/compiler/GHC/Parser/Lexer.x index ef9f1803bf..7265e1dffb 100644 --- a/compiler/GHC/Parser/Lexer.x +++ b/compiler/GHC/Parser/Lexer.x @@ -66,7 +66,8 @@ module GHC.Parser.Lexer ( lexTokenStream, AddAnn(..),mkParensApiAnn, addAnnsAt, - commentToAnnotation + commentToAnnotation, + HdkComment(..), ) where import GHC.Prelude @@ -97,6 +98,8 @@ import GHC.Utils.Outputable import GHC.Data.StringBuffer import GHC.Data.FastString import GHC.Types.Unique.FM +import GHC.Data.Maybe +import GHC.Data.OrdList import GHC.Utils.Misc ( readRational, readHexRational ) -- compiler/main @@ -109,6 +112,7 @@ import GHC.Unit import GHC.Types.Basic ( InlineSpec(..), RuleMatchInfo(..), IntegralLit(..), FractionalLit(..), SourceText(..) ) +import GHC.Hs.Doc -- compiler/parser import GHC.Parser.CharClass @@ -363,10 +367,8 @@ $tab { warnTab } -- Haddock comments -<0,option_prags> { - "-- " $docsym / { ifExtension HaddockBit } { multiline_doc_comment } - "{-" \ ? $docsym / { ifExtension HaddockBit } { nested_doc_comment } -} +"-- " $docsym / { ifExtension HaddockBit } { multiline_doc_comment } +"{-" \ ? $docsym / { ifExtension HaddockBit } { nested_doc_comment } -- "special" symbols @@ -1271,11 +1273,8 @@ nested_comment cont span buf len = do go (reverse $ lexemeToString buf len) (1::Int) input where go commentAcc 0 input = do - setInput input - b <- getBit RawTokenStreamBit - if b - then docCommentEnd input commentAcc ITblockComment buf span - else cont + let finalizeComment str = (Nothing, ITblockComment str) + commentEnd cont input commentAcc finalizeComment buf span go commentAcc n input = case alexGetChar' input of Nothing -> errBrace input (psRealSpan span) Just ('-',input) -> case alexGetChar' input of @@ -1365,24 +1364,37 @@ return control to parseNestedPragma by returning the ITcomment_line_prag token. See #314 for more background on the bug this fixes. -} -withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (PsLocated Token)) +withLexedDocType :: (AlexInput -> (String -> (HdkComment, Token)) -> Bool -> P (PsLocated Token)) -> P (PsLocated Token) withLexedDocType lexDocComment = do input@(AI _ buf) <- getInput case prevChar buf ' ' of -- The `Bool` argument to lexDocComment signals whether or not the next -- line of input might also belong to this doc comment. - '|' -> lexDocComment input ITdocCommentNext True - '^' -> lexDocComment input ITdocCommentPrev True - '$' -> lexDocComment input ITdocCommentNamed True + '|' -> lexDocComment input mkHdkCommentNext True + '^' -> lexDocComment input mkHdkCommentPrev True + '$' -> lexDocComment input mkHdkCommentNamed True '*' -> lexDocSection 1 input _ -> panic "withLexedDocType: Bad doc type" where lexDocSection n input = case alexGetChar' input of Just ('*', input) -> lexDocSection (n+1) input - Just (_, _) -> lexDocComment input (ITdocSection n) False + Just (_, _) -> lexDocComment input (mkHdkCommentSection n) False Nothing -> do setInput input; lexToken -- eof reached, lex it normally +mkHdkCommentNext, mkHdkCommentPrev :: String -> (HdkComment, Token) +mkHdkCommentNext str = (HdkCommentNext (mkHsDocString str), ITdocCommentNext str) +mkHdkCommentPrev str = (HdkCommentPrev (mkHsDocString str), ITdocCommentPrev str) + +mkHdkCommentNamed :: String -> (HdkComment, Token) +mkHdkCommentNamed str = + let (name, rest) = break isSpace str + in (HdkCommentNamed name (mkHsDocString rest), ITdocCommentNamed str) + +mkHdkCommentSection :: Int -> String -> (HdkComment, Token) +mkHdkCommentSection n str = + (HdkCommentSection n (mkHsDocString str), ITdocSection n str) + -- RULES pragmas turn on the forall and '.' keywords, and we turn them -- off again at the end of the pragma. rulePrag :: Action @@ -1425,17 +1437,34 @@ endPrag span _buf _len = do -- it writes the wrong token length to the parser state. This function is -- called afterwards, so it can just update the state. -docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer -> - PsSpan -> P (PsLocated Token) -docCommentEnd input commentAcc docType buf span = do +commentEnd :: P (PsLocated Token) + -> AlexInput + -> String + -> (String -> (Maybe HdkComment, Token)) + -> StringBuffer + -> PsSpan + -> P (PsLocated Token) +commentEnd cont input commentAcc finalizeComment buf span = do setInput input let (AI loc nextBuf) = input comment = reverse commentAcc span' = mkPsSpan (psSpanStart span) loc last_len = byteDiff buf nextBuf - span `seq` setLastToken span' last_len - return (L span' (docType comment)) + let (m_hdk_comment, hdk_token) = finalizeComment comment + whenIsJust m_hdk_comment $ \hdk_comment -> + P $ \s -> POk (s {hdk_comments = hdk_comments s `snocOL` L span' hdk_comment}) () + b <- getBit RawTokenStreamBit + if b then return (L span' hdk_token) + else cont + +docCommentEnd :: AlexInput -> String -> (String -> (HdkComment, Token)) -> StringBuffer -> + PsSpan -> P (PsLocated Token) +docCommentEnd input commentAcc docType buf span = do + let finalizeComment str = + let (hdk_comment, token) = docType str + in (Just hdk_comment, token) + commentEnd lexToken input commentAcc finalizeComment buf span errBrace :: AlexInput -> RealSrcSpan -> P a errBrace (AI end _) span = failLocMsgP (realSrcSpanStart span) (psRealLoc end) "unterminated `{-'" @@ -2170,6 +2199,15 @@ data ParserFlags = ParserFlags { , pExtsBitmap :: !ExtsBitmap -- ^ bitmap of permitted extensions } +-- | Haddock comment as produced by the lexer. These are accumulated in +-- 'PState' and then processed in "GHC.Parser.PostProcess.Haddock". +data HdkComment + = HdkCommentNext HsDocString + | HdkCommentPrev HsDocString + | HdkCommentNamed String HsDocString + | HdkCommentSection Int HsDocString + deriving Show + data PState = PState { buffer :: StringBuffer, options :: ParserFlags, @@ -2211,7 +2249,13 @@ data PState = PState { annotations :: [(ApiAnnKey,[RealSrcSpan])], eof_pos :: Maybe RealSrcSpan, comment_q :: [RealLocated AnnotationComment], - annotations_comments :: [(RealSrcSpan,[RealLocated AnnotationComment])] + annotations_comments :: [(RealSrcSpan,[RealLocated AnnotationComment])], + + -- Haddock comments accumulated in ascending order of their location + -- (BufPos). We use OrdList to get O(1) snoc. + -- + -- See Note [Adding Haddock comments to the syntax tree] in GHC.Parser.PostProcess.Haddock + hdk_comments :: OrdList (PsLocated HdkComment) } -- last_loc and last_len are used when generating error messages, -- and in pushCurrentContext only. Sigh, if only Happy passed the @@ -2698,7 +2742,8 @@ mkPStatePure options buf loc = annotations = [], eof_pos = Nothing, comment_q = [], - annotations_comments = [] + annotations_comments = [], + hdk_comments = nilOL } where init_loc = PsLoc loc (BufPos 0) @@ -2917,10 +2962,6 @@ lexer queueComments cont = do (L span tok) <- lexTokenFun --trace ("token: " ++ show tok) $ do - if (queueComments && isDocComment tok) - then queueComment (L (psRealSpan span) tok) - else return () - if (queueComments && isComment tok) then queueComment (L (psRealSpan span) tok) >> lexer queueComments cont else cont (L (mkSrcSpanPs span) tok) @@ -3372,13 +3413,10 @@ commentToAnnotation _ = panic "commentToAnnotation" isComment :: Token -> Bool isComment (ITlineComment _) = True isComment (ITblockComment _) = True +isComment (ITdocCommentNext _) = True +isComment (ITdocCommentPrev _) = True +isComment (ITdocCommentNamed _) = True +isComment (ITdocSection _ _) = True +isComment (ITdocOptions _) = True isComment _ = False - -isDocComment :: Token -> Bool -isDocComment (ITdocCommentNext _) = True -isDocComment (ITdocCommentPrev _) = True -isDocComment (ITdocCommentNamed _) = True -isDocComment (ITdocSection _ _) = True -isDocComment (ITdocOptions _) = True -isDocComment _ = False } |