summaryrefslogtreecommitdiff
path: root/compiler/GHC/Parser/Lexer.x
diff options
context:
space:
mode:
authorVladislav Zavialov <vlad.z.4096@gmail.com>2019-12-05 03:06:40 +0300
committerBen Gamari <ben@smart-cactus.org>2020-07-21 14:50:01 -0400
commit19e80b9af252eee760dc047765a9930ef00067ec (patch)
treecb45fce4b1e74e1a82c5bd926fda0e92de1964c1 /compiler/GHC/Parser/Lexer.x
parent58235d46bd4e9fbf69bd82969b29cd9c6ab051e1 (diff)
downloadhaskell-19e80b9af252eee760dc047765a9930ef00067ec.tar.gz
Accumulate Haddock comments in P (#17544, #17561, #8944)
Haddock comments are, first and foremost, comments. It's very annoying to incorporate them into the grammar. We can take advantage of an important property: adding a Haddock comment does not change the parse tree in any way other than wrapping some nodes in HsDocTy and the like (and if it does, that's a bug). This patch implements the following: * Accumulate Haddock comments with their locations in the P monad. This is handled in the lexer. * After parsing, do a pass over the AST to associate Haddock comments with AST nodes using location info. * Report the leftover comments to the user as a warning (-Winvalid-haddock).
Diffstat (limited to 'compiler/GHC/Parser/Lexer.x')
-rw-r--r--compiler/GHC/Parser/Lexer.x106
1 files changed, 72 insertions, 34 deletions
diff --git a/compiler/GHC/Parser/Lexer.x b/compiler/GHC/Parser/Lexer.x
index ef9f1803bf..7265e1dffb 100644
--- a/compiler/GHC/Parser/Lexer.x
+++ b/compiler/GHC/Parser/Lexer.x
@@ -66,7 +66,8 @@ module GHC.Parser.Lexer (
lexTokenStream,
AddAnn(..),mkParensApiAnn,
addAnnsAt,
- commentToAnnotation
+ commentToAnnotation,
+ HdkComment(..),
) where
import GHC.Prelude
@@ -97,6 +98,8 @@ import GHC.Utils.Outputable
import GHC.Data.StringBuffer
import GHC.Data.FastString
import GHC.Types.Unique.FM
+import GHC.Data.Maybe
+import GHC.Data.OrdList
import GHC.Utils.Misc ( readRational, readHexRational )
-- compiler/main
@@ -109,6 +112,7 @@ import GHC.Unit
import GHC.Types.Basic ( InlineSpec(..), RuleMatchInfo(..),
IntegralLit(..), FractionalLit(..),
SourceText(..) )
+import GHC.Hs.Doc
-- compiler/parser
import GHC.Parser.CharClass
@@ -363,10 +367,8 @@ $tab { warnTab }
-- Haddock comments
-<0,option_prags> {
- "-- " $docsym / { ifExtension HaddockBit } { multiline_doc_comment }
- "{-" \ ? $docsym / { ifExtension HaddockBit } { nested_doc_comment }
-}
+"-- " $docsym / { ifExtension HaddockBit } { multiline_doc_comment }
+"{-" \ ? $docsym / { ifExtension HaddockBit } { nested_doc_comment }
-- "special" symbols
@@ -1271,11 +1273,8 @@ nested_comment cont span buf len = do
go (reverse $ lexemeToString buf len) (1::Int) input
where
go commentAcc 0 input = do
- setInput input
- b <- getBit RawTokenStreamBit
- if b
- then docCommentEnd input commentAcc ITblockComment buf span
- else cont
+ let finalizeComment str = (Nothing, ITblockComment str)
+ commentEnd cont input commentAcc finalizeComment buf span
go commentAcc n input = case alexGetChar' input of
Nothing -> errBrace input (psRealSpan span)
Just ('-',input) -> case alexGetChar' input of
@@ -1365,24 +1364,37 @@ return control to parseNestedPragma by returning the ITcomment_line_prag token.
See #314 for more background on the bug this fixes.
-}
-withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (PsLocated Token))
+withLexedDocType :: (AlexInput -> (String -> (HdkComment, Token)) -> Bool -> P (PsLocated Token))
-> P (PsLocated Token)
withLexedDocType lexDocComment = do
input@(AI _ buf) <- getInput
case prevChar buf ' ' of
-- The `Bool` argument to lexDocComment signals whether or not the next
-- line of input might also belong to this doc comment.
- '|' -> lexDocComment input ITdocCommentNext True
- '^' -> lexDocComment input ITdocCommentPrev True
- '$' -> lexDocComment input ITdocCommentNamed True
+ '|' -> lexDocComment input mkHdkCommentNext True
+ '^' -> lexDocComment input mkHdkCommentPrev True
+ '$' -> lexDocComment input mkHdkCommentNamed True
'*' -> lexDocSection 1 input
_ -> panic "withLexedDocType: Bad doc type"
where
lexDocSection n input = case alexGetChar' input of
Just ('*', input) -> lexDocSection (n+1) input
- Just (_, _) -> lexDocComment input (ITdocSection n) False
+ Just (_, _) -> lexDocComment input (mkHdkCommentSection n) False
Nothing -> do setInput input; lexToken -- eof reached, lex it normally
+mkHdkCommentNext, mkHdkCommentPrev :: String -> (HdkComment, Token)
+mkHdkCommentNext str = (HdkCommentNext (mkHsDocString str), ITdocCommentNext str)
+mkHdkCommentPrev str = (HdkCommentPrev (mkHsDocString str), ITdocCommentPrev str)
+
+mkHdkCommentNamed :: String -> (HdkComment, Token)
+mkHdkCommentNamed str =
+ let (name, rest) = break isSpace str
+ in (HdkCommentNamed name (mkHsDocString rest), ITdocCommentNamed str)
+
+mkHdkCommentSection :: Int -> String -> (HdkComment, Token)
+mkHdkCommentSection n str =
+ (HdkCommentSection n (mkHsDocString str), ITdocSection n str)
+
-- RULES pragmas turn on the forall and '.' keywords, and we turn them
-- off again at the end of the pragma.
rulePrag :: Action
@@ -1425,17 +1437,34 @@ endPrag span _buf _len = do
-- it writes the wrong token length to the parser state. This function is
-- called afterwards, so it can just update the state.
-docCommentEnd :: AlexInput -> String -> (String -> Token) -> StringBuffer ->
- PsSpan -> P (PsLocated Token)
-docCommentEnd input commentAcc docType buf span = do
+commentEnd :: P (PsLocated Token)
+ -> AlexInput
+ -> String
+ -> (String -> (Maybe HdkComment, Token))
+ -> StringBuffer
+ -> PsSpan
+ -> P (PsLocated Token)
+commentEnd cont input commentAcc finalizeComment buf span = do
setInput input
let (AI loc nextBuf) = input
comment = reverse commentAcc
span' = mkPsSpan (psSpanStart span) loc
last_len = byteDiff buf nextBuf
-
span `seq` setLastToken span' last_len
- return (L span' (docType comment))
+ let (m_hdk_comment, hdk_token) = finalizeComment comment
+ whenIsJust m_hdk_comment $ \hdk_comment ->
+ P $ \s -> POk (s {hdk_comments = hdk_comments s `snocOL` L span' hdk_comment}) ()
+ b <- getBit RawTokenStreamBit
+ if b then return (L span' hdk_token)
+ else cont
+
+docCommentEnd :: AlexInput -> String -> (String -> (HdkComment, Token)) -> StringBuffer ->
+ PsSpan -> P (PsLocated Token)
+docCommentEnd input commentAcc docType buf span = do
+ let finalizeComment str =
+ let (hdk_comment, token) = docType str
+ in (Just hdk_comment, token)
+ commentEnd lexToken input commentAcc finalizeComment buf span
errBrace :: AlexInput -> RealSrcSpan -> P a
errBrace (AI end _) span = failLocMsgP (realSrcSpanStart span) (psRealLoc end) "unterminated `{-'"
@@ -2170,6 +2199,15 @@ data ParserFlags = ParserFlags {
, pExtsBitmap :: !ExtsBitmap -- ^ bitmap of permitted extensions
}
+-- | Haddock comment as produced by the lexer. These are accumulated in
+-- 'PState' and then processed in "GHC.Parser.PostProcess.Haddock".
+data HdkComment
+ = HdkCommentNext HsDocString
+ | HdkCommentPrev HsDocString
+ | HdkCommentNamed String HsDocString
+ | HdkCommentSection Int HsDocString
+ deriving Show
+
data PState = PState {
buffer :: StringBuffer,
options :: ParserFlags,
@@ -2211,7 +2249,13 @@ data PState = PState {
annotations :: [(ApiAnnKey,[RealSrcSpan])],
eof_pos :: Maybe RealSrcSpan,
comment_q :: [RealLocated AnnotationComment],
- annotations_comments :: [(RealSrcSpan,[RealLocated AnnotationComment])]
+ annotations_comments :: [(RealSrcSpan,[RealLocated AnnotationComment])],
+
+ -- Haddock comments accumulated in ascending order of their location
+ -- (BufPos). We use OrdList to get O(1) snoc.
+ --
+ -- See Note [Adding Haddock comments to the syntax tree] in GHC.Parser.PostProcess.Haddock
+ hdk_comments :: OrdList (PsLocated HdkComment)
}
-- last_loc and last_len are used when generating error messages,
-- and in pushCurrentContext only. Sigh, if only Happy passed the
@@ -2698,7 +2742,8 @@ mkPStatePure options buf loc =
annotations = [],
eof_pos = Nothing,
comment_q = [],
- annotations_comments = []
+ annotations_comments = [],
+ hdk_comments = nilOL
}
where init_loc = PsLoc loc (BufPos 0)
@@ -2917,10 +2962,6 @@ lexer queueComments cont = do
(L span tok) <- lexTokenFun
--trace ("token: " ++ show tok) $ do
- if (queueComments && isDocComment tok)
- then queueComment (L (psRealSpan span) tok)
- else return ()
-
if (queueComments && isComment tok)
then queueComment (L (psRealSpan span) tok) >> lexer queueComments cont
else cont (L (mkSrcSpanPs span) tok)
@@ -3372,13 +3413,10 @@ commentToAnnotation _ = panic "commentToAnnotation"
isComment :: Token -> Bool
isComment (ITlineComment _) = True
isComment (ITblockComment _) = True
+isComment (ITdocCommentNext _) = True
+isComment (ITdocCommentPrev _) = True
+isComment (ITdocCommentNamed _) = True
+isComment (ITdocSection _ _) = True
+isComment (ITdocOptions _) = True
isComment _ = False
-
-isDocComment :: Token -> Bool
-isDocComment (ITdocCommentNext _) = True
-isDocComment (ITdocCommentPrev _) = True
-isDocComment (ITdocCommentNamed _) = True
-isDocComment (ITdocSection _ _) = True
-isDocComment (ITdocOptions _) = True
-isDocComment _ = False
}