diff options
author | Zubin Duggal <zubin@cmi.ac.in> | 2022-03-12 00:07:56 +0000 |
---|---|---|
committer | Matthew Pickering <matthewtpickering@gmail.com> | 2022-03-22 22:16:48 +0000 |
commit | 26819793f836f29f7c04ac0ac9c43d363eb5beb8 (patch) | |
tree | 406809b6a923bb84d1370874500017e69c6681d9 /compiler/GHC/Parser | |
parent | d45bb70178e044bc8b6e8215da7bc8ed0c95f2cb (diff) | |
download | haskell-wip/hi-haddock2021.tar.gz |
hi haddock: Lex and store haddock docs in interface fileswip/hi-haddock2021
Names appearing in Haddock docstrings are lexed and renamed like any other names
appearing in the AST. We currently rename names irrespective of the namespace,
so both type and constructor names corresponding to an identifier will appear in
the docstring. Haddock will select a given name as the link destination based on
its own heuristics.
This patch also restricts the limitation of `-haddock` being incompatible with
`Opt_KeepRawTokenStream`.
The export and documenation structure is now computed in GHC and serialised in
.hi files. This can be used by haddock to directly generate doc pages without
reparsing or renaming the source. At the moment the operation of haddock
is not modified, that's left to a future patch.
Updates the haddock submodule with the minimum changes needed.
Diffstat (limited to 'compiler/GHC/Parser')
-rw-r--r-- | compiler/GHC/Parser/Annotation.hs | 19 | ||||
-rw-r--r-- | compiler/GHC/Parser/HaddockLex.x | 201 | ||||
-rw-r--r-- | compiler/GHC/Parser/Header.hs | 12 | ||||
-rw-r--r-- | compiler/GHC/Parser/Lexer.x | 218 | ||||
-rw-r--r-- | compiler/GHC/Parser/PostProcess/Haddock.hs | 80 |
5 files changed, 380 insertions, 150 deletions
diff --git a/compiler/GHC/Parser/Annotation.hs b/compiler/GHC/Parser/Annotation.hs index 1f48615aec..dd0cdd3123 100644 --- a/compiler/GHC/Parser/Annotation.hs +++ b/compiler/GHC/Parser/Annotation.hs @@ -93,6 +93,7 @@ import Data.Semigroup import GHC.Data.FastString import GHC.Types.Name import GHC.Types.SrcLoc +import GHC.Hs.DocString import GHC.Utils.Binary import GHC.Utils.Outputable hiding ( (<>) ) import GHC.Utils.Panic @@ -358,14 +359,11 @@ data EpaComment = -- and the start of this location is used for the spacing when -- exact printing the comment. } - deriving (Eq, Ord, Data, Show) + deriving (Eq, Data, Show) data EpaCommentTok = -- Documentation annotations - EpaDocCommentNext String -- ^ something beginning '-- |' - | EpaDocCommentPrev String -- ^ something beginning '-- ^' - | EpaDocCommentNamed String -- ^ something beginning '-- $' - | EpaDocSection Int String -- ^ a section heading + EpaDocComment HsDocString -- ^ a docstring that can be pretty printed using pprHsDocString | EpaDocOptions String -- ^ doc options (prune, ignore-exports, etc) | EpaLineComment String -- ^ comment starting by "--" | EpaBlockComment String -- ^ comment in {- -} @@ -376,7 +374,7 @@ data EpaCommentTok = -- should be removed in favour of capturing it in the location for -- 'Located HsModule' in the parser. - deriving (Eq, Ord, Data, Show) + deriving (Eq, Data, Show) -- Note: these are based on the Token versions, but the Token type is -- defined in GHC.Parser.Lexer and bringing it in here would create a loop @@ -407,12 +405,12 @@ data AddEpAnn = AddEpAnn AnnKeywordId EpaLocation deriving (Data,Eq) -- sort the relative order. data EpaLocation = EpaSpan !RealSrcSpan | EpaDelta !DeltaPos ![LEpaComment] - deriving (Data,Eq,Ord) + deriving (Data,Eq) -- | Tokens embedded in the AST have an EpaLocation, unless they come from -- generated code (e.g. by TH). data TokenLocation = NoTokenLoc | TokenLoc !EpaLocation - deriving (Data,Eq,Ord) + deriving (Data,Eq) -- | Spacing between output items when exact printing. It captures -- the spacing from the current print position on the page to the @@ -460,9 +458,6 @@ instance Outputable EpaLocation where instance Outputable AddEpAnn where ppr (AddEpAnn kw ss) = text "AddEpAnn" <+> ppr kw <+> ppr ss -instance Ord AddEpAnn where - compare (AddEpAnn kw1 loc1) (AddEpAnn kw2 loc2) = compare (loc1, kw1) (loc2,kw2) - -- --------------------------------------------------------------------- -- | The exact print annotations (EPAs) are kept in the HsSyn AST for @@ -640,7 +635,7 @@ data TrailingAnn = AddSemiAnn EpaLocation -- ^ Trailing ';' | AddCommaAnn EpaLocation -- ^ Trailing ',' | AddVbarAnn EpaLocation -- ^ Trailing '|' - deriving (Data, Eq, Ord) + deriving (Data, Eq) instance Outputable TrailingAnn where ppr (AddSemiAnn ss) = text "AddSemiAnn" <+> ppr ss diff --git a/compiler/GHC/Parser/HaddockLex.x b/compiler/GHC/Parser/HaddockLex.x new file mode 100644 index 0000000000..e215769f9e --- /dev/null +++ b/compiler/GHC/Parser/HaddockLex.x @@ -0,0 +1,201 @@ +{ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# OPTIONS_GHC -funbox-strict-fields #-} + +module GHC.Parser.HaddockLex (lexHsDoc, lexStringLiteral) where + +import GHC.Prelude + +import GHC.Data.FastString +import GHC.Hs.Doc +import GHC.Parser.Lexer +import GHC.Parser.Annotation +import GHC.Types.SrcLoc +import GHC.Types.SourceText +import GHC.Data.StringBuffer +import qualified GHC.Data.Strict as Strict +import GHC.Types.Name.Reader +import GHC.Utils.Outputable +import GHC.Utils.Error +import GHC.Utils.Encoding +import GHC.Hs.Extension + +import qualified GHC.Data.EnumSet as EnumSet + +import Data.Maybe +import Data.Word + +import Data.ByteString ( ByteString ) +import qualified Data.ByteString as BS + +import qualified GHC.LanguageExtensions as LangExt +} + +-- ----------------------------------------------------------------------------- +-- Alex "Character set macros" +-- Copied from GHC/Parser/Lexer.x + +-- NB: The logic behind these definitions is also reflected in "GHC.Utils.Lexeme" +-- Any changes here should likely be reflected there. +$unispace = \x05 -- Trick Alex into handling Unicode. See Note [Unicode in Alex]. +$nl = [\n\r\f] +$whitechar = [$nl\v\ $unispace] +$white_no_nl = $whitechar # \n -- TODO #8424 +$tab = \t + +$ascdigit = 0-9 +$unidigit = \x03 -- Trick Alex into handling Unicode. See Note [Unicode in Alex]. +$decdigit = $ascdigit -- exactly $ascdigit, no more no less. +$digit = [$ascdigit $unidigit] + +$special = [\(\)\,\;\[\]\`\{\}] +$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:] +$unisymbol = \x04 -- Trick Alex into handling Unicode. See Note [Unicode in Alex]. +$symbol = [$ascsymbol $unisymbol] # [$special \_\"\'] + +$unilarge = \x01 -- Trick Alex into handling Unicode. See Note [Unicode in Alex]. +$asclarge = [A-Z] +$large = [$asclarge $unilarge] + +$unismall = \x02 -- Trick Alex into handling Unicode. See Note [Unicode in Alex]. +$ascsmall = [a-z] +$small = [$ascsmall $unismall \_] + +$uniidchar = \x07 -- Trick Alex into handling Unicode. See Note [Unicode in Alex]. +$idchar = [$small $large $digit $uniidchar \'] + +$unigraphic = \x06 -- Trick Alex into handling Unicode. See Note [Unicode in Alex]. +$graphic = [$small $large $symbol $digit $idchar $special $unigraphic \"\'] + +$alpha = [$small $large] + +-- The character sets marked "TODO" are mostly overly inclusive +-- and should be defined more precisely once alex has better +-- support for unicode character sets (see +-- https://github.com/simonmar/alex/issues/126). + +@id = $alpha $idchar* \#* | $symbol+ +@modname = $large $idchar* +@qualid = (@modname \.)* @id + +:- + \' @qualid \' | \` @qualid \` { getIdentifier 1 } + \'\` @qualid \`\' | \'\( @qualid \)\' | \`\( @qualid \)\` { getIdentifier 2 } + [. \n] ; + +{ +data AlexInput = AlexInput + { alexInput_position :: !RealSrcLoc + , alexInput_string :: !ByteString + } + +-- NB: As long as we don't use a left-context we don't need to track the +-- previous input character. +alexInputPrevChar :: AlexInput -> Word8 +alexInputPrevChar = error "Left-context not supported" + +alexGetByte :: AlexInput -> Maybe (Word8, AlexInput) +alexGetByte (AlexInput p s) = case utf8UnconsByteString s of + Nothing -> Nothing + Just (c,bs) -> Just (adjustChar c, AlexInput (advanceSrcLoc p c) bs) + +alexScanTokens :: RealSrcLoc -> ByteString -> [(RealSrcSpan, ByteString)] +alexScanTokens start str0 = go (AlexInput start str0) + where go inp@(AlexInput pos str) = + case alexScan inp 0 of + AlexSkip inp' _ln -> go inp' + AlexToken inp'@(AlexInput _ str') _ act -> act pos (BS.length str - BS.length str') str : go inp' + AlexEOF -> [] + AlexError (AlexInput p _) -> error $ "lexical error at " ++ show p + +-------------------------------------------------------------------------------- + +-- | Extract identifier from Alex state. +getIdentifier :: Int -- ^ adornment length + -> RealSrcLoc + -> Int + -- ^ Token length + -> ByteString + -- ^ The remaining input beginning with the found token + -> (RealSrcSpan, ByteString) +getIdentifier !i !loc0 !len0 !s0 = + (mkRealSrcSpan loc1 loc2, ident) + where + (adornment, s1) = BS.splitAt i s0 + ident = BS.take (len0 - 2*i) s1 + loc1 = advanceSrcLocBS loc0 adornment + loc2 = advanceSrcLocBS loc1 ident + +advanceSrcLocBS :: RealSrcLoc -> ByteString -> RealSrcLoc +advanceSrcLocBS !loc bs = case utf8UnconsByteString bs of + Nothing -> loc + Just (c, bs') -> advanceSrcLocBS (advanceSrcLoc loc c) bs' + +-- | Lex 'StringLiteral' for warning messages +lexStringLiteral :: P (LocatedN RdrName) -- ^ A precise identifier parser + -> Located StringLiteral + -> Located (WithHsDocIdentifiers StringLiteral GhcPs) +lexStringLiteral identParser (L l sl@(StringLiteral _ fs _)) + = L l (WithHsDocIdentifiers sl idents) + where + bs = bytesFS fs + + idents = mapMaybe (uncurry (validateIdentWith identParser)) plausibleIdents + + plausibleIdents :: [(SrcSpan,ByteString)] + plausibleIdents = case l of + RealSrcSpan span _ -> [(RealSrcSpan span' Strict.Nothing, tok) | (span', tok) <- alexScanTokens (realSrcSpanStart span) bs] + UnhelpfulSpan reason -> [(UnhelpfulSpan reason, tok) | (_, tok) <- alexScanTokens fakeLoc bs] + + fakeLoc = mkRealSrcLoc (mkFastString "") 0 0 + +-- | Lex identifiers from a docstring. +lexHsDoc :: P (LocatedN RdrName) -- ^ A precise identifier parser + -> HsDocString + -> HsDoc GhcPs +lexHsDoc identParser doc = + WithHsDocIdentifiers doc idents + where + docStrings = docStringChunks doc + idents = concat [mapMaybe maybeDocIdentifier (plausibleIdents doc) | doc <- docStrings] + + maybeDocIdentifier :: (SrcSpan, ByteString) -> Maybe (Located RdrName) + maybeDocIdentifier = uncurry (validateIdentWith identParser) + + plausibleIdents :: LHsDocStringChunk -> [(SrcSpan,ByteString)] + plausibleIdents (L (RealSrcSpan span _) (HsDocStringChunk s)) + = [(RealSrcSpan span' Strict.Nothing, tok) | (span', tok) <- alexScanTokens (realSrcSpanStart span) s] + plausibleIdents (L (UnhelpfulSpan reason) (HsDocStringChunk s)) + = [(UnhelpfulSpan reason, tok) | (_, tok) <- alexScanTokens fakeLoc s] -- preserve the original reason + + fakeLoc = mkRealSrcLoc (mkFastString "") 0 0 + +validateIdentWith :: P (LocatedN RdrName) -> SrcSpan -> ByteString -> Maybe (Located RdrName) +validateIdentWith identParser mloc str0 = + let -- These ParserFlags should be as "inclusive" as possible, allowing + -- identifiers defined with any language extension. + pflags = mkParserOpts + (EnumSet.fromList [LangExt.MagicHash]) + dopts + [] + False False False False + dopts = DiagOpts + { diag_warning_flags = EnumSet.empty + , diag_fatal_warning_flags = EnumSet.empty + , diag_warn_is_error = False + , diag_reverse_errors = False + , diag_max_errors = Nothing + , diag_ppr_ctx = defaultSDocContext + } + buffer = stringBufferFromByteString str0 + realSrcLc = case mloc of + RealSrcSpan loc _ -> realSrcSpanStart loc + UnhelpfulSpan _ -> mkRealSrcLoc (mkFastString "") 0 0 + pstate = initParserState pflags buffer realSrcLc + in case unP identParser pstate of + POk _ name -> Just $ case mloc of + RealSrcSpan _ _ -> reLoc name + UnhelpfulSpan _ -> L mloc (unLoc name) -- Preserve the original reason + _ -> Nothing +} diff --git a/compiler/GHC/Parser/Header.hs b/compiler/GHC/Parser/Header.hs index cb8a5c334e..87f20b5c9c 100644 --- a/compiler/GHC/Parser/Header.hs +++ b/compiler/GHC/Parser/Header.hs @@ -1,4 +1,3 @@ - {-# LANGUAGE TypeFamilies #-} ----------------------------------------------------------------------------- @@ -301,13 +300,10 @@ getOptions' opts toks isComment :: Token -> Bool isComment c = case c of - (ITlineComment {}) -> True - (ITblockComment {}) -> True - (ITdocCommentNext {}) -> True - (ITdocCommentPrev {}) -> True - (ITdocCommentNamed {}) -> True - (ITdocSection {}) -> True - _ -> False + (ITlineComment {}) -> True + (ITblockComment {}) -> True + (ITdocComment {}) -> True + _ -> False toArgs :: RealSrcLoc -> String -> Either String -- Error diff --git a/compiler/GHC/Parser/Lexer.x b/compiler/GHC/Parser/Lexer.x index 7d7d157d2b..02717c7dae 100644 --- a/compiler/GHC/Parser/Lexer.x +++ b/compiler/GHC/Parser/Lexer.x @@ -76,6 +76,7 @@ module GHC.Parser.Lexer ( commentToAnnotation, HdkComment(..), warnopt, + adjustChar, addPsMessage ) where @@ -87,6 +88,8 @@ import Control.Monad import Control.Applicative import Data.Char import Data.List (stripPrefix, isInfixOf, partition) +import Data.List.NonEmpty ( NonEmpty(..) ) +import qualified Data.List.NonEmpty as NE import Data.Maybe import Data.Word import Debug.Trace (trace) @@ -134,34 +137,34 @@ import GHC.Parser.Errors.Ppr () -- NB: The logic behind these definitions is also reflected in "GHC.Utils.Lexeme" -- Any changes here should likely be reflected there. -$unispace = \x05 -- Trick Alex into handling Unicode. See [Unicode in Alex]. +$unispace = \x05 -- Trick Alex into handling Unicode. See Note [Unicode in Alex]. $nl = [\n\r\f] $whitechar = [$nl\v\ $unispace] $white_no_nl = $whitechar # \n -- TODO #8424 $tab = \t $ascdigit = 0-9 -$unidigit = \x03 -- Trick Alex into handling Unicode. See [Unicode in Alex]. +$unidigit = \x03 -- Trick Alex into handling Unicode. See Note [Unicode in Alex]. $decdigit = $ascdigit -- exactly $ascdigit, no more no less. $digit = [$ascdigit $unidigit] $special = [\(\)\,\;\[\]\`\{\}] $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:] -$unisymbol = \x04 -- Trick Alex into handling Unicode. See [Unicode in Alex]. +$unisymbol = \x04 -- Trick Alex into handling Unicode. See Note [Unicode in Alex]. $symbol = [$ascsymbol $unisymbol] # [$special \_\"\'] -$unilarge = \x01 -- Trick Alex into handling Unicode. See [Unicode in Alex]. +$unilarge = \x01 -- Trick Alex into handling Unicode. See Note [Unicode in Alex]. $asclarge = [A-Z] $large = [$asclarge $unilarge] -$unismall = \x02 -- Trick Alex into handling Unicode. See [Unicode in Alex]. +$unismall = \x02 -- Trick Alex into handling Unicode. See Note [Unicode in Alex]. $ascsmall = [a-z] $small = [$ascsmall $unismall \_] -$uniidchar = \x07 -- Trick Alex into handling Unicode. See [Unicode in Alex]. +$uniidchar = \x07 -- Trick Alex into handling Unicode. See Note [Unicode in Alex]. $idchar = [$small $large $digit $uniidchar \'] -$unigraphic = \x06 -- Trick Alex into handling Unicode. See [Unicode in Alex]. +$unigraphic = \x06 -- Trick Alex into handling Unicode. See Note [Unicode in Alex]. $graphic = [$small $large $symbol $digit $idchar $special $unigraphic \"\'] $binit = 0-1 @@ -230,7 +233,7 @@ $tab { warnTab } -- are). We also rule out nested Haddock comments, if the -haddock flag is -- set. -"{-" / { isNormalComment } { nested_comment lexToken } +"{-" / { isNormalComment } { nested_comment } -- Single-line comments are a bit tricky. Haskell 98 says that two or -- more dashes followed by a symbol should be parsed as a varsym, so we @@ -364,12 +367,12 @@ $tab { warnTab } <0> { -- In the "0" mode we ignore these pragmas "{-#" $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags } - { nested_comment lexToken } + { nested_comment } } <0,option_prags> { "{-#" { warnThen PsWarnUnrecognisedPragma - (nested_comment lexToken) } + (nested_comment ) } } -- '0' state: ordinary lexemes @@ -883,13 +886,11 @@ data Token | ITeof -- ^ end of file token -- Documentation annotations. See Note [PsSpan in Comments] - | ITdocCommentNext String PsSpan -- ^ something beginning @-- |@ - | ITdocCommentPrev String PsSpan -- ^ something beginning @-- ^@ - | ITdocCommentNamed String PsSpan -- ^ something beginning @-- $@ - | ITdocSection Int String PsSpan -- ^ a section heading - | ITdocOptions String PsSpan -- ^ doc options (prune, ignore-exports, etc) - | ITlineComment String PsSpan -- ^ comment starting by "--" - | ITblockComment String PsSpan -- ^ comment in {- -} + | ITdocComment HsDocString PsSpan -- ^ The HsDocString contains more details about what + -- this is and how to pretty print it + | ITdocOptions String PsSpan -- ^ doc options (prune, ignore-exports, etc) + | ITlineComment String PsSpan -- ^ comment starting by "--" + | ITblockComment String PsSpan -- ^ comment in {- -} deriving Show @@ -1280,16 +1281,23 @@ alexOrPred p1 p2 userState in1 len in2 = p1 userState in1 len in2 || p2 userState in1 len in2 multiline_doc_comment :: Action -multiline_doc_comment span buf _len = withLexedDocType (worker "") +multiline_doc_comment span buf _len = {-# SCC "multiline_doc_comment" #-} withLexedDocType worker where - worker commentAcc input docType checkNextLine = case alexGetChar' input of - Just ('\n', input') - | checkNextLine -> case checkIfCommentLine input' of - Just input -> worker ('\n':commentAcc) input docType checkNextLine - Nothing -> docCommentEnd input commentAcc docType buf span - | otherwise -> docCommentEnd input commentAcc docType buf span - Just (c, input) -> worker (c:commentAcc) input docType checkNextLine - Nothing -> docCommentEnd input commentAcc docType buf span + worker input@(AI start_loc _) docType checkNextLine = go start_loc "" [] input + where + go start_loc curLine prevLines input@(AI end_loc _) = case alexGetChar' input of + Just ('\n', input') + | checkNextLine -> case checkIfCommentLine input' of + Just input@(AI next_start _) -> go next_start "" (locatedLine : prevLines) input -- Start a new line + Nothing -> endComment + | otherwise -> endComment + Just (c, input) -> go start_loc (c:curLine) prevLines input + Nothing -> endComment + where + lineSpan = mkSrcSpanPs $ mkPsSpan start_loc end_loc + locatedLine = L lineSpan (mkHsDocStringChunk $ reverse curLine) + commentLines = NE.reverse $ locatedLine :| prevLines + endComment = docCommentEnd input (docType (\dec -> MultiLineDocString dec commentLines)) buf span -- Check if the next line of input belongs to this doc comment as well. -- A doc comment continues onto the next line when the following @@ -1331,15 +1339,43 @@ lineCommentToken span buf len = do nested comments require traversing by hand, they can't be parsed using regular expressions. -} -nested_comment :: P (PsLocated Token) -> Action -nested_comment cont span buf len = do +nested_comment :: Action +nested_comment span buf len = {-# SCC "nested_comment" #-} do + l <- getLastLocComment + let endComment input (L _ comment) = commentEnd lexToken input (Nothing, ITblockComment comment l) buf span input <- getInput - go (reverse $ lexemeToString buf len) (1::Int) input + -- Include decorator in comment + let start_decorator = reverse $ lexemeToString buf len + nested_comment_logic endComment start_decorator input span + +nested_doc_comment :: Action +nested_doc_comment span buf _len = {-# SCC "nested_doc_comment" #-} withLexedDocType worker + where + worker input docType _checkNextLine = nested_comment_logic endComment "" input span + where + endComment input lcomment + = docCommentEnd input (docType (\d -> NestedDocString d (mkHsDocStringChunk . dropTrailingDec <$> lcomment))) buf span + + dropTrailingDec [] = [] + dropTrailingDec "-}" = "" + dropTrailingDec (x:xs) = x:dropTrailingDec xs + +{-# INLINE nested_comment_logic #-} +-- | Includes the trailing '-}' decorators +-- drop the last two elements with the callback if you don't want them to be included +nested_comment_logic + :: (AlexInput -> Located String -> P (PsLocated Token)) -- ^ Continuation that gets the rest of the input and the lexed comment + -> String -- ^ starting value for accumulator (reversed) - When we want to include a decorator '{-' in the comment + -> AlexInput + -> PsSpan + -> P (PsLocated Token) +nested_comment_logic endComment commentAcc input span = go commentAcc (1::Int) input where - go commentAcc 0 input = do - l <- getLastLocComment - let finalizeComment str = (Nothing, ITblockComment str l) - commentEnd cont input commentAcc finalizeComment buf span + go commentAcc 0 input@(AI end_loc _) = do + let comment = reverse commentAcc + cspan = mkSrcSpanPs $ mkPsSpan (psSpanStart span) end_loc + lcomment = L cspan comment + endComment input lcomment go commentAcc n input = case alexGetChar' input of Nothing -> errBrace input (psRealSpan span) Just ('-',input) -> case alexGetChar' input of @@ -1358,31 +1394,6 @@ nested_comment cont span buf len = do Just (_,_) -> go ('\n':commentAcc) n input Just (c,input) -> go (c:commentAcc) n input -nested_doc_comment :: Action -nested_doc_comment span buf _len = withLexedDocType (go "") - where - go commentAcc input docType _ = case alexGetChar' input of - Nothing -> errBrace input (psRealSpan span) - Just ('-',input) -> case alexGetChar' input of - Nothing -> errBrace input (psRealSpan span) - Just ('\125',input) -> - docCommentEnd input commentAcc docType buf span - Just (_,_) -> go ('-':commentAcc) input docType False - Just ('\123', input) -> case alexGetChar' input of - Nothing -> errBrace input (psRealSpan span) - Just ('-',input) -> do - setInput input - let cont = do input <- getInput; go commentAcc input docType False - nested_comment cont span buf _len - Just (_,_) -> go ('\123':commentAcc) input docType False - -- See Note [Nested comment line pragmas] - Just ('\n',input) -> case alexGetChar' input of - Nothing -> errBrace input (psRealSpan span) - Just ('#',_) -> do (parsedAcc,input) <- parseNestedPragma input - go (parsedAcc ++ '\n':commentAcc) input docType False - Just (_,_) -> go ('\n':commentAcc) input docType False - Just (c,input) -> go (c:commentAcc) input docType False - -- See Note [Nested comment line pragmas] parseNestedPragma :: AlexInput -> P (String,AlexInput) parseNestedPragma input@(AI _ buf) = do @@ -1429,7 +1440,8 @@ return control to parseNestedPragma by returning the ITcomment_line_prag token. See #314 for more background on the bug this fixes. -} -withLexedDocType :: (AlexInput -> (String -> (HdkComment, Token)) -> Bool -> P (PsLocated Token)) +{-# INLINE withLexedDocType #-} +withLexedDocType :: (AlexInput -> ((HsDocStringDecorator -> HsDocString) -> (HdkComment, Token)) -> Bool -> P (PsLocated Token)) -> P (PsLocated Token) withLexedDocType lexDocComment = do input@(AI _ buf) <- getInput @@ -1439,7 +1451,9 @@ withLexedDocType lexDocComment = do -- line of input might also belong to this doc comment. '|' -> lexDocComment input (mkHdkCommentNext l) True '^' -> lexDocComment input (mkHdkCommentPrev l) True - '$' -> lexDocComment input (mkHdkCommentNamed l) True + '$' -> case lexDocName input of + Nothing -> do setInput input; lexToken -- eof reached, lex it normally + Just (name, input) -> lexDocComment input (mkHdkCommentNamed l name) True '*' -> lexDocSection l 1 input _ -> panic "withLexedDocType: Bad doc type" where @@ -1448,18 +1462,28 @@ withLexedDocType lexDocComment = do Just (_, _) -> lexDocComment input (mkHdkCommentSection l n) False Nothing -> do setInput input; lexToken -- eof reached, lex it normally -mkHdkCommentNext, mkHdkCommentPrev :: PsSpan -> String -> (HdkComment, Token) -mkHdkCommentNext loc str = (HdkCommentNext (mkHsDocString str), ITdocCommentNext str loc) -mkHdkCommentPrev loc str = (HdkCommentPrev (mkHsDocString str), ITdocCommentPrev str loc) + lexDocName :: AlexInput -> Maybe (String, AlexInput) + lexDocName = go "" + where + go acc input = case alexGetChar' input of + Just (c, input') + | isSpace c -> Just (reverse acc, input) + | otherwise -> go (c:acc) input' + Nothing -> Nothing + +mkHdkCommentNext, mkHdkCommentPrev :: PsSpan -> (HsDocStringDecorator -> HsDocString) -> (HdkComment, Token) +mkHdkCommentNext loc mkDS = (HdkCommentNext ds,ITdocComment ds loc) + where ds = mkDS HsDocStringNext +mkHdkCommentPrev loc mkDS = (HdkCommentPrev ds,ITdocComment ds loc) + where ds = mkDS HsDocStringPrevious -mkHdkCommentNamed :: PsSpan -> String -> (HdkComment, Token) -mkHdkCommentNamed loc str = - let (name, rest) = break isSpace str - in (HdkCommentNamed name (mkHsDocString rest), ITdocCommentNamed str loc) +mkHdkCommentNamed :: PsSpan -> String -> (HsDocStringDecorator -> HsDocString) -> (HdkComment, Token) +mkHdkCommentNamed loc name mkDS = (HdkCommentNamed name ds, ITdocComment ds loc) + where ds = mkDS (HsDocStringNamed name) -mkHdkCommentSection :: PsSpan -> Int -> String -> (HdkComment, Token) -mkHdkCommentSection loc n str = - (HdkCommentSection n (mkHsDocString str), ITdocSection n str loc) +mkHdkCommentSection :: PsSpan -> Int -> (HsDocStringDecorator -> HsDocString) -> (HdkComment, Token) +mkHdkCommentSection loc n mkDS = (HdkCommentSection n ds, ITdocComment ds loc) + where ds = mkDS (HsDocStringGroup n) -- RULES pragmas turn on the forall and '.' keywords, and we turn them -- off again at the end of the pragma. @@ -1503,34 +1527,30 @@ endPrag span _buf _len = do -- it writes the wrong token length to the parser state. This function is -- called afterwards, so it can just update the state. +{-# INLINE commentEnd #-} commentEnd :: P (PsLocated Token) -> AlexInput - -> String - -> (String -> (Maybe HdkComment, Token)) + -> (Maybe HdkComment, Token) -> StringBuffer -> PsSpan -> P (PsLocated Token) -commentEnd cont input commentAcc finalizeComment buf span = do +commentEnd cont input (m_hdk_comment, hdk_token) buf span = do setInput input let (AI loc nextBuf) = input - comment = reverse commentAcc span' = mkPsSpan (psSpanStart span) loc last_len = byteDiff buf nextBuf span `seq` setLastToken span' last_len - let (m_hdk_comment, hdk_token) = finalizeComment comment whenIsJust m_hdk_comment $ \hdk_comment -> P $ \s -> POk (s {hdk_comments = hdk_comments s `snocOL` L span' hdk_comment}) () b <- getBit RawTokenStreamBit if b then return (L span' hdk_token) else cont -docCommentEnd :: AlexInput -> String -> (String -> (HdkComment, Token)) -> StringBuffer -> +{-# INLINE docCommentEnd #-} +docCommentEnd :: AlexInput -> (HdkComment, Token) -> StringBuffer -> PsSpan -> P (PsLocated Token) -docCommentEnd input commentAcc docType buf span = do - let finalizeComment str = - let (hdk_comment, token) = docType str - in (Just hdk_comment, token) - commentEnd lexToken input commentAcc finalizeComment buf span +docCommentEnd input (hdk_comment, tok) buf span + = commentEnd lexToken input (Just hdk_comment, tok) buf span errBrace :: AlexInput -> RealSrcSpan -> P a errBrace (AI end _) span = @@ -2331,8 +2351,10 @@ data ParserOpts = ParserOpts pWarningFlags :: ParserOpts -> EnumSet WarningFlag pWarningFlags opts = diag_warning_flags (pDiagOpts opts) --- | Haddock comment as produced by the lexer. These are accumulated in --- 'PState' and then processed in "GHC.Parser.PostProcess.Haddock". +-- | Haddock comment as produced by the lexer. These are accumulated in 'PState' +-- and then processed in "GHC.Parser.PostProcess.Haddock". The location of the +-- 'HsDocString's spans over the contents of the docstring - i.e. it does not +-- include the decorator ("-- |", "{-|" etc.) data HdkComment = HdkCommentNext HsDocString | HdkCommentPrev HsDocString @@ -2596,6 +2618,7 @@ alexGetByte (AI loc s) loc' = advancePsLoc loc c byte = adjustChar c +{-# INLINE alexGetChar' #-} -- This version does not squash unicode characters, it is used when -- lexing strings. alexGetChar' :: AlexInput -> Maybe (Char,AlexInput) @@ -3386,8 +3409,7 @@ reportLexError loc1 loc2 buf f lexTokenStream :: ParserOpts -> StringBuffer -> RealSrcLoc -> ParseResult [Located Token] lexTokenStream opts buf loc = unP go initState{ options = opts' } where - new_exts = xunset HaddockBit -- disable Haddock - $ xunset UsePosPragsBit -- parse LINE/COLUMN pragmas as tokens + new_exts = xunset UsePosPragsBit -- parse LINE/COLUMN pragmas as tokens $ xset RawTokenStreamBit -- include comments $ pExtsBitmap opts opts' = opts { pExtsBitmap = new_exts } @@ -3407,7 +3429,7 @@ fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag), ("include", lex_string_prag ITinclude_prag)]) ignoredPrags = Map.fromList (map ignored pragmas) - where ignored opt = (opt, nested_comment lexToken) + where ignored opt = (opt, nested_comment) impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"] options_pragmas = map ("options_" ++) impls -- CFILES is a hugs-only thing. @@ -3553,13 +3575,10 @@ allocateFinalComments ss comment_q mheader_comments = Strict.Just _ -> (mheader_comments, [], comment_q' ++ newAnns) commentToAnnotation :: RealLocated Token -> LEpaComment -commentToAnnotation (L l (ITdocCommentNext s ll)) = mkLEpaComment l ll (EpaDocCommentNext s) -commentToAnnotation (L l (ITdocCommentPrev s ll)) = mkLEpaComment l ll (EpaDocCommentPrev s) -commentToAnnotation (L l (ITdocCommentNamed s ll)) = mkLEpaComment l ll (EpaDocCommentNamed s) -commentToAnnotation (L l (ITdocSection n s ll)) = mkLEpaComment l ll (EpaDocSection n s) -commentToAnnotation (L l (ITdocOptions s ll)) = mkLEpaComment l ll (EpaDocOptions s) -commentToAnnotation (L l (ITlineComment s ll)) = mkLEpaComment l ll (EpaLineComment s) -commentToAnnotation (L l (ITblockComment s ll)) = mkLEpaComment l ll (EpaBlockComment s) +commentToAnnotation (L l (ITdocComment s ll)) = mkLEpaComment l ll (EpaDocComment s) +commentToAnnotation (L l (ITdocOptions s ll)) = mkLEpaComment l ll (EpaDocOptions s) +commentToAnnotation (L l (ITlineComment s ll)) = mkLEpaComment l ll (EpaLineComment s) +commentToAnnotation (L l (ITblockComment s ll)) = mkLEpaComment l ll (EpaBlockComment s) commentToAnnotation _ = panic "commentToAnnotation" -- see Note [PsSpan in Comments] @@ -3569,12 +3588,9 @@ mkLEpaComment l ll tok = L (realSpanAsAnchor l) (EpaComment tok (psRealSpan ll)) -- --------------------------------------------------------------------- isComment :: Token -> Bool -isComment (ITlineComment _ _) = True -isComment (ITblockComment _ _) = True -isComment (ITdocCommentNext _ _) = True -isComment (ITdocCommentPrev _ _) = True -isComment (ITdocCommentNamed _ _) = True -isComment (ITdocSection _ _ _) = True -isComment (ITdocOptions _ _) = True -isComment _ = False +isComment (ITlineComment _ _) = True +isComment (ITblockComment _ _) = True +isComment (ITdocComment _ _) = True +isComment (ITdocOptions _ _) = True +isComment _ = False } diff --git a/compiler/GHC/Parser/PostProcess/Haddock.hs b/compiler/GHC/Parser/PostProcess/Haddock.hs index 08bebc4683..271d9db30f 100644 --- a/compiler/GHC/Parser/PostProcess/Haddock.hs +++ b/compiler/GHC/Parser/PostProcess/Haddock.hs @@ -67,7 +67,9 @@ import Control.Monad.Trans.Writer import Data.Functor.Identity import qualified Data.Monoid +import {-# SOURCE #-} GHC.Parser (parseIdentifier) import GHC.Parser.Lexer +import GHC.Parser.HaddockLex import GHC.Parser.Errors.Types import GHC.Utils.Misc (mergeListsBy, filterOut, mapLastM, (<&&>)) import qualified GHC.Data.Strict as Strict @@ -252,7 +254,8 @@ instance HasHaddock (Located HsModule) where docs <- inLocRange (locRangeTo (getBufPos (srcSpanStart (locA l_name)))) $ takeHdkComments mkDocNext - selectDocString docs + dc <- selectDocString docs + pure $ lexLHsDocString <$> dc -- Step 2, process documentation comments in the export list: -- @@ -292,6 +295,12 @@ instance HasHaddock (Located HsModule) where , hsmodDecls = hsmodDecls' , hsmodHaddockModHeader = join @Maybe headerDocs } +lexHsDocString :: HsDocString -> HsDoc GhcPs +lexHsDocString = lexHsDoc parseIdentifier + +lexLHsDocString :: Located HsDocString -> LHsDoc GhcPs +lexLHsDocString = fmap lexHsDocString + -- Only for module exports, not module imports. -- -- module M (a, b, c) where -- use on this [LIE GhcPs] @@ -700,7 +709,7 @@ instance HasHaddock (LocatedA (ConDecl GhcPs)) where con_res_ty' <- addHaddock con_res_ty pure $ L l_con_decl $ ConDeclGADT { con_g_ext, con_names, con_bndrs, con_mb_cxt, - con_doc = con_doc', + con_doc = lexLHsDocString <$> con_doc', con_g_args = con_g_args', con_res_ty = con_res_ty' } ConDeclH98 { con_ext, con_name, con_forall, con_ex_tvs, con_mb_cxt, con_args } -> @@ -711,7 +720,7 @@ instance HasHaddock (LocatedA (ConDecl GhcPs)) where ts' <- traverse addHaddockConDeclFieldTy ts pure $ L l_con_decl $ ConDeclH98 { con_ext, con_name, con_forall, con_ex_tvs, con_mb_cxt, - con_doc = con_doc', + con_doc = lexLHsDocString <$> con_doc', con_args = PrefixCon noTypeArgs ts' } InfixCon t1 t2 -> do t1' <- addHaddockConDeclFieldTy t1 @@ -719,14 +728,14 @@ instance HasHaddock (LocatedA (ConDecl GhcPs)) where t2' <- addHaddockConDeclFieldTy t2 pure $ L l_con_decl $ ConDeclH98 { con_ext, con_name, con_forall, con_ex_tvs, con_mb_cxt, - con_doc = con_doc', + con_doc = lexLHsDocString <$> con_doc', con_args = InfixCon t1' t2' } RecCon (L l_rec flds) -> do con_doc' <- getConDoc (getLocA con_name) flds' <- traverse addHaddockConDeclField flds pure $ L l_con_decl $ ConDeclH98 { con_ext, con_name, con_forall, con_ex_tvs, con_mb_cxt, - con_doc = con_doc', + con_doc = lexLHsDocString <$> con_doc', con_args = RecCon (L l_rec flds') } -- Keep track of documentation comments on the data constructor or any of its @@ -768,7 +777,7 @@ discardHasInnerDocs = fmap fst . runWriterT -- data/newtype declaration. getConDoc :: SrcSpan -- Location of the data constructor - -> ConHdkA (Maybe LHsDocString) + -> ConHdkA (Maybe (Located HsDocString)) getConDoc l = WriterT $ extendHdkA l $ liftHdkA $ do mDoc <- getPrevNextDoc l @@ -792,7 +801,7 @@ addHaddockConDeclField -> ConHdkA (LConDeclField GhcPs) addHaddockConDeclField (L l_fld fld) = WriterT $ extendHdkA (locA l_fld) $ liftHdkA $ do - cd_fld_doc <- getPrevNextDoc (locA l_fld) + cd_fld_doc <- fmap lexLHsDocString <$> getPrevNextDoc (locA l_fld) return (L l_fld (fld { cd_fld_doc }), HasInnerDocs (isJust cd_fld_doc)) @@ -861,7 +870,7 @@ addConTrailingDoc l_sep = x <$ reportExtraDocs trailingDocs mk_doc_fld (L l' con_fld) = do doc <- selectDocString trailingDocs - return $ L l' (con_fld { cd_fld_doc = doc }) + return $ L l' (con_fld { cd_fld_doc = fmap lexLHsDocString doc }) con_args' <- case con_args con_decl of x@(PrefixCon _ []) -> x <$ reportExtraDocs trailingDocs x@(RecCon (L _ [])) -> x <$ reportExtraDocs trailingDocs @@ -872,7 +881,7 @@ addConTrailingDoc l_sep = return (RecCon (L l_rec flds')) return $ L l (con_decl{ con_args = con_args' }) else do - con_doc' <- selectDocString (con_doc con_decl `mcons` trailingDocs) + con_doc' <- selectDoc (con_doc con_decl `mcons` (map lexLHsDocString trailingDocs)) return $ L l (con_decl{ con_doc = con_doc' }) _ -> panic "addConTrailingDoc: non-H98 ConDecl" @@ -1196,7 +1205,7 @@ data HdkSt = -- | Warnings accumulated in HdkM. data HdkWarn = HdkWarnInvalidComment (PsLocated HdkComment) - | HdkWarnExtraComment LHsDocString + | HdkWarnExtraComment (Located HsDocString) -- Restrict the range in which a HdkM computation will look up comments: -- @@ -1238,8 +1247,7 @@ takeHdkComments f = (items, other_comments) = foldr add_comment ([], []) comments_in_range remaining_comments = comments_before_range ++ other_comments ++ comments_after_range hdk_st' = hdk_st{ hdk_st_pending = remaining_comments } - in - (items, hdk_st') + in (items, hdk_st') where is_after StartOfFile _ = True is_after (StartLoc l) (L l_comment _) = bufSpanStart (psBufSpan l_comment) >= l @@ -1257,7 +1265,7 @@ takeHdkComments f = Nothing -> (items, hdk_comment : other_hdk_comments) -- Get the docnext or docprev comment for an AST node at the given source span. -getPrevNextDoc :: SrcSpan -> HdkM (Maybe LHsDocString) +getPrevNextDoc :: SrcSpan -> HdkM (Maybe (Located HsDocString)) getPrevNextDoc l = do let (l_start, l_end) = (srcSpanStart l, srcSpanEnd l) before_t = locRangeTo (getBufPos l_start) @@ -1271,7 +1279,7 @@ appendHdkWarning e = HdkM $ \_ hdk_st -> let hdk_st' = hdk_st { hdk_st_warnings = e : hdk_st_warnings hdk_st } in ((), hdk_st') -selectDocString :: [LHsDocString] -> HdkM (Maybe LHsDocString) +selectDocString :: [Located HsDocString] -> HdkM (Maybe (Located HsDocString)) selectDocString = select . filterOut (isEmptyDocString . unLoc) where select [] = return Nothing @@ -1280,7 +1288,16 @@ selectDocString = select . filterOut (isEmptyDocString . unLoc) reportExtraDocs extra_docs return (Just doc) -reportExtraDocs :: [LHsDocString] -> HdkM () +selectDoc :: forall a. [LHsDoc a] -> HdkM (Maybe (LHsDoc a)) +selectDoc = select . filterOut (isEmptyDocString . hsDocString . unLoc) + where + select [] = return Nothing + select [doc] = return (Just doc) + select (doc : extra_docs) = do + reportExtraDocs $ map (\(L l d) -> L l $ hsDocString d) extra_docs + return (Just doc) + +reportExtraDocs :: [Located HsDocString] -> HdkM () reportExtraDocs = traverse_ (\extra_doc -> appendHdkWarning (HdkWarnExtraComment extra_doc)) @@ -1297,13 +1314,14 @@ mkDocDecl :: LayoutInfo -> PsLocated HdkComment -> Maybe (LDocDecl GhcPs) mkDocDecl layout_info (L l_comment hdk_comment) | indent_mismatch = Nothing | otherwise = - Just $ L (noAnnSrcSpan $ mkSrcSpanPs l_comment) $ + Just $ L (noAnnSrcSpan span) $ case hdk_comment of - HdkCommentNext doc -> DocCommentNext doc - HdkCommentPrev doc -> DocCommentPrev doc - HdkCommentNamed s doc -> DocCommentNamed s doc - HdkCommentSection n doc -> DocGroup n doc + HdkCommentNext doc -> DocCommentNext (L span $ lexHsDocString doc) + HdkCommentPrev doc -> DocCommentPrev (L span $ lexHsDocString doc) + HdkCommentNamed s doc -> DocCommentNamed s (L span $ lexHsDocString doc) + HdkCommentSection n doc -> DocGroup n (L span $ lexHsDocString doc) where + span = mkSrcSpanPs l_comment -- 'indent_mismatch' checks if the documentation comment has the exact -- indentation level expected by the parent node. -- @@ -1332,18 +1350,19 @@ mkDocDecl layout_info (L l_comment hdk_comment) mkDocIE :: PsLocated HdkComment -> Maybe (LIE GhcPs) mkDocIE (L l_comment hdk_comment) = case hdk_comment of - HdkCommentSection n doc -> Just $ L l (IEGroup noExtField n doc) + HdkCommentSection n doc -> Just $ L l (IEGroup noExtField n $ L span $ lexHsDocString doc) HdkCommentNamed s _doc -> Just $ L l (IEDocNamed noExtField s) - HdkCommentNext doc -> Just $ L l (IEDoc noExtField doc) + HdkCommentNext doc -> Just $ L l (IEDoc noExtField $ L span $ lexHsDocString doc) _ -> Nothing - where l = noAnnSrcSpan $ mkSrcSpanPs l_comment + where l = noAnnSrcSpan span + span = mkSrcSpanPs l_comment -mkDocNext :: PsLocated HdkComment -> Maybe LHsDocString -mkDocNext (L l (HdkCommentNext doc)) = Just $ L (mkSrcSpanPs l) doc +mkDocNext :: PsLocated HdkComment -> Maybe (Located HsDocString) +mkDocNext (L l (HdkCommentNext doc)) = Just (L (mkSrcSpanPs l) doc) mkDocNext _ = Nothing -mkDocPrev :: PsLocated HdkComment -> Maybe LHsDocString -mkDocPrev (L l (HdkCommentPrev doc)) = Just $ L (mkSrcSpanPs l) doc +mkDocPrev :: PsLocated HdkComment -> Maybe (Located HsDocString) +mkDocPrev (L l (HdkCommentPrev doc)) = Just (L (mkSrcSpanPs l) doc) mkDocPrev _ = Nothing @@ -1396,6 +1415,7 @@ locRangeTo Strict.Nothing = mempty -- We'd rather only do the (>=40) check. So we reify the predicate to make -- sure we only check for the most restrictive bound. data LowerLocBound = StartOfFile | StartLoc !BufPos + deriving Show instance Semigroup LowerLocBound where StartOfFile <> l = l @@ -1424,6 +1444,7 @@ instance Monoid LowerLocBound where -- We'd rather only do the (<=20) check. So we reify the predicate to make -- sure we only check for the most restrictive bound. data UpperLocBound = EndOfFile | EndLoc !BufPos + deriving Show instance Semigroup UpperLocBound where EndOfFile <> l = l @@ -1442,6 +1463,7 @@ instance Monoid UpperLocBound where -- The semigroup instance corresponds to (&&). -- newtype ColumnBound = ColumnFrom Int -- n >= GHC.Types.SrcLoc.leftmostColumn + deriving Show instance Semigroup ColumnBound where ColumnFrom n <> ColumnFrom m = ColumnFrom (max n m) @@ -1456,9 +1478,9 @@ instance Monoid ColumnBound where * * ********************************************************************* -} -mkLHsDocTy :: LHsType GhcPs -> Maybe LHsDocString -> LHsType GhcPs +mkLHsDocTy :: LHsType GhcPs -> Maybe (Located HsDocString) -> LHsType GhcPs mkLHsDocTy t Nothing = t -mkLHsDocTy t (Just doc) = L (getLoc t) (HsDocTy noAnn t doc) +mkLHsDocTy t (Just doc) = L (getLoc t) (HsDocTy noAnn t $ lexLHsDocString doc) getForAllTeleLoc :: HsForAllTelescope GhcPs -> SrcSpan getForAllTeleLoc tele = |