summaryrefslogtreecommitdiff
path: root/compiler/GHC/Parser/Lexer.x
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/GHC/Parser/Lexer.x')
-rw-r--r--compiler/GHC/Parser/Lexer.x218
1 files changed, 117 insertions, 101 deletions
diff --git a/compiler/GHC/Parser/Lexer.x b/compiler/GHC/Parser/Lexer.x
index 7d7d157d2b..02717c7dae 100644
--- a/compiler/GHC/Parser/Lexer.x
+++ b/compiler/GHC/Parser/Lexer.x
@@ -76,6 +76,7 @@ module GHC.Parser.Lexer (
commentToAnnotation,
HdkComment(..),
warnopt,
+ adjustChar,
addPsMessage
) where
@@ -87,6 +88,8 @@ import Control.Monad
import Control.Applicative
import Data.Char
import Data.List (stripPrefix, isInfixOf, partition)
+import Data.List.NonEmpty ( NonEmpty(..) )
+import qualified Data.List.NonEmpty as NE
import Data.Maybe
import Data.Word
import Debug.Trace (trace)
@@ -134,34 +137,34 @@ import GHC.Parser.Errors.Ppr ()
-- NB: The logic behind these definitions is also reflected in "GHC.Utils.Lexeme"
-- Any changes here should likely be reflected there.
-$unispace = \x05 -- Trick Alex into handling Unicode. See [Unicode in Alex].
+$unispace = \x05 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
$nl = [\n\r\f]
$whitechar = [$nl\v\ $unispace]
$white_no_nl = $whitechar # \n -- TODO #8424
$tab = \t
$ascdigit = 0-9
-$unidigit = \x03 -- Trick Alex into handling Unicode. See [Unicode in Alex].
+$unidigit = \x03 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
$decdigit = $ascdigit -- exactly $ascdigit, no more no less.
$digit = [$ascdigit $unidigit]
$special = [\(\)\,\;\[\]\`\{\}]
$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:]
-$unisymbol = \x04 -- Trick Alex into handling Unicode. See [Unicode in Alex].
+$unisymbol = \x04 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
$symbol = [$ascsymbol $unisymbol] # [$special \_\"\']
-$unilarge = \x01 -- Trick Alex into handling Unicode. See [Unicode in Alex].
+$unilarge = \x01 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
$asclarge = [A-Z]
$large = [$asclarge $unilarge]
-$unismall = \x02 -- Trick Alex into handling Unicode. See [Unicode in Alex].
+$unismall = \x02 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
$ascsmall = [a-z]
$small = [$ascsmall $unismall \_]
-$uniidchar = \x07 -- Trick Alex into handling Unicode. See [Unicode in Alex].
+$uniidchar = \x07 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
$idchar = [$small $large $digit $uniidchar \']
-$unigraphic = \x06 -- Trick Alex into handling Unicode. See [Unicode in Alex].
+$unigraphic = \x06 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
$graphic = [$small $large $symbol $digit $idchar $special $unigraphic \"\']
$binit = 0-1
@@ -230,7 +233,7 @@ $tab { warnTab }
-- are). We also rule out nested Haddock comments, if the -haddock flag is
-- set.
-"{-" / { isNormalComment } { nested_comment lexToken }
+"{-" / { isNormalComment } { nested_comment }
-- Single-line comments are a bit tricky. Haskell 98 says that two or
-- more dashes followed by a symbol should be parsed as a varsym, so we
@@ -364,12 +367,12 @@ $tab { warnTab }
<0> {
-- In the "0" mode we ignore these pragmas
"{-#" $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
- { nested_comment lexToken }
+ { nested_comment }
}
<0,option_prags> {
"{-#" { warnThen PsWarnUnrecognisedPragma
- (nested_comment lexToken) }
+ (nested_comment ) }
}
-- '0' state: ordinary lexemes
@@ -883,13 +886,11 @@ data Token
| ITeof -- ^ end of file token
-- Documentation annotations. See Note [PsSpan in Comments]
- | ITdocCommentNext String PsSpan -- ^ something beginning @-- |@
- | ITdocCommentPrev String PsSpan -- ^ something beginning @-- ^@
- | ITdocCommentNamed String PsSpan -- ^ something beginning @-- $@
- | ITdocSection Int String PsSpan -- ^ a section heading
- | ITdocOptions String PsSpan -- ^ doc options (prune, ignore-exports, etc)
- | ITlineComment String PsSpan -- ^ comment starting by "--"
- | ITblockComment String PsSpan -- ^ comment in {- -}
+ | ITdocComment HsDocString PsSpan -- ^ The HsDocString contains more details about what
+ -- this is and how to pretty print it
+ | ITdocOptions String PsSpan -- ^ doc options (prune, ignore-exports, etc)
+ | ITlineComment String PsSpan -- ^ comment starting by "--"
+ | ITblockComment String PsSpan -- ^ comment in {- -}
deriving Show
@@ -1280,16 +1281,23 @@ alexOrPred p1 p2 userState in1 len in2
= p1 userState in1 len in2 || p2 userState in1 len in2
multiline_doc_comment :: Action
-multiline_doc_comment span buf _len = withLexedDocType (worker "")
+multiline_doc_comment span buf _len = {-# SCC "multiline_doc_comment" #-} withLexedDocType worker
where
- worker commentAcc input docType checkNextLine = case alexGetChar' input of
- Just ('\n', input')
- | checkNextLine -> case checkIfCommentLine input' of
- Just input -> worker ('\n':commentAcc) input docType checkNextLine
- Nothing -> docCommentEnd input commentAcc docType buf span
- | otherwise -> docCommentEnd input commentAcc docType buf span
- Just (c, input) -> worker (c:commentAcc) input docType checkNextLine
- Nothing -> docCommentEnd input commentAcc docType buf span
+ worker input@(AI start_loc _) docType checkNextLine = go start_loc "" [] input
+ where
+ go start_loc curLine prevLines input@(AI end_loc _) = case alexGetChar' input of
+ Just ('\n', input')
+ | checkNextLine -> case checkIfCommentLine input' of
+ Just input@(AI next_start _) -> go next_start "" (locatedLine : prevLines) input -- Start a new line
+ Nothing -> endComment
+ | otherwise -> endComment
+ Just (c, input) -> go start_loc (c:curLine) prevLines input
+ Nothing -> endComment
+ where
+ lineSpan = mkSrcSpanPs $ mkPsSpan start_loc end_loc
+ locatedLine = L lineSpan (mkHsDocStringChunk $ reverse curLine)
+ commentLines = NE.reverse $ locatedLine :| prevLines
+ endComment = docCommentEnd input (docType (\dec -> MultiLineDocString dec commentLines)) buf span
-- Check if the next line of input belongs to this doc comment as well.
-- A doc comment continues onto the next line when the following
@@ -1331,15 +1339,43 @@ lineCommentToken span buf len = do
nested comments require traversing by hand, they can't be parsed
using regular expressions.
-}
-nested_comment :: P (PsLocated Token) -> Action
-nested_comment cont span buf len = do
+nested_comment :: Action
+nested_comment span buf len = {-# SCC "nested_comment" #-} do
+ l <- getLastLocComment
+ let endComment input (L _ comment) = commentEnd lexToken input (Nothing, ITblockComment comment l) buf span
input <- getInput
- go (reverse $ lexemeToString buf len) (1::Int) input
+ -- Include decorator in comment
+ let start_decorator = reverse $ lexemeToString buf len
+ nested_comment_logic endComment start_decorator input span
+
+nested_doc_comment :: Action
+nested_doc_comment span buf _len = {-# SCC "nested_doc_comment" #-} withLexedDocType worker
+ where
+ worker input docType _checkNextLine = nested_comment_logic endComment "" input span
+ where
+ endComment input lcomment
+ = docCommentEnd input (docType (\d -> NestedDocString d (mkHsDocStringChunk . dropTrailingDec <$> lcomment))) buf span
+
+ dropTrailingDec [] = []
+ dropTrailingDec "-}" = ""
+ dropTrailingDec (x:xs) = x:dropTrailingDec xs
+
+{-# INLINE nested_comment_logic #-}
+-- | Includes the trailing '-}' decorators
+-- drop the last two elements with the callback if you don't want them to be included
+nested_comment_logic
+ :: (AlexInput -> Located String -> P (PsLocated Token)) -- ^ Continuation that gets the rest of the input and the lexed comment
+ -> String -- ^ starting value for accumulator (reversed) - When we want to include a decorator '{-' in the comment
+ -> AlexInput
+ -> PsSpan
+ -> P (PsLocated Token)
+nested_comment_logic endComment commentAcc input span = go commentAcc (1::Int) input
where
- go commentAcc 0 input = do
- l <- getLastLocComment
- let finalizeComment str = (Nothing, ITblockComment str l)
- commentEnd cont input commentAcc finalizeComment buf span
+ go commentAcc 0 input@(AI end_loc _) = do
+ let comment = reverse commentAcc
+ cspan = mkSrcSpanPs $ mkPsSpan (psSpanStart span) end_loc
+ lcomment = L cspan comment
+ endComment input lcomment
go commentAcc n input = case alexGetChar' input of
Nothing -> errBrace input (psRealSpan span)
Just ('-',input) -> case alexGetChar' input of
@@ -1358,31 +1394,6 @@ nested_comment cont span buf len = do
Just (_,_) -> go ('\n':commentAcc) n input
Just (c,input) -> go (c:commentAcc) n input
-nested_doc_comment :: Action
-nested_doc_comment span buf _len = withLexedDocType (go "")
- where
- go commentAcc input docType _ = case alexGetChar' input of
- Nothing -> errBrace input (psRealSpan span)
- Just ('-',input) -> case alexGetChar' input of
- Nothing -> errBrace input (psRealSpan span)
- Just ('\125',input) ->
- docCommentEnd input commentAcc docType buf span
- Just (_,_) -> go ('-':commentAcc) input docType False
- Just ('\123', input) -> case alexGetChar' input of
- Nothing -> errBrace input (psRealSpan span)
- Just ('-',input) -> do
- setInput input
- let cont = do input <- getInput; go commentAcc input docType False
- nested_comment cont span buf _len
- Just (_,_) -> go ('\123':commentAcc) input docType False
- -- See Note [Nested comment line pragmas]
- Just ('\n',input) -> case alexGetChar' input of
- Nothing -> errBrace input (psRealSpan span)
- Just ('#',_) -> do (parsedAcc,input) <- parseNestedPragma input
- go (parsedAcc ++ '\n':commentAcc) input docType False
- Just (_,_) -> go ('\n':commentAcc) input docType False
- Just (c,input) -> go (c:commentAcc) input docType False
-
-- See Note [Nested comment line pragmas]
parseNestedPragma :: AlexInput -> P (String,AlexInput)
parseNestedPragma input@(AI _ buf) = do
@@ -1429,7 +1440,8 @@ return control to parseNestedPragma by returning the ITcomment_line_prag token.
See #314 for more background on the bug this fixes.
-}
-withLexedDocType :: (AlexInput -> (String -> (HdkComment, Token)) -> Bool -> P (PsLocated Token))
+{-# INLINE withLexedDocType #-}
+withLexedDocType :: (AlexInput -> ((HsDocStringDecorator -> HsDocString) -> (HdkComment, Token)) -> Bool -> P (PsLocated Token))
-> P (PsLocated Token)
withLexedDocType lexDocComment = do
input@(AI _ buf) <- getInput
@@ -1439,7 +1451,9 @@ withLexedDocType lexDocComment = do
-- line of input might also belong to this doc comment.
'|' -> lexDocComment input (mkHdkCommentNext l) True
'^' -> lexDocComment input (mkHdkCommentPrev l) True
- '$' -> lexDocComment input (mkHdkCommentNamed l) True
+ '$' -> case lexDocName input of
+ Nothing -> do setInput input; lexToken -- eof reached, lex it normally
+ Just (name, input) -> lexDocComment input (mkHdkCommentNamed l name) True
'*' -> lexDocSection l 1 input
_ -> panic "withLexedDocType: Bad doc type"
where
@@ -1448,18 +1462,28 @@ withLexedDocType lexDocComment = do
Just (_, _) -> lexDocComment input (mkHdkCommentSection l n) False
Nothing -> do setInput input; lexToken -- eof reached, lex it normally
-mkHdkCommentNext, mkHdkCommentPrev :: PsSpan -> String -> (HdkComment, Token)
-mkHdkCommentNext loc str = (HdkCommentNext (mkHsDocString str), ITdocCommentNext str loc)
-mkHdkCommentPrev loc str = (HdkCommentPrev (mkHsDocString str), ITdocCommentPrev str loc)
+ lexDocName :: AlexInput -> Maybe (String, AlexInput)
+ lexDocName = go ""
+ where
+ go acc input = case alexGetChar' input of
+ Just (c, input')
+ | isSpace c -> Just (reverse acc, input)
+ | otherwise -> go (c:acc) input'
+ Nothing -> Nothing
+
+mkHdkCommentNext, mkHdkCommentPrev :: PsSpan -> (HsDocStringDecorator -> HsDocString) -> (HdkComment, Token)
+mkHdkCommentNext loc mkDS = (HdkCommentNext ds,ITdocComment ds loc)
+ where ds = mkDS HsDocStringNext
+mkHdkCommentPrev loc mkDS = (HdkCommentPrev ds,ITdocComment ds loc)
+ where ds = mkDS HsDocStringPrevious
-mkHdkCommentNamed :: PsSpan -> String -> (HdkComment, Token)
-mkHdkCommentNamed loc str =
- let (name, rest) = break isSpace str
- in (HdkCommentNamed name (mkHsDocString rest), ITdocCommentNamed str loc)
+mkHdkCommentNamed :: PsSpan -> String -> (HsDocStringDecorator -> HsDocString) -> (HdkComment, Token)
+mkHdkCommentNamed loc name mkDS = (HdkCommentNamed name ds, ITdocComment ds loc)
+ where ds = mkDS (HsDocStringNamed name)
-mkHdkCommentSection :: PsSpan -> Int -> String -> (HdkComment, Token)
-mkHdkCommentSection loc n str =
- (HdkCommentSection n (mkHsDocString str), ITdocSection n str loc)
+mkHdkCommentSection :: PsSpan -> Int -> (HsDocStringDecorator -> HsDocString) -> (HdkComment, Token)
+mkHdkCommentSection loc n mkDS = (HdkCommentSection n ds, ITdocComment ds loc)
+ where ds = mkDS (HsDocStringGroup n)
-- RULES pragmas turn on the forall and '.' keywords, and we turn them
-- off again at the end of the pragma.
@@ -1503,34 +1527,30 @@ endPrag span _buf _len = do
-- it writes the wrong token length to the parser state. This function is
-- called afterwards, so it can just update the state.
+{-# INLINE commentEnd #-}
commentEnd :: P (PsLocated Token)
-> AlexInput
- -> String
- -> (String -> (Maybe HdkComment, Token))
+ -> (Maybe HdkComment, Token)
-> StringBuffer
-> PsSpan
-> P (PsLocated Token)
-commentEnd cont input commentAcc finalizeComment buf span = do
+commentEnd cont input (m_hdk_comment, hdk_token) buf span = do
setInput input
let (AI loc nextBuf) = input
- comment = reverse commentAcc
span' = mkPsSpan (psSpanStart span) loc
last_len = byteDiff buf nextBuf
span `seq` setLastToken span' last_len
- let (m_hdk_comment, hdk_token) = finalizeComment comment
whenIsJust m_hdk_comment $ \hdk_comment ->
P $ \s -> POk (s {hdk_comments = hdk_comments s `snocOL` L span' hdk_comment}) ()
b <- getBit RawTokenStreamBit
if b then return (L span' hdk_token)
else cont
-docCommentEnd :: AlexInput -> String -> (String -> (HdkComment, Token)) -> StringBuffer ->
+{-# INLINE docCommentEnd #-}
+docCommentEnd :: AlexInput -> (HdkComment, Token) -> StringBuffer ->
PsSpan -> P (PsLocated Token)
-docCommentEnd input commentAcc docType buf span = do
- let finalizeComment str =
- let (hdk_comment, token) = docType str
- in (Just hdk_comment, token)
- commentEnd lexToken input commentAcc finalizeComment buf span
+docCommentEnd input (hdk_comment, tok) buf span
+ = commentEnd lexToken input (Just hdk_comment, tok) buf span
errBrace :: AlexInput -> RealSrcSpan -> P a
errBrace (AI end _) span =
@@ -2331,8 +2351,10 @@ data ParserOpts = ParserOpts
pWarningFlags :: ParserOpts -> EnumSet WarningFlag
pWarningFlags opts = diag_warning_flags (pDiagOpts opts)
--- | Haddock comment as produced by the lexer. These are accumulated in
--- 'PState' and then processed in "GHC.Parser.PostProcess.Haddock".
+-- | Haddock comment as produced by the lexer. These are accumulated in 'PState'
+-- and then processed in "GHC.Parser.PostProcess.Haddock". The location of the
+-- 'HsDocString's spans over the contents of the docstring - i.e. it does not
+-- include the decorator ("-- |", "{-|" etc.)
data HdkComment
= HdkCommentNext HsDocString
| HdkCommentPrev HsDocString
@@ -2596,6 +2618,7 @@ alexGetByte (AI loc s)
loc' = advancePsLoc loc c
byte = adjustChar c
+{-# INLINE alexGetChar' #-}
-- This version does not squash unicode characters, it is used when
-- lexing strings.
alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
@@ -3386,8 +3409,7 @@ reportLexError loc1 loc2 buf f
lexTokenStream :: ParserOpts -> StringBuffer -> RealSrcLoc -> ParseResult [Located Token]
lexTokenStream opts buf loc = unP go initState{ options = opts' }
where
- new_exts = xunset HaddockBit -- disable Haddock
- $ xunset UsePosPragsBit -- parse LINE/COLUMN pragmas as tokens
+ new_exts = xunset UsePosPragsBit -- parse LINE/COLUMN pragmas as tokens
$ xset RawTokenStreamBit -- include comments
$ pExtsBitmap opts
opts' = opts { pExtsBitmap = new_exts }
@@ -3407,7 +3429,7 @@ fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag),
("include", lex_string_prag ITinclude_prag)])
ignoredPrags = Map.fromList (map ignored pragmas)
- where ignored opt = (opt, nested_comment lexToken)
+ where ignored opt = (opt, nested_comment)
impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"]
options_pragmas = map ("options_" ++) impls
-- CFILES is a hugs-only thing.
@@ -3553,13 +3575,10 @@ allocateFinalComments ss comment_q mheader_comments =
Strict.Just _ -> (mheader_comments, [], comment_q' ++ newAnns)
commentToAnnotation :: RealLocated Token -> LEpaComment
-commentToAnnotation (L l (ITdocCommentNext s ll)) = mkLEpaComment l ll (EpaDocCommentNext s)
-commentToAnnotation (L l (ITdocCommentPrev s ll)) = mkLEpaComment l ll (EpaDocCommentPrev s)
-commentToAnnotation (L l (ITdocCommentNamed s ll)) = mkLEpaComment l ll (EpaDocCommentNamed s)
-commentToAnnotation (L l (ITdocSection n s ll)) = mkLEpaComment l ll (EpaDocSection n s)
-commentToAnnotation (L l (ITdocOptions s ll)) = mkLEpaComment l ll (EpaDocOptions s)
-commentToAnnotation (L l (ITlineComment s ll)) = mkLEpaComment l ll (EpaLineComment s)
-commentToAnnotation (L l (ITblockComment s ll)) = mkLEpaComment l ll (EpaBlockComment s)
+commentToAnnotation (L l (ITdocComment s ll)) = mkLEpaComment l ll (EpaDocComment s)
+commentToAnnotation (L l (ITdocOptions s ll)) = mkLEpaComment l ll (EpaDocOptions s)
+commentToAnnotation (L l (ITlineComment s ll)) = mkLEpaComment l ll (EpaLineComment s)
+commentToAnnotation (L l (ITblockComment s ll)) = mkLEpaComment l ll (EpaBlockComment s)
commentToAnnotation _ = panic "commentToAnnotation"
-- see Note [PsSpan in Comments]
@@ -3569,12 +3588,9 @@ mkLEpaComment l ll tok = L (realSpanAsAnchor l) (EpaComment tok (psRealSpan ll))
-- ---------------------------------------------------------------------
isComment :: Token -> Bool
-isComment (ITlineComment _ _) = True
-isComment (ITblockComment _ _) = True
-isComment (ITdocCommentNext _ _) = True
-isComment (ITdocCommentPrev _ _) = True
-isComment (ITdocCommentNamed _ _) = True
-isComment (ITdocSection _ _ _) = True
-isComment (ITdocOptions _ _) = True
-isComment _ = False
+isComment (ITlineComment _ _) = True
+isComment (ITblockComment _ _) = True
+isComment (ITdocComment _ _) = True
+isComment (ITdocOptions _ _) = True
+isComment _ = False
}