1 files changed, 117 insertions, 101 deletions
diff --git a/compiler/GHC/Parser/Lexer.x b/compiler/GHC/Parser/Lexer.x
index 7d7d157d2b..02717c7dae 100644
--- a/compiler/GHC/Parser/Lexer.x
+++ b/compiler/GHC/Parser/Lexer.x
@@ -76,6 +76,7 @@ module GHC.Parser.Lexer (
    commentToAnnotation,
    HdkComment(..),
    warnopt,
+   adjustChar,
    addPsMessage
   ) where
 
@@ -87,6 +88,8 @@ import Control.Monad
 import Control.Applicative
 import Data.Char
 import Data.List (stripPrefix, isInfixOf, partition)
+import Data.List.NonEmpty ( NonEmpty(..) )
+import qualified Data.List.NonEmpty as NE
 import Data.Maybe
 import Data.Word
 import Debug.Trace (trace)
@@ -134,34 +137,34 @@ import GHC.Parser.Errors.Ppr ()
 
 -- NB: The logic behind these definitions is also reflected in "GHC.Utils.Lexeme"
 -- Any changes here should likely be reflected there.
-$unispace    = \x05 -- Trick Alex into handling Unicode. See [Unicode in Alex].
+$unispace    = \x05 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
 $nl          = [\n\r\f]
 $whitechar   = [$nl\v\ $unispace]
 $white_no_nl = $whitechar # \n -- TODO #8424
 $tab         = \t
 
 $ascdigit  = 0-9
-$unidigit  = \x03 -- Trick Alex into handling Unicode. See [Unicode in Alex].
+$unidigit  = \x03 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
 $decdigit  = $ascdigit -- exactly $ascdigit, no more no less.
 $digit     = [$ascdigit $unidigit]
 
 $special   = [\(\)\,\;\[\]\`\{\}]
 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:]
-$unisymbol = \x04 -- Trick Alex into handling Unicode. See [Unicode in Alex].
+$unisymbol = \x04 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
 $symbol    = [$ascsymbol $unisymbol] # [$special \_\"\']
 
-$unilarge  = \x01 -- Trick Alex into handling Unicode. See [Unicode in Alex].
+$unilarge  = \x01 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
 $asclarge  = [A-Z]
 $large     = [$asclarge $unilarge]
 
-$unismall  = \x02 -- Trick Alex into handling Unicode. See [Unicode in Alex].
+$unismall  = \x02 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
 $ascsmall  = [a-z]
 $small     = [$ascsmall $unismall \_]
 
-$uniidchar = \x07 -- Trick Alex into handling Unicode. See [Unicode in Alex].
+$uniidchar = \x07 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
 $idchar    = [$small $large $digit $uniidchar \']
 
-$unigraphic = \x06 -- Trick Alex into handling Unicode. See [Unicode in Alex].
+$unigraphic = \x06 -- Trick Alex into handling Unicode. See Note [Unicode in Alex].
 $graphic   = [$small $large $symbol $digit $idchar $special $unigraphic \"\']
 
 $binit     = 0-1
@@ -230,7 +233,7 @@ $tab          { warnTab }
 -- are). We also rule out nested Haddock comments, if the -haddock flag is
 -- set.
 
-"{-" / { isNormalComment } { nested_comment lexToken }
+"{-" / { isNormalComment } { nested_comment }
 
 -- Single-line comments are a bit tricky.  Haskell 98 says that two or
 -- more dashes followed by a symbol should be parsed as a varsym, so we
@@ -364,12 +367,12 @@ $tab          { warnTab }
 <0> {
   -- In the "0" mode we ignore these pragmas
   "{-#"  $whitechar* $pragmachar+ / { known_pragma fileHeaderPrags }
-                     { nested_comment lexToken }
+                     { nested_comment }
 }
 
 <0,option_prags> {
   "{-#"  { warnThen PsWarnUnrecognisedPragma
-                    (nested_comment lexToken) }
+                    (nested_comment ) }
 }
 
 -- '0' state: ordinary lexemes
@@ -883,13 +886,11 @@ data Token
   | ITeof                        -- ^ end of file token
 
   -- Documentation annotations. See Note [PsSpan in Comments]
-  | ITdocCommentNext  String     PsSpan -- ^ something beginning @-- |@
-  | ITdocCommentPrev  String     PsSpan -- ^ something beginning @-- ^@
-  | ITdocCommentNamed String     PsSpan -- ^ something beginning @-- $@
-  | ITdocSection      Int String PsSpan -- ^ a section heading
-  | ITdocOptions      String     PsSpan -- ^ doc options (prune, ignore-exports, etc)
-  | ITlineComment     String     PsSpan -- ^ comment starting by "--"
-  | ITblockComment    String     PsSpan -- ^ comment in {- -}
+  | ITdocComment   HsDocString PsSpan -- ^ The HsDocString contains more details about what
+                                      -- this is and how to pretty print it
+  | ITdocOptions   String      PsSpan -- ^ doc options (prune, ignore-exports, etc)
+  | ITlineComment  String      PsSpan -- ^ comment starting by "--"
+  | ITblockComment String      PsSpan -- ^ comment in {- -}
 
   deriving Show
 
@@ -1280,16 +1281,23 @@ alexOrPred p1 p2 userState in1 len in2
   = p1 userState in1 len in2 || p2 userState in1 len in2
 
 multiline_doc_comment :: Action
-multiline_doc_comment span buf _len = withLexedDocType (worker "")
+multiline_doc_comment span buf _len = {-# SCC "multiline_doc_comment" #-} withLexedDocType worker
   where
-    worker commentAcc input docType checkNextLine = case alexGetChar' input of
-      Just ('\n', input')
-        | checkNextLine -> case checkIfCommentLine input' of
-          Just input -> worker ('\n':commentAcc) input docType checkNextLine
-          Nothing -> docCommentEnd input commentAcc docType buf span
-        | otherwise -> docCommentEnd input commentAcc docType buf span
-      Just (c, input) -> worker (c:commentAcc) input docType checkNextLine
-      Nothing -> docCommentEnd input commentAcc docType buf span
+    worker input@(AI start_loc _) docType checkNextLine = go start_loc "" [] input
+      where
+        go start_loc curLine prevLines input@(AI end_loc _) = case alexGetChar' input of
+            Just ('\n', input')
+              | checkNextLine -> case checkIfCommentLine input' of
+                Just input@(AI next_start _) ->  go next_start "" (locatedLine : prevLines) input -- Start a new line
+                Nothing -> endComment
+              | otherwise -> endComment
+            Just (c, input) -> go start_loc (c:curLine) prevLines input
+            Nothing -> endComment
+          where
+            lineSpan = mkSrcSpanPs $ mkPsSpan start_loc end_loc
+            locatedLine = L lineSpan (mkHsDocStringChunk $ reverse curLine)
+            commentLines = NE.reverse $ locatedLine :| prevLines
+            endComment = docCommentEnd input (docType (\dec -> MultiLineDocString dec commentLines)) buf span
 
     -- Check if the next line of input belongs to this doc comment as well.
     -- A doc comment continues onto the next line when the following
@@ -1331,15 +1339,43 @@ lineCommentToken span buf len = do
   nested comments require traversing by hand, they can't be parsed
   using regular expressions.
 -}
-nested_comment :: P (PsLocated Token) -> Action
-nested_comment cont span buf len = do
+nested_comment :: Action
+nested_comment span buf len = {-# SCC "nested_comment" #-} do
+  l <- getLastLocComment
+  let endComment input (L _ comment) = commentEnd lexToken input (Nothing, ITblockComment comment l) buf span
   input <- getInput
-  go (reverse $ lexemeToString buf len) (1::Int) input
+  -- Include decorator in comment
+  let start_decorator = reverse $ lexemeToString buf len
+  nested_comment_logic endComment start_decorator input span
+
+nested_doc_comment :: Action
+nested_doc_comment span buf _len = {-# SCC "nested_doc_comment" #-} withLexedDocType worker
+  where
+    worker input docType _checkNextLine = nested_comment_logic endComment "" input span
+      where
+        endComment input lcomment
+          = docCommentEnd input (docType (\d -> NestedDocString d (mkHsDocStringChunk . dropTrailingDec <$> lcomment))) buf span
+
+        dropTrailingDec [] = []
+        dropTrailingDec "-}" = ""
+        dropTrailingDec (x:xs) = x:dropTrailingDec xs
+
+{-# INLINE nested_comment_logic #-}
+-- | Includes the trailing '-}' decorators
+-- drop the last two elements with the callback if you don't want them to be included
+nested_comment_logic
+  :: (AlexInput -> Located String -> P (PsLocated Token))  -- ^ Continuation that gets the rest of the input and the lexed comment
+  -> String -- ^ starting value for accumulator (reversed) - When we want to include a decorator '{-' in the comment
+  -> AlexInput
+  -> PsSpan
+  -> P (PsLocated Token)
+nested_comment_logic endComment commentAcc input span = go commentAcc (1::Int) input
   where
-    go commentAcc 0 input = do
-      l <- getLastLocComment
-      let finalizeComment str = (Nothing, ITblockComment str l)
-      commentEnd cont input commentAcc finalizeComment buf span
+    go commentAcc 0 input@(AI end_loc _) = do
+      let comment = reverse commentAcc
+          cspan = mkSrcSpanPs $ mkPsSpan (psSpanStart span) end_loc
+          lcomment = L cspan comment
+      endComment input lcomment
     go commentAcc n input = case alexGetChar' input of
       Nothing -> errBrace input (psRealSpan span)
       Just ('-',input) -> case alexGetChar' input of
@@ -1358,31 +1394,6 @@ nested_comment cont span buf len = do
         Just (_,_)   -> go ('\n':commentAcc) n input
       Just (c,input) -> go (c:commentAcc) n input
 
-nested_doc_comment :: Action
-nested_doc_comment span buf _len = withLexedDocType (go "")
-  where
-    go commentAcc input docType _ = case alexGetChar' input of
-      Nothing -> errBrace input (psRealSpan span)
-      Just ('-',input) -> case alexGetChar' input of
-        Nothing -> errBrace input (psRealSpan span)
-        Just ('\125',input) ->
-          docCommentEnd input commentAcc docType buf span
-        Just (_,_) -> go ('-':commentAcc) input docType False
-      Just ('\123', input) -> case alexGetChar' input of
-        Nothing  -> errBrace input (psRealSpan span)
-        Just ('-',input) -> do
-          setInput input
-          let cont = do input <- getInput; go commentAcc input docType False
-          nested_comment cont span buf _len
-        Just (_,_) -> go ('\123':commentAcc) input docType False
-      -- See Note [Nested comment line pragmas]
-      Just ('\n',input) -> case alexGetChar' input of
-        Nothing  -> errBrace input (psRealSpan span)
-        Just ('#',_) -> do (parsedAcc,input) <- parseNestedPragma input
-                           go (parsedAcc ++ '\n':commentAcc) input docType False
-        Just (_,_)   -> go ('\n':commentAcc) input docType False
-      Just (c,input) -> go (c:commentAcc) input docType False
-
 -- See Note [Nested comment line pragmas]
 parseNestedPragma :: AlexInput -> P (String,AlexInput)
 parseNestedPragma input@(AI _ buf) = do
@@ -1429,7 +1440,8 @@ return control to parseNestedPragma by returning the ITcomment_line_prag token.
 See #314 for more background on the bug this fixes.
 -}
 
-withLexedDocType :: (AlexInput -> (String -> (HdkComment, Token)) -> Bool -> P (PsLocated Token))
+{-# INLINE withLexedDocType #-}
+withLexedDocType :: (AlexInput -> ((HsDocStringDecorator -> HsDocString) -> (HdkComment, Token)) -> Bool -> P (PsLocated Token))
                  -> P (PsLocated Token)
 withLexedDocType lexDocComment = do
   input@(AI _ buf) <- getInput
@@ -1439,7 +1451,9 @@ withLexedDocType lexDocComment = do
     -- line of input might also belong to this doc comment.
     '|' -> lexDocComment input (mkHdkCommentNext l) True
     '^' -> lexDocComment input (mkHdkCommentPrev l) True
-    '$' -> lexDocComment input (mkHdkCommentNamed l) True
+    '$' -> case lexDocName input of
+       Nothing -> do setInput input; lexToken -- eof reached, lex it normally
+       Just (name, input) -> lexDocComment input (mkHdkCommentNamed l name) True
     '*' -> lexDocSection l 1 input
     _ -> panic "withLexedDocType: Bad doc type"
  where
@@ -1448,18 +1462,28 @@ withLexedDocType lexDocComment = do
       Just (_,   _)     -> lexDocComment input (mkHdkCommentSection l n) False
       Nothing -> do setInput input; lexToken -- eof reached, lex it normally
 
-mkHdkCommentNext, mkHdkCommentPrev :: PsSpan -> String -> (HdkComment, Token)
-mkHdkCommentNext loc str = (HdkCommentNext (mkHsDocString str), ITdocCommentNext str loc)
-mkHdkCommentPrev loc str = (HdkCommentPrev (mkHsDocString str), ITdocCommentPrev str loc)
+    lexDocName :: AlexInput -> Maybe (String, AlexInput)
+    lexDocName = go ""
+      where
+        go acc input = case alexGetChar' input of
+          Just (c, input')
+            | isSpace c -> Just (reverse acc, input)
+            | otherwise -> go (c:acc) input'
+          Nothing -> Nothing
+
+mkHdkCommentNext, mkHdkCommentPrev  :: PsSpan -> (HsDocStringDecorator -> HsDocString) -> (HdkComment, Token)
+mkHdkCommentNext loc mkDS =  (HdkCommentNext ds,ITdocComment ds loc)
+  where ds = mkDS HsDocStringNext
+mkHdkCommentPrev loc mkDS =  (HdkCommentPrev ds,ITdocComment ds loc)
+  where ds = mkDS HsDocStringPrevious
 
-mkHdkCommentNamed :: PsSpan -> String -> (HdkComment, Token)
-mkHdkCommentNamed loc str =
-  let (name, rest) = break isSpace str
-  in (HdkCommentNamed name (mkHsDocString rest), ITdocCommentNamed str loc)
+mkHdkCommentNamed :: PsSpan -> String -> (HsDocStringDecorator -> HsDocString) -> (HdkComment, Token)
+mkHdkCommentNamed loc name mkDS = (HdkCommentNamed name ds, ITdocComment ds loc)
+  where ds = mkDS (HsDocStringNamed name)
 
-mkHdkCommentSection :: PsSpan -> Int -> String -> (HdkComment, Token)
-mkHdkCommentSection loc n str =
-  (HdkCommentSection n (mkHsDocString str), ITdocSection n str loc)
+mkHdkCommentSection :: PsSpan -> Int -> (HsDocStringDecorator -> HsDocString) -> (HdkComment, Token)
+mkHdkCommentSection loc n mkDS = (HdkCommentSection n ds, ITdocComment ds loc)
+  where ds = mkDS (HsDocStringGroup n)
 
 -- RULES pragmas turn on the forall and '.' keywords, and we turn them
 -- off again at the end of the pragma.
@@ -1503,34 +1527,30 @@ endPrag span _buf _len = do
 -- it writes the wrong token length to the parser state. This function is
 -- called afterwards, so it can just update the state.
 
+{-# INLINE commentEnd #-}
 commentEnd :: P (PsLocated Token)
            -> AlexInput
-           -> String
-           -> (String -> (Maybe HdkComment, Token))
+           -> (Maybe HdkComment, Token)
            -> StringBuffer
            -> PsSpan
            -> P (PsLocated Token)
-commentEnd cont input commentAcc finalizeComment buf span = do
+commentEnd cont input (m_hdk_comment, hdk_token) buf span = do
   setInput input
   let (AI loc nextBuf) = input
-      comment = reverse commentAcc
       span' = mkPsSpan (psSpanStart span) loc
       last_len = byteDiff buf nextBuf
   span `seq` setLastToken span' last_len
-  let (m_hdk_comment, hdk_token) = finalizeComment comment
   whenIsJust m_hdk_comment $ \hdk_comment ->
     P $ \s -> POk (s {hdk_comments = hdk_comments s `snocOL` L span' hdk_comment}) ()
   b <- getBit RawTokenStreamBit
   if b then return (L span' hdk_token)
        else cont
 
-docCommentEnd :: AlexInput -> String -> (String -> (HdkComment, Token)) -> StringBuffer ->
+{-# INLINE docCommentEnd #-}
+docCommentEnd :: AlexInput -> (HdkComment, Token) -> StringBuffer ->
                  PsSpan -> P (PsLocated Token)
-docCommentEnd input commentAcc docType buf span = do
-  let finalizeComment str =
-        let (hdk_comment, token) = docType str
-        in (Just hdk_comment, token)
-  commentEnd lexToken input commentAcc finalizeComment buf span
+docCommentEnd input (hdk_comment, tok) buf span
+  = commentEnd lexToken input (Just hdk_comment, tok) buf span
 
 errBrace :: AlexInput -> RealSrcSpan -> P a
 errBrace (AI end _) span =
@@ -2331,8 +2351,10 @@ data ParserOpts = ParserOpts
 pWarningFlags :: ParserOpts -> EnumSet WarningFlag
 pWarningFlags opts = diag_warning_flags (pDiagOpts opts)
 
--- | Haddock comment as produced by the lexer. These are accumulated in
--- 'PState' and then processed in "GHC.Parser.PostProcess.Haddock".
+-- | Haddock comment as produced by the lexer. These are accumulated in 'PState'
+-- and then processed in "GHC.Parser.PostProcess.Haddock". The location of the
+-- 'HsDocString's spans over the contents of the docstring - i.e. it does not
+-- include the decorator ("-- |", "{-|" etc.)
 data HdkComment
   = HdkCommentNext HsDocString
   | HdkCommentPrev HsDocString
@@ -2596,6 +2618,7 @@ alexGetByte (AI loc s)
         loc'   = advancePsLoc loc c
         byte   = adjustChar c
 
+{-# INLINE alexGetChar' #-}
 -- This version does not squash unicode characters, it is used when
 -- lexing strings.
 alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
@@ -3386,8 +3409,7 @@ reportLexError loc1 loc2 buf f
 lexTokenStream :: ParserOpts -> StringBuffer -> RealSrcLoc -> ParseResult [Located Token]
 lexTokenStream opts buf loc = unP go initState{ options = opts' }
     where
-    new_exts  = xunset HaddockBit        -- disable Haddock
-                $ xunset UsePosPragsBit  -- parse LINE/COLUMN pragmas as tokens
+    new_exts  =   xunset UsePosPragsBit  -- parse LINE/COLUMN pragmas as tokens
                 $ xset RawTokenStreamBit -- include comments
                 $ pExtsBitmap opts
     opts'     = opts { pExtsBitmap = new_exts }
@@ -3407,7 +3429,7 @@ fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag),
                                  ("include", lex_string_prag ITinclude_prag)])
 
 ignoredPrags = Map.fromList (map ignored pragmas)
-               where ignored opt = (opt, nested_comment lexToken)
+               where ignored opt = (opt, nested_comment)
                      impls = ["hugs", "nhc98", "jhc", "yhc", "catch", "derive"]
                      options_pragmas = map ("options_" ++) impls
                      -- CFILES is a hugs-only thing.
@@ -3553,13 +3575,10 @@ allocateFinalComments ss comment_q mheader_comments =
       Strict.Just _ -> (mheader_comments, [], comment_q' ++ newAnns)
 
 commentToAnnotation :: RealLocated Token -> LEpaComment
-commentToAnnotation (L l (ITdocCommentNext s ll))  = mkLEpaComment l ll (EpaDocCommentNext s)
-commentToAnnotation (L l (ITdocCommentPrev s ll))  = mkLEpaComment l ll (EpaDocCommentPrev s)
-commentToAnnotation (L l (ITdocCommentNamed s ll)) = mkLEpaComment l ll (EpaDocCommentNamed s)
-commentToAnnotation (L l (ITdocSection n s ll))    = mkLEpaComment l ll (EpaDocSection n s)
-commentToAnnotation (L l (ITdocOptions s ll))      = mkLEpaComment l ll (EpaDocOptions s)
-commentToAnnotation (L l (ITlineComment s ll))     = mkLEpaComment l ll (EpaLineComment s)
-commentToAnnotation (L l (ITblockComment s ll))    = mkLEpaComment l ll (EpaBlockComment s)
+commentToAnnotation (L l (ITdocComment s ll))   = mkLEpaComment l ll (EpaDocComment s)
+commentToAnnotation (L l (ITdocOptions s ll))   = mkLEpaComment l ll (EpaDocOptions s)
+commentToAnnotation (L l (ITlineComment s ll))  = mkLEpaComment l ll (EpaLineComment s)
+commentToAnnotation (L l (ITblockComment s ll)) = mkLEpaComment l ll (EpaBlockComment s)
 commentToAnnotation _                           = panic "commentToAnnotation"
 
 -- see Note [PsSpan in Comments]
@@ -3569,12 +3588,9 @@ mkLEpaComment l ll tok = L (realSpanAsAnchor l) (EpaComment tok (psRealSpan ll))
 -- ---------------------------------------------------------------------
 
 isComment :: Token -> Bool
-isComment (ITlineComment     _ _)   = True
-isComment (ITblockComment    _ _)   = True
-isComment (ITdocCommentNext  _ _)   = True
-isComment (ITdocCommentPrev  _ _)   = True
-isComment (ITdocCommentNamed _ _)   = True
-isComment (ITdocSection      _ _ _) = True
-isComment (ITdocOptions      _ _)   = True
-isComment _ = False
+isComment (ITlineComment  _ _) = True
+isComment (ITblockComment _ _) = True
+isComment (ITdocComment   _ _) = True
+isComment (ITdocOptions   _ _) = True
+isComment _                    = False
 }