1 files changed, 358 insertions, 187 deletions
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index 936948b40f..bceb48bf48 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -68,6 +68,7 @@ module Lexer (
    explicitNamespacesEnabled,
    patternSynonymsEnabled,
    sccProfilingOn, hpcEnabled,
+   starIsTypeEnabled,
    addWarning,
    lexTokenStream,
    addAnnotation,AddAnn,addAnnsAt,mkParensApiAnn,
@@ -75,11 +76,11 @@ module Lexer (
    moveAnnotations
   ) where
 
+import GhcPrelude
+
 -- base
 import Control.Monad
-#if __GLASGOW_HASKELL__ > 710
-import Control.Monad.Fail
-#endif
+import Control.Monad.Fail as MonadFail
 import Data.Bits
 import Data.Char
 import Data.List
@@ -105,7 +106,7 @@ import Outputable
 import StringBuffer
 import FastString
 import UniqFM
-import Util             ( readRational )
+import Util             ( readRational, readHexRational )
 
 -- compiler/main
 import ErrUtils
@@ -129,38 +130,38 @@ import ApiAnnotation
 
 -- NB: The logic behind these definitions is also reflected in basicTypes/Lexeme.hs
 -- Any changes here should likely be reflected there.
-$unispace    = \x05 -- Trick Alex into handling Unicode. See alexGetByte.
+$unispace    = \x05 -- Trick Alex into handling Unicode. See [Unicode in Alex].
 $nl          = [\n\r\f]
 $whitechar   = [$nl\v\ $unispace]
 $white_no_nl = $whitechar # \n -- TODO #8424
 $tab         = \t
 
 $ascdigit  = 0-9
-$unidigit  = \x03 -- Trick Alex into handling Unicode. See alexGetByte.
+$unidigit  = \x03 -- Trick Alex into handling Unicode. See [Unicode in Alex].
 $decdigit  = $ascdigit -- for now, should really be $digit (ToDo)
 $digit     = [$ascdigit $unidigit]
 
 $special   = [\(\)\,\;\[\]\`\{\}]
 $ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~\:]
-$unisymbol = \x04 -- Trick Alex into handling Unicode. See alexGetByte.
+$unisymbol = \x04 -- Trick Alex into handling Unicode. See [Unicode in Alex].
 $symbol    = [$ascsymbol $unisymbol] # [$special \_\"\']
 
-$unilarge  = \x01 -- Trick Alex into handling Unicode. See alexGetByte.
+$unilarge  = \x01 -- Trick Alex into handling Unicode. See [Unicode in Alex].
 $asclarge  = [A-Z]
 $large     = [$asclarge $unilarge]
 
-$unismall  = \x02 -- Trick Alex into handling Unicode. See alexGetByte.
+$unismall  = \x02 -- Trick Alex into handling Unicode. See [Unicode in Alex].
 $ascsmall  = [a-z]
 $small     = [$ascsmall $unismall \_]
 
-$unigraphic = \x06 -- Trick Alex into handling Unicode. See alexGetByte.
+$unigraphic = \x06 -- Trick Alex into handling Unicode. See [Unicode in Alex].
 $graphic   = [$small $large $symbol $digit $special $unigraphic \"\']
 
 $binit     = 0-1
 $octit     = 0-7
 $hexit     = [$decdigit A-F a-f]
 
-$uniidchar = \x07 -- Trick Alex into handling Unicode. See alexGetByte.
+$uniidchar = \x07 -- Trick Alex into handling Unicode. See [Unicode in Alex].
 $idchar    = [$small $large $digit $uniidchar \']
 
 $pragmachar = [$small $large $digit]
@@ -177,11 +178,14 @@ $docsym    = [\| \^ \* \$]
 @varsym    = ($symbol # \:) $symbol*  -- variable (operator) symbol
 @consym    = \: $symbol*              -- constructor (operator) symbol
 
-@decimal     = $decdigit+
-@binary      = $binit+
-@octal       = $octit+
-@hexadecimal = $hexit+
-@exponent    = [eE] [\-\+]? @decimal
+-- See Note [Lexing NumericUnderscores extension] and #14473
+@numspc       = _*                   -- numeric spacer (#14473)
+@decimal      = $decdigit(@numspc $decdigit)*
+@binary       = $binit(@numspc $binit)*
+@octal        = $octit(@numspc $octit)*
+@hexadecimal  = $hexit(@numspc $hexit)*
+@exponent     = @numspc [eE] [\-\+]? @decimal
+@bin_exponent = @numspc [pP] [\-\+]? @decimal
 
 @qual = (@conid \.)+
 @qvarid = @qual @varid
@@ -189,7 +193,8 @@ $docsym    = [\| \^ \* \$]
 @qvarsym = @qual @varsym
 @qconsym = @qual @consym
 
-@floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
+@floating_point = @numspc @decimal \. @decimal @exponent? | @numspc @decimal @exponent
+@hex_floating_point = @numspc @hexadecimal \. @hexadecimal @bin_exponent? | @numspc @hexadecimal @bin_exponent
 
 -- normal signed numerical literals can only be explicitly negative,
 -- not explicitly positive (contrast @exponent)
@@ -307,15 +312,18 @@ $tab          { warnTab }
 
 -- single-line line pragmas, of the form
 --    # <line> "<file>" <extra-stuff> \n
-<line_prag1> @decimal                   { setLine line_prag1a }
-<line_prag1a> \" [$graphic \ ]* \"      { setFile line_prag1b }
-<line_prag1b> .*                        { pop }
+<line_prag1> {
+  @decimal $white_no_nl+ \" [$graphic \ ]* \"  { setLineAndFile line_prag1a }
+  ()                                           { failLinePrag1 }
+}
+<line_prag1a> .*                               { popLinePrag1 }
 
 -- Haskell-style line pragmas, of the form
 --    {-# LINE <line> "<file>" #-}
-<line_prag2> @decimal                   { setLine line_prag2a }
-<line_prag2a> \" [$graphic \ ]* \"      { setFile line_prag2b }
-<line_prag2b> "#-}"|"-}"                { pop }
+<line_prag2> {
+  @decimal $white_no_nl+ \" [$graphic \ ]* \"  { setLineAndFile line_prag2a }
+}
+<line_prag2a> "#-}"|"-}"                       { pop }
    -- NOTE: accept -} at the end of a LINE pragma, for compatibility
    -- with older versions of GHC which generated these.
 
@@ -367,11 +375,6 @@ $tab          { warnTab }
 -- "special" symbols
 
 <0> {
-  "[:" / { ifExtension parrEnabled }    { token ITopabrack }
-  ":]" / { ifExtension parrEnabled }    { token ITcpabrack }
-}
-
-<0> {
   "[|"        / { ifExtension thQuotesEnabled } { token (ITopenExpQuote NoE
                                                                 NormalSyntax) }
   "[||"       / { ifExtension thQuotesEnabled } { token (ITopenTExpQuote NoE) }
@@ -483,21 +486,34 @@ $tab          { warnTab }
 
 -- For the normal boxed literals we need to be careful
 -- when trying to be close to Haskell98
+
+-- Note [Lexing NumericUnderscores extension] (#14473)
+--
+-- NumericUnderscores extension allows underscores in numeric literals.
+-- Multiple underscores are represented with @numspc macro.
+-- To be simpler, we have only the definitions with underscores.
+-- And then we have a separate function (tok_integral and tok_frac)
+-- that validates the literals.
+-- If extensions are not enabled, check that there are no underscores.
+--
 <0> {
   -- Normal integral literals (:: Num a => a, from Integer)
   @decimal                                                               { tok_num positive 0 0 decimal }
-  0[bB] @binary                / { ifExtension binaryLiteralsEnabled }   { tok_num positive 2 2 binary }
-  0[oO] @octal                                                           { tok_num positive 2 2 octal }
-  0[xX] @hexadecimal                                                     { tok_num positive 2 2 hexadecimal }
+  0[bB] @numspc @binary        / { ifExtension binaryLiteralsEnabled }   { tok_num positive 2 2 binary }
+  0[oO] @numspc @octal                                                   { tok_num positive 2 2 octal }
+  0[xX] @numspc @hexadecimal                                             { tok_num positive 2 2 hexadecimal }
   @negative @decimal           / { ifExtension negativeLiteralsEnabled } { tok_num negative 1 1 decimal }
-  @negative 0[bB] @binary      / { ifExtension negativeLiteralsEnabled `alexAndPred`
-                                   ifExtension binaryLiteralsEnabled }   { tok_num negative 3 3 binary }
-  @negative 0[oO] @octal       / { ifExtension negativeLiteralsEnabled } { tok_num negative 3 3 octal }
-  @negative 0[xX] @hexadecimal / { ifExtension negativeLiteralsEnabled } { tok_num negative 3 3 hexadecimal }
+  @negative 0[bB] @numspc @binary  / { ifExtension negativeLiteralsEnabled `alexAndPred`
+                                       ifExtension binaryLiteralsEnabled }   { tok_num negative 3 3 binary }
+  @negative 0[oO] @numspc @octal   / { ifExtension negativeLiteralsEnabled } { tok_num negative 3 3 octal }
+  @negative 0[xX] @numspc @hexadecimal / { ifExtension negativeLiteralsEnabled } { tok_num negative 3 3 hexadecimal }
 
   -- Normal rational literals (:: Fractional a => a, from Rational)
-  @floating_point                                                        { strtoken tok_float }
-  @negative @floating_point    / { ifExtension negativeLiteralsEnabled } { strtoken tok_float }
+  @floating_point                                                        { tok_frac 0 tok_float }
+  @negative @floating_point    / { ifExtension negativeLiteralsEnabled } { tok_frac 0 tok_float }
+  0[xX] @numspc @hex_floating_point     / { ifExtension hexFloatLiteralsEnabled } { tok_frac 0 tok_hex_float }
+  @negative 0[xX] @numspc @hex_floating_point / { ifExtension hexFloatLiteralsEnabled `alexAndPred`
+                                                  ifExtension negativeLiteralsEnabled } { tok_frac 0 tok_hex_float }
 }
 
 <0> {
@@ -505,26 +521,26 @@ $tab          { warnTab }
   -- It's simpler (and faster?) to give separate cases to the negatives,
   -- especially considering octal/hexadecimal prefixes.
   @decimal                     \# / { ifExtension magicHashEnabled } { tok_primint positive 0 1 decimal }
-  0[bB] @binary                \# / { ifExtension magicHashEnabled `alexAndPred`
+  0[bB] @numspc @binary        \# / { ifExtension magicHashEnabled `alexAndPred`
                                       ifExtension binaryLiteralsEnabled } { tok_primint positive 2 3 binary }
-  0[oO] @octal                 \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
-  0[xX] @hexadecimal           \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
+  0[oO] @numspc @octal         \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 octal }
+  0[xX] @numspc @hexadecimal   \# / { ifExtension magicHashEnabled } { tok_primint positive 2 3 hexadecimal }
   @negative @decimal           \# / { ifExtension magicHashEnabled } { tok_primint negative 1 2 decimal }
-  @negative 0[bB] @binary      \# / { ifExtension magicHashEnabled `alexAndPred`
-                                      ifExtension binaryLiteralsEnabled } { tok_primint negative 3 4 binary }
-  @negative 0[oO] @octal       \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
-  @negative 0[xX] @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
+  @negative 0[bB] @numspc @binary  \# / { ifExtension magicHashEnabled `alexAndPred`
+                                          ifExtension binaryLiteralsEnabled } { tok_primint negative 3 4 binary }
+  @negative 0[oO] @numspc @octal   \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 octal }
+  @negative 0[xX] @numspc @hexadecimal \# / { ifExtension magicHashEnabled } { tok_primint negative 3 4 hexadecimal }
 
   @decimal                     \# \# / { ifExtension magicHashEnabled } { tok_primword 0 2 decimal }
-  0[bB] @binary                \# \# / { ifExtension magicHashEnabled `alexAndPred`
+  0[bB] @numspc @binary        \# \# / { ifExtension magicHashEnabled `alexAndPred`
                                          ifExtension binaryLiteralsEnabled } { tok_primword 2 4 binary }
-  0[oO] @octal                 \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
-  0[xX] @hexadecimal           \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
+  0[oO] @numspc @octal         \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 octal }
+  0[xX] @numspc @hexadecimal   \# \# / { ifExtension magicHashEnabled } { tok_primword 2 4 hexadecimal }
 
   -- Unboxed floats and doubles (:: Float#, :: Double#)
   -- prim_{float,double} work with signed literals
-  @signed @floating_point \# / { ifExtension magicHashEnabled } { init_strtoken 1 tok_primfloat }
-  @signed @floating_point \# \# / { ifExtension magicHashEnabled } { init_strtoken 2 tok_primdouble }
+  @signed @floating_point \# / { ifExtension magicHashEnabled } { tok_frac 1 tok_primfloat }
+  @signed @floating_point \# \# / { ifExtension magicHashEnabled } { tok_frac 2 tok_primdouble }
 }
 
 -- Strings and chars are lexed by hand-written code.  The reason is
@@ -620,6 +636,7 @@ data Token
   | ITstatic
   | ITstock
   | ITanyclass
+  | ITvia
 
   -- Backpack tokens
   | ITunit
@@ -635,7 +652,8 @@ data Token
   | ITrules_prag        SourceText
   | ITwarning_prag      SourceText
   | ITdeprecated_prag   SourceText
-  | ITline_prag
+  | ITline_prag         SourceText  -- not usually produced, see 'use_pos_prags'
+  | ITcolumn_prag       SourceText  -- not usually produced, see 'use_pos_prags'
   | ITscc_prag          SourceText
   | ITgenerated_prag    SourceText
   | ITcore_prag         SourceText         -- hdaume: core annotations
@@ -647,15 +665,13 @@ data Token
   | IToptions_prag String
   | ITinclude_prag String
   | ITlanguage_prag
-  | ITvect_prag         SourceText
-  | ITvect_scalar_prag  SourceText
-  | ITnovect_prag       SourceText
   | ITminimal_prag      SourceText
   | IToverlappable_prag SourceText  -- instance overlap mode
   | IToverlapping_prag  SourceText  -- instance overlap mode
   | IToverlaps_prag     SourceText  -- instance overlap mode
   | ITincoherent_prag   SourceText  -- instance overlap mode
   | ITctype             SourceText
+  | ITcomment_line_prag         -- See Note [Nested comment line pragmas]
 
   | ITdotdot                    -- reserved symbols
   | ITcolon
@@ -668,10 +684,10 @@ data Token
   | ITrarrow            IsUnicodeSyntax
   | ITat
   | ITtilde
-  | ITtildehsh
   | ITdarrow            IsUnicodeSyntax
   | ITminus
   | ITbang
+  | ITstar              IsUnicodeSyntax
   | ITdot
 
   | ITbiglam                    -- GHC-extension symbols
@@ -820,6 +836,7 @@ reservedWordsFM = listToUFM $
          ( "static",         ITstatic,        0 ),
          ( "stock",          ITstock,         0 ),
          ( "anyclass",       ITanyclass,      0 ),
+         ( "via",            ITvia,           0 ),
          ( "group",          ITgroup,         xbit TransformComprehensionsBit),
          ( "by",             ITby,            xbit TransformComprehensionsBit),
          ( "using",          ITusing,         xbit TransformComprehensionsBit),
@@ -878,11 +895,12 @@ reservedSymsFM = listToUFM $
        ,("->",  ITrarrow NormalSyntax, always)
        ,("@",   ITat,                  always)
        ,("~",   ITtilde,               always)
-       ,("~#",  ITtildehsh,            magicHashEnabled)
        ,("=>",  ITdarrow NormalSyntax, always)
        ,("-",   ITminus,               always)
        ,("!",   ITbang,                always)
 
+       ,("*", ITstar NormalSyntax, starIsTypeEnabled)
+
         -- For 'forall a . t'
        ,(".", ITdot,  always) -- \i -> explicitForallEnabled i || inRulePrag i)
 
@@ -905,6 +923,8 @@ reservedSymsFM = listToUFM $
                                 \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
        ,("⤜",   ITRarrowtail UnicodeSyntax,
                                 \i -> unicodeSyntaxEnabled i && arrowsEnabled i)
+       ,("★",   ITstar UnicodeSyntax,
+                  \i -> unicodeSyntaxEnabled i && starIsTypeEnabled i)
 
         -- ToDo: ideally, → and ∷ should be "specials", so that they cannot
         -- form part of a large operator.  This would let us have a better
@@ -938,17 +958,26 @@ strtoken :: (String -> Token) -> Action
 strtoken f span buf len =
   return (L span $! (f $! lexemeToString buf len))
 
-init_strtoken :: Int -> (String -> Token) -> Action
--- like strtoken, but drops the last N character(s)
-init_strtoken drop f span buf len =
-  return (L span $! (f $! lexemeToString buf (len-drop)))
-
 begin :: Int -> Action
 begin code _span _str _len = do pushLexState code; lexToken
 
 pop :: Action
 pop _span _buf _len = do _ <- popLexState
                          lexToken
+-- See Note [Nested comment line pragmas]
+failLinePrag1 :: Action
+failLinePrag1 span _buf _len = do
+  b <- extension inNestedComment
+  if b then return (L span ITcomment_line_prag)
+       else lexError "lexical error in pragma"
+
+-- See Note [Nested comment line pragmas]
+popLinePrag1 :: Action
+popLinePrag1 span _buf _len = do
+  b <- extension inNestedComment
+  if b then return (L span ITcomment_line_prag) else do
+    _ <- popLexState
+    lexToken
 
 hopefully_open_brace :: Action
 hopefully_open_brace span buf len
@@ -1088,6 +1117,12 @@ nested_comment cont span buf len = do
         Nothing  -> errBrace input span
         Just ('-',input) -> go ('-':'\123':commentAcc) (n+1) input
         Just (_,_)       -> go ('\123':commentAcc) n input
+      -- See Note [Nested comment line pragmas]
+      Just ('\n',input) -> case alexGetChar' input of
+        Nothing  -> errBrace input span
+        Just ('#',_) -> do (parsedAcc,input) <- parseNestedPragma input
+                           go (parsedAcc ++ '\n':commentAcc) n input
+        Just (_,_)   -> go ('\n':commentAcc) n input
       Just (c,input) -> go (c:commentAcc) n input
 
 nested_doc_comment :: Action
@@ -1107,8 +1142,60 @@ nested_doc_comment span buf _len = withLexedDocType (go "")
           let cont = do input <- getInput; go commentAcc input docType False
           nested_comment cont span buf _len
         Just (_,_) -> go ('\123':commentAcc) input docType False
+      -- See Note [Nested comment line pragmas]
+      Just ('\n',input) -> case alexGetChar' input of
+        Nothing  -> errBrace input span
+        Just ('#',_) -> do (parsedAcc,input) <- parseNestedPragma input
+                           go (parsedAcc ++ '\n':commentAcc) input docType False
+        Just (_,_)   -> go ('\n':commentAcc) input docType False
       Just (c,input) -> go (c:commentAcc) input docType False
 
+-- See Note [Nested comment line pragmas]
+parseNestedPragma :: AlexInput -> P (String,AlexInput)
+parseNestedPragma input@(AI _ buf) = do
+  origInput <- getInput
+  setInput input
+  setExts (.|. xbit InNestedCommentBit)
+  pushLexState bol
+  lt <- lexToken
+  _ <- popLexState
+  setExts (.&. complement (xbit InNestedCommentBit))
+  postInput@(AI _ postBuf) <- getInput
+  setInput origInput
+  case unLoc lt of
+    ITcomment_line_prag -> do
+      let bytes = byteDiff buf postBuf
+          diff  = lexemeToString buf bytes
+      return (reverse diff, postInput)
+    lt' -> panic ("parseNestedPragma: unexpected token" ++ (show lt'))
+
+{-
+Note [Nested comment line pragmas]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+We used to ignore cpp-preprocessor-generated #line pragmas if they were inside
+nested comments.
+
+Now, when parsing a nested comment, if we encounter a line starting with '#' we
+call parseNestedPragma, which executes the following:
+1. Save the current lexer input (loc, buf) for later
+2. Set the current lexer input to the beginning of the line starting with '#'
+3. Turn the 'InNestedComment' extension on
+4. Push the 'bol' lexer state
+5. Lex a token. Due to (2), (3), and (4), this should always lex a single line
+   or less and return the ITcomment_line_prag token. This may set source line
+   and file location if a #line pragma is successfully parsed
+6. Restore lexer input and state to what they were before we did all this
+7. Return control to the function parsing a nested comment, informing it of
+   what the lexer parsed
+
+Regarding (5) above:
+Every exit from the 'bol' lexer state (do_bol, popLinePrag1, failLinePrag1)
+checks if the 'InNestedComment' extension is set. If it is, that function will
+return control to parseNestedPragma by returning the ITcomment_line_prag token.
+
+See #314 for more background on the bug this fixes.
+-}
+
 withLexedDocType :: (AlexInput -> (String -> Token) -> Bool -> P (RealLocated Token))
                  -> P (RealLocated Token)
 withLexedDocType lexDocComment = do
@@ -1135,6 +1222,27 @@ rulePrag span buf len = do
   let !src = lexemeToString buf len
   return (L span (ITrules_prag (SourceText src)))
 
+-- When 'use_pos_prags' is not set, it is expected that we emit a token instead
+-- of updating the position in 'PState'
+linePrag :: Action
+linePrag span buf len = do
+  ps <- getPState
+  if use_pos_prags ps
+    then begin line_prag2 span buf len
+    else let !src = lexemeToString buf len
+         in return (L span (ITline_prag (SourceText src)))
+
+-- When 'use_pos_prags' is not set, it is expected that we emit a token instead
+-- of updating the position in 'PState'
+columnPrag :: Action
+columnPrag span buf len = do
+  ps <- getPState
+  let !src = lexemeToString buf len
+  if use_pos_prags ps
+    then begin column_prag span buf len
+    else let !src = lexemeToString buf len
+         in return (L span (ITcolumn_prag (SourceText src)))
+
 endPrag :: Action
 endPrag span _buf _len = do
   setExts (.&. complement (xbit InRulePragBit))
@@ -1210,15 +1318,14 @@ varid :: Action
 varid span buf len =
   case lookupUFM reservedWordsFM fs of
     Just (ITcase, _) -> do
-      lambdaCase <- extension lambdaCaseEnabled
-      keyword <- if lambdaCase
-                 then do
-                   lastTk <- getLastTk
-                   return $ case lastTk of
-                     Just ITlam -> ITlcase
-                     _          -> ITcase
-                 else
-                   return ITcase
+      lastTk <- getLastTk
+      keyword <- case lastTk of
+        Just ITlam -> do
+          lambdaCase <- extension lambdaCaseEnabled
+          if lambdaCase
+            then return ITlcase
+            else failMsgP "Illegal lambda-case (use -XLambdaCase)"
+        _ -> return ITcase
       maybe_layout keyword
       return $ L span keyword
     Just (ITstatic, _) -> do
@@ -1272,8 +1379,12 @@ tok_integral :: (SourceText -> Integer -> Token)
              -> Int -> Int
              -> (Integer, (Char -> Int))
              -> Action
-tok_integral itint transint transbuf translen (radix,char_to_int) span buf len
- = return $ L span $ itint (SourceText $ lexemeToString buf len)
+tok_integral itint transint transbuf translen (radix,char_to_int) span buf len = do
+  numericUnderscores <- extension numericUnderscoresEnabled  -- #14473
+  let src = lexemeToString buf len
+  if (not numericUnderscores) && ('_' `elem` src)
+    then failMsgP "Use NumericUnderscores to allow underscores in integer literals"
+    else return $ L span $ itint (SourceText src)
        $! transint $ parseUnsignedInteger
        (offsetBytes transbuf buf) (subtract translen len) radix char_to_int
 
@@ -1305,15 +1416,32 @@ octal = (8,octDecDigit)
 hexadecimal = (16,hexDigit)
 
 -- readRational can understand negative rationals, exponents, everything.
+tok_frac :: Int -> (String -> Token) -> Action
+tok_frac drop f span buf len = do
+  numericUnderscores <- extension numericUnderscoresEnabled  -- #14473
+  let src = lexemeToString buf (len-drop)
+  if (not numericUnderscores) && ('_' `elem` src)
+    then failMsgP "Use NumericUnderscores to allow underscores in floating literals"
+    else return (L span $! (f $! src))
+
 tok_float, tok_primfloat, tok_primdouble :: String -> Token
-tok_float      str  = ITrational   $! readFractionalLit str
-tok_primfloat  str  = ITprimfloat  $! readFractionalLit str
-tok_primdouble str  = ITprimdouble $! readFractionalLit str
+tok_float        str = ITrational   $! readFractionalLit str
+tok_hex_float    str = ITrational   $! readHexFractionalLit str
+tok_primfloat    str = ITprimfloat  $! readFractionalLit str
+tok_primdouble   str = ITprimdouble $! readFractionalLit str
 
 readFractionalLit :: String -> FractionalLit
 readFractionalLit str = ((FL $! (SourceText str)) $! is_neg) $! readRational str
                         where is_neg = case str of ('-':_) -> True
                                                    _       -> False
+readHexFractionalLit :: String -> FractionalLit
+readHexFractionalLit str =
+  FL { fl_text  = SourceText str
+     , fl_neg   = case str of
+                    '-' : _ -> True
+                    _       -> False
+     , fl_value = readHexRational str
+     }
 
 -- -----------------------------------------------------------------------------
 -- Layout processing
@@ -1321,20 +1449,23 @@ readFractionalLit str = ((FL $! (SourceText str)) $! is_neg) $! readRational str
 -- we're at the first token on a line, insert layout tokens if necessary
 do_bol :: Action
 do_bol span _str _len = do
-        (pos, gen_semic) <- getOffside
-        case pos of
-            LT -> do
-                --trace "layout: inserting '}'" $ do
-                popContext
-                -- do NOT pop the lex state, we might have a ';' to insert
-                return (L span ITvccurly)
-            EQ | gen_semic -> do
-                --trace "layout: inserting ';'" $ do
-                _ <- popLexState
-                return (L span ITsemi)
-            _ -> do
-                _ <- popLexState
-                lexToken
+        -- See Note [Nested comment line pragmas]
+        b <- extension inNestedComment
+        if b then return (L span ITcomment_line_prag) else do
+          (pos, gen_semic) <- getOffside
+          case pos of
+              LT -> do
+                  --trace "layout: inserting '}'" $ do
+                  popContext
+                  -- do NOT pop the lex state, we might have a ';' to insert
+                  return (L span ITvccurly)
+              EQ | gen_semic -> do
+                  --trace "layout: inserting ';'" $ do
+                  _ <- popLexState
+                  return (L span ITsemi)
+              _ -> do
+                  _ <- popLexState
+                  lexToken
 
 -- certain keywords put us in the "layout" state, where we might
 -- add an opening curly brace.
@@ -1394,29 +1525,13 @@ do_layout_left span _buf _len = do
 -- -----------------------------------------------------------------------------
 -- LINE pragmas
 
-setLine :: Int -> Action
-setLine code span buf len = do
-  let line = parseUnsignedInteger buf len 10 octDecDigit
-  setSrcLoc (mkRealSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
-        -- subtract one: the line number refers to the *following* line
-  _ <- popLexState
-  pushLexState code
-  lexToken
-
-setColumn :: Action
-setColumn span buf len = do
-  let column =
-        case reads (lexemeToString buf len) of
-          [(column, _)] -> column
-          _ -> error "setColumn: expected integer" -- shouldn't happen
-  setSrcLoc (mkRealSrcLoc (srcSpanFile span) (srcSpanEndLine span)
-                          (fromIntegral (column :: Integer)))
-  _ <- popLexState
-  lexToken
-
-setFile :: Int -> Action
-setFile code span buf len = do
-  let file = mkFastString (go (lexemeToString (stepOn buf) (len-2)))
+setLineAndFile :: Int -> Action
+setLineAndFile code span buf len = do
+  let src = lexemeToString buf (len - 1)  -- drop trailing quotation mark
+      linenumLen = length $ head $ words src
+      linenum = parseUnsignedInteger buf linenumLen 10 octDecDigit
+      file = mkFastString $ go $ drop 1 $ dropWhile (/= '"') src
+          -- skip everything through first quotation mark to get to the filename
         where go ('\\':c:cs) = c : go cs
               go (c:cs)      = c : go cs
               go []          = []
@@ -1430,12 +1545,24 @@ setFile code span buf len = do
               -- filenames and it does not remove duplicate
               -- backslashes after the drive letter (should it?).
   setAlrLastLoc $ alrInitialLoc file
-  setSrcLoc (mkRealSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
+  setSrcLoc (mkRealSrcLoc file (fromIntegral linenum - 1) (srcSpanEndCol span))
+      -- subtract one: the line number refers to the *following* line
   addSrcFile file
   _ <- popLexState
   pushLexState code
   lexToken
 
+setColumn :: Action
+setColumn span buf len = do
+  let column =
+        case reads (lexemeToString buf len) of
+          [(column, _)] -> column
+          _ -> error "setColumn: expected integer" -- shouldn't happen
+  setSrcLoc (mkRealSrcLoc (srcSpanFile span) (srcSpanEndLine span)
+                          (fromIntegral (column :: Integer)))
+  _ <- popLexState
+  lexToken
+
 alrInitialLoc :: FastString -> RealSrcSpan
 alrInitialLoc file = mkRealSrcSpan loc loc
     where -- This is a hack to ensure that the first line in a file
@@ -1859,6 +1986,10 @@ data PState = PState {
         -- token doesn't need to close anything:
         alr_justClosedExplicitLetBlock :: Bool,
 
+        -- If this is enabled, '{-# LINE ... -#}' and '{-# COLUMN ... #-}'
+        -- update the 'loc' field. Otherwise, those pragmas are lexed as tokens.
+        use_pos_prags :: Bool,
+
         -- The next three are used to implement Annotations giving the
         -- locations of 'noise' tokens in the source, so that users of
         -- the GHC API can do source to source conversions.
@@ -1892,12 +2023,10 @@ instance Applicative P where
 
 instance Monad P where
   (>>=) = thenP
-  fail = failP
+  fail = MonadFail.fail
 
-#if __GLASGOW_HASKELL__ > 710
 instance MonadFail P where
   fail = failP
-#endif
 
 returnP :: a -> P a
 returnP a = a `seq` (P $ \s -> POk s a)
@@ -1970,27 +2099,29 @@ getLastTk = P $ \s@(PState { last_tk = last_tk }) -> POk s last_tk
 
 data AlexInput = AI RealSrcLoc StringBuffer
 
-alexInputPrevChar :: AlexInput -> Char
-alexInputPrevChar (AI _ buf) = prevChar buf '\n'
+{-
+Note [Unicode in Alex]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Although newer versions of Alex support unicode, this grammar is processed with
+the old style '--latin1' behaviour. This means that when implementing the
+functions
 
--- backwards compatibility for Alex 2.x
-alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
-alexGetChar inp = case alexGetByte inp of
-                    Nothing    -> Nothing
-                    Just (b,i) -> c `seq` Just (c,i)
-                       where c = chr $ fromIntegral b
+    alexGetByte       :: AlexInput -> Maybe (Word8,AlexInput)
+    alexInputPrevChar :: AlexInput -> Char
 
-alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
-alexGetByte (AI loc s)
-  | atEnd s   = Nothing
-  | otherwise = byte `seq` loc' `seq` s' `seq`
-                --trace (show (ord c)) $
-                Just (byte, (AI loc' s'))
-  where (c,s') = nextChar s
-        loc'   = advanceSrcLoc loc c
-        byte   = fromIntegral $ ord adj_c
+which Alex uses to take apart our 'AlexInput', we must
 
-        non_graphic     = '\x00'
+  * return a latin1 character in the 'Word8' that 'alexGetByte' expects
+  * return a latin1 character in 'alexInputPrevChar'.
+
+We handle this in 'adjustChar' by squishing entire classes of unicode
+characters into single bytes.
+-}
+
+{-# INLINE adjustChar #-}
+adjustChar :: Char -> Word8
+adjustChar c = fromIntegral $ ord adj_c
+  where non_graphic     = '\x00'
         upper           = '\x01'
         lower           = '\x02'
         digit           = '\x03'
@@ -2036,6 +2167,32 @@ alexGetByte (AI loc s)
                   Space                 -> space
                   _other                -> non_graphic
 
+-- Getting the previous 'Char' isn't enough here - we need to convert it into
+-- the same format that 'alexGetByte' would have produced.
+--
+-- See Note [Unicode in Alex] and #13986.
+alexInputPrevChar :: AlexInput -> Char
+alexInputPrevChar (AI _ buf) = chr (fromIntegral (adjustChar pc))
+  where pc = prevChar buf '\n'
+
+-- backwards compatibility for Alex 2.x
+alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
+alexGetChar inp = case alexGetByte inp of
+                    Nothing    -> Nothing
+                    Just (b,i) -> c `seq` Just (c,i)
+                       where c = chr $ fromIntegral b
+
+-- See Note [Unicode in Alex]
+alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
+alexGetByte (AI loc s)
+  | atEnd s   = Nothing
+  | otherwise = byte `seq` loc' `seq` s' `seq`
+                --trace (show (ord c)) $
+                Just (byte, (AI loc' s'))
+  where (c,s') = nextChar s
+        loc'   = advanceSrcLoc loc c
+        byte   = adjustChar c
+
 -- This version does not squash unicode characters, it is used when
 -- lexing strings.
 alexGetChar' :: AlexInput -> Maybe (Char,AlexInput)
@@ -2146,7 +2303,6 @@ data ExtBits
   = FfiBit
   | InterruptibleFfiBit
   | CApiFfiBit
-  | ParrBit
   | ArrowsBit
   | ThBit
   | ThQuotesBit
@@ -2166,6 +2322,7 @@ data ExtBits
   | TransformComprehensionsBit
   | QqBit -- enable quasiquoting
   | InRulePragBit
+  | InNestedCommentBit -- See Note [Nested comment line pragmas]
   | RawTokenStreamBit -- producing a token stream with all comments included
   | SccProfilingOnBit
   | HpcBit
@@ -2178,15 +2335,16 @@ data ExtBits
   | LambdaCaseBit
   | BinaryLiteralsBit
   | NegativeLiteralsBit
+  | HexFloatLiteralsBit
   | TypeApplicationsBit
   | StaticPointersBit
+  | NumericUnderscoresBit
+  | StarIsTypeBit
   deriving Enum
 
 
 always :: ExtsBitmap -> Bool
 always           _     = True
-parrEnabled :: ExtsBitmap -> Bool
-parrEnabled = xtest ParrBit
 arrowsEnabled :: ExtsBitmap -> Bool
 arrowsEnabled = xtest ArrowsBit
 thEnabled :: ExtsBitmap -> Bool
@@ -2217,6 +2375,8 @@ qqEnabled :: ExtsBitmap -> Bool
 qqEnabled = xtest QqBit
 inRulePrag :: ExtsBitmap -> Bool
 inRulePrag = xtest InRulePragBit
+inNestedComment :: ExtsBitmap -> Bool
+inNestedComment = xtest InNestedCommentBit
 rawTokenStreamEnabled :: ExtsBitmap -> Bool
 rawTokenStreamEnabled = xtest RawTokenStreamBit
 alternativeLayoutRule :: ExtsBitmap -> Bool
@@ -2240,12 +2400,18 @@ binaryLiteralsEnabled :: ExtsBitmap -> Bool
 binaryLiteralsEnabled = xtest BinaryLiteralsBit
 negativeLiteralsEnabled :: ExtsBitmap -> Bool
 negativeLiteralsEnabled = xtest NegativeLiteralsBit
+hexFloatLiteralsEnabled :: ExtsBitmap -> Bool
+hexFloatLiteralsEnabled = xtest HexFloatLiteralsBit
 patternSynonymsEnabled :: ExtsBitmap -> Bool
 patternSynonymsEnabled = xtest PatternSynonymsBit
 typeApplicationEnabled :: ExtsBitmap -> Bool
 typeApplicationEnabled = xtest TypeApplicationsBit
 staticPointersEnabled :: ExtsBitmap -> Bool
 staticPointersEnabled = xtest StaticPointersBit
+numericUnderscoresEnabled :: ExtsBitmap -> Bool
+numericUnderscoresEnabled = xtest NumericUnderscoresBit
+starIsTypeEnabled :: ExtsBitmap -> Bool
+starIsTypeEnabled = xtest StarIsTypeBit
 
 -- PState for parsing options pragmas
 --
@@ -2264,46 +2430,55 @@ mkParserFlags flags =
     , pExtsBitmap = bitmap
     }
   where
-      bitmap =     FfiBit                      `setBitIf` xopt LangExt.ForeignFunctionInterface flags
-               .|. InterruptibleFfiBit         `setBitIf` xopt LangExt.InterruptibleFFI         flags
-               .|. CApiFfiBit                  `setBitIf` xopt LangExt.CApiFFI                  flags
-               .|. ParrBit                     `setBitIf` xopt LangExt.ParallelArrays           flags
-               .|. ArrowsBit                   `setBitIf` xopt LangExt.Arrows                   flags
-               .|. ThBit                       `setBitIf` xopt LangExt.TemplateHaskell          flags
-               .|. ThQuotesBit                 `setBitIf` xopt LangExt.TemplateHaskellQuotes    flags
-               .|. QqBit                       `setBitIf` xopt LangExt.QuasiQuotes              flags
-               .|. IpBit                       `setBitIf` xopt LangExt.ImplicitParams           flags
-               .|. OverloadedLabelsBit         `setBitIf` xopt LangExt.OverloadedLabels         flags
-               .|. ExplicitForallBit           `setBitIf` xopt LangExt.ExplicitForAll           flags
-               .|. BangPatBit                  `setBitIf` xopt LangExt.BangPatterns             flags
-               .|. HaddockBit                  `setBitIf` gopt Opt_Haddock                      flags
-               .|. MagicHashBit                `setBitIf` xopt LangExt.MagicHash                flags
-               .|. RecursiveDoBit              `setBitIf` xopt LangExt.RecursiveDo              flags
-               .|. UnicodeSyntaxBit            `setBitIf` xopt LangExt.UnicodeSyntax            flags
-               .|. UnboxedTuplesBit            `setBitIf` xopt LangExt.UnboxedTuples            flags
-               .|. UnboxedSumsBit              `setBitIf` xopt LangExt.UnboxedSums              flags
-               .|. DatatypeContextsBit         `setBitIf` xopt LangExt.DatatypeContexts         flags
-               .|. TransformComprehensionsBit  `setBitIf` xopt LangExt.TransformListComp        flags
-               .|. TransformComprehensionsBit  `setBitIf` xopt LangExt.MonadComprehensions      flags
-               .|. RawTokenStreamBit           `setBitIf` gopt Opt_KeepRawTokenStream           flags
-               .|. HpcBit                      `setBitIf` gopt Opt_Hpc                          flags
-               .|. AlternativeLayoutRuleBit    `setBitIf` xopt LangExt.AlternativeLayoutRule    flags
-               .|. RelaxedLayoutBit            `setBitIf` xopt LangExt.RelaxedLayout            flags
-               .|. SccProfilingOnBit           `setBitIf` gopt Opt_SccProfilingOn               flags
-               .|. NondecreasingIndentationBit `setBitIf` xopt LangExt.NondecreasingIndentation flags
-               .|. SafeHaskellBit              `setBitIf` safeImportsOn                         flags
-               .|. TraditionalRecordSyntaxBit  `setBitIf` xopt LangExt.TraditionalRecordSyntax  flags
-               .|. ExplicitNamespacesBit       `setBitIf` xopt LangExt.ExplicitNamespaces flags
-               .|. LambdaCaseBit               `setBitIf` xopt LangExt.LambdaCase               flags
-               .|. BinaryLiteralsBit           `setBitIf` xopt LangExt.BinaryLiterals           flags
-               .|. NegativeLiteralsBit         `setBitIf` xopt LangExt.NegativeLiterals         flags
-               .|. PatternSynonymsBit          `setBitIf` xopt LangExt.PatternSynonyms          flags
-               .|. TypeApplicationsBit         `setBitIf` xopt LangExt.TypeApplications         flags
-               .|. StaticPointersBit           `setBitIf` xopt LangExt.StaticPointers           flags
-
-      setBitIf :: ExtBits -> Bool -> ExtsBitmap
-      b `setBitIf` cond | cond      = xbit b
-                        | otherwise = 0
+    bitmap = safeHaskellBit .|. langExtBits .|. optBits
+    safeHaskellBit =
+          SafeHaskellBit `setBitIf` safeImportsOn flags
+    langExtBits =
+          FfiBit                      `xoptBit` LangExt.ForeignFunctionInterface
+      .|. InterruptibleFfiBit         `xoptBit` LangExt.InterruptibleFFI
+      .|. CApiFfiBit                  `xoptBit` LangExt.CApiFFI
+      .|. ArrowsBit                   `xoptBit` LangExt.Arrows
+      .|. ThBit                       `xoptBit` LangExt.TemplateHaskell
+      .|. ThQuotesBit                 `xoptBit` LangExt.TemplateHaskellQuotes
+      .|. QqBit                       `xoptBit` LangExt.QuasiQuotes
+      .|. IpBit                       `xoptBit` LangExt.ImplicitParams
+      .|. OverloadedLabelsBit         `xoptBit` LangExt.OverloadedLabels
+      .|. ExplicitForallBit           `xoptBit` LangExt.ExplicitForAll
+      .|. BangPatBit                  `xoptBit` LangExt.BangPatterns
+      .|. MagicHashBit                `xoptBit` LangExt.MagicHash
+      .|. RecursiveDoBit              `xoptBit` LangExt.RecursiveDo
+      .|. UnicodeSyntaxBit            `xoptBit` LangExt.UnicodeSyntax
+      .|. UnboxedTuplesBit            `xoptBit` LangExt.UnboxedTuples
+      .|. UnboxedSumsBit              `xoptBit` LangExt.UnboxedSums
+      .|. DatatypeContextsBit         `xoptBit` LangExt.DatatypeContexts
+      .|. TransformComprehensionsBit  `xoptBit` LangExt.TransformListComp
+      .|. TransformComprehensionsBit  `xoptBit` LangExt.MonadComprehensions
+      .|. AlternativeLayoutRuleBit    `xoptBit` LangExt.AlternativeLayoutRule
+      .|. RelaxedLayoutBit            `xoptBit` LangExt.RelaxedLayout
+      .|. NondecreasingIndentationBit `xoptBit` LangExt.NondecreasingIndentation
+      .|. TraditionalRecordSyntaxBit  `xoptBit` LangExt.TraditionalRecordSyntax
+      .|. ExplicitNamespacesBit       `xoptBit` LangExt.ExplicitNamespaces
+      .|. LambdaCaseBit               `xoptBit` LangExt.LambdaCase
+      .|. BinaryLiteralsBit           `xoptBit` LangExt.BinaryLiterals
+      .|. NegativeLiteralsBit         `xoptBit` LangExt.NegativeLiterals
+      .|. HexFloatLiteralsBit         `xoptBit` LangExt.HexFloatLiterals
+      .|. PatternSynonymsBit          `xoptBit` LangExt.PatternSynonyms
+      .|. TypeApplicationsBit         `xoptBit` LangExt.TypeApplications
+      .|. StaticPointersBit           `xoptBit` LangExt.StaticPointers
+      .|. NumericUnderscoresBit       `xoptBit` LangExt.NumericUnderscores
+      .|. StarIsTypeBit               `xoptBit` LangExt.StarIsType
+    optBits =
+          HaddockBit        `goptBit` Opt_Haddock
+      .|. RawTokenStreamBit `goptBit` Opt_KeepRawTokenStream
+      .|. HpcBit            `goptBit` Opt_Hpc
+      .|. SccProfilingOnBit `goptBit` Opt_SccProfilingOn
+
+    xoptBit bit ext = bit `setBitIf` xopt ext flags
+    goptBit bit opt = bit `setBitIf` gopt opt flags
+
+    setBitIf :: ExtBits -> Bool -> ExtsBitmap
+    b `setBitIf` cond | cond      = xbit b
+                      | otherwise = 0
 
 -- | Creates a parse state from a 'DynFlags' value
 mkPState :: DynFlags -> StringBuffer -> RealSrcLoc -> PState
@@ -2331,6 +2506,7 @@ mkPStatePure options buf loc =
       alr_context = [],
       alr_expecting_ocurly = Nothing,
       alr_justClosedExplicitLetBlock = False,
+      use_pos_prags = True,
       annotations = [],
       comment_q = [],
       annotations_comments = []
@@ -2742,14 +2918,14 @@ reportLexError loc1 loc2 buf str
 lexTokenStream :: StringBuffer -> RealSrcLoc -> DynFlags -> ParseResult [Located Token]
 lexTokenStream buf loc dflags = unP go initState
     where dflags' = gopt_set (gopt_unset dflags Opt_Haddock) Opt_KeepRawTokenStream
-          initState = mkPState dflags' buf loc
+          initState = (mkPState dflags' buf loc) { use_pos_prags = False }
           go = do
             ltok <- lexer False return
             case ltok of
               L _ ITeof -> return []
               _ -> liftM (ltok:) go
 
-linePrags = Map.singleton "line" (begin line_prag2)
+linePrags = Map.singleton "line" linePrag
 
 fileHeaderPrags = Map.fromList([("options", lex_string_prag IToptions_prag),
                                  ("options_ghc", lex_string_prag IToptions_prag),
@@ -2785,8 +2961,6 @@ oneWordPrags = Map.fromList [
      ("unpack", strtoken (\s -> ITunpack_prag (SourceText s))),
      ("nounpack", strtoken (\s -> ITnounpack_prag (SourceText s))),
      ("ann", strtoken (\s -> ITann_prag (SourceText s))),
-     ("vectorize", strtoken (\s -> ITvect_prag (SourceText s))),
-     ("novectorize", strtoken (\s -> ITnovect_prag (SourceText s))),
      ("minimal", strtoken (\s -> ITminimal_prag (SourceText s))),
      ("overlaps", strtoken (\s -> IToverlaps_prag (SourceText s))),
      ("overlappable", strtoken (\s -> IToverlappable_prag (SourceText s))),
@@ -2794,10 +2968,10 @@ oneWordPrags = Map.fromList [
      ("incoherent", strtoken (\s -> ITincoherent_prag (SourceText s))),
      ("ctype", strtoken (\s -> ITctype (SourceText s))),
      ("complete", strtoken (\s -> ITcomplete_prag (SourceText s))),
-     ("column", begin column_prag)
+     ("column", columnPrag)
      ]
 
-twoWordPrags = Map.fromList([
+twoWordPrags = Map.fromList [
      ("inline conlike",
          strtoken (\s -> (ITinline_prag (SourceText s) Inline ConLike))),
      ("notinline conlike",
@@ -2805,9 +2979,8 @@ twoWordPrags = Map.fromList([
      ("specialize inline",
          strtoken (\s -> (ITspec_inline_prag (SourceText s) True))),
      ("specialize notinline",
-         strtoken (\s -> (ITspec_inline_prag (SourceText s) False))),
-     ("vectorize scalar",
-         strtoken (\s -> ITvect_scalar_prag (SourceText s)))])
+         strtoken (\s -> (ITspec_inline_prag (SourceText s) False)))
+     ]
 
 dispatch_pragmas :: Map String Action -> Action
 dispatch_pragmas prags span buf len = case Map.lookup (clean_pragma (lexemeToString buf len)) prags of
@@ -2829,8 +3002,6 @@ clean_pragma prag = canon_ws (map toLower (unprefix prag))
                           canonical prag' = case prag' of
                                               "noinline" -> "notinline"
                                               "specialise" -> "specialize"
-                                              "vectorise" -> "vectorize"
-                                              "novectorise" -> "novectorize"
                                               "constructorlike" -> "conlike"
                                               _ -> prag'
                           canon_ws s = unwords (map canonical (words s))