1 files changed, 187 insertions, 66 deletions
diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x
index 2ada289db4..fc6779a359 100644
--- a/compiler/parser/Lexer.x
+++ b/compiler/parser/Lexer.x
@@ -44,6 +44,7 @@
 {-# LANGUAGE CPP #-}
 {-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE MultiWayIf #-}
 
 {-# OPTIONS_GHC -funbox-strict-fields #-}
 
@@ -376,10 +377,6 @@ $tab          { warnTab }
   "[t|"       / { ifExtension ThQuotesBit } { token ITopenTypQuote }
   "|]"        / { ifExtension ThQuotesBit } { token (ITcloseQuote NormalSyntax) }
   "||]"       / { ifExtension ThQuotesBit } { token ITcloseTExpQuote }
-  \$ @varid   / { ifExtension ThBit }       { skip_one_varid ITidEscape }
-  "$$" @varid / { ifExtension ThBit }       { skip_two_varid ITidTyEscape }
-  "$("        / { ifExtension ThBit }       { token ITparenEscape }
-  "$$("       / { ifExtension ThBit }       { token ITparenTyEscape }
 
   "[" @varid "|"  / { ifExtension QqBit }   { lex_quasiquote_tok }
 
@@ -398,14 +395,6 @@ $tab          { warnTab }
     { token (ITcloseQuote UnicodeSyntax) }
 }
 
-  -- See Note [Lexing type applications]
-<0> {
-    [^ $idchar \) ] ^
-  "@"
-    / { ifExtension TypeApplicationsBit `alexAndPred` notFollowedBySymbol }
-    { token ITtypeApp }
-}
-
 <0> {
   "(|"
     / { ifExtension ArrowsBit `alexAndPred`
@@ -471,12 +460,20 @@ $tab          { warnTab }
   @conid "#"+       / { ifExtension MagicHashBit } { idtoken conid }
 }
 
+-- Operators classified into prefix, suffix, tight infix, and loose infix.
+-- See Note [Whitespace-sensitive operator parsing]
+<0> {
+  @varsym / { precededByClosingToken `alexAndPred` followedByOpeningToken } { varsym_tight_infix }
+  @varsym / { followedByOpeningToken }  { varsym_prefix }
+  @varsym / { precededByClosingToken }  { varsym_suffix }
+  @varsym                               { varsym_loose_infix }
+}
+
 -- ToDo: - move `var` and (sym) into lexical syntax?
 --       - remove backquote from $special?
 <0> {
   @qvarsym                                         { idtoken qvarsym }
   @qconsym                                         { idtoken qconsym }
-  @varsym                                          { varsym }
   @consym                                          { consym }
 }
 
@@ -550,32 +547,114 @@ $tab          { warnTab }
   \"                            { lex_string_tok }
 }
 
--- Note [Lexing type applications]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- The desired syntax for type applications is to prefix the type application
--- with '@', like this:
+-- Note [Whitespace-sensitive operator parsing]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-- In accord with GHC Proposal #229 https://github.com/ghc-proposals/ghc-proposals/blob/master/proposals/0229-whitespace-bang-patterns.rst
+-- we classify operator occurrences into four categories:
+--
+--     a ! b   -- a loose infix occurrence
+--     a!b     -- a tight infix occurrence
+--     a !b    -- a prefix occurrence
+--     a! b    -- a suffix occurrence
+--
+-- The rules are a bit more elaborate than simply checking for whitespace, in
+-- order to accommodate the following use cases:
+--
+--     f (!a) = ...    -- prefix occurrence
+--     g (a !)         -- loose infix occurrence
+--     g (! a)         -- loose infix occurrence
+--
+-- The precise rules are as follows:
+--
+--  * Identifiers, literals, and opening brackets (, (#, [, [|, [||, [p|, [e|,
+--    [t|, {, are considered "opening tokens". The function followedByOpeningToken
+--    tests whether the next token is an opening token.
+--
+--  * Identifiers, literals, and closing brackets ), #), ], |], },
+--    are considered "closing tokens". The function precededByClosingToken tests
+--    whether the previous token is a closing token.
 --
---   foo @Int @Bool baz bum
+--  * Whitespace, comments, separators, and other tokens, are considered
+--    neither opening nor closing.
 --
--- This, of course, conflicts with as-patterns. The conflict arises because
--- expressions and patterns use the same parser, and also because we want
--- to allow type patterns within expression patterns.
+--  * Any unqualified operator occurrence is classified as prefix, suffix, or
+--    tight/loose infix, based on preceding and following tokens:
 --
--- Disambiguation is accomplished by requiring *something* to appear between
--- type application and the preceding token. This something must end with
--- a character that cannot be the end of the variable bound in an as-pattern.
--- Currently (June 2015), this means that the something cannot end with a
--- $idchar or a close-paren. (The close-paren is necessary if the as-bound
--- identifier is symbolic.)
+--       precededByClosingToken | followedByOpeningToken | Occurrence
+--      ------------------------+------------------------+------------
+--       False                  | True                   | prefix
+--       True                   | False                  | suffix
+--       True                   | True                   | tight infix
+--       False                  | False                  | loose infix
+--      ------------------------+------------------------+------------
 --
--- Note that looking for whitespace before the '@' is insufficient, because
--- of this pathological case:
+-- A loose infix occurrence is always considered an operator. Other types of
+-- occurrences may be assigned a special per-operator meaning override:
 --
---   foo {- hi -}@Int
+--   Operator |  Occurrence   | Token returned
+--  ----------+---------------+------------------------------------------
+--    !       |  prefix       | ITbang
+--            |               |   strictness annotation or bang pattern,
+--            |               |   e.g.  f !x = rhs, data T = MkT !a
+--            |  not prefix   | ITvarsym "!"
+--            |               |   ordinary operator or type operator,
+--            |               |   e.g.  xs ! 3, (! x), Int ! Bool
+--  ----------+---------------+------------------------------------------
+--    ~       |  prefix       | ITtilde
+--            |               |   laziness annotation or lazy pattern,
+--            |               |   e.g.  f ~x = rhs, data T = MkT ~a
+--            |  not prefix   | ITvarsym "~"
+--            |               |   ordinary operator or type operator,
+--            |               |   e.g.  xs ~ 3, (~ x), Int ~ Bool
+--  ----------+---------------+------------------------------------------
+--    $  $$   |  prefix       | ITdollar, ITdollardollar
+--            |               |   untyped or typed Template Haskell splice,
+--            |               |   e.g.  $(f x), $$(f x), $$"str"
+--            |  not prefix   | ITvarsym "$", ITvarsym "$$"
+--            |               |   ordinary operator or type operator,
+--            |               |   e.g.  f $ g x, a $$ b
+--  ----------+---------------+------------------------------------------
+--    @       |  prefix       | ITtypeApp
+--            |               |   type application, e.g.  fmap @Maybe
+--            |  tight infix  | ITat
+--            |               |   as-pattern, e.g.  f p@(a,b) = rhs
+--            |  suffix       | parse error
+--            |               |   e.g. f p@ x = rhs
+--            |  loose infix  | ITvarsym "@"
+--            |               |   ordinary operator or type operator,
+--            |               |   e.g.  f @ g, (f @)
+--  ----------+---------------+------------------------------------------
 --
--- This design is predicated on the fact that as-patterns are generally
--- whitespace-free, and also that this whole thing is opt-in, with the
--- TypeApplications extension.
+-- Also, some of these overrides are guarded behind language extensions.
+-- According to the specification, we must determine the occurrence based on
+-- surrounding *tokens* (see the proposal for the exact rules). However, in
+-- the implementation we cheat a little and do the classification based on
+-- characters, for reasons of both simplicity and efficiency (see
+-- 'followedByOpeningToken' and 'precededByClosingToken')
+--
+-- When an operator is subject to a meaning override, it is mapped to special
+-- token: ITbang, ITtilde, ITat, ITdollar, ITdollardollar. Otherwise, it is
+-- returned as ITvarsym.
+--
+-- For example, this is how we process the (!):
+--
+--    precededByClosingToken | followedByOpeningToken | Token
+--   ------------------------+------------------------+-------------
+--    False                  | True                   | ITbang
+--    True                   | False                  | ITvarsym "!"
+--    True                   | True                   | ITvarsym "!"
+--    False                  | False                  | ITvarsym "!"
+--   ------------------------+------------------------+-------------
+--
+-- And this is how we process the (@):
+--
+--    precededByClosingToken | followedByOpeningToken | Token
+--   ------------------------+------------------------+-------------
+--    False                  | True                   | ITtypeApp
+--    True                   | False                  | parse error
+--    True                   | True                   | ITat
+--    False                  | False                  | ITvarsym "@"
+--   ------------------------+------------------------+-------------
 
 -- -----------------------------------------------------------------------------
 -- Alex "Haskell code fragment bottom"
@@ -680,11 +759,12 @@ data Token
   | ITvbar
   | ITlarrow            IsUnicodeSyntax
   | ITrarrow            IsUnicodeSyntax
-  | ITat
-  | ITtilde
   | ITdarrow            IsUnicodeSyntax
   | ITminus
-  | ITbang
+  | ITbang     -- Prefix (!) only, e.g. f !x = rhs
+  | ITtilde    -- Prefix (~) only, e.g. f ~x = rhs
+  | ITat       -- Tight infix (@) only, e.g. f x@pat = rhs
+  | ITtypeApp  -- Prefix (@) only, e.g. f @t
   | ITstar              IsUnicodeSyntax
   | ITdot
 
@@ -740,10 +820,8 @@ data Token
   | ITcloseQuote IsUnicodeSyntax        --  |]
   | ITopenTExpQuote HasE                --  [|| or [e||
   | ITcloseTExpQuote                    --  ||]
-  | ITidEscape   FastString             --  $x
-  | ITparenEscape                       --  $(
-  | ITidTyEscape   FastString           --  $$x
-  | ITparenTyEscape                     --  $$(
+  | ITdollar                            --  prefix $
+  | ITdollardollar                      --  prefix $$
   | ITtyQuote                           --  ''
   | ITquasiQuote (FastString,FastString,RealSrcSpan)
     -- ITquasiQuote(quoter, quote, loc)
@@ -764,11 +842,6 @@ data Token
   | ITLarrowtail IsUnicodeSyntax -- ^ @-<<@
   | ITRarrowtail IsUnicodeSyntax -- ^ @>>-@
 
-  -- | Type application '@' (lexed differently than as-pattern '@',
-  -- due to checking for preceding whitespace)
-  | ITtypeApp
-
-
   | ITunknown String             -- ^ Used when the lexer can't make sense of it
   | ITeof                        -- ^ end of file token
 
@@ -889,11 +962,8 @@ reservedSymsFM = listToUFM $
        ,("|",   ITvbar,                     NormalSyntax,  0 )
        ,("<-",  ITlarrow NormalSyntax,      NormalSyntax,  0 )
        ,("->",  ITrarrow NormalSyntax,      NormalSyntax,  0 )
-       ,("@",   ITat,                       NormalSyntax,  0 )
-       ,("~",   ITtilde,                    NormalSyntax,  0 )
        ,("=>",  ITdarrow NormalSyntax,      NormalSyntax,  0 )
        ,("-",   ITminus,                    NormalSyntax,  0 )
-       ,("!",   ITbang,                     NormalSyntax,  0 )
 
        ,("*",   ITstar NormalSyntax,        NormalSyntax,  xbit StarIsTypeBit)
 
@@ -988,6 +1058,32 @@ pop_and :: Action -> Action
 pop_and act span buf len = do _ <- popLexState
                               act span buf len
 
+-- See Note [Whitespace-sensitive operator parsing]
+followedByOpeningToken :: AlexAccPred ExtsBitmap
+followedByOpeningToken _ _ _ (AI _ buf)
+  | atEnd buf = False
+  | otherwise =
+      case nextChar buf of
+        ('{', buf') -> nextCharIsNot buf' (== '-')
+        ('(', _) -> True
+        ('[', _) -> True
+        ('\"', _) -> True
+        ('\'', _) -> True
+        ('_', _) -> True
+        (c, _) -> isAlphaNum c
+
+-- See Note [Whitespace-sensitive operator parsing]
+precededByClosingToken :: AlexAccPred ExtsBitmap
+precededByClosingToken _ (AI _ buf) _ _ =
+  case prevChar buf '\n' of
+    '}' -> decodePrevNChars 1 buf /= "-"
+    ')' -> True
+    ']' -> True
+    '\"' -> True
+    '\'' -> True
+    '_' -> True
+    c -> isAlphaNum c
+
 {-# INLINE nextCharIs #-}
 nextCharIs :: StringBuffer -> (Char -> Bool) -> Bool
 nextCharIs buf p = not (atEnd buf) && p (currentChar buf)
@@ -1348,11 +1444,40 @@ qvarsym, qconsym :: StringBuffer -> Int -> Token
 qvarsym buf len = ITqvarsym $! splitQualName buf len False
 qconsym buf len = ITqconsym $! splitQualName buf len False
 
-varsym, consym :: Action
-varsym = sym ITvarsym
-consym = sym ITconsym
-
-sym :: (FastString -> Token) -> Action
+-- See Note [Whitespace-sensitive operator parsing]
+varsym_prefix :: Action
+varsym_prefix = sym $ \exts s ->
+  if | TypeApplicationsBit `xtest` exts, s == fsLit "@"
+     -> return ITtypeApp
+     | ThBit `xtest` exts, s == fsLit "$"
+     -> return ITdollar
+     | ThBit `xtest` exts, s == fsLit "$$"
+     -> return ITdollardollar
+     | s == fsLit "!" -> return ITbang
+     | s == fsLit "~" -> return ITtilde
+     | otherwise -> return (ITvarsym s)
+
+-- See Note [Whitespace-sensitive operator parsing]
+varsym_suffix :: Action
+varsym_suffix = sym $ \_ s ->
+  if | s == fsLit "@"
+     -> failMsgP "Suffix occurrence of @. For an as-pattern, remove the leading whitespace."
+     | otherwise -> return (ITvarsym s)
+
+-- See Note [Whitespace-sensitive operator parsing]
+varsym_tight_infix :: Action
+varsym_tight_infix = sym $ \_ s ->
+  if | s == fsLit "@" -> return ITat
+     | otherwise -> return (ITvarsym s)
+
+-- See Note [Whitespace-sensitive operator parsing]
+varsym_loose_infix :: Action
+varsym_loose_infix = sym (\_ s -> return $ ITvarsym s)
+
+consym :: Action
+consym = sym (\_exts s -> return $ ITconsym s)
+
+sym :: (ExtsBitmap -> FastString -> P Token) -> Action
 sym con span buf len =
   case lookupUFM reservedSymsFM fs of
     Just (keyword, NormalSyntax, 0) ->
@@ -1361,19 +1486,20 @@ sym con span buf len =
       exts <- getExts
       if exts .&. i /= 0
         then return $ L span keyword
-        else return $ L span (con fs)
+        else L span <$!> con exts fs
     Just (keyword, UnicodeSyntax, 0) -> do
       exts <- getExts
       if xtest UnicodeSyntaxBit exts
         then return $ L span keyword
-        else return $ L span (con fs)
+        else L span <$!> con exts fs
     Just (keyword, UnicodeSyntax, i) -> do
       exts <- getExts
       if exts .&. i /= 0 && xtest UnicodeSyntaxBit exts
         then return $ L span keyword
-        else return $ L span (con fs)
-    Nothing ->
-      return $ L span $! con fs
+        else L span <$!> con exts fs
+    Nothing -> do
+      exts <- getExts
+      L span <$!> con exts fs
   where
     !fs = lexemeToFastString buf len
 
@@ -2889,8 +3015,6 @@ isALRopen ITobrack        = True
 isALRopen ITocurly        = True
 -- GHC Extensions:
 isALRopen IToubxparen     = True
-isALRopen ITparenEscape   = True
-isALRopen ITparenTyEscape = True
 isALRopen _               = False
 
 isALRclose :: Token -> Bool
@@ -2945,12 +3069,9 @@ lexToken = do
         let bytes = byteDiff buf buf2
         span `seq` setLastToken span bytes
         lt <- t span buf bytes
-        case unRealSrcSpan lt of
-          ITlineComment _  -> return lt
-          ITblockComment _ -> return lt
-          lt' -> do
-            setLastTk lt'
-            return lt
+        let lt' = unRealSrcSpan lt
+        unless (isComment lt') (setLastTk lt')
+        return lt
 
 reportLexError :: RealSrcLoc -> RealSrcLoc -> StringBuffer -> [Char] -> P a
 reportLexError loc1 loc2 buf str