diff options
Diffstat (limited to 'compiler/parser')
-rw-r--r-- | compiler/parser/Ctype.lhs | 341 | ||||
-rw-r--r-- | compiler/parser/LexCore.hs | 130 | ||||
-rw-r--r-- | compiler/parser/Lexer.x | 1457 | ||||
-rw-r--r-- | compiler/parser/Parser.y.pp | 1607 | ||||
-rw-r--r-- | compiler/parser/ParserCore.y | 339 | ||||
-rw-r--r-- | compiler/parser/ParserCoreUtils.hs | 72 | ||||
-rw-r--r-- | compiler/parser/RdrHsSyn.lhs | 869 | ||||
-rw-r--r-- | compiler/parser/cutils.c | 70 | ||||
-rw-r--r-- | compiler/parser/cutils.h | 16 | ||||
-rw-r--r-- | compiler/parser/hschooks.c | 55 | ||||
-rw-r--r-- | compiler/parser/hschooks.h | 9 |
11 files changed, 4965 insertions, 0 deletions
diff --git a/compiler/parser/Ctype.lhs b/compiler/parser/Ctype.lhs new file mode 100644 index 0000000000..dbe4e9f1b0 --- /dev/null +++ b/compiler/parser/Ctype.lhs @@ -0,0 +1,341 @@ +Character classification + +\begin{code} +module Ctype + ( is_ident -- Char# -> Bool + , is_symbol -- Char# -> Bool + , is_any -- Char# -> Bool + , is_space -- Char# -> Bool + , is_lower -- Char# -> Bool + , is_upper -- Char# -> Bool + , is_digit -- Char# -> Bool + , is_alphanum -- Char# -> Bool + + , is_hexdigit, is_octdigit + , hexDigit, octDecDigit + ) where + +#include "HsVersions.h" + +import DATA_INT ( Int32 ) +import DATA_BITS ( Bits((.&.)) ) +import Char ( ord, chr ) +\end{code} + +Bit masks + +\begin{code} +cIdent, cSymbol, cAny, cSpace, cLower, cUpper, cDigit :: Int +cIdent = 1 +cSymbol = 2 +cAny = 4 +cSpace = 8 +cLower = 16 +cUpper = 32 +cDigit = 64 +\end{code} + +The predicates below look costly, but aren't, GHC+GCC do a great job +at the big case below. + +\begin{code} +{-# INLINE is_ctype #-} +is_ctype :: Int -> Char -> Bool +is_ctype mask c = (fromIntegral (charType c) .&. fromIntegral mask) /= (0::Int32) + +is_ident, is_symbol, is_any, is_space, is_lower, is_upper, is_digit :: Char -> Bool +is_ident = is_ctype cIdent +is_symbol = is_ctype cSymbol +is_any = is_ctype cAny +is_space = is_ctype cSpace +is_lower = is_ctype cLower +is_upper = is_ctype cUpper +is_digit = is_ctype cDigit +is_alphanum = is_ctype (cLower+cUpper+cDigit) +\end{code} + +Utils + +\begin{code} +hexDigit :: Char -> Int +hexDigit c | is_digit c = ord c - ord '0' + | otherwise = ord (to_lower c) - ord 'a' + 10 + +octDecDigit :: Char -> Int +octDecDigit c = ord c - ord '0' + +is_hexdigit c + = is_digit c + || (c >= 'a' && c <= 'f') + || (c >= 'A' && c <= 'F') + +is_octdigit c = c >= '0' && c <= '7' + +to_lower c + | c >= 'A' && c <= 'Z' = chr (ord c - (ord 'A' - ord 'a')) + | otherwise = c +\end{code} + +We really mean .|. instead of + below, but GHC currently doesn't do +any constant folding with bitops. *sigh* + +\begin{code} +charType :: Char -> Int +charType c = case c of + '\0' -> 0 -- \000 + '\1' -> 0 -- \001 + '\2' -> 0 -- \002 + '\3' -> 0 -- \003 + '\4' -> 0 -- \004 + '\5' -> 0 -- \005 + '\6' -> 0 -- \006 + '\7' -> 0 -- \007 + '\8' -> 0 -- \010 + '\9' -> cAny + cSpace -- \t + '\10' -> cSpace -- \n (not allowed in strings, so !cAny) + '\11' -> cAny + cSpace -- \v + '\12' -> cAny + cSpace -- \f + '\13' -> cAny + cSpace -- ^M + '\14' -> 0 -- \016 + '\15' -> 0 -- \017 + '\16' -> 0 -- \020 + '\17' -> 0 -- \021 + '\18' -> 0 -- \022 + '\19' -> 0 -- \023 + '\20' -> 0 -- \024 + '\21' -> 0 -- \025 + '\22' -> 0 -- \026 + '\23' -> 0 -- \027 + '\24' -> 0 -- \030 + '\25' -> 0 -- \031 + '\26' -> 0 -- \032 + '\27' -> 0 -- \033 + '\28' -> 0 -- \034 + '\29' -> 0 -- \035 + '\30' -> 0 -- \036 + '\31' -> 0 -- \037 + '\32' -> cAny + cSpace -- + '\33' -> cAny + cSymbol -- ! + '\34' -> cAny -- " + '\35' -> cAny + cSymbol -- # + '\36' -> cAny + cSymbol -- $ + '\37' -> cAny + cSymbol -- % + '\38' -> cAny + cSymbol -- & + '\39' -> cAny + cIdent -- ' + '\40' -> cAny -- ( + '\41' -> cAny -- ) + '\42' -> cAny + cSymbol -- * + '\43' -> cAny + cSymbol -- + + '\44' -> cAny -- , + '\45' -> cAny + cSymbol -- - + '\46' -> cAny + cSymbol -- . + '\47' -> cAny + cSymbol -- / + '\48' -> cAny + cIdent + cDigit -- 0 + '\49' -> cAny + cIdent + cDigit -- 1 + '\50' -> cAny + cIdent + cDigit -- 2 + '\51' -> cAny + cIdent + cDigit -- 3 + '\52' -> cAny + cIdent + cDigit -- 4 + '\53' -> cAny + cIdent + cDigit -- 5 + '\54' -> cAny + cIdent + cDigit -- 6 + '\55' -> cAny + cIdent + cDigit -- 7 + '\56' -> cAny + cIdent + cDigit -- 8 + '\57' -> cAny + cIdent + cDigit -- 9 + '\58' -> cAny + cSymbol -- : + '\59' -> cAny -- ; + '\60' -> cAny + cSymbol -- < + '\61' -> cAny + cSymbol -- = + '\62' -> cAny + cSymbol -- > + '\63' -> cAny + cSymbol -- ? + '\64' -> cAny + cSymbol -- @ + '\65' -> cAny + cIdent + cUpper -- A + '\66' -> cAny + cIdent + cUpper -- B + '\67' -> cAny + cIdent + cUpper -- C + '\68' -> cAny + cIdent + cUpper -- D + '\69' -> cAny + cIdent + cUpper -- E + '\70' -> cAny + cIdent + cUpper -- F + '\71' -> cAny + cIdent + cUpper -- G + '\72' -> cAny + cIdent + cUpper -- H + '\73' -> cAny + cIdent + cUpper -- I + '\74' -> cAny + cIdent + cUpper -- J + '\75' -> cAny + cIdent + cUpper -- K + '\76' -> cAny + cIdent + cUpper -- L + '\77' -> cAny + cIdent + cUpper -- M + '\78' -> cAny + cIdent + cUpper -- N + '\79' -> cAny + cIdent + cUpper -- O + '\80' -> cAny + cIdent + cUpper -- P + '\81' -> cAny + cIdent + cUpper -- Q + '\82' -> cAny + cIdent + cUpper -- R + '\83' -> cAny + cIdent + cUpper -- S + '\84' -> cAny + cIdent + cUpper -- T + '\85' -> cAny + cIdent + cUpper -- U + '\86' -> cAny + cIdent + cUpper -- V + '\87' -> cAny + cIdent + cUpper -- W + '\88' -> cAny + cIdent + cUpper -- X + '\89' -> cAny + cIdent + cUpper -- Y + '\90' -> cAny + cIdent + cUpper -- Z + '\91' -> cAny -- [ + '\92' -> cAny + cSymbol -- backslash + '\93' -> cAny -- ] + '\94' -> cAny + cSymbol -- ^ + '\95' -> cAny + cIdent + cLower -- _ + '\96' -> cAny -- ` + '\97' -> cAny + cIdent + cLower -- a + '\98' -> cAny + cIdent + cLower -- b + '\99' -> cAny + cIdent + cLower -- c + '\100' -> cAny + cIdent + cLower -- d + '\101' -> cAny + cIdent + cLower -- e + '\102' -> cAny + cIdent + cLower -- f + '\103' -> cAny + cIdent + cLower -- g + '\104' -> cAny + cIdent + cLower -- h + '\105' -> cAny + cIdent + cLower -- i + '\106' -> cAny + cIdent + cLower -- j + '\107' -> cAny + cIdent + cLower -- k + '\108' -> cAny + cIdent + cLower -- l + '\109' -> cAny + cIdent + cLower -- m + '\110' -> cAny + cIdent + cLower -- n + '\111' -> cAny + cIdent + cLower -- o + '\112' -> cAny + cIdent + cLower -- p + '\113' -> cAny + cIdent + cLower -- q + '\114' -> cAny + cIdent + cLower -- r + '\115' -> cAny + cIdent + cLower -- s + '\116' -> cAny + cIdent + cLower -- t + '\117' -> cAny + cIdent + cLower -- u + '\118' -> cAny + cIdent + cLower -- v + '\119' -> cAny + cIdent + cLower -- w + '\120' -> cAny + cIdent + cLower -- x + '\121' -> cAny + cIdent + cLower -- y + '\122' -> cAny + cIdent + cLower -- z + '\123' -> cAny -- { + '\124' -> cAny + cSymbol -- | + '\125' -> cAny -- } + '\126' -> cAny + cSymbol -- ~ + '\127' -> 0 -- \177 + '\128' -> 0 -- \200 + '\129' -> 0 -- \201 + '\130' -> 0 -- \202 + '\131' -> 0 -- \203 + '\132' -> 0 -- \204 + '\133' -> 0 -- \205 + '\134' -> 0 -- \206 + '\135' -> 0 -- \207 + '\136' -> 0 -- \210 + '\137' -> 0 -- \211 + '\138' -> 0 -- \212 + '\139' -> 0 -- \213 + '\140' -> 0 -- \214 + '\141' -> 0 -- \215 + '\142' -> 0 -- \216 + '\143' -> 0 -- \217 + '\144' -> 0 -- \220 + '\145' -> 0 -- \221 + '\146' -> 0 -- \222 + '\147' -> 0 -- \223 + '\148' -> 0 -- \224 + '\149' -> 0 -- \225 + '\150' -> 0 -- \226 + '\151' -> 0 -- \227 + '\152' -> 0 -- \230 + '\153' -> 0 -- \231 + '\154' -> 0 -- \232 + '\155' -> 0 -- \233 + '\156' -> 0 -- \234 + '\157' -> 0 -- \235 + '\158' -> 0 -- \236 + '\159' -> 0 -- \237 + '\160' -> cSpace -- + '\161' -> cAny + cSymbol -- ¡ + '\162' -> cAny + cSymbol -- ¢ + '\163' -> cAny + cSymbol -- £ + '\164' -> cAny + cSymbol -- ¤ + '\165' -> cAny + cSymbol -- ¥ + '\166' -> cAny + cSymbol -- ¦ + '\167' -> cAny + cSymbol -- § + '\168' -> cAny + cSymbol -- ¨ + '\169' -> cAny + cSymbol -- © + '\170' -> cAny + cSymbol -- ª + '\171' -> cAny + cSymbol -- « + '\172' -> cAny + cSymbol -- ¬ + '\173' -> cAny + cSymbol -- + '\174' -> cAny + cSymbol -- ® + '\175' -> cAny + cSymbol -- ¯ + '\176' -> cAny + cSymbol -- ° + '\177' -> cAny + cSymbol -- ± + '\178' -> cAny + cSymbol -- ² + '\179' -> cAny + cSymbol -- ³ + '\180' -> cAny + cSymbol -- ´ + '\181' -> cAny + cSymbol -- µ + '\182' -> cAny + cSymbol -- ¶ + '\183' -> cAny + cSymbol -- · + '\184' -> cAny + cSymbol -- ¸ + '\185' -> cAny + cSymbol -- ¹ + '\186' -> cAny + cSymbol -- º + '\187' -> cAny + cSymbol -- » + '\188' -> cAny + cSymbol -- ¼ + '\189' -> cAny + cSymbol -- ½ + '\190' -> cAny + cSymbol -- ¾ + '\191' -> cAny + cSymbol -- ¿ + '\192' -> cAny + cIdent + cUpper -- À + '\193' -> cAny + cIdent + cUpper -- Á + '\194' -> cAny + cIdent + cUpper -- Â + '\195' -> cAny + cIdent + cUpper -- Ã + '\196' -> cAny + cIdent + cUpper -- Ä + '\197' -> cAny + cIdent + cUpper -- Å + '\198' -> cAny + cIdent + cUpper -- Æ + '\199' -> cAny + cIdent + cUpper -- Ç + '\200' -> cAny + cIdent + cUpper -- È + '\201' -> cAny + cIdent + cUpper -- É + '\202' -> cAny + cIdent + cUpper -- Ê + '\203' -> cAny + cIdent + cUpper -- Ë + '\204' -> cAny + cIdent + cUpper -- Ì + '\205' -> cAny + cIdent + cUpper -- Í + '\206' -> cAny + cIdent + cUpper -- Î + '\207' -> cAny + cIdent + cUpper -- Ï + '\208' -> cAny + cIdent + cUpper -- Ð + '\209' -> cAny + cIdent + cUpper -- Ñ + '\210' -> cAny + cIdent + cUpper -- Ò + '\211' -> cAny + cIdent + cUpper -- Ó + '\212' -> cAny + cIdent + cUpper -- Ô + '\213' -> cAny + cIdent + cUpper -- Õ + '\214' -> cAny + cIdent + cUpper -- Ö + '\215' -> cAny + cSymbol + cLower -- × + '\216' -> cAny + cIdent + cUpper -- Ø + '\217' -> cAny + cIdent + cUpper -- Ù + '\218' -> cAny + cIdent + cUpper -- Ú + '\219' -> cAny + cIdent + cUpper -- Û + '\220' -> cAny + cIdent + cUpper -- Ü + '\221' -> cAny + cIdent + cUpper -- Ý + '\222' -> cAny + cIdent + cUpper -- Þ + '\223' -> cAny + cIdent -- ß + '\224' -> cAny + cIdent + cLower -- à + '\225' -> cAny + cIdent + cLower -- á + '\226' -> cAny + cIdent + cLower -- â + '\227' -> cAny + cIdent + cLower -- ã + '\228' -> cAny + cIdent + cLower -- ä + '\229' -> cAny + cIdent + cLower -- å + '\230' -> cAny + cIdent + cLower -- æ + '\231' -> cAny + cIdent + cLower -- ç + '\232' -> cAny + cIdent + cLower -- è + '\233' -> cAny + cIdent + cLower -- é + '\234' -> cAny + cIdent + cLower -- ê + '\235' -> cAny + cIdent + cLower -- ë + '\236' -> cAny + cIdent + cLower -- ì + '\237' -> cAny + cIdent + cLower -- í + '\238' -> cAny + cIdent + cLower -- î + '\239' -> cAny + cIdent + cLower -- ï + '\240' -> cAny + cIdent + cLower -- ð + '\241' -> cAny + cIdent + cLower -- ñ + '\242' -> cAny + cIdent + cLower -- ò + '\243' -> cAny + cIdent + cLower -- ó + '\244' -> cAny + cIdent + cLower -- ô + '\245' -> cAny + cIdent + cLower -- õ + '\246' -> cAny + cIdent + cLower -- ö + '\247' -> cAny + cSymbol -- ÷ + '\248' -> cAny + cIdent -- ø + '\249' -> cAny + cIdent + cLower -- ù + '\250' -> cAny + cIdent + cLower -- ú + '\251' -> cAny + cIdent + cLower -- û + '\252' -> cAny + cIdent + cLower -- ü + '\253' -> cAny + cIdent + cLower -- ý + '\254' -> cAny + cIdent + cLower -- þ + '\255' -> cAny + cIdent + cLower -- ÿ +\end{code} diff --git a/compiler/parser/LexCore.hs b/compiler/parser/LexCore.hs new file mode 100644 index 0000000000..1a545a3e43 --- /dev/null +++ b/compiler/parser/LexCore.hs @@ -0,0 +1,130 @@ +module LexCore where + +import ParserCoreUtils +import Ratio +import Char +import qualified Numeric( readFloat, readDec ) + +isNameChar c = isAlpha c || isDigit c || (c == '_') || (c == '\'') +isKeywordChar c = isAlpha c || (c == '_') + +lexer :: (Token -> P a) -> P a +lexer cont [] = cont TKEOF [] +lexer cont ('\n':cs) = \line -> lexer cont cs (line+1) +lexer cont ('-':'>':cs) = cont TKrarrow cs + +lexer cont (c:cs) + | isSpace c = lexer cont cs + | isLower c || (c == '_') = lexName cont TKname (c:cs) + | isUpper c = lexName cont TKcname (c:cs) + | isDigit c || (c == '-') = lexNum cont (c:cs) + +lexer cont ('%':cs) = lexKeyword cont cs +lexer cont ('\'':cs) = lexChar cont cs +lexer cont ('\"':cs) = lexString [] cont cs +lexer cont ('#':cs) = cont TKhash cs +lexer cont ('(':cs) = cont TKoparen cs +lexer cont (')':cs) = cont TKcparen cs +lexer cont ('{':cs) = cont TKobrace cs +lexer cont ('}':cs) = cont TKcbrace cs +lexer cont ('=':cs) = cont TKeq cs +lexer cont (':':':':cs) = cont TKcoloncolon cs +lexer cont ('*':cs) = cont TKstar cs +lexer cont ('.':cs) = cont TKdot cs +lexer cont ('\\':cs) = cont TKlambda cs +lexer cont ('@':cs) = cont TKat cs +lexer cont ('?':cs) = cont TKquestion cs +lexer cont (';':cs) = cont TKsemicolon cs +lexer cont (c:cs) = failP "invalid character" [c] + + + +lexChar cont ('\\':'x':h1:h0:'\'':cs) + | isHexEscape [h1,h0] = cont (TKchar (hexToChar h1 h0)) cs +lexChar cont ('\\':cs) = failP "invalid char character" ('\\':(take 10 cs)) +lexChar cont ('\'':cs) = failP "invalid char character" ['\''] +lexChar cont ('\"':cs) = failP "invalid char character" ['\"'] +lexChar cont (c:'\'':cs) = cont (TKchar c) cs + + +lexString s cont ('\\':'x':h1:h0:cs) + | isHexEscape [h1,h0] = lexString (s++[hexToChar h1 h0]) cont cs +lexString s cont ('\\':cs) = failP "invalid string character" ['\\'] +lexString s cont ('\'':cs) = failP "invalid string character" ['\''] +lexString s cont ('\"':cs) = cont (TKstring s) cs +lexString s cont (c:cs) = lexString (s++[c]) cont cs + +isHexEscape = all (\c -> isHexDigit c && (isDigit c || isLower c)) + +hexToChar h1 h0 = chr (digitToInt h1 * 16 + digitToInt h0) + + +lexNum cont cs = + case cs of + ('-':cs) -> f (-1) cs + _ -> f 1 cs + where f sgn cs = + case span isDigit cs of + (digits,'.':c:rest) + | isDigit c -> cont (TKrational (fromInteger sgn * r)) rest' + where ((r,rest'):_) = readFloat (digits ++ ('.':c:rest)) + -- When reading a floating-point number, which is + -- a bit complicated, use the Haskell 98 library function + (digits,rest) -> cont (TKinteger (sgn * (read digits))) rest + +lexName cont cstr cs = cont (cstr name) rest + where (name,rest) = span isNameChar cs + +lexKeyword cont cs = + case span isKeywordChar cs of + ("module",rest) -> cont TKmodule rest + ("data",rest) -> cont TKdata rest + ("newtype",rest) -> cont TKnewtype rest + ("forall",rest) -> cont TKforall rest + ("rec",rest) -> cont TKrec rest + ("let",rest) -> cont TKlet rest + ("in",rest) -> cont TKin rest + ("case",rest) -> cont TKcase rest + ("of",rest) -> cont TKof rest + ("coerce",rest) -> cont TKcoerce rest + ("note",rest) -> cont TKnote rest + ("external",rest) -> cont TKexternal rest + ("_",rest) -> cont TKwild rest + _ -> failP "invalid keyword" ('%':cs) + + +#if __GLASGOW_HASKELL__ >= 504 +-- The readFloat in the Numeric library will do the job + +readFloat :: (RealFrac a) => ReadS a +readFloat = Numeric.readFloat + +#else +-- Haskell 98's Numeric.readFloat used to have a bogusly restricted signature +-- so it was incapable of reading a rational. +-- So for GHCs that have that old bogus library, here is the code, written out longhand. + +readFloat r = [(fromRational ((n%1)*10^^(k-d)),t) | (n,d,s) <- readFix r, + (k,t) <- readExp s] ++ + [ (0/0, t) | ("NaN",t) <- lex r] ++ + [ (1/0, t) | ("Infinity",t) <- lex r] + where + readFix r = [(read (ds++ds'), length ds', t) + | (ds,d) <- lexDigits r, + (ds',t) <- lexFrac d ] + + lexFrac ('.':ds) = lexDigits ds + lexFrac s = [("",s)] + + readExp (e:s) | e `elem` "eE" = readExp' s + readExp s = [(0,s)] + + readExp' ('-':s) = [(-k,t) | (k,t) <- Numeric.readDec s] + readExp' ('+':s) = Numeric.readDec s + readExp' s = Numeric.readDec s + +lexDigits :: ReadS String +lexDigits s = case span isDigit s of + (cs,s') | not (null cs) -> [(cs,s')] + otherwise -> [] +#endif diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x new file mode 100644 index 0000000000..4c1b48efc0 --- /dev/null +++ b/compiler/parser/Lexer.x @@ -0,0 +1,1457 @@ +----------------------------------------------------------------------------- +-- (c) The University of Glasgow, 2006 +-- +-- GHC's lexer. +-- +-- This is a combination of an Alex-generated lexer from a regex +-- definition, with some hand-coded bits. +-- +-- Completely accurate information about token-spans within the source +-- file is maintained. Every token has a start and end SrcLoc attached to it. +-- +----------------------------------------------------------------------------- + +-- ToDo / known bugs: +-- - Unicode +-- - parsing integers is a bit slow +-- - readRational is a bit slow +-- +-- Known bugs, that were also in the previous version: +-- - M... should be 3 tokens, not 1. +-- - pragma-end should be only valid in a pragma + +{ +module Lexer ( + Token(..), lexer, pragState, mkPState, PState(..), + P(..), ParseResult(..), getSrcLoc, + failLocMsgP, failSpanMsgP, srcParseFail, + popContext, pushCurrentContext, setLastToken, setSrcLoc, + getLexState, popLexState, pushLexState, + extension, bangPatEnabled + ) where + +#include "HsVersions.h" + +import ErrUtils ( Message ) +import Outputable +import StringBuffer +import FastString +import FastTypes +import SrcLoc +import UniqFM +import DynFlags +import Ctype +import Util ( maybePrefixMatch, readRational ) + +import DATA_BITS +import Data.Char ( chr ) +import Ratio +--import TRACE + +#if __GLASGOW_HASKELL__ >= 605 +import Data.Char ( GeneralCategory(..), generalCategory, isPrint, isUpper ) +#else +import Compat.Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper ) +#endif +} + +$unispace = \x05 +$whitechar = [\ \t\n\r\f\v\xa0 $unispace] +$white_no_nl = $whitechar # \n + +$ascdigit = 0-9 +$unidigit = \x03 +$decdigit = $ascdigit -- for now, should really be $digit (ToDo) +$digit = [$ascdigit $unidigit] + +$special = [\(\)\,\;\[\]\`\{\}] +$ascsymbol = [\!\#\$\%\&\*\+\.\/\<\=\>\?\@\\\^\|\-\~] +$unisymbol = \x04 +$symbol = [$ascsymbol $unisymbol] # [$special \_\:\"\'] + +$unilarge = \x01 +$asclarge = [A-Z \xc0-\xd6 \xd8-\xde] +$large = [$asclarge $unilarge] + +$unismall = \x02 +$ascsmall = [a-z \xdf-\xf6 \xf8-\xff] +$small = [$ascsmall $unismall \_] + +$unigraphic = \x06 +$graphic = [$small $large $symbol $digit $special $unigraphic \:\"\'] + +$octit = 0-7 +$hexit = [$decdigit A-F a-f] +$symchar = [$symbol \:] +$nl = [\n\r] +$idchar = [$small $large $digit \'] + +@varid = $small $idchar* +@conid = $large $idchar* + +@varsym = $symbol $symchar* +@consym = \: $symchar* + +@decimal = $decdigit+ +@octal = $octit+ +@hexadecimal = $hexit+ +@exponent = [eE] [\-\+]? @decimal + +-- we support the hierarchical module name extension: +@qual = (@conid \.)+ + +@floating_point = @decimal \. @decimal @exponent? | @decimal @exponent + +haskell :- + +-- everywhere: skip whitespace and comments +$white_no_nl+ ; + +-- Everywhere: deal with nested comments. We explicitly rule out +-- pragmas, "{-#", so that we don't accidentally treat them as comments. +-- (this can happen even though pragmas will normally take precedence due to +-- longest-match, because pragmas aren't valid in every state, but comments +-- are). +"{-" / { notFollowedBy '#' } { nested_comment } + +-- Single-line comments are a bit tricky. Haskell 98 says that two or +-- more dashes followed by a symbol should be parsed as a varsym, so we +-- have to exclude those. +-- The regex says: "munch all the characters after the dashes, as long as +-- the first one is not a symbol". +"--"\-* [^$symbol :] .* ; +"--"\-* / { atEOL } ; + +-- 'bol' state: beginning of a line. Slurp up all the whitespace (including +-- blank lines) until we find a non-whitespace character, then do layout +-- processing. +-- +-- One slight wibble here: what if the line begins with {-#? In +-- theory, we have to lex the pragma to see if it's one we recognise, +-- and if it is, then we backtrack and do_bol, otherwise we treat it +-- as a nested comment. We don't bother with this: if the line begins +-- with {-#, then we'll assume it's a pragma we know about and go for do_bol. +<bol> { + \n ; + ^\# (line)? { begin line_prag1 } + ^\# pragma .* \n ; -- GCC 3.3 CPP generated, apparently + ^\# \! .* \n ; -- #!, for scripts + () { do_bol } +} + +-- after a layout keyword (let, where, do, of), we begin a new layout +-- context if the curly brace is missing. +-- Careful! This stuff is quite delicate. +<layout, layout_do> { + \{ / { notFollowedBy '-' } { pop_and open_brace } + -- we might encounter {-# here, but {- has been handled already + \n ; + ^\# (line)? { begin line_prag1 } +} + +-- do is treated in a subtly different way, see new_layout_context +<layout> () { new_layout_context True } +<layout_do> () { new_layout_context False } + +-- after a new layout context which was found to be to the left of the +-- previous context, we have generated a '{' token, and we now need to +-- generate a matching '}' token. +<layout_left> () { do_layout_left } + +<0,option_prags,glaexts> \n { begin bol } + +"{-#" $whitechar* (line|LINE) { begin line_prag2 } + +-- single-line line pragmas, of the form +-- # <line> "<file>" <extra-stuff> \n +<line_prag1> $decdigit+ { setLine line_prag1a } +<line_prag1a> \" [$graphic \ ]* \" { setFile line_prag1b } +<line_prag1b> .* { pop } + +-- Haskell-style line pragmas, of the form +-- {-# LINE <line> "<file>" #-} +<line_prag2> $decdigit+ { setLine line_prag2a } +<line_prag2a> \" [$graphic \ ]* \" { setFile line_prag2b } +<line_prag2b> "#-}"|"-}" { pop } + -- NOTE: accept -} at the end of a LINE pragma, for compatibility + -- with older versions of GHC which generated these. + +-- We only want RULES pragmas to be picked up when -fglasgow-exts +-- is on, because the contents of the pragma is always written using +-- glasgow-exts syntax (using forall etc.), so if glasgow exts are not +-- enabled, we're sure to get a parse error. +-- (ToDo: we should really emit a warning when ignoring pragmas) +<glaexts> + "{-#" $whitechar* (RULES|rules) { token ITrules_prag } + +<0,option_prags,glaexts> { + "{-#" $whitechar* (INLINE|inline) { token (ITinline_prag True) } + "{-#" $whitechar* (NO(T?)INLINE|no(t?)inline) + { token (ITinline_prag False) } + "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) + { token ITspec_prag } + "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) + $whitechar* (INLINE|inline) { token (ITspec_inline_prag True) } + "{-#" $whitechar* (SPECIALI[SZ]E|speciali[sz]e) + $whitechar* (NO(T?)INLINE|no(t?)inline) + { token (ITspec_inline_prag False) } + "{-#" $whitechar* (SOURCE|source) { token ITsource_prag } + "{-#" $whitechar* (DEPRECATED|deprecated) + { token ITdeprecated_prag } + "{-#" $whitechar* (SCC|scc) { token ITscc_prag } + "{-#" $whitechar* (CORE|core) { token ITcore_prag } + "{-#" $whitechar* (UNPACK|unpack) { token ITunpack_prag } + + "{-#" { nested_comment } + + -- ToDo: should only be valid inside a pragma: + "#-}" { token ITclose_prag} +} + +<option_prags> { + "{-#" $whitechar* (OPTIONS|options) { lex_string_prag IToptions_prag } + "{-#" $whitechar* (OPTIONS_GHC|options_ghc) + { lex_string_prag IToptions_prag } + "{-#" $whitechar* (LANGUAGE|language) { token ITlanguage_prag } + "{-#" $whitechar* (INCLUDE|include) { lex_string_prag ITinclude_prag } +} + +-- '0' state: ordinary lexemes +-- 'glaexts' state: glasgow extensions (postfix '#', etc.) + +-- "special" symbols + +<0,glaexts> { + "[:" / { ifExtension parrEnabled } { token ITopabrack } + ":]" / { ifExtension parrEnabled } { token ITcpabrack } +} + +<0,glaexts> { + "[|" / { ifExtension thEnabled } { token ITopenExpQuote } + "[e|" / { ifExtension thEnabled } { token ITopenExpQuote } + "[p|" / { ifExtension thEnabled } { token ITopenPatQuote } + "[d|" / { ifExtension thEnabled } { layout_token ITopenDecQuote } + "[t|" / { ifExtension thEnabled } { token ITopenTypQuote } + "|]" / { ifExtension thEnabled } { token ITcloseQuote } + \$ @varid / { ifExtension thEnabled } { skip_one_varid ITidEscape } + "$(" / { ifExtension thEnabled } { token ITparenEscape } +} + +<0,glaexts> { + "(|" / { ifExtension arrowsEnabled `alexAndPred` notFollowedBySymbol } + { special IToparenbar } + "|)" / { ifExtension arrowsEnabled } { special ITcparenbar } +} + +<0,glaexts> { + \? @varid / { ifExtension ipEnabled } { skip_one_varid ITdupipvarid } + \% @varid / { ifExtension ipEnabled } { skip_one_varid ITsplitipvarid } +} + +<glaexts> { + "(#" / { notFollowedBySymbol } { token IToubxparen } + "#)" { token ITcubxparen } + "{|" { token ITocurlybar } + "|}" { token ITccurlybar } +} + +<0,option_prags,glaexts> { + \( { special IToparen } + \) { special ITcparen } + \[ { special ITobrack } + \] { special ITcbrack } + \, { special ITcomma } + \; { special ITsemi } + \` { special ITbackquote } + + \{ { open_brace } + \} { close_brace } +} + +<0,option_prags,glaexts> { + @qual @varid { check_qvarid } + @qual @conid { idtoken qconid } + @varid { varid } + @conid { idtoken conid } +} + +-- after an illegal qvarid, such as 'M.let', +-- we back up and try again in the bad_qvarid state: +<bad_qvarid> { + @conid { pop_and (idtoken conid) } + @qual @conid { pop_and (idtoken qconid) } +} + +<glaexts> { + @qual @varid "#"+ { idtoken qvarid } + @qual @conid "#"+ { idtoken qconid } + @varid "#"+ { varid } + @conid "#"+ { idtoken conid } +} + +-- ToDo: M.(,,,) + +<0,glaexts> { + @qual @varsym { idtoken qvarsym } + @qual @consym { idtoken qconsym } + @varsym { varsym } + @consym { consym } +} + +<0,glaexts> { + @decimal { tok_decimal } + 0[oO] @octal { tok_octal } + 0[xX] @hexadecimal { tok_hexadecimal } +} + +<glaexts> { + @decimal \# { prim_decimal } + 0[oO] @octal \# { prim_octal } + 0[xX] @hexadecimal \# { prim_hexadecimal } +} + +<0,glaexts> @floating_point { strtoken tok_float } +<glaexts> @floating_point \# { init_strtoken 1 prim_float } +<glaexts> @floating_point \# \# { init_strtoken 2 prim_double } + +-- Strings and chars are lexed by hand-written code. The reason is +-- that even if we recognise the string or char here in the regex +-- lexer, we would still have to parse the string afterward in order +-- to convert it to a String. +<0,glaexts> { + \' { lex_char_tok } + \" { lex_string_tok } +} + +{ +-- work around bug in Alex 2.0 +#if __GLASGOW_HASKELL__ < 503 +unsafeAt arr i = arr ! i +#endif + +-- ----------------------------------------------------------------------------- +-- The token type + +data Token + = ITas -- Haskell keywords + | ITcase + | ITclass + | ITdata + | ITdefault + | ITderiving + | ITdo + | ITelse + | IThiding + | ITif + | ITimport + | ITin + | ITinfix + | ITinfixl + | ITinfixr + | ITinstance + | ITlet + | ITmodule + | ITnewtype + | ITof + | ITqualified + | ITthen + | ITtype + | ITwhere + | ITscc -- ToDo: remove (we use {-# SCC "..." #-} now) + + | ITforall -- GHC extension keywords + | ITforeign + | ITexport + | ITlabel + | ITdynamic + | ITsafe + | ITthreadsafe + | ITunsafe + | ITstdcallconv + | ITccallconv + | ITdotnet + | ITmdo + + -- Pragmas + | ITinline_prag Bool -- True <=> INLINE, False <=> NOINLINE + | ITspec_prag -- SPECIALISE + | ITspec_inline_prag Bool -- SPECIALISE INLINE (or NOINLINE) + | ITsource_prag + | ITrules_prag + | ITdeprecated_prag + | ITline_prag + | ITscc_prag + | ITcore_prag -- hdaume: core annotations + | ITunpack_prag + | ITclose_prag + | IToptions_prag String + | ITinclude_prag String + | ITlanguage_prag + + | ITdotdot -- reserved symbols + | ITcolon + | ITdcolon + | ITequal + | ITlam + | ITvbar + | ITlarrow + | ITrarrow + | ITat + | ITtilde + | ITdarrow + | ITminus + | ITbang + | ITstar + | ITdot + + | ITbiglam -- GHC-extension symbols + + | ITocurly -- special symbols + | ITccurly + | ITocurlybar -- {|, for type applications + | ITccurlybar -- |}, for type applications + | ITvocurly + | ITvccurly + | ITobrack + | ITopabrack -- [:, for parallel arrays with -fparr + | ITcpabrack -- :], for parallel arrays with -fparr + | ITcbrack + | IToparen + | ITcparen + | IToubxparen + | ITcubxparen + | ITsemi + | ITcomma + | ITunderscore + | ITbackquote + + | ITvarid FastString -- identifiers + | ITconid FastString + | ITvarsym FastString + | ITconsym FastString + | ITqvarid (FastString,FastString) + | ITqconid (FastString,FastString) + | ITqvarsym (FastString,FastString) + | ITqconsym (FastString,FastString) + + | ITdupipvarid FastString -- GHC extension: implicit param: ?x + | ITsplitipvarid FastString -- GHC extension: implicit param: %x + + | ITpragma StringBuffer + + | ITchar Char + | ITstring FastString + | ITinteger Integer + | ITrational Rational + + | ITprimchar Char + | ITprimstring FastString + | ITprimint Integer + | ITprimfloat Rational + | ITprimdouble Rational + + -- MetaHaskell extension tokens + | ITopenExpQuote -- [| or [e| + | ITopenPatQuote -- [p| + | ITopenDecQuote -- [d| + | ITopenTypQuote -- [t| + | ITcloseQuote -- |] + | ITidEscape FastString -- $x + | ITparenEscape -- $( + | ITvarQuote -- ' + | ITtyQuote -- '' + + -- Arrow notation extension + | ITproc + | ITrec + | IToparenbar -- (| + | ITcparenbar -- |) + | ITlarrowtail -- -< + | ITrarrowtail -- >- + | ITLarrowtail -- -<< + | ITRarrowtail -- >>- + + | ITunknown String -- Used when the lexer can't make sense of it + | ITeof -- end of file token +#ifdef DEBUG + deriving Show -- debugging +#endif + +isSpecial :: Token -> Bool +-- If we see M.x, where x is a keyword, but +-- is special, we treat is as just plain M.x, +-- not as a keyword. +isSpecial ITas = True +isSpecial IThiding = True +isSpecial ITqualified = True +isSpecial ITforall = True +isSpecial ITexport = True +isSpecial ITlabel = True +isSpecial ITdynamic = True +isSpecial ITsafe = True +isSpecial ITthreadsafe = True +isSpecial ITunsafe = True +isSpecial ITccallconv = True +isSpecial ITstdcallconv = True +isSpecial ITmdo = True +isSpecial _ = False + +-- the bitmap provided as the third component indicates whether the +-- corresponding extension keyword is valid under the extension options +-- provided to the compiler; if the extension corresponding to *any* of the +-- bits set in the bitmap is enabled, the keyword is valid (this setup +-- facilitates using a keyword in two different extensions that can be +-- activated independently) +-- +reservedWordsFM = listToUFM $ + map (\(x, y, z) -> (mkFastString x, (y, z))) + [( "_", ITunderscore, 0 ), + ( "as", ITas, 0 ), + ( "case", ITcase, 0 ), + ( "class", ITclass, 0 ), + ( "data", ITdata, 0 ), + ( "default", ITdefault, 0 ), + ( "deriving", ITderiving, 0 ), + ( "do", ITdo, 0 ), + ( "else", ITelse, 0 ), + ( "hiding", IThiding, 0 ), + ( "if", ITif, 0 ), + ( "import", ITimport, 0 ), + ( "in", ITin, 0 ), + ( "infix", ITinfix, 0 ), + ( "infixl", ITinfixl, 0 ), + ( "infixr", ITinfixr, 0 ), + ( "instance", ITinstance, 0 ), + ( "let", ITlet, 0 ), + ( "module", ITmodule, 0 ), + ( "newtype", ITnewtype, 0 ), + ( "of", ITof, 0 ), + ( "qualified", ITqualified, 0 ), + ( "then", ITthen, 0 ), + ( "type", ITtype, 0 ), + ( "where", ITwhere, 0 ), + ( "_scc_", ITscc, 0 ), -- ToDo: remove + + ( "forall", ITforall, bit tvBit), + ( "mdo", ITmdo, bit glaExtsBit), + + ( "foreign", ITforeign, bit ffiBit), + ( "export", ITexport, bit ffiBit), + ( "label", ITlabel, bit ffiBit), + ( "dynamic", ITdynamic, bit ffiBit), + ( "safe", ITsafe, bit ffiBit), + ( "threadsafe", ITthreadsafe, bit ffiBit), + ( "unsafe", ITunsafe, bit ffiBit), + ( "stdcall", ITstdcallconv, bit ffiBit), + ( "ccall", ITccallconv, bit ffiBit), + ( "dotnet", ITdotnet, bit ffiBit), + + ( "rec", ITrec, bit arrowsBit), + ( "proc", ITproc, bit arrowsBit) + ] + +reservedSymsFM = listToUFM $ + map (\ (x,y,z) -> (mkFastString x,(y,z))) + [ ("..", ITdotdot, 0) + ,(":", ITcolon, 0) -- (:) is a reserved op, + -- meaning only list cons + ,("::", ITdcolon, 0) + ,("=", ITequal, 0) + ,("\\", ITlam, 0) + ,("|", ITvbar, 0) + ,("<-", ITlarrow, 0) + ,("->", ITrarrow, 0) + ,("@", ITat, 0) + ,("~", ITtilde, 0) + ,("=>", ITdarrow, 0) + ,("-", ITminus, 0) + ,("!", ITbang, 0) + + ,("*", ITstar, bit glaExtsBit) -- For data T (a::*) = MkT + ,(".", ITdot, bit tvBit) -- For 'forall a . t' + + ,("-<", ITlarrowtail, bit arrowsBit) + ,(">-", ITrarrowtail, bit arrowsBit) + ,("-<<", ITLarrowtail, bit arrowsBit) + ,(">>-", ITRarrowtail, bit arrowsBit) + +#if __GLASGOW_HASKELL__ >= 605 + ,("λ", ITlam, bit glaExtsBit) + ,("∷", ITdcolon, bit glaExtsBit) + ,("⇒", ITdarrow, bit glaExtsBit) + ,("∀", ITforall, bit glaExtsBit) + ,("→", ITrarrow, bit glaExtsBit) + ,("←", ITlarrow, bit glaExtsBit) + ,("⋯", ITdotdot, bit glaExtsBit) +#endif + ] + +-- ----------------------------------------------------------------------------- +-- Lexer actions + +type Action = SrcSpan -> StringBuffer -> Int -> P (Located Token) + +special :: Token -> Action +special tok span _buf len = return (L span tok) + +token, layout_token :: Token -> Action +token t span buf len = return (L span t) +layout_token t span buf len = pushLexState layout >> return (L span t) + +idtoken :: (StringBuffer -> Int -> Token) -> Action +idtoken f span buf len = return (L span $! (f buf len)) + +skip_one_varid :: (FastString -> Token) -> Action +skip_one_varid f span buf len + = return (L span $! f (lexemeToFastString (stepOn buf) (len-1))) + +strtoken :: (String -> Token) -> Action +strtoken f span buf len = + return (L span $! (f $! lexemeToString buf len)) + +init_strtoken :: Int -> (String -> Token) -> Action +-- like strtoken, but drops the last N character(s) +init_strtoken drop f span buf len = + return (L span $! (f $! lexemeToString buf (len-drop))) + +begin :: Int -> Action +begin code _span _str _len = do pushLexState code; lexToken + +pop :: Action +pop _span _buf _len = do popLexState; lexToken + +pop_and :: Action -> Action +pop_and act span buf len = do popLexState; act span buf len + +notFollowedBy char _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf /= char + +notFollowedBySymbol _ _ _ (AI _ _ buf) + = atEnd buf || currentChar buf `notElem` "!#$%&*+./<=>?@\\^|-~" + +atEOL _ _ _ (AI _ _ buf) = atEnd buf || currentChar buf == '\n' + +ifExtension pred bits _ _ _ = pred bits + +{- + nested comments require traversing by hand, they can't be parsed + using regular expressions. +-} +nested_comment :: Action +nested_comment span _str _len = do + input <- getInput + go 1 input + where go 0 input = do setInput input; lexToken + go n input = do + case alexGetChar input of + Nothing -> err input + Just (c,input) -> do + case c of + '-' -> do + case alexGetChar input of + Nothing -> err input + Just ('\125',input) -> go (n-1) input + Just (c,_) -> go n input + '\123' -> do + case alexGetChar input of + Nothing -> err input + Just ('-',input') -> go (n+1) input' + Just (c,input) -> go n input + c -> go n input + + err (AI end _ _) = failLocMsgP (srcSpanStart span) end "unterminated `{-'" + +open_brace, close_brace :: Action +open_brace span _str _len = do + ctx <- getContext + setContext (NoLayout:ctx) + return (L span ITocurly) +close_brace span _str _len = do + popContext + return (L span ITccurly) + +-- We have to be careful not to count M.<varid> as a qualified name +-- when <varid> is a keyword. We hack around this by catching +-- the offending tokens afterward, and re-lexing in a different state. +check_qvarid span buf len = do + case lookupUFM reservedWordsFM var of + Just (keyword,exts) + | not (isSpecial keyword) -> + if exts == 0 + then try_again + else do + b <- extension (\i -> exts .&. i /= 0) + if b then try_again + else return token + _other -> return token + where + (mod,var) = splitQualName buf len + token = L span (ITqvarid (mod,var)) + + try_again = do + (AI _ offs _) <- getInput + setInput (AI (srcSpanStart span) (offs-len) buf) + pushLexState bad_qvarid + lexToken + +qvarid buf len = ITqvarid $! splitQualName buf len +qconid buf len = ITqconid $! splitQualName buf len + +splitQualName :: StringBuffer -> Int -> (FastString,FastString) +-- takes a StringBuffer and a length, and returns the module name +-- and identifier parts of a qualified name. Splits at the *last* dot, +-- because of hierarchical module names. +splitQualName orig_buf len = split orig_buf orig_buf + where + split buf dot_buf + | orig_buf `byteDiff` buf >= len = done dot_buf + | c == '.' = found_dot buf' + | otherwise = split buf' dot_buf + where + (c,buf') = nextChar buf + + -- careful, we might get names like M.... + -- so, if the character after the dot is not upper-case, this is + -- the end of the qualifier part. + found_dot buf -- buf points after the '.' + | isUpper c = split buf' buf + | otherwise = done buf + where + (c,buf') = nextChar buf + + done dot_buf = + (lexemeToFastString orig_buf (qual_size - 1), + lexemeToFastString dot_buf (len - qual_size)) + where + qual_size = orig_buf `byteDiff` dot_buf + +varid span buf len = + case lookupUFM reservedWordsFM fs of + Just (keyword,0) -> do + maybe_layout keyword + return (L span keyword) + Just (keyword,exts) -> do + b <- extension (\i -> exts .&. i /= 0) + if b then do maybe_layout keyword + return (L span keyword) + else return (L span (ITvarid fs)) + _other -> return (L span (ITvarid fs)) + where + fs = lexemeToFastString buf len + +conid buf len = ITconid fs + where fs = lexemeToFastString buf len + +qvarsym buf len = ITqvarsym $! splitQualName buf len +qconsym buf len = ITqconsym $! splitQualName buf len + +varsym = sym ITvarsym +consym = sym ITconsym + +sym con span buf len = + case lookupUFM reservedSymsFM fs of + Just (keyword,0) -> return (L span keyword) + Just (keyword,exts) -> do + b <- extension (\i -> exts .&. i /= 0) + if b then return (L span keyword) + else return (L span $! con fs) + _other -> return (L span $! con fs) + where + fs = lexemeToFastString buf len + +tok_decimal span buf len + = return (L span (ITinteger $! parseInteger buf len 10 octDecDigit)) + +tok_octal span buf len + = return (L span (ITinteger $! parseInteger (offsetBytes 2 buf) (len-2) 8 octDecDigit)) + +tok_hexadecimal span buf len + = return (L span (ITinteger $! parseInteger (offsetBytes 2 buf) (len-2) 16 hexDigit)) + +prim_decimal span buf len + = return (L span (ITprimint $! parseInteger buf (len-1) 10 octDecDigit)) + +prim_octal span buf len + = return (L span (ITprimint $! parseInteger (offsetBytes 2 buf) (len-3) 8 octDecDigit)) + +prim_hexadecimal span buf len + = return (L span (ITprimint $! parseInteger (offsetBytes 2 buf) (len-3) 16 hexDigit)) + +tok_float str = ITrational $! readRational str +prim_float str = ITprimfloat $! readRational str +prim_double str = ITprimdouble $! readRational str + +-- ----------------------------------------------------------------------------- +-- Layout processing + +-- we're at the first token on a line, insert layout tokens if necessary +do_bol :: Action +do_bol span _str _len = do + pos <- getOffside + case pos of + LT -> do + --trace "layout: inserting '}'" $ do + popContext + -- do NOT pop the lex state, we might have a ';' to insert + return (L span ITvccurly) + EQ -> do + --trace "layout: inserting ';'" $ do + popLexState + return (L span ITsemi) + GT -> do + popLexState + lexToken + +-- certain keywords put us in the "layout" state, where we might +-- add an opening curly brace. +maybe_layout ITdo = pushLexState layout_do +maybe_layout ITmdo = pushLexState layout_do +maybe_layout ITof = pushLexState layout +maybe_layout ITlet = pushLexState layout +maybe_layout ITwhere = pushLexState layout +maybe_layout ITrec = pushLexState layout +maybe_layout _ = return () + +-- Pushing a new implicit layout context. If the indentation of the +-- next token is not greater than the previous layout context, then +-- Haskell 98 says that the new layout context should be empty; that is +-- the lexer must generate {}. +-- +-- We are slightly more lenient than this: when the new context is started +-- by a 'do', then we allow the new context to be at the same indentation as +-- the previous context. This is what the 'strict' argument is for. +-- +new_layout_context strict span _buf _len = do + popLexState + (AI _ offset _) <- getInput + ctx <- getContext + case ctx of + Layout prev_off : _ | + (strict && prev_off >= offset || + not strict && prev_off > offset) -> do + -- token is indented to the left of the previous context. + -- we must generate a {} sequence now. + pushLexState layout_left + return (L span ITvocurly) + other -> do + setContext (Layout offset : ctx) + return (L span ITvocurly) + +do_layout_left span _buf _len = do + popLexState + pushLexState bol -- we must be at the start of a line + return (L span ITvccurly) + +-- ----------------------------------------------------------------------------- +-- LINE pragmas + +setLine :: Int -> Action +setLine code span buf len = do + let line = parseInteger buf len 10 octDecDigit + setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 0) + -- subtract one: the line number refers to the *following* line + popLexState + pushLexState code + lexToken + +setFile :: Int -> Action +setFile code span buf len = do + let file = lexemeToFastString (stepOn buf) (len-2) + setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span)) + popLexState + pushLexState code + lexToken + + +-- ----------------------------------------------------------------------------- +-- Options, includes and language pragmas. + +lex_string_prag :: (String -> Token) -> Action +lex_string_prag mkTok span buf len + = do input <- getInput + start <- getSrcLoc + tok <- go [] input + end <- getSrcLoc + return (L (mkSrcSpan start end) tok) + where go acc input + = if isString input "#-}" + then do setInput input + return (mkTok (reverse acc)) + else case alexGetChar input of + Just (c,i) -> go (c:acc) i + Nothing -> err input + isString i [] = True + isString i (x:xs) + = case alexGetChar i of + Just (c,i') | c == x -> isString i' xs + _other -> False + err (AI end _ _) = failLocMsgP (srcSpanStart span) end "unterminated options pragma" + + +-- ----------------------------------------------------------------------------- +-- Strings & Chars + +-- This stuff is horrible. I hates it. + +lex_string_tok :: Action +lex_string_tok span buf len = do + tok <- lex_string "" + end <- getSrcLoc + return (L (mkSrcSpan (srcSpanStart span) end) tok) + +lex_string :: String -> P Token +lex_string s = do + i <- getInput + case alexGetChar' i of + Nothing -> lit_error + + Just ('"',i) -> do + setInput i + glaexts <- extension glaExtsEnabled + if glaexts + then do + i <- getInput + case alexGetChar' i of + Just ('#',i) -> do + setInput i + if any (> '\xFF') s + then failMsgP "primitive string literal must contain only characters <= \'\\xFF\'" + else let s' = mkZFastString (reverse s) in + return (ITprimstring s') + -- mkZFastString is a hack to avoid encoding the + -- string in UTF-8. We just want the exact bytes. + _other -> + return (ITstring (mkFastString (reverse s))) + else + return (ITstring (mkFastString (reverse s))) + + Just ('\\',i) + | Just ('&',i) <- next -> do + setInput i; lex_string s + | Just (c,i) <- next, is_space c -> do + setInput i; lex_stringgap s + where next = alexGetChar' i + + Just (c, i) -> do + c' <- lex_char c i + lex_string (c':s) + +lex_stringgap s = do + c <- getCharOrFail + case c of + '\\' -> lex_string s + c | is_space c -> lex_stringgap s + _other -> lit_error + + +lex_char_tok :: Action +-- Here we are basically parsing character literals, such as 'x' or '\n' +-- but, when Template Haskell is on, we additionally spot +-- 'x and ''T, returning ITvarQuote and ITtyQuote respectively, +-- but WIHTOUT CONSUMING the x or T part (the parser does that). +-- So we have to do two characters of lookahead: when we see 'x we need to +-- see if there's a trailing quote +lex_char_tok span buf len = do -- We've seen ' + i1 <- getInput -- Look ahead to first character + let loc = srcSpanStart span + case alexGetChar' i1 of + Nothing -> lit_error + + Just ('\'', i2@(AI end2 _ _)) -> do -- We've seen '' + th_exts <- extension thEnabled + if th_exts then do + setInput i2 + return (L (mkSrcSpan loc end2) ITtyQuote) + else lit_error + + Just ('\\', i2@(AI end2 _ _)) -> do -- We've seen 'backslash + setInput i2 + lit_ch <- lex_escape + mc <- getCharOrFail -- Trailing quote + if mc == '\'' then finish_char_tok loc lit_ch + else do setInput i2; lit_error + + Just (c, i2@(AI end2 _ _)) + | not (isAny c) -> lit_error + | otherwise -> + + -- We've seen 'x, where x is a valid character + -- (i.e. not newline etc) but not a quote or backslash + case alexGetChar' i2 of -- Look ahead one more character + Nothing -> lit_error + Just ('\'', i3) -> do -- We've seen 'x' + setInput i3 + finish_char_tok loc c + _other -> do -- We've seen 'x not followed by quote + -- If TH is on, just parse the quote only + th_exts <- extension thEnabled + let (AI end _ _) = i1 + if th_exts then return (L (mkSrcSpan loc end) ITvarQuote) + else do setInput i2; lit_error + +finish_char_tok :: SrcLoc -> Char -> P (Located Token) +finish_char_tok loc ch -- We've already seen the closing quote + -- Just need to check for trailing # + = do glaexts <- extension glaExtsEnabled + i@(AI end _ _) <- getInput + if glaexts then do + case alexGetChar' i of + Just ('#',i@(AI end _ _)) -> do + setInput i + return (L (mkSrcSpan loc end) (ITprimchar ch)) + _other -> + return (L (mkSrcSpan loc end) (ITchar ch)) + else do + return (L (mkSrcSpan loc end) (ITchar ch)) + +lex_char :: Char -> AlexInput -> P Char +lex_char c inp = do + case c of + '\\' -> do setInput inp; lex_escape + c | isAny c -> do setInput inp; return c + _other -> lit_error + +isAny c | c > '\xff' = isPrint c + | otherwise = is_any c + +lex_escape :: P Char +lex_escape = do + c <- getCharOrFail + case c of + 'a' -> return '\a' + 'b' -> return '\b' + 'f' -> return '\f' + 'n' -> return '\n' + 'r' -> return '\r' + 't' -> return '\t' + 'v' -> return '\v' + '\\' -> return '\\' + '"' -> return '\"' + '\'' -> return '\'' + '^' -> do c <- getCharOrFail + if c >= '@' && c <= '_' + then return (chr (ord c - ord '@')) + else lit_error + + 'x' -> readNum is_hexdigit 16 hexDigit + 'o' -> readNum is_octdigit 8 octDecDigit + x | is_digit x -> readNum2 is_digit 10 octDecDigit (octDecDigit x) + + c1 -> do + i <- getInput + case alexGetChar' i of + Nothing -> lit_error + Just (c2,i2) -> + case alexGetChar' i2 of + Nothing -> do setInput i2; lit_error + Just (c3,i3) -> + let str = [c1,c2,c3] in + case [ (c,rest) | (p,c) <- silly_escape_chars, + Just rest <- [maybePrefixMatch p str] ] of + (escape_char,[]):_ -> do + setInput i3 + return escape_char + (escape_char,_:_):_ -> do + setInput i2 + return escape_char + [] -> lit_error + +readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> P Char +readNum is_digit base conv = do + i <- getInput + c <- getCharOrFail + if is_digit c + then readNum2 is_digit base conv (conv c) + else do setInput i; lit_error + +readNum2 is_digit base conv i = do + input <- getInput + read i input + where read i input = do + case alexGetChar' input of + Just (c,input') | is_digit c -> do + read (i*base + conv c) input' + _other -> do + if i >= 0 && i <= 0x10FFFF + then do setInput input; return (chr i) + else lit_error + +silly_escape_chars = [ + ("NUL", '\NUL'), + ("SOH", '\SOH'), + ("STX", '\STX'), + ("ETX", '\ETX'), + ("EOT", '\EOT'), + ("ENQ", '\ENQ'), + ("ACK", '\ACK'), + ("BEL", '\BEL'), + ("BS", '\BS'), + ("HT", '\HT'), + ("LF", '\LF'), + ("VT", '\VT'), + ("FF", '\FF'), + ("CR", '\CR'), + ("SO", '\SO'), + ("SI", '\SI'), + ("DLE", '\DLE'), + ("DC1", '\DC1'), + ("DC2", '\DC2'), + ("DC3", '\DC3'), + ("DC4", '\DC4'), + ("NAK", '\NAK'), + ("SYN", '\SYN'), + ("ETB", '\ETB'), + ("CAN", '\CAN'), + ("EM", '\EM'), + ("SUB", '\SUB'), + ("ESC", '\ESC'), + ("FS", '\FS'), + ("GS", '\GS'), + ("RS", '\RS'), + ("US", '\US'), + ("SP", '\SP'), + ("DEL", '\DEL') + ] + +-- before calling lit_error, ensure that the current input is pointing to +-- the position of the error in the buffer. This is so that we can report +-- a correct location to the user, but also so we can detect UTF-8 decoding +-- errors if they occur. +lit_error = lexError "lexical error in string/character literal" + +getCharOrFail :: P Char +getCharOrFail = do + i <- getInput + case alexGetChar' i of + Nothing -> lexError "unexpected end-of-file in string/character literal" + Just (c,i) -> do setInput i; return c + +-- ----------------------------------------------------------------------------- +-- The Parse Monad + +data LayoutContext + = NoLayout + | Layout !Int + +data ParseResult a + = POk PState a + | PFailed + SrcSpan -- The start and end of the text span related to + -- the error. Might be used in environments which can + -- show this span, e.g. by highlighting it. + Message -- The error message + +data PState = PState { + buffer :: StringBuffer, + last_loc :: SrcSpan, -- pos of previous token + last_offs :: !Int, -- offset of the previous token from the + -- beginning of the current line. + -- \t is equal to 8 spaces. + last_len :: !Int, -- len of previous token + loc :: SrcLoc, -- current loc (end of prev token + 1) + extsBitmap :: !Int, -- bitmap that determines permitted extensions + context :: [LayoutContext], + lex_state :: [Int] + } + -- last_loc and last_len are used when generating error messages, + -- and in pushCurrentContext only. Sigh, if only Happy passed the + -- current token to happyError, we could at least get rid of last_len. + -- Getting rid of last_loc would require finding another way to + -- implement pushCurrentContext (which is only called from one place). + +newtype P a = P { unP :: PState -> ParseResult a } + +instance Monad P where + return = returnP + (>>=) = thenP + fail = failP + +returnP :: a -> P a +returnP a = P $ \s -> POk s a + +thenP :: P a -> (a -> P b) -> P b +(P m) `thenP` k = P $ \ s -> + case m s of + POk s1 a -> (unP (k a)) s1 + PFailed span err -> PFailed span err + +failP :: String -> P a +failP msg = P $ \s -> PFailed (last_loc s) (text msg) + +failMsgP :: String -> P a +failMsgP msg = P $ \s -> PFailed (last_loc s) (text msg) + +failLocMsgP :: SrcLoc -> SrcLoc -> String -> P a +failLocMsgP loc1 loc2 str = P $ \s -> PFailed (mkSrcSpan loc1 loc2) (text str) + +failSpanMsgP :: SrcSpan -> String -> P a +failSpanMsgP span msg = P $ \s -> PFailed span (text msg) + +extension :: (Int -> Bool) -> P Bool +extension p = P $ \s -> POk s (p $! extsBitmap s) + +getExts :: P Int +getExts = P $ \s -> POk s (extsBitmap s) + +setSrcLoc :: SrcLoc -> P () +setSrcLoc new_loc = P $ \s -> POk s{loc=new_loc} () + +getSrcLoc :: P SrcLoc +getSrcLoc = P $ \s@(PState{ loc=loc }) -> POk s loc + +setLastToken :: SrcSpan -> Int -> P () +setLastToken loc len = P $ \s -> POk s{ last_loc=loc, last_len=len } () + +data AlexInput = AI SrcLoc {-#UNPACK#-}!Int StringBuffer + +alexInputPrevChar :: AlexInput -> Char +alexInputPrevChar (AI _ _ buf) = prevChar buf '\n' + +alexGetChar :: AlexInput -> Maybe (Char,AlexInput) +alexGetChar (AI loc ofs s) + | atEnd s = Nothing + | otherwise = adj_c `seq` loc' `seq` ofs' `seq` s' `seq` + Just (adj_c, (AI loc' ofs' s')) + where (c,s') = nextChar s + loc' = advanceSrcLoc loc c + ofs' = advanceOffs c ofs + + non_graphic = '\x0' + upper = '\x1' + lower = '\x2' + digit = '\x3' + symbol = '\x4' + space = '\x5' + other_graphic = '\x6' + + adj_c + | c <= '\x06' = non_graphic + | c <= '\xff' = c + | otherwise = + case generalCategory c of + UppercaseLetter -> upper + LowercaseLetter -> lower + TitlecaseLetter -> upper + ModifierLetter -> other_graphic + OtherLetter -> other_graphic + NonSpacingMark -> other_graphic + SpacingCombiningMark -> other_graphic + EnclosingMark -> other_graphic + DecimalNumber -> digit + LetterNumber -> other_graphic + OtherNumber -> other_graphic + ConnectorPunctuation -> other_graphic + DashPunctuation -> other_graphic + OpenPunctuation -> other_graphic + ClosePunctuation -> other_graphic + InitialQuote -> other_graphic + FinalQuote -> other_graphic + OtherPunctuation -> other_graphic + MathSymbol -> symbol + CurrencySymbol -> symbol + ModifierSymbol -> symbol + OtherSymbol -> symbol + Space -> space + _other -> non_graphic + +-- This version does not squash unicode characters, it is used when +-- lexing strings. +alexGetChar' :: AlexInput -> Maybe (Char,AlexInput) +alexGetChar' (AI loc ofs s) + | atEnd s = Nothing + | otherwise = c `seq` loc' `seq` ofs' `seq` s' `seq` + Just (c, (AI loc' ofs' s')) + where (c,s') = nextChar s + loc' = advanceSrcLoc loc c + ofs' = advanceOffs c ofs + +advanceOffs :: Char -> Int -> Int +advanceOffs '\n' offs = 0 +advanceOffs '\t' offs = (offs `quot` 8 + 1) * 8 +advanceOffs _ offs = offs + 1 + +getInput :: P AlexInput +getInput = P $ \s@PState{ loc=l, last_offs=o, buffer=b } -> POk s (AI l o b) + +setInput :: AlexInput -> P () +setInput (AI l o b) = P $ \s -> POk s{ loc=l, last_offs=o, buffer=b } () + +pushLexState :: Int -> P () +pushLexState ls = P $ \s@PState{ lex_state=l } -> POk s{lex_state=ls:l} () + +popLexState :: P Int +popLexState = P $ \s@PState{ lex_state=ls:l } -> POk s{ lex_state=l } ls + +getLexState :: P Int +getLexState = P $ \s@PState{ lex_state=ls:l } -> POk s ls + +-- for reasons of efficiency, flags indicating language extensions (eg, +-- -fglasgow-exts or -fparr) are represented by a bitmap stored in an unboxed +-- integer + +glaExtsBit, ffiBit, parrBit :: Int +glaExtsBit = 0 +ffiBit = 1 +parrBit = 2 +arrowsBit = 4 +thBit = 5 +ipBit = 6 +tvBit = 7 -- Scoped type variables enables 'forall' keyword +bangPatBit = 8 -- Tells the parser to understand bang-patterns + -- (doesn't affect the lexer) + +glaExtsEnabled, ffiEnabled, parrEnabled :: Int -> Bool +glaExtsEnabled flags = testBit flags glaExtsBit +ffiEnabled flags = testBit flags ffiBit +parrEnabled flags = testBit flags parrBit +arrowsEnabled flags = testBit flags arrowsBit +thEnabled flags = testBit flags thBit +ipEnabled flags = testBit flags ipBit +tvEnabled flags = testBit flags tvBit +bangPatEnabled flags = testBit flags bangPatBit + +-- PState for parsing options pragmas +-- +pragState :: StringBuffer -> SrcLoc -> PState +pragState buf loc = + PState { + buffer = buf, + last_loc = mkSrcSpan loc loc, + last_offs = 0, + last_len = 0, + loc = loc, + extsBitmap = 0, + context = [], + lex_state = [bol, option_prags, 0] + } + + +-- create a parse state +-- +mkPState :: StringBuffer -> SrcLoc -> DynFlags -> PState +mkPState buf loc flags = + PState { + buffer = buf, + last_loc = mkSrcSpan loc loc, + last_offs = 0, + last_len = 0, + loc = loc, + extsBitmap = fromIntegral bitmap, + context = [], + lex_state = [bol, if glaExtsEnabled bitmap then glaexts else 0] + -- we begin in the layout state if toplev_layout is set + } + where + bitmap = glaExtsBit `setBitIf` dopt Opt_GlasgowExts flags + .|. ffiBit `setBitIf` dopt Opt_FFI flags + .|. parrBit `setBitIf` dopt Opt_PArr flags + .|. arrowsBit `setBitIf` dopt Opt_Arrows flags + .|. thBit `setBitIf` dopt Opt_TH flags + .|. ipBit `setBitIf` dopt Opt_ImplicitParams flags + .|. tvBit `setBitIf` dopt Opt_ScopedTypeVariables flags + .|. bangPatBit `setBitIf` dopt Opt_BangPatterns flags + -- + setBitIf :: Int -> Bool -> Int + b `setBitIf` cond | cond = bit b + | otherwise = 0 + +getContext :: P [LayoutContext] +getContext = P $ \s@PState{context=ctx} -> POk s ctx + +setContext :: [LayoutContext] -> P () +setContext ctx = P $ \s -> POk s{context=ctx} () + +popContext :: P () +popContext = P $ \ s@(PState{ buffer = buf, context = ctx, + loc = loc, last_len = len, last_loc = last_loc }) -> + case ctx of + (_:tl) -> POk s{ context = tl } () + [] -> PFailed last_loc (srcParseErr buf len) + +-- Push a new layout context at the indentation of the last token read. +-- This is only used at the outer level of a module when the 'module' +-- keyword is missing. +pushCurrentContext :: P () +pushCurrentContext = P $ \ s@PState{ last_offs=offs, last_len=len, context=ctx } -> + POk s{context = Layout (offs-len) : ctx} () + +getOffside :: P Ordering +getOffside = P $ \s@PState{last_offs=offs, context=stk} -> + let ord = case stk of + (Layout n:_) -> compare offs n + _ -> GT + in POk s ord + +-- --------------------------------------------------------------------------- +-- Construct a parse error + +srcParseErr + :: StringBuffer -- current buffer (placed just after the last token) + -> Int -- length of the previous token + -> Message +srcParseErr buf len + = hcat [ if null token + then ptext SLIT("parse error (possibly incorrect indentation)") + else hcat [ptext SLIT("parse error on input "), + char '`', text token, char '\''] + ] + where token = lexemeToString (offsetBytes (-len) buf) len + +-- Report a parse failure, giving the span of the previous token as +-- the location of the error. This is the entry point for errors +-- detected during parsing. +srcParseFail :: P a +srcParseFail = P $ \PState{ buffer = buf, last_len = len, + last_loc = last_loc } -> + PFailed last_loc (srcParseErr buf len) + +-- A lexical error is reported at a particular position in the source file, +-- not over a token range. +lexError :: String -> P a +lexError str = do + loc <- getSrcLoc + i@(AI end _ buf) <- getInput + reportLexError loc end buf str + +-- ----------------------------------------------------------------------------- +-- This is the top-level function: called from the parser each time a +-- new token is to be read from the input. + +lexer :: (Located Token -> P a) -> P a +lexer cont = do + tok@(L _ tok__) <- lexToken + --trace ("token: " ++ show tok__) $ do + cont tok + +lexToken :: P (Located Token) +lexToken = do + inp@(AI loc1 _ buf) <- getInput + sc <- getLexState + exts <- getExts + case alexScanUser exts inp sc of + AlexEOF -> do let span = mkSrcSpan loc1 loc1 + setLastToken span 0 + return (L span ITeof) + AlexError (AI loc2 _ buf) -> do + reportLexError loc1 loc2 buf "lexical error" + AlexSkip inp2 _ -> do + setInput inp2 + lexToken + AlexToken inp2@(AI end _ buf2) len t -> do + setInput inp2 + let span = mkSrcSpan loc1 end + let bytes = byteDiff buf buf2 + span `seq` setLastToken span bytes + t span buf bytes + +-- ToDo: Alex reports the buffer at the start of the erroneous lexeme, +-- but it would be more informative to report the location where the +-- error was actually discovered, especially if this is a decoding +-- error. +reportLexError loc1 loc2 buf str = + let + c = fst (nextChar buf) + in + if c == '\0' -- decoding errors are mapped to '\0', see utf8DecodeChar# + then failLocMsgP loc2 loc2 "UTF-8 decoding error" + else failLocMsgP loc1 loc2 (str ++ " at character " ++ show c) +} diff --git a/compiler/parser/Parser.y.pp b/compiler/parser/Parser.y.pp new file mode 100644 index 0000000000..3066a0f876 --- /dev/null +++ b/compiler/parser/Parser.y.pp @@ -0,0 +1,1607 @@ +-- -*-haskell-*- +-- --------------------------------------------------------------------------- +-- (c) The University of Glasgow 1997-2003 +--- +-- The GHC grammar. +-- +-- Author(s): Simon Marlow, Sven Panne 1997, 1998, 1999 +-- --------------------------------------------------------------------------- + +{ +module Parser ( parseModule, parseStmt, parseIdentifier, parseType, + parseHeader ) where + +#define INCLUDE #include +INCLUDE "HsVersions.h" + +import HsSyn +import RdrHsSyn +import HscTypes ( IsBootInterface, DeprecTxt ) +import Lexer +import RdrName +import TysWiredIn ( unitTyCon, unitDataCon, tupleTyCon, tupleCon, nilDataCon, + listTyCon_RDR, parrTyCon_RDR, consDataCon_RDR ) +import Type ( funTyCon ) +import ForeignCall ( Safety(..), CExportSpec(..), CLabelString, + CCallConv(..), CCallTarget(..), defaultCCallConv + ) +import OccName ( varName, dataName, tcClsName, tvName ) +import DataCon ( DataCon, dataConName ) +import SrcLoc ( Located(..), unLoc, getLoc, noLoc, combineSrcSpans, + SrcSpan, combineLocs, srcLocFile, + mkSrcLoc, mkSrcSpan ) +import Module +import StaticFlags ( opt_SccProfilingOn ) +import Type ( Kind, mkArrowKind, liftedTypeKind ) +import BasicTypes ( Boxity(..), Fixity(..), FixityDirection(..), IPName(..), + Activation(..), defaultInlineSpec ) +import OrdList + +import FastString +import Maybes ( orElse ) +import Outputable +import GLAEXTS +} + +{- +----------------------------------------------------------------------------- +Conflicts: 36 shift/reduce (1.25) + +10 for abiguity in 'if x then y else z + 1' [State 178] + (shift parses as 'if x then y else (z + 1)', as per longest-parse rule) + 10 because op might be: : - ! * . `x` VARSYM CONSYM QVARSYM QCONSYM + +1 for ambiguity in 'if x then y else z :: T' [State 178] + (shift parses as 'if x then y else (z :: T)', as per longest-parse rule) + +4 for ambiguity in 'if x then y else z -< e' [State 178] + (shift parses as 'if x then y else (z -< T)', as per longest-parse rule) + There are four such operators: -<, >-, -<<, >>- + + +2 for ambiguity in 'case v of { x :: T -> T ... } ' [States 11, 253] + Which of these two is intended? + case v of + (x::T) -> T -- Rhs is T + or + case v of + (x::T -> T) -> .. -- Rhs is ... + +10 for ambiguity in 'e :: a `b` c'. Does this mean [States 11, 253] + (e::a) `b` c, or + (e :: (a `b` c)) + As well as `b` we can have !, VARSYM, QCONSYM, and CONSYM, hence 5 cases + Same duplication between states 11 and 253 as the previous case + +1 for ambiguity in 'let ?x ...' [State 329] + the parser can't tell whether the ?x is the lhs of a normal binding or + an implicit binding. Fortunately resolving as shift gives it the only + sensible meaning, namely the lhs of an implicit binding. + +1 for ambiguity in '{-# RULES "name" [ ... #-} [State 382] + we don't know whether the '[' starts the activation or not: it + might be the start of the declaration with the activation being + empty. --SDM 1/4/2002 + +6 for conflicts between `fdecl' and `fdeclDEPRECATED', [States 393,394] + which are resolved correctly, and moreover, + should go away when `fdeclDEPRECATED' is removed. + +1 for ambiguity in '{-# RULES "name" forall = ... #-}' [State 474] + since 'forall' is a valid variable name, we don't know whether + to treat a forall on the input as the beginning of a quantifier + or the beginning of the rule itself. Resolving to shift means + it's always treated as a quantifier, hence the above is disallowed. + This saves explicitly defining a grammar for the rule lhs that + doesn't include 'forall'. + +-- --------------------------------------------------------------------------- +-- Adding location info + +This is done in a stylised way using the three macros below, L0, L1 +and LL. Each of these macros can be thought of as having type + + L0, L1, LL :: a -> Located a + +They each add a SrcSpan to their argument. + + L0 adds 'noSrcSpan', used for empty productions + + L1 for a production with a single token on the lhs. Grabs the SrcSpan + from that token. + + LL for a production with >1 token on the lhs. Makes up a SrcSpan from + the first and last tokens. + +These suffice for the majority of cases. However, we must be +especially careful with empty productions: LL won't work if the first +or last token on the lhs can represent an empty span. In these cases, +we have to calculate the span using more of the tokens from the lhs, eg. + + | 'newtype' tycl_hdr '=' newconstr deriving + { L (comb3 $1 $4 $5) + (mkTyData NewType (unLoc $2) [$4] (unLoc $5)) } + +We provide comb3 and comb4 functions which are useful in such cases. + +Be careful: there's no checking that you actually got this right, the +only symptom will be that the SrcSpans of your syntax will be +incorrect. + +/* + * We must expand these macros *before* running Happy, which is why this file is + * Parser.y.pp rather than just Parser.y - we run the C pre-processor first. + */ +#define L0 L noSrcSpan +#define L1 sL (getLoc $1) +#define LL sL (comb2 $1 $>) + +-- ----------------------------------------------------------------------------- + +-} + +%token + '_' { L _ ITunderscore } -- Haskell keywords + 'as' { L _ ITas } + 'case' { L _ ITcase } + 'class' { L _ ITclass } + 'data' { L _ ITdata } + 'default' { L _ ITdefault } + 'deriving' { L _ ITderiving } + 'do' { L _ ITdo } + 'else' { L _ ITelse } + 'hiding' { L _ IThiding } + 'if' { L _ ITif } + 'import' { L _ ITimport } + 'in' { L _ ITin } + 'infix' { L _ ITinfix } + 'infixl' { L _ ITinfixl } + 'infixr' { L _ ITinfixr } + 'instance' { L _ ITinstance } + 'let' { L _ ITlet } + 'module' { L _ ITmodule } + 'newtype' { L _ ITnewtype } + 'of' { L _ ITof } + 'qualified' { L _ ITqualified } + 'then' { L _ ITthen } + 'type' { L _ ITtype } + 'where' { L _ ITwhere } + '_scc_' { L _ ITscc } -- ToDo: remove + + 'forall' { L _ ITforall } -- GHC extension keywords + 'foreign' { L _ ITforeign } + 'export' { L _ ITexport } + 'label' { L _ ITlabel } + 'dynamic' { L _ ITdynamic } + 'safe' { L _ ITsafe } + 'threadsafe' { L _ ITthreadsafe } + 'unsafe' { L _ ITunsafe } + 'mdo' { L _ ITmdo } + 'stdcall' { L _ ITstdcallconv } + 'ccall' { L _ ITccallconv } + 'dotnet' { L _ ITdotnet } + 'proc' { L _ ITproc } -- for arrow notation extension + 'rec' { L _ ITrec } -- for arrow notation extension + + '{-# INLINE' { L _ (ITinline_prag _) } + '{-# SPECIALISE' { L _ ITspec_prag } + '{-# SPECIALISE_INLINE' { L _ (ITspec_inline_prag _) } + '{-# SOURCE' { L _ ITsource_prag } + '{-# RULES' { L _ ITrules_prag } + '{-# CORE' { L _ ITcore_prag } -- hdaume: annotated core + '{-# SCC' { L _ ITscc_prag } + '{-# DEPRECATED' { L _ ITdeprecated_prag } + '{-# UNPACK' { L _ ITunpack_prag } + '#-}' { L _ ITclose_prag } + + '..' { L _ ITdotdot } -- reserved symbols + ':' { L _ ITcolon } + '::' { L _ ITdcolon } + '=' { L _ ITequal } + '\\' { L _ ITlam } + '|' { L _ ITvbar } + '<-' { L _ ITlarrow } + '->' { L _ ITrarrow } + '@' { L _ ITat } + '~' { L _ ITtilde } + '=>' { L _ ITdarrow } + '-' { L _ ITminus } + '!' { L _ ITbang } + '*' { L _ ITstar } + '-<' { L _ ITlarrowtail } -- for arrow notation + '>-' { L _ ITrarrowtail } -- for arrow notation + '-<<' { L _ ITLarrowtail } -- for arrow notation + '>>-' { L _ ITRarrowtail } -- for arrow notation + '.' { L _ ITdot } + + '{' { L _ ITocurly } -- special symbols + '}' { L _ ITccurly } + '{|' { L _ ITocurlybar } + '|}' { L _ ITccurlybar } + vocurly { L _ ITvocurly } -- virtual open curly (from layout) + vccurly { L _ ITvccurly } -- virtual close curly (from layout) + '[' { L _ ITobrack } + ']' { L _ ITcbrack } + '[:' { L _ ITopabrack } + ':]' { L _ ITcpabrack } + '(' { L _ IToparen } + ')' { L _ ITcparen } + '(#' { L _ IToubxparen } + '#)' { L _ ITcubxparen } + '(|' { L _ IToparenbar } + '|)' { L _ ITcparenbar } + ';' { L _ ITsemi } + ',' { L _ ITcomma } + '`' { L _ ITbackquote } + + VARID { L _ (ITvarid _) } -- identifiers + CONID { L _ (ITconid _) } + VARSYM { L _ (ITvarsym _) } + CONSYM { L _ (ITconsym _) } + QVARID { L _ (ITqvarid _) } + QCONID { L _ (ITqconid _) } + QVARSYM { L _ (ITqvarsym _) } + QCONSYM { L _ (ITqconsym _) } + + IPDUPVARID { L _ (ITdupipvarid _) } -- GHC extension + IPSPLITVARID { L _ (ITsplitipvarid _) } -- GHC extension + + CHAR { L _ (ITchar _) } + STRING { L _ (ITstring _) } + INTEGER { L _ (ITinteger _) } + RATIONAL { L _ (ITrational _) } + + PRIMCHAR { L _ (ITprimchar _) } + PRIMSTRING { L _ (ITprimstring _) } + PRIMINTEGER { L _ (ITprimint _) } + PRIMFLOAT { L _ (ITprimfloat _) } + PRIMDOUBLE { L _ (ITprimdouble _) } + +-- Template Haskell +'[|' { L _ ITopenExpQuote } +'[p|' { L _ ITopenPatQuote } +'[t|' { L _ ITopenTypQuote } +'[d|' { L _ ITopenDecQuote } +'|]' { L _ ITcloseQuote } +TH_ID_SPLICE { L _ (ITidEscape _) } -- $x +'$(' { L _ ITparenEscape } -- $( exp ) +TH_VAR_QUOTE { L _ ITvarQuote } -- 'x +TH_TY_QUOTE { L _ ITtyQuote } -- ''T + +%monad { P } { >>= } { return } +%lexer { lexer } { L _ ITeof } +%name parseModule module +%name parseStmt maybe_stmt +%name parseIdentifier identifier +%name parseType ctype +%partial parseHeader header +%tokentype { (Located Token) } +%% + +----------------------------------------------------------------------------- +-- Identifiers; one of the entry points +identifier :: { Located RdrName } + : qvar { $1 } + | qcon { $1 } + | qvarop { $1 } + | qconop { $1 } + +----------------------------------------------------------------------------- +-- Module Header + +-- The place for module deprecation is really too restrictive, but if it +-- was allowed at its natural place just before 'module', we get an ugly +-- s/r conflict with the second alternative. Another solution would be the +-- introduction of a new pragma DEPRECATED_MODULE, but this is not very nice, +-- either, and DEPRECATED is only expected to be used by people who really +-- know what they are doing. :-) + +module :: { Located (HsModule RdrName) } + : 'module' modid maybemoddeprec maybeexports 'where' body + {% fileSrcSpan >>= \ loc -> + return (L loc (HsModule (Just $2) $4 (fst $6) (snd $6) $3)) } + | missing_module_keyword top close + {% fileSrcSpan >>= \ loc -> + return (L loc (HsModule Nothing Nothing + (fst $2) (snd $2) Nothing)) } + +missing_module_keyword :: { () } + : {- empty -} {% pushCurrentContext } + +maybemoddeprec :: { Maybe DeprecTxt } + : '{-# DEPRECATED' STRING '#-}' { Just (getSTRING $2) } + | {- empty -} { Nothing } + +body :: { ([LImportDecl RdrName], [LHsDecl RdrName]) } + : '{' top '}' { $2 } + | vocurly top close { $2 } + +top :: { ([LImportDecl RdrName], [LHsDecl RdrName]) } + : importdecls { (reverse $1,[]) } + | importdecls ';' cvtopdecls { (reverse $1,$3) } + | cvtopdecls { ([],$1) } + +cvtopdecls :: { [LHsDecl RdrName] } + : topdecls { cvTopDecls $1 } + +----------------------------------------------------------------------------- +-- Module declaration & imports only + +header :: { Located (HsModule RdrName) } + : 'module' modid maybemoddeprec maybeexports 'where' header_body + {% fileSrcSpan >>= \ loc -> + return (L loc (HsModule (Just $2) $4 $6 [] $3)) } + | missing_module_keyword importdecls + {% fileSrcSpan >>= \ loc -> + return (L loc (HsModule Nothing Nothing $2 [] Nothing)) } + +header_body :: { [LImportDecl RdrName] } + : '{' importdecls { $2 } + | vocurly importdecls { $2 } + +----------------------------------------------------------------------------- +-- The Export List + +maybeexports :: { Maybe [LIE RdrName] } + : '(' exportlist ')' { Just $2 } + | {- empty -} { Nothing } + +exportlist :: { [LIE RdrName] } + : exportlist ',' export { $3 : $1 } + | exportlist ',' { $1 } + | export { [$1] } + | {- empty -} { [] } + + -- No longer allow things like [] and (,,,) to be exported + -- They are built in syntax, always available +export :: { LIE RdrName } + : qvar { L1 (IEVar (unLoc $1)) } + | oqtycon { L1 (IEThingAbs (unLoc $1)) } + | oqtycon '(' '..' ')' { LL (IEThingAll (unLoc $1)) } + | oqtycon '(' ')' { LL (IEThingWith (unLoc $1) []) } + | oqtycon '(' qcnames ')' { LL (IEThingWith (unLoc $1) (reverse $3)) } + | 'module' modid { LL (IEModuleContents (unLoc $2)) } + +qcnames :: { [RdrName] } + : qcnames ',' qcname { unLoc $3 : $1 } + | qcname { [unLoc $1] } + +qcname :: { Located RdrName } -- Variable or data constructor + : qvar { $1 } + | qcon { $1 } + +----------------------------------------------------------------------------- +-- Import Declarations + +-- import decls can be *empty*, or even just a string of semicolons +-- whereas topdecls must contain at least one topdecl. + +importdecls :: { [LImportDecl RdrName] } + : importdecls ';' importdecl { $3 : $1 } + | importdecls ';' { $1 } + | importdecl { [ $1 ] } + | {- empty -} { [] } + +importdecl :: { LImportDecl RdrName } + : 'import' maybe_src optqualified modid maybeas maybeimpspec + { L (comb4 $1 $4 $5 $6) (ImportDecl $4 $2 $3 (unLoc $5) (unLoc $6)) } + +maybe_src :: { IsBootInterface } + : '{-# SOURCE' '#-}' { True } + | {- empty -} { False } + +optqualified :: { Bool } + : 'qualified' { True } + | {- empty -} { False } + +maybeas :: { Located (Maybe Module) } + : 'as' modid { LL (Just (unLoc $2)) } + | {- empty -} { noLoc Nothing } + +maybeimpspec :: { Located (Maybe (Bool, [LIE RdrName])) } + : impspec { L1 (Just (unLoc $1)) } + | {- empty -} { noLoc Nothing } + +impspec :: { Located (Bool, [LIE RdrName]) } + : '(' exportlist ')' { LL (False, reverse $2) } + | 'hiding' '(' exportlist ')' { LL (True, reverse $3) } + +----------------------------------------------------------------------------- +-- Fixity Declarations + +prec :: { Int } + : {- empty -} { 9 } + | INTEGER {% checkPrecP (L1 (fromInteger (getINTEGER $1))) } + +infix :: { Located FixityDirection } + : 'infix' { L1 InfixN } + | 'infixl' { L1 InfixL } + | 'infixr' { L1 InfixR } + +ops :: { Located [Located RdrName] } + : ops ',' op { LL ($3 : unLoc $1) } + | op { L1 [$1] } + +----------------------------------------------------------------------------- +-- Top-Level Declarations + +topdecls :: { OrdList (LHsDecl RdrName) } -- Reversed + : topdecls ';' topdecl { $1 `appOL` $3 } + | topdecls ';' { $1 } + | topdecl { $1 } + +topdecl :: { OrdList (LHsDecl RdrName) } + : tycl_decl { unitOL (L1 (TyClD (unLoc $1))) } + | 'instance' inst_type where + { let (binds,sigs) = cvBindsAndSigs (unLoc $3) + in unitOL (L (comb3 $1 $2 $3) (InstD (InstDecl $2 binds sigs))) } + | 'default' '(' comma_types0 ')' { unitOL (LL $ DefD (DefaultDecl $3)) } + | 'foreign' fdecl { unitOL (LL (unLoc $2)) } + | '{-# DEPRECATED' deprecations '#-}' { $2 } + | '{-# RULES' rules '#-}' { $2 } + | '$(' exp ')' { unitOL (LL $ SpliceD (SpliceDecl $2)) } + | decl { unLoc $1 } + +tycl_decl :: { LTyClDecl RdrName } + : 'type' type '=' ctype + -- Note type on the left of the '='; this allows + -- infix type constructors to be declared + -- + -- Note ctype, not sigtype, on the right + -- We allow an explicit for-all but we don't insert one + -- in type Foo a = (b,b) + -- Instead we just say b is out of scope + {% do { (tc,tvs) <- checkSynHdr $2 + ; return (LL (TySynonym tc tvs $4)) } } + + | data_or_newtype tycl_hdr constrs deriving + { L (comb4 $1 $2 $3 $4) -- We need the location on tycl_hdr + -- in case constrs and deriving are both empty + (mkTyData (unLoc $1) (unLoc $2) Nothing (reverse (unLoc $3)) (unLoc $4)) } + + | data_or_newtype tycl_hdr opt_kind_sig + 'where' gadt_constrlist + deriving + { L (comb4 $1 $2 $4 $5) + (mkTyData (unLoc $1) (unLoc $2) $3 (reverse (unLoc $5)) (unLoc $6)) } + + | 'class' tycl_hdr fds where + { let + (binds,sigs) = cvBindsAndSigs (unLoc $4) + in + L (comb4 $1 $2 $3 $4) (mkClassDecl (unLoc $2) (unLoc $3) sigs + binds) } + +data_or_newtype :: { Located NewOrData } + : 'data' { L1 DataType } + | 'newtype' { L1 NewType } + +opt_kind_sig :: { Maybe Kind } + : { Nothing } + | '::' kind { Just $2 } + +-- tycl_hdr parses the header of a type or class decl, +-- which takes the form +-- T a b +-- Eq a => T a +-- (Eq a, Ord b) => T a b +-- Rather a lot of inlining here, else we get reduce/reduce errors +tycl_hdr :: { Located (LHsContext RdrName, Located RdrName, [LHsTyVarBndr RdrName]) } + : context '=>' type {% checkTyClHdr $1 $3 >>= return.LL } + | type {% checkTyClHdr (noLoc []) $1 >>= return.L1 } + +----------------------------------------------------------------------------- +-- Nested declarations + +decls :: { Located (OrdList (LHsDecl RdrName)) } -- Reversed + : decls ';' decl { LL (unLoc $1 `appOL` unLoc $3) } + | decls ';' { LL (unLoc $1) } + | decl { $1 } + | {- empty -} { noLoc nilOL } + + +decllist :: { Located (OrdList (LHsDecl RdrName)) } -- Reversed + : '{' decls '}' { LL (unLoc $2) } + | vocurly decls close { $2 } + +where :: { Located (OrdList (LHsDecl RdrName)) } -- Reversed + -- No implicit parameters + : 'where' decllist { LL (unLoc $2) } + | {- empty -} { noLoc nilOL } + +binds :: { Located (HsLocalBinds RdrName) } -- May have implicit parameters + : decllist { L1 (HsValBinds (cvBindGroup (unLoc $1))) } + | '{' dbinds '}' { LL (HsIPBinds (IPBinds (unLoc $2) emptyLHsBinds)) } + | vocurly dbinds close { L (getLoc $2) (HsIPBinds (IPBinds (unLoc $2) emptyLHsBinds)) } + +wherebinds :: { Located (HsLocalBinds RdrName) } -- May have implicit parameters + : 'where' binds { LL (unLoc $2) } + | {- empty -} { noLoc emptyLocalBinds } + + +----------------------------------------------------------------------------- +-- Transformation Rules + +rules :: { OrdList (LHsDecl RdrName) } -- Reversed + : rules ';' rule { $1 `snocOL` $3 } + | rules ';' { $1 } + | rule { unitOL $1 } + | {- empty -} { nilOL } + +rule :: { LHsDecl RdrName } + : STRING activation rule_forall infixexp '=' exp + { LL $ RuleD (HsRule (getSTRING $1) + ($2 `orElse` AlwaysActive) + $3 $4 placeHolderNames $6 placeHolderNames) } + +activation :: { Maybe Activation } + : {- empty -} { Nothing } + | explicit_activation { Just $1 } + +explicit_activation :: { Activation } -- In brackets + : '[' INTEGER ']' { ActiveAfter (fromInteger (getINTEGER $2)) } + | '[' '~' INTEGER ']' { ActiveBefore (fromInteger (getINTEGER $3)) } + +rule_forall :: { [RuleBndr RdrName] } + : 'forall' rule_var_list '.' { $2 } + | {- empty -} { [] } + +rule_var_list :: { [RuleBndr RdrName] } + : rule_var { [$1] } + | rule_var rule_var_list { $1 : $2 } + +rule_var :: { RuleBndr RdrName } + : varid { RuleBndr $1 } + | '(' varid '::' ctype ')' { RuleBndrSig $2 $4 } + +----------------------------------------------------------------------------- +-- Deprecations (c.f. rules) + +deprecations :: { OrdList (LHsDecl RdrName) } -- Reversed + : deprecations ';' deprecation { $1 `appOL` $3 } + | deprecations ';' { $1 } + | deprecation { $1 } + | {- empty -} { nilOL } + +-- SUP: TEMPORARY HACK, not checking for `module Foo' +deprecation :: { OrdList (LHsDecl RdrName) } + : depreclist STRING + { toOL [ LL $ DeprecD (Deprecation n (getSTRING $2)) + | n <- unLoc $1 ] } + + +----------------------------------------------------------------------------- +-- Foreign import and export declarations + +-- for the time being, the following accepts foreign declarations conforming +-- to the FFI Addendum, Version 1.0 as well as pre-standard declarations +-- +-- * a flag indicates whether pre-standard declarations have been used and +-- triggers a deprecation warning further down the road +-- +-- NB: The first two rules could be combined into one by replacing `safety1' +-- with `safety'. However, the combined rule conflicts with the +-- DEPRECATED rules. +-- +fdecl :: { LHsDecl RdrName } +fdecl : 'import' callconv safety1 fspec + {% mkImport $2 $3 (unLoc $4) >>= return.LL } + | 'import' callconv fspec + {% do { d <- mkImport $2 (PlaySafe False) (unLoc $3); + return (LL d) } } + | 'export' callconv fspec + {% mkExport $2 (unLoc $3) >>= return.LL } + -- the following syntax is DEPRECATED + | fdecl1DEPRECATED { L1 (ForD (unLoc $1)) } + | fdecl2DEPRECATED { L1 (unLoc $1) } + +fdecl1DEPRECATED :: { LForeignDecl RdrName } +fdecl1DEPRECATED + ----------- DEPRECATED label decls ------------ + : 'label' ext_name varid '::' sigtype + { LL $ ForeignImport $3 $5 (CImport defaultCCallConv (PlaySafe False) nilFS nilFS + (CLabel ($2 `orElse` mkExtName (unLoc $3)))) True } + + ----------- DEPRECATED ccall/stdcall decls ------------ + -- + -- NB: This business with the case expression below may seem overly + -- complicated, but it is necessary to avoid some conflicts. + + -- DEPRECATED variant #1: lack of a calling convention specification + -- (import) + | 'import' {-no callconv-} ext_name safety varid_no_unsafe '::' sigtype + { let + target = StaticTarget ($2 `orElse` mkExtName (unLoc $4)) + in + LL $ ForeignImport $4 $6 (CImport defaultCCallConv $3 nilFS nilFS + (CFunction target)) True } + + -- DEPRECATED variant #2: external name consists of two separate strings + -- (module name and function name) (import) + | 'import' callconv STRING STRING safety varid_no_unsafe '::' sigtype + {% case $2 of + DNCall -> parseError (comb2 $1 $>) "Illegal format of .NET foreign import" + CCall cconv -> return $ + let + imp = CFunction (StaticTarget (getSTRING $4)) + in + LL $ ForeignImport $6 $8 (CImport cconv $5 nilFS nilFS imp) True } + + -- DEPRECATED variant #3: `unsafe' after entity + | 'import' callconv STRING 'unsafe' varid_no_unsafe '::' sigtype + {% case $2 of + DNCall -> parseError (comb2 $1 $>) "Illegal format of .NET foreign import" + CCall cconv -> return $ + let + imp = CFunction (StaticTarget (getSTRING $3)) + in + LL $ ForeignImport $5 $7 (CImport cconv PlayRisky nilFS nilFS imp) True } + + -- DEPRECATED variant #4: use of the special identifier `dynamic' without + -- an explicit calling convention (import) + | 'import' {-no callconv-} 'dynamic' safety varid_no_unsafe '::' sigtype + { LL $ ForeignImport $4 $6 (CImport defaultCCallConv $3 nilFS nilFS + (CFunction DynamicTarget)) True } + + -- DEPRECATED variant #5: use of the special identifier `dynamic' (import) + | 'import' callconv 'dynamic' safety varid_no_unsafe '::' sigtype + {% case $2 of + DNCall -> parseError (comb2 $1 $>) "Illegal format of .NET foreign import" + CCall cconv -> return $ + LL $ ForeignImport $5 $7 (CImport cconv $4 nilFS nilFS + (CFunction DynamicTarget)) True } + + -- DEPRECATED variant #6: lack of a calling convention specification + -- (export) + | 'export' {-no callconv-} ext_name varid '::' sigtype + { LL $ ForeignExport $3 $5 (CExport (CExportStatic ($2 `orElse` mkExtName (unLoc $3)) + defaultCCallConv)) True } + + -- DEPRECATED variant #7: external name consists of two separate strings + -- (module name and function name) (export) + | 'export' callconv STRING STRING varid '::' sigtype + {% case $2 of + DNCall -> parseError (comb2 $1 $>) "Illegal format of .NET foreign import" + CCall cconv -> return $ + LL $ ForeignExport $5 $7 + (CExport (CExportStatic (getSTRING $4) cconv)) True } + + -- DEPRECATED variant #8: use of the special identifier `dynamic' without + -- an explicit calling convention (export) + | 'export' {-no callconv-} 'dynamic' varid '::' sigtype + { LL $ ForeignImport $3 $5 (CImport defaultCCallConv (PlaySafe False) nilFS nilFS + CWrapper) True } + + -- DEPRECATED variant #9: use of the special identifier `dynamic' (export) + | 'export' callconv 'dynamic' varid '::' sigtype + {% case $2 of + DNCall -> parseError (comb2 $1 $>) "Illegal format of .NET foreign import" + CCall cconv -> return $ + LL $ ForeignImport $4 $6 + (CImport cconv (PlaySafe False) nilFS nilFS CWrapper) True } + + ----------- DEPRECATED .NET decls ------------ + -- NB: removed the .NET call declaration, as it is entirely subsumed + -- by the new standard FFI declarations + +fdecl2DEPRECATED :: { LHsDecl RdrName } +fdecl2DEPRECATED + : 'import' 'dotnet' 'type' ext_name tycon { LL $ TyClD (ForeignType $5 $4 DNType) } + -- left this one unchanged for the moment as type imports are not + -- covered currently by the FFI standard -=chak + + +callconv :: { CallConv } + : 'stdcall' { CCall StdCallConv } + | 'ccall' { CCall CCallConv } + | 'dotnet' { DNCall } + +safety :: { Safety } + : 'unsafe' { PlayRisky } + | 'safe' { PlaySafe False } + | 'threadsafe' { PlaySafe True } + | {- empty -} { PlaySafe False } + +safety1 :: { Safety } + : 'unsafe' { PlayRisky } + | 'safe' { PlaySafe False } + | 'threadsafe' { PlaySafe True } + -- only needed to avoid conflicts with the DEPRECATED rules + +fspec :: { Located (Located FastString, Located RdrName, LHsType RdrName) } + : STRING var '::' sigtype { LL (L (getLoc $1) (getSTRING $1), $2, $4) } + | var '::' sigtype { LL (noLoc nilFS, $1, $3) } + -- if the entity string is missing, it defaults to the empty string; + -- the meaning of an empty entity string depends on the calling + -- convention + +-- DEPRECATED syntax +ext_name :: { Maybe CLabelString } + : STRING { Just (getSTRING $1) } + | STRING STRING { Just (getSTRING $2) } -- Ignore "module name" for now + | {- empty -} { Nothing } + + +----------------------------------------------------------------------------- +-- Type signatures + +opt_sig :: { Maybe (LHsType RdrName) } + : {- empty -} { Nothing } + | '::' sigtype { Just $2 } + +opt_asig :: { Maybe (LHsType RdrName) } + : {- empty -} { Nothing } + | '::' atype { Just $2 } + +sigtypes1 :: { [LHsType RdrName] } + : sigtype { [ $1 ] } + | sigtype ',' sigtypes1 { $1 : $3 } + +sigtype :: { LHsType RdrName } + : ctype { L1 (mkImplicitHsForAllTy (noLoc []) $1) } + -- Wrap an Implicit forall if there isn't one there already + +sig_vars :: { Located [Located RdrName] } + : sig_vars ',' var { LL ($3 : unLoc $1) } + | var { L1 [$1] } + +----------------------------------------------------------------------------- +-- Types + +strict_mark :: { Located HsBang } + : '!' { L1 HsStrict } + | '{-# UNPACK' '#-}' '!' { LL HsUnbox } + +-- A ctype is a for-all type +ctype :: { LHsType RdrName } + : 'forall' tv_bndrs '.' ctype { LL $ mkExplicitHsForAllTy $2 (noLoc []) $4 } + | context '=>' type { LL $ mkImplicitHsForAllTy $1 $3 } + -- A type of form (context => type) is an *implicit* HsForAllTy + | type { $1 } + +-- We parse a context as a btype so that we don't get reduce/reduce +-- errors in ctype. The basic problem is that +-- (Eq a, Ord a) +-- looks so much like a tuple type. We can't tell until we find the => +context :: { LHsContext RdrName } + : btype {% checkContext $1 } + +type :: { LHsType RdrName } + : ipvar '::' gentype { LL (HsPredTy (HsIParam (unLoc $1) $3)) } + | gentype { $1 } + +gentype :: { LHsType RdrName } + : btype { $1 } + | btype qtyconop gentype { LL $ HsOpTy $1 $2 $3 } + | btype tyvarop gentype { LL $ HsOpTy $1 $2 $3 } + | btype '->' ctype { LL $ HsFunTy $1 $3 } + +btype :: { LHsType RdrName } + : btype atype { LL $ HsAppTy $1 $2 } + | atype { $1 } + +atype :: { LHsType RdrName } + : gtycon { L1 (HsTyVar (unLoc $1)) } + | tyvar { L1 (HsTyVar (unLoc $1)) } + | strict_mark atype { LL (HsBangTy (unLoc $1) $2) } + | '(' ctype ',' comma_types1 ')' { LL $ HsTupleTy Boxed ($2:$4) } + | '(#' comma_types1 '#)' { LL $ HsTupleTy Unboxed $2 } + | '[' ctype ']' { LL $ HsListTy $2 } + | '[:' ctype ':]' { LL $ HsPArrTy $2 } + | '(' ctype ')' { LL $ HsParTy $2 } + | '(' ctype '::' kind ')' { LL $ HsKindSig $2 $4 } +-- Generics + | INTEGER { L1 (HsNumTy (getINTEGER $1)) } + +-- An inst_type is what occurs in the head of an instance decl +-- e.g. (Foo a, Gaz b) => Wibble a b +-- It's kept as a single type, with a MonoDictTy at the right +-- hand corner, for convenience. +inst_type :: { LHsType RdrName } + : sigtype {% checkInstType $1 } + +inst_types1 :: { [LHsType RdrName] } + : inst_type { [$1] } + | inst_type ',' inst_types1 { $1 : $3 } + +comma_types0 :: { [LHsType RdrName] } + : comma_types1 { $1 } + | {- empty -} { [] } + +comma_types1 :: { [LHsType RdrName] } + : ctype { [$1] } + | ctype ',' comma_types1 { $1 : $3 } + +tv_bndrs :: { [LHsTyVarBndr RdrName] } + : tv_bndr tv_bndrs { $1 : $2 } + | {- empty -} { [] } + +tv_bndr :: { LHsTyVarBndr RdrName } + : tyvar { L1 (UserTyVar (unLoc $1)) } + | '(' tyvar '::' kind ')' { LL (KindedTyVar (unLoc $2) $4) } + +fds :: { Located [Located ([RdrName], [RdrName])] } + : {- empty -} { noLoc [] } + | '|' fds1 { LL (reverse (unLoc $2)) } + +fds1 :: { Located [Located ([RdrName], [RdrName])] } + : fds1 ',' fd { LL ($3 : unLoc $1) } + | fd { L1 [$1] } + +fd :: { Located ([RdrName], [RdrName]) } + : varids0 '->' varids0 { L (comb3 $1 $2 $3) + (reverse (unLoc $1), reverse (unLoc $3)) } + +varids0 :: { Located [RdrName] } + : {- empty -} { noLoc [] } + | varids0 tyvar { LL (unLoc $2 : unLoc $1) } + +----------------------------------------------------------------------------- +-- Kinds + +kind :: { Kind } + : akind { $1 } + | akind '->' kind { mkArrowKind $1 $3 } + +akind :: { Kind } + : '*' { liftedTypeKind } + | '(' kind ')' { $2 } + + +----------------------------------------------------------------------------- +-- Datatype declarations + +gadt_constrlist :: { Located [LConDecl RdrName] } + : '{' gadt_constrs '}' { LL (unLoc $2) } + | vocurly gadt_constrs close { $2 } + +gadt_constrs :: { Located [LConDecl RdrName] } + : gadt_constrs ';' gadt_constr { LL ($3 : unLoc $1) } + | gadt_constrs ';' { $1 } + | gadt_constr { L1 [$1] } + +-- We allow the following forms: +-- C :: Eq a => a -> T a +-- C :: forall a. Eq a => !a -> T a +-- D { x,y :: a } :: T a +-- forall a. Eq a => D { x,y :: a } :: T a + +gadt_constr :: { LConDecl RdrName } + : con '::' sigtype + { LL (mkGadtDecl $1 $3) } + -- Syntax: Maybe merge the record stuff with the single-case above? + -- (to kill the mostly harmless reduce/reduce error) + -- XXX revisit autrijus + | constr_stuff_record '::' sigtype + { let (con,details) = unLoc $1 in + LL (ConDecl con Implicit [] (noLoc []) details (ResTyGADT $3)) } +{- + | forall context '=>' constr_stuff_record '::' sigtype + { let (con,details) = unLoc $4 in + LL (ConDecl con Implicit (unLoc $1) $2 details (ResTyGADT $6)) } + | forall constr_stuff_record '::' sigtype + { let (con,details) = unLoc $2 in + LL (ConDecl con Implicit (unLoc $1) (noLoc []) details (ResTyGADT $4)) } +-} + + +constrs :: { Located [LConDecl RdrName] } + : {- empty; a GHC extension -} { noLoc [] } + | '=' constrs1 { LL (unLoc $2) } + +constrs1 :: { Located [LConDecl RdrName] } + : constrs1 '|' constr { LL ($3 : unLoc $1) } + | constr { L1 [$1] } + +constr :: { LConDecl RdrName } + : forall context '=>' constr_stuff + { let (con,details) = unLoc $4 in + LL (ConDecl con Explicit (unLoc $1) $2 details ResTyH98) } + | forall constr_stuff + { let (con,details) = unLoc $2 in + LL (ConDecl con Explicit (unLoc $1) (noLoc []) details ResTyH98) } + +forall :: { Located [LHsTyVarBndr RdrName] } + : 'forall' tv_bndrs '.' { LL $2 } + | {- empty -} { noLoc [] } + +constr_stuff :: { Located (Located RdrName, HsConDetails RdrName (LBangType RdrName)) } +-- We parse the constructor declaration +-- C t1 t2 +-- as a btype (treating C as a type constructor) and then convert C to be +-- a data constructor. Reason: it might continue like this: +-- C t1 t2 %: D Int +-- in which case C really would be a type constructor. We can't resolve this +-- ambiguity till we come across the constructor oprerator :% (or not, more usually) + : btype {% mkPrefixCon $1 [] >>= return.LL } + | oqtycon '{' '}' {% mkRecCon $1 [] >>= return.LL } + | oqtycon '{' fielddecls '}' {% mkRecCon $1 $3 >>= return.LL } + | btype conop btype { LL ($2, InfixCon $1 $3) } + +constr_stuff_record :: { Located (Located RdrName, HsConDetails RdrName (LBangType RdrName)) } + : oqtycon '{' '}' {% mkRecCon $1 [] >>= return.sL (comb2 $1 $>) } + | oqtycon '{' fielddecls '}' {% mkRecCon $1 $3 >>= return.sL (comb2 $1 $>) } + +fielddecls :: { [([Located RdrName], LBangType RdrName)] } + : fielddecl ',' fielddecls { unLoc $1 : $3 } + | fielddecl { [unLoc $1] } + +fielddecl :: { Located ([Located RdrName], LBangType RdrName) } + : sig_vars '::' ctype { LL (reverse (unLoc $1), $3) } + +-- We allow the odd-looking 'inst_type' in a deriving clause, so that +-- we can do deriving( forall a. C [a] ) in a newtype (GHC extension). +-- The 'C [a]' part is converted to an HsPredTy by checkInstType +-- We don't allow a context, but that's sorted out by the type checker. +deriving :: { Located (Maybe [LHsType RdrName]) } + : {- empty -} { noLoc Nothing } + | 'deriving' qtycon {% do { let { L loc tv = $2 } + ; p <- checkInstType (L loc (HsTyVar tv)) + ; return (LL (Just [p])) } } + | 'deriving' '(' ')' { LL (Just []) } + | 'deriving' '(' inst_types1 ')' { LL (Just $3) } + -- Glasgow extension: allow partial + -- applications in derivings + +----------------------------------------------------------------------------- +-- Value definitions + +{- There's an awkward overlap with a type signature. Consider + f :: Int -> Int = ...rhs... + Then we can't tell whether it's a type signature or a value + definition with a result signature until we see the '='. + So we have to inline enough to postpone reductions until we know. +-} + +{- + ATTENTION: Dirty Hackery Ahead! If the second alternative of vars is var + instead of qvar, we get another shift/reduce-conflict. Consider the + following programs: + + { (^^) :: Int->Int ; } Type signature; only var allowed + + { (^^) :: Int->Int = ... ; } Value defn with result signature; + qvar allowed (because of instance decls) + + We can't tell whether to reduce var to qvar until after we've read the signatures. +-} + +decl :: { Located (OrdList (LHsDecl RdrName)) } + : sigdecl { $1 } + | '!' infixexp rhs {% do { pat <- checkPattern $2; + return (LL $ unitOL $ LL $ ValD $ + PatBind (LL $ BangPat pat) (unLoc $3) + placeHolderType placeHolderNames) } } + | infixexp opt_sig rhs {% do { r <- checkValDef $1 $2 $3; + return (LL $ unitOL (LL $ ValD r)) } } + +rhs :: { Located (GRHSs RdrName) } + : '=' exp wherebinds { L (comb3 $1 $2 $3) $ GRHSs (unguardedRHS $2) (unLoc $3) } + | gdrhs wherebinds { LL $ GRHSs (reverse (unLoc $1)) (unLoc $2) } + +gdrhs :: { Located [LGRHS RdrName] } + : gdrhs gdrh { LL ($2 : unLoc $1) } + | gdrh { L1 [$1] } + +gdrh :: { LGRHS RdrName } + : '|' quals '=' exp { sL (comb2 $1 $>) $ GRHS (reverse (unLoc $2)) $4 } + +sigdecl :: { Located (OrdList (LHsDecl RdrName)) } + : infixexp '::' sigtype + {% do s <- checkValSig $1 $3; + return (LL $ unitOL (LL $ SigD s)) } + -- See the above notes for why we need infixexp here + | var ',' sig_vars '::' sigtype + { LL $ toOL [ LL $ SigD (TypeSig n $5) | n <- $1 : unLoc $3 ] } + | infix prec ops { LL $ toOL [ LL $ SigD (FixSig (FixitySig n (Fixity $2 (unLoc $1)))) + | n <- unLoc $3 ] } + | '{-# INLINE' activation qvar '#-}' + { LL $ unitOL (LL $ SigD (InlineSig $3 (mkInlineSpec $2 (getINLINE $1)))) } + | '{-# SPECIALISE' qvar '::' sigtypes1 '#-}' + { LL $ toOL [ LL $ SigD (SpecSig $2 t defaultInlineSpec) + | t <- $4] } + | '{-# SPECIALISE_INLINE' activation qvar '::' sigtypes1 '#-}' + { LL $ toOL [ LL $ SigD (SpecSig $3 t (mkInlineSpec $2 (getSPEC_INLINE $1))) + | t <- $5] } + | '{-# SPECIALISE' 'instance' inst_type '#-}' + { LL $ unitOL (LL $ SigD (SpecInstSig $3)) } + +----------------------------------------------------------------------------- +-- Expressions + +exp :: { LHsExpr RdrName } + : infixexp '::' sigtype { LL $ ExprWithTySig $1 $3 } + | infixexp '-<' exp { LL $ HsArrApp $1 $3 placeHolderType HsFirstOrderApp True } + | infixexp '>-' exp { LL $ HsArrApp $3 $1 placeHolderType HsFirstOrderApp False } + | infixexp '-<<' exp { LL $ HsArrApp $1 $3 placeHolderType HsHigherOrderApp True } + | infixexp '>>-' exp { LL $ HsArrApp $3 $1 placeHolderType HsHigherOrderApp False} + | infixexp { $1 } + +infixexp :: { LHsExpr RdrName } + : exp10 { $1 } + | infixexp qop exp10 { LL (OpApp $1 $2 (panic "fixity") $3) } + +exp10 :: { LHsExpr RdrName } + : '\\' aexp aexps opt_asig '->' exp + {% checkPatterns ($2 : reverse $3) >>= \ ps -> + return (LL $ HsLam (mkMatchGroup [LL $ Match ps $4 + (GRHSs (unguardedRHS $6) emptyLocalBinds + )])) } + | 'let' binds 'in' exp { LL $ HsLet (unLoc $2) $4 } + | 'if' exp 'then' exp 'else' exp { LL $ HsIf $2 $4 $6 } + | 'case' exp 'of' altslist { LL $ HsCase $2 (mkMatchGroup (unLoc $4)) } + | '-' fexp { LL $ mkHsNegApp $2 } + + | 'do' stmtlist {% let loc = comb2 $1 $2 in + checkDo loc (unLoc $2) >>= \ (stmts,body) -> + return (L loc (mkHsDo DoExpr stmts body)) } + | 'mdo' stmtlist {% let loc = comb2 $1 $2 in + checkDo loc (unLoc $2) >>= \ (stmts,body) -> + return (L loc (mkHsDo (MDoExpr noPostTcTable) stmts body)) } + | scc_annot exp { LL $ if opt_SccProfilingOn + then HsSCC (unLoc $1) $2 + else HsPar $2 } + + | 'proc' aexp '->' exp + {% checkPattern $2 >>= \ p -> + return (LL $ HsProc p (LL $ HsCmdTop $4 [] + placeHolderType undefined)) } + -- TODO: is LL right here? + + | '{-# CORE' STRING '#-}' exp { LL $ HsCoreAnn (getSTRING $2) $4 } + -- hdaume: core annotation + | fexp { $1 } + +scc_annot :: { Located FastString } + : '_scc_' STRING { LL $ getSTRING $2 } + | '{-# SCC' STRING '#-}' { LL $ getSTRING $2 } + +fexp :: { LHsExpr RdrName } + : fexp aexp { LL $ HsApp $1 $2 } + | aexp { $1 } + +aexps :: { [LHsExpr RdrName] } + : aexps aexp { $2 : $1 } + | {- empty -} { [] } + +aexp :: { LHsExpr RdrName } + : qvar '@' aexp { LL $ EAsPat $1 $3 } + | '~' aexp { LL $ ELazyPat $2 } +-- | '!' aexp { LL $ EBangPat $2 } + | aexp1 { $1 } + +aexp1 :: { LHsExpr RdrName } + : aexp1 '{' fbinds '}' {% do { r <- mkRecConstrOrUpdate $1 (comb2 $2 $4) + (reverse $3); + return (LL r) }} + | aexp2 { $1 } + +-- Here was the syntax for type applications that I was planning +-- but there are difficulties (e.g. what order for type args) +-- so it's not enabled yet. +-- But this case *is* used for the left hand side of a generic definition, +-- which is parsed as an expression before being munged into a pattern + | qcname '{|' gentype '|}' { LL $ HsApp (sL (getLoc $1) (HsVar (unLoc $1))) + (sL (getLoc $3) (HsType $3)) } + +aexp2 :: { LHsExpr RdrName } + : ipvar { L1 (HsIPVar $! unLoc $1) } + | qcname { L1 (HsVar $! unLoc $1) } + | literal { L1 (HsLit $! unLoc $1) } + | INTEGER { L1 (HsOverLit $! mkHsIntegral (getINTEGER $1)) } + | RATIONAL { L1 (HsOverLit $! mkHsFractional (getRATIONAL $1)) } + | '(' exp ')' { LL (HsPar $2) } + | '(' texp ',' texps ')' { LL $ ExplicitTuple ($2 : reverse $4) Boxed } + | '(#' texps '#)' { LL $ ExplicitTuple (reverse $2) Unboxed } + | '[' list ']' { LL (unLoc $2) } + | '[:' parr ':]' { LL (unLoc $2) } + | '(' infixexp qop ')' { LL $ SectionL $2 $3 } + | '(' qopm infixexp ')' { LL $ SectionR $2 $3 } + | '_' { L1 EWildPat } + + -- MetaHaskell Extension + | TH_ID_SPLICE { L1 $ HsSpliceE (mkHsSplice + (L1 $ HsVar (mkUnqual varName + (getTH_ID_SPLICE $1)))) } -- $x + | '$(' exp ')' { LL $ HsSpliceE (mkHsSplice $2) } -- $( exp ) + + | TH_VAR_QUOTE qvar { LL $ HsBracket (VarBr (unLoc $2)) } + | TH_VAR_QUOTE qcon { LL $ HsBracket (VarBr (unLoc $2)) } + | TH_TY_QUOTE tyvar { LL $ HsBracket (VarBr (unLoc $2)) } + | TH_TY_QUOTE gtycon { LL $ HsBracket (VarBr (unLoc $2)) } + | '[|' exp '|]' { LL $ HsBracket (ExpBr $2) } + | '[t|' ctype '|]' { LL $ HsBracket (TypBr $2) } + | '[p|' infixexp '|]' {% checkPattern $2 >>= \p -> + return (LL $ HsBracket (PatBr p)) } + | '[d|' cvtopbody '|]' { LL $ HsBracket (DecBr (mkGroup $2)) } + + -- arrow notation extension + | '(|' aexp2 cmdargs '|)' { LL $ HsArrForm $2 Nothing (reverse $3) } + +cmdargs :: { [LHsCmdTop RdrName] } + : cmdargs acmd { $2 : $1 } + | {- empty -} { [] } + +acmd :: { LHsCmdTop RdrName } + : aexp2 { L1 $ HsCmdTop $1 [] placeHolderType undefined } + +cvtopbody :: { [LHsDecl RdrName] } + : '{' cvtopdecls0 '}' { $2 } + | vocurly cvtopdecls0 close { $2 } + +cvtopdecls0 :: { [LHsDecl RdrName] } + : {- empty -} { [] } + | cvtopdecls { $1 } + +texp :: { LHsExpr RdrName } + : exp { $1 } + | qopm infixexp { LL $ SectionR $1 $2 } + -- The second production is really here only for bang patterns + -- but + +texps :: { [LHsExpr RdrName] } + : texps ',' texp { $3 : $1 } + | texp { [$1] } + + +----------------------------------------------------------------------------- +-- List expressions + +-- The rules below are little bit contorted to keep lexps left-recursive while +-- avoiding another shift/reduce-conflict. + +list :: { LHsExpr RdrName } + : texp { L1 $ ExplicitList placeHolderType [$1] } + | lexps { L1 $ ExplicitList placeHolderType (reverse (unLoc $1)) } + | texp '..' { LL $ ArithSeq noPostTcExpr (From $1) } + | texp ',' exp '..' { LL $ ArithSeq noPostTcExpr (FromThen $1 $3) } + | texp '..' exp { LL $ ArithSeq noPostTcExpr (FromTo $1 $3) } + | texp ',' exp '..' exp { LL $ ArithSeq noPostTcExpr (FromThenTo $1 $3 $5) } + | texp pquals { sL (comb2 $1 $>) $ mkHsDo ListComp (reverse (unLoc $2)) $1 } + +lexps :: { Located [LHsExpr RdrName] } + : lexps ',' texp { LL ($3 : unLoc $1) } + | texp ',' texp { LL [$3,$1] } + +----------------------------------------------------------------------------- +-- List Comprehensions + +pquals :: { Located [LStmt RdrName] } -- Either a singleton ParStmt, + -- or a reversed list of Stmts + : pquals1 { case unLoc $1 of + [qs] -> L1 qs + qss -> L1 [L1 (ParStmt stmtss)] + where + stmtss = [ (reverse qs, undefined) + | qs <- qss ] + } + +pquals1 :: { Located [[LStmt RdrName]] } + : pquals1 '|' quals { LL (unLoc $3 : unLoc $1) } + | '|' quals { L (getLoc $2) [unLoc $2] } + +quals :: { Located [LStmt RdrName] } + : quals ',' qual { LL ($3 : unLoc $1) } + | qual { L1 [$1] } + +----------------------------------------------------------------------------- +-- Parallel array expressions + +-- The rules below are little bit contorted; see the list case for details. +-- Note that, in contrast to lists, we only have finite arithmetic sequences. +-- Moreover, we allow explicit arrays with no element (represented by the nil +-- constructor in the list case). + +parr :: { LHsExpr RdrName } + : { noLoc (ExplicitPArr placeHolderType []) } + | exp { L1 $ ExplicitPArr placeHolderType [$1] } + | lexps { L1 $ ExplicitPArr placeHolderType + (reverse (unLoc $1)) } + | exp '..' exp { LL $ PArrSeq noPostTcExpr (FromTo $1 $3) } + | exp ',' exp '..' exp { LL $ PArrSeq noPostTcExpr (FromThenTo $1 $3 $5) } + | exp pquals { sL (comb2 $1 $>) $ mkHsDo PArrComp (reverse (unLoc $2)) $1 } + +-- We are reusing `lexps' and `pquals' from the list case. + +----------------------------------------------------------------------------- +-- Case alternatives + +altslist :: { Located [LMatch RdrName] } + : '{' alts '}' { LL (reverse (unLoc $2)) } + | vocurly alts close { L (getLoc $2) (reverse (unLoc $2)) } + +alts :: { Located [LMatch RdrName] } + : alts1 { L1 (unLoc $1) } + | ';' alts { LL (unLoc $2) } + +alts1 :: { Located [LMatch RdrName] } + : alts1 ';' alt { LL ($3 : unLoc $1) } + | alts1 ';' { LL (unLoc $1) } + | alt { L1 [$1] } + +alt :: { LMatch RdrName } + : infixexp opt_sig alt_rhs {% checkPattern $1 >>= \p -> + return (LL (Match [p] $2 (unLoc $3))) } + +alt_rhs :: { Located (GRHSs RdrName) } + : ralt wherebinds { LL (GRHSs (unLoc $1) (unLoc $2)) } + +ralt :: { Located [LGRHS RdrName] } + : '->' exp { LL (unguardedRHS $2) } + | gdpats { L1 (reverse (unLoc $1)) } + +gdpats :: { Located [LGRHS RdrName] } + : gdpats gdpat { LL ($2 : unLoc $1) } + | gdpat { L1 [$1] } + +gdpat :: { LGRHS RdrName } + : '|' quals '->' exp { sL (comb2 $1 $>) $ GRHS (reverse (unLoc $2)) $4 } + +----------------------------------------------------------------------------- +-- Statement sequences + +stmtlist :: { Located [LStmt RdrName] } + : '{' stmts '}' { LL (unLoc $2) } + | vocurly stmts close { $2 } + +-- do { ;; s ; s ; ; s ;; } +-- The last Stmt should be an expression, but that's hard to enforce +-- here, because we need too much lookahead if we see do { e ; } +-- So we use ExprStmts throughout, and switch the last one over +-- in ParseUtils.checkDo instead +stmts :: { Located [LStmt RdrName] } + : stmt stmts_help { LL ($1 : unLoc $2) } + | ';' stmts { LL (unLoc $2) } + | {- empty -} { noLoc [] } + +stmts_help :: { Located [LStmt RdrName] } -- might be empty + : ';' stmts { LL (unLoc $2) } + | {- empty -} { noLoc [] } + +-- For typing stmts at the GHCi prompt, where +-- the input may consist of just comments. +maybe_stmt :: { Maybe (LStmt RdrName) } + : stmt { Just $1 } + | {- nothing -} { Nothing } + +stmt :: { LStmt RdrName } + : qual { $1 } + | infixexp '->' exp {% checkPattern $3 >>= \p -> + return (LL $ mkBindStmt p $1) } + | 'rec' stmtlist { LL $ mkRecStmt (unLoc $2) } + +qual :: { LStmt RdrName } + : exp '<-' exp {% checkPattern $1 >>= \p -> + return (LL $ mkBindStmt p $3) } + | exp { L1 $ mkExprStmt $1 } + | 'let' binds { LL $ LetStmt (unLoc $2) } + +----------------------------------------------------------------------------- +-- Record Field Update/Construction + +fbinds :: { HsRecordBinds RdrName } + : fbinds1 { $1 } + | {- empty -} { [] } + +fbinds1 :: { HsRecordBinds RdrName } + : fbinds1 ',' fbind { $3 : $1 } + | fbind { [$1] } + +fbind :: { (Located RdrName, LHsExpr RdrName) } + : qvar '=' exp { ($1,$3) } + +----------------------------------------------------------------------------- +-- Implicit Parameter Bindings + +dbinds :: { Located [LIPBind RdrName] } + : dbinds ';' dbind { LL ($3 : unLoc $1) } + | dbinds ';' { LL (unLoc $1) } + | dbind { L1 [$1] } +-- | {- empty -} { [] } + +dbind :: { LIPBind RdrName } +dbind : ipvar '=' exp { LL (IPBind (unLoc $1) $3) } + +ipvar :: { Located (IPName RdrName) } + : IPDUPVARID { L1 (Dupable (mkUnqual varName (getIPDUPVARID $1))) } + | IPSPLITVARID { L1 (Linear (mkUnqual varName (getIPSPLITVARID $1))) } + +----------------------------------------------------------------------------- +-- Deprecations + +depreclist :: { Located [RdrName] } +depreclist : deprec_var { L1 [unLoc $1] } + | deprec_var ',' depreclist { LL (unLoc $1 : unLoc $3) } + +deprec_var :: { Located RdrName } +deprec_var : var { $1 } + | con { $1 } + +----------------------------------------- +-- Data constructors +qcon :: { Located RdrName } + : qconid { $1 } + | '(' qconsym ')' { LL (unLoc $2) } + | sysdcon { L1 $ nameRdrName (dataConName (unLoc $1)) } +-- The case of '[:' ':]' is part of the production `parr' + +con :: { Located RdrName } + : conid { $1 } + | '(' consym ')' { LL (unLoc $2) } + | sysdcon { L1 $ nameRdrName (dataConName (unLoc $1)) } + +sysdcon :: { Located DataCon } -- Wired in data constructors + : '(' ')' { LL unitDataCon } + | '(' commas ')' { LL $ tupleCon Boxed $2 } + | '[' ']' { LL nilDataCon } + +conop :: { Located RdrName } + : consym { $1 } + | '`' conid '`' { LL (unLoc $2) } + +qconop :: { Located RdrName } + : qconsym { $1 } + | '`' qconid '`' { LL (unLoc $2) } + +----------------------------------------------------------------------------- +-- Type constructors + +gtycon :: { Located RdrName } -- A "general" qualified tycon + : oqtycon { $1 } + | '(' ')' { LL $ getRdrName unitTyCon } + | '(' commas ')' { LL $ getRdrName (tupleTyCon Boxed $2) } + | '(' '->' ')' { LL $ getRdrName funTyCon } + | '[' ']' { LL $ listTyCon_RDR } + | '[:' ':]' { LL $ parrTyCon_RDR } + +oqtycon :: { Located RdrName } -- An "ordinary" qualified tycon + : qtycon { $1 } + | '(' qtyconsym ')' { LL (unLoc $2) } + +qtyconop :: { Located RdrName } -- Qualified or unqualified + : qtyconsym { $1 } + | '`' qtycon '`' { LL (unLoc $2) } + +qtycon :: { Located RdrName } -- Qualified or unqualified + : QCONID { L1 $! mkQual tcClsName (getQCONID $1) } + | tycon { $1 } + +tycon :: { Located RdrName } -- Unqualified + : CONID { L1 $! mkUnqual tcClsName (getCONID $1) } + +qtyconsym :: { Located RdrName } + : QCONSYM { L1 $! mkQual tcClsName (getQCONSYM $1) } + | tyconsym { $1 } + +tyconsym :: { Located RdrName } + : CONSYM { L1 $! mkUnqual tcClsName (getCONSYM $1) } + +----------------------------------------------------------------------------- +-- Operators + +op :: { Located RdrName } -- used in infix decls + : varop { $1 } + | conop { $1 } + +varop :: { Located RdrName } + : varsym { $1 } + | '`' varid '`' { LL (unLoc $2) } + +qop :: { LHsExpr RdrName } -- used in sections + : qvarop { L1 $ HsVar (unLoc $1) } + | qconop { L1 $ HsVar (unLoc $1) } + +qopm :: { LHsExpr RdrName } -- used in sections + : qvaropm { L1 $ HsVar (unLoc $1) } + | qconop { L1 $ HsVar (unLoc $1) } + +qvarop :: { Located RdrName } + : qvarsym { $1 } + | '`' qvarid '`' { LL (unLoc $2) } + +qvaropm :: { Located RdrName } + : qvarsym_no_minus { $1 } + | '`' qvarid '`' { LL (unLoc $2) } + +----------------------------------------------------------------------------- +-- Type variables + +tyvar :: { Located RdrName } +tyvar : tyvarid { $1 } + | '(' tyvarsym ')' { LL (unLoc $2) } + +tyvarop :: { Located RdrName } +tyvarop : '`' tyvarid '`' { LL (unLoc $2) } + | tyvarsym { $1 } + +tyvarid :: { Located RdrName } + : VARID { L1 $! mkUnqual tvName (getVARID $1) } + | special_id { L1 $! mkUnqual tvName (unLoc $1) } + | 'unsafe' { L1 $! mkUnqual tvName FSLIT("unsafe") } + | 'safe' { L1 $! mkUnqual tvName FSLIT("safe") } + | 'threadsafe' { L1 $! mkUnqual tvName FSLIT("threadsafe") } + +tyvarsym :: { Located RdrName } +-- Does not include "!", because that is used for strictness marks +-- or ".", because that separates the quantified type vars from the rest +-- or "*", because that's used for kinds +tyvarsym : VARSYM { L1 $! mkUnqual tvName (getVARSYM $1) } + +----------------------------------------------------------------------------- +-- Variables + +var :: { Located RdrName } + : varid { $1 } + | '(' varsym ')' { LL (unLoc $2) } + +qvar :: { Located RdrName } + : qvarid { $1 } + | '(' varsym ')' { LL (unLoc $2) } + | '(' qvarsym1 ')' { LL (unLoc $2) } +-- We've inlined qvarsym here so that the decision about +-- whether it's a qvar or a var can be postponed until +-- *after* we see the close paren. + +qvarid :: { Located RdrName } + : varid { $1 } + | QVARID { L1 $ mkQual varName (getQVARID $1) } + +varid :: { Located RdrName } + : varid_no_unsafe { $1 } + | 'unsafe' { L1 $! mkUnqual varName FSLIT("unsafe") } + | 'safe' { L1 $! mkUnqual varName FSLIT("safe") } + | 'threadsafe' { L1 $! mkUnqual varName FSLIT("threadsafe") } + +varid_no_unsafe :: { Located RdrName } + : VARID { L1 $! mkUnqual varName (getVARID $1) } + | special_id { L1 $! mkUnqual varName (unLoc $1) } + | 'forall' { L1 $! mkUnqual varName FSLIT("forall") } + +qvarsym :: { Located RdrName } + : varsym { $1 } + | qvarsym1 { $1 } + +qvarsym_no_minus :: { Located RdrName } + : varsym_no_minus { $1 } + | qvarsym1 { $1 } + +qvarsym1 :: { Located RdrName } +qvarsym1 : QVARSYM { L1 $ mkQual varName (getQVARSYM $1) } + +varsym :: { Located RdrName } + : varsym_no_minus { $1 } + | '-' { L1 $ mkUnqual varName FSLIT("-") } + +varsym_no_minus :: { Located RdrName } -- varsym not including '-' + : VARSYM { L1 $ mkUnqual varName (getVARSYM $1) } + | special_sym { L1 $ mkUnqual varName (unLoc $1) } + + +-- These special_ids are treated as keywords in various places, +-- but as ordinary ids elsewhere. 'special_id' collects all these +-- except 'unsafe' and 'forall' whose treatment differs depending on context +special_id :: { Located FastString } +special_id + : 'as' { L1 FSLIT("as") } + | 'qualified' { L1 FSLIT("qualified") } + | 'hiding' { L1 FSLIT("hiding") } + | 'export' { L1 FSLIT("export") } + | 'label' { L1 FSLIT("label") } + | 'dynamic' { L1 FSLIT("dynamic") } + | 'stdcall' { L1 FSLIT("stdcall") } + | 'ccall' { L1 FSLIT("ccall") } + +special_sym :: { Located FastString } +special_sym : '!' { L1 FSLIT("!") } + | '.' { L1 FSLIT(".") } + | '*' { L1 FSLIT("*") } + +----------------------------------------------------------------------------- +-- Data constructors + +qconid :: { Located RdrName } -- Qualified or unqualified + : conid { $1 } + | QCONID { L1 $ mkQual dataName (getQCONID $1) } + +conid :: { Located RdrName } + : CONID { L1 $ mkUnqual dataName (getCONID $1) } + +qconsym :: { Located RdrName } -- Qualified or unqualified + : consym { $1 } + | QCONSYM { L1 $ mkQual dataName (getQCONSYM $1) } + +consym :: { Located RdrName } + : CONSYM { L1 $ mkUnqual dataName (getCONSYM $1) } + + -- ':' means only list cons + | ':' { L1 $ consDataCon_RDR } + + +----------------------------------------------------------------------------- +-- Literals + +literal :: { Located HsLit } + : CHAR { L1 $ HsChar $ getCHAR $1 } + | STRING { L1 $ HsString $ getSTRING $1 } + | PRIMINTEGER { L1 $ HsIntPrim $ getPRIMINTEGER $1 } + | PRIMCHAR { L1 $ HsCharPrim $ getPRIMCHAR $1 } + | PRIMSTRING { L1 $ HsStringPrim $ getPRIMSTRING $1 } + | PRIMFLOAT { L1 $ HsFloatPrim $ getPRIMFLOAT $1 } + | PRIMDOUBLE { L1 $ HsDoublePrim $ getPRIMDOUBLE $1 } + +----------------------------------------------------------------------------- +-- Layout + +close :: { () } + : vccurly { () } -- context popped in lexer. + | error {% popContext } + +----------------------------------------------------------------------------- +-- Miscellaneous (mostly renamings) + +modid :: { Located Module } + : CONID { L1 $ mkModuleFS (getCONID $1) } + | QCONID { L1 $ let (mod,c) = getQCONID $1 in + mkModuleFS + (mkFastString + (unpackFS mod ++ '.':unpackFS c)) + } + +commas :: { Int } + : commas ',' { $1 + 1 } + | ',' { 2 } + +----------------------------------------------------------------------------- + +{ +happyError :: P a +happyError = srcParseFail + +getVARID (L _ (ITvarid x)) = x +getCONID (L _ (ITconid x)) = x +getVARSYM (L _ (ITvarsym x)) = x +getCONSYM (L _ (ITconsym x)) = x +getQVARID (L _ (ITqvarid x)) = x +getQCONID (L _ (ITqconid x)) = x +getQVARSYM (L _ (ITqvarsym x)) = x +getQCONSYM (L _ (ITqconsym x)) = x +getIPDUPVARID (L _ (ITdupipvarid x)) = x +getIPSPLITVARID (L _ (ITsplitipvarid x)) = x +getCHAR (L _ (ITchar x)) = x +getSTRING (L _ (ITstring x)) = x +getINTEGER (L _ (ITinteger x)) = x +getRATIONAL (L _ (ITrational x)) = x +getPRIMCHAR (L _ (ITprimchar x)) = x +getPRIMSTRING (L _ (ITprimstring x)) = x +getPRIMINTEGER (L _ (ITprimint x)) = x +getPRIMFLOAT (L _ (ITprimfloat x)) = x +getPRIMDOUBLE (L _ (ITprimdouble x)) = x +getTH_ID_SPLICE (L _ (ITidEscape x)) = x +getINLINE (L _ (ITinline_prag b)) = b +getSPEC_INLINE (L _ (ITspec_inline_prag b)) = b + +-- Utilities for combining source spans +comb2 :: Located a -> Located b -> SrcSpan +comb2 = combineLocs + +comb3 :: Located a -> Located b -> Located c -> SrcSpan +comb3 a b c = combineSrcSpans (getLoc a) (combineSrcSpans (getLoc b) (getLoc c)) + +comb4 :: Located a -> Located b -> Located c -> Located d -> SrcSpan +comb4 a b c d = combineSrcSpans (getLoc a) $ combineSrcSpans (getLoc b) $ + combineSrcSpans (getLoc c) (getLoc d) + +-- strict constructor version: +{-# INLINE sL #-} +sL :: SrcSpan -> a -> Located a +sL span a = span `seq` L span a + +-- Make a source location for the file. We're a bit lazy here and just +-- make a point SrcSpan at line 1, column 0. Strictly speaking we should +-- try to find the span of the whole file (ToDo). +fileSrcSpan :: P SrcSpan +fileSrcSpan = do + l <- getSrcLoc; + let loc = mkSrcLoc (srcLocFile l) 1 0; + return (mkSrcSpan loc loc) +} diff --git a/compiler/parser/ParserCore.y b/compiler/parser/ParserCore.y new file mode 100644 index 0000000000..3210583f96 --- /dev/null +++ b/compiler/parser/ParserCore.y @@ -0,0 +1,339 @@ +{ +module ParserCore ( parseCore ) where + +import IfaceSyn +import ForeignCall +import RdrHsSyn +import HsSyn +import RdrName +import OccName +import Kind( Kind(..) ) +import Name( nameOccName, nameModule ) +import Module +import ParserCoreUtils +import LexCore +import Literal +import SrcLoc +import TysPrim( wordPrimTyCon, intPrimTyCon, charPrimTyCon, + floatPrimTyCon, doublePrimTyCon, addrPrimTyCon ) +import TyCon ( TyCon, tyConName ) +import FastString +import Outputable +import Char + +#include "../HsVersions.h" + +} + +%name parseCore +%tokentype { Token } + +%token + '%module' { TKmodule } + '%data' { TKdata } + '%newtype' { TKnewtype } + '%forall' { TKforall } + '%rec' { TKrec } + '%let' { TKlet } + '%in' { TKin } + '%case' { TKcase } + '%of' { TKof } + '%coerce' { TKcoerce } + '%note' { TKnote } + '%external' { TKexternal } + '%_' { TKwild } + '(' { TKoparen } + ')' { TKcparen } + '{' { TKobrace } + '}' { TKcbrace } + '#' { TKhash} + '=' { TKeq } + '::' { TKcoloncolon } + '*' { TKstar } + '->' { TKrarrow } + '\\' { TKlambda} + '@' { TKat } + '.' { TKdot } + '?' { TKquestion} + ';' { TKsemicolon } + NAME { TKname $$ } + CNAME { TKcname $$ } + INTEGER { TKinteger $$ } + RATIONAL { TKrational $$ } + STRING { TKstring $$ } + CHAR { TKchar $$ } + +%monad { P } { thenP } { returnP } +%lexer { lexer } { TKEOF } + +%% + +module :: { HsExtCore RdrName } + : '%module' modid tdefs vdefgs { HsExtCore $2 $3 $4 } + +modid :: { Module } + : CNAME { mkModuleFS (mkFastString $1) } + +------------------------------------------------------------- +-- Type and newtype declarations are in HsSyn syntax + +tdefs :: { [TyClDecl RdrName] } + : {- empty -} {[]} + | tdef ';' tdefs {$1:$3} + +tdef :: { TyClDecl RdrName } + : '%data' q_tc_name tv_bndrs '=' '{' cons1 '}' + { mkTyData DataType (noLoc [], noLoc (ifaceExtRdrName $2), map toHsTvBndr $3) Nothing $6 Nothing } + | '%newtype' q_tc_name tv_bndrs trep + { let tc_rdr = ifaceExtRdrName $2 in + mkTyData NewType (noLoc [], noLoc tc_rdr, map toHsTvBndr $3) Nothing ($4 (rdrNameOcc tc_rdr)) Nothing } + +-- For a newtype we have to invent a fake data constructor name +-- It doesn't matter what it is, because it won't be used +trep :: { OccName -> [LConDecl RdrName] } + : {- empty -} { (\ tc_occ -> []) } + | '=' ty { (\ tc_occ -> let { dc_name = mkRdrUnqual (setOccNameSpace dataName tc_occ) ; + con_info = PrefixCon [toHsType $2] } + in [noLoc $ ConDecl (noLoc dc_name) Explicit [] + (noLoc []) con_info ResTyH98]) } + +cons1 :: { [LConDecl RdrName] } + : con { [$1] } + | con ';' cons1 { $1:$3 } + +con :: { LConDecl RdrName } + : d_pat_occ attv_bndrs hs_atys + { noLoc $ ConDecl (noLoc (mkRdrUnqual $1)) Explicit $2 (noLoc []) (PrefixCon $3) ResTyH98} + | d_pat_occ '::' ty + -- XXX - autrijus - $3 needs to be split into argument and return types! + -- also not sure whether the [] below (quantified vars) appears. + -- also the "PrefixCon []" is wrong. + -- also we want to munge $3 somehow. + -- extractWhatEver to unpack ty into the parts to ConDecl + -- XXX - define it somewhere in RdrHsSyn + { noLoc $ ConDecl (noLoc (mkRdrUnqual $1)) Explicit [] (noLoc []) (PrefixCon []) (undefined $3) } + +attv_bndrs :: { [LHsTyVarBndr RdrName] } + : {- empty -} { [] } + | '@' tv_bndr attv_bndrs { toHsTvBndr $2 : $3 } + +hs_atys :: { [LHsType RdrName] } + : atys { map toHsType $1 } + + +--------------------------------------- +-- Types +--------------------------------------- + +atys :: { [IfaceType] } + : {- empty -} { [] } + | aty atys { $1:$2 } + +aty :: { IfaceType } + : tv_occ { IfaceTyVar $1 } + | q_tc_name { IfaceTyConApp (IfaceTc $1) [] } + | '(' ty ')' { $2 } + +bty :: { IfaceType } + : tv_occ atys { foldl IfaceAppTy (IfaceTyVar $1) $2 } + | q_tc_name atys { IfaceTyConApp (IfaceTc $1) $2 } + | '(' ty ')' { $2 } + +ty :: { IfaceType } + : bty { $1 } + | bty '->' ty { IfaceFunTy $1 $3 } + | '%forall' tv_bndrs '.' ty { foldr IfaceForAllTy $4 $2 } + +---------------------------------------------- +-- Bindings are in Iface syntax + +vdefgs :: { [IfaceBinding] } + : {- empty -} { [] } + | let_bind ';' vdefgs { $1 : $3 } + +let_bind :: { IfaceBinding } + : '%rec' '{' vdefs1 '}' { IfaceRec $3 } + | vdef { let (b,r) = $1 + in IfaceNonRec b r } + +vdefs1 :: { [(IfaceIdBndr, IfaceExpr)] } + : vdef { [$1] } + | vdef ';' vdefs1 { $1:$3 } + +vdef :: { (IfaceIdBndr, IfaceExpr) } + : qd_occ '::' ty '=' exp { (($1, $3), $5) } + -- NB: qd_occ includes data constructors, because + -- we allow data-constructor wrappers at top level + -- But we discard the module name, because it must be the + -- same as the module being compiled, and Iface syntax only + -- has OccNames in binding positions + +qd_occ :: { OccName } + : var_occ { $1 } + | d_occ { $1 } + +--------------------------------------- +-- Binders +bndr :: { IfaceBndr } + : '@' tv_bndr { IfaceTvBndr $2 } + | id_bndr { IfaceIdBndr $1 } + +bndrs :: { [IfaceBndr] } + : bndr { [$1] } + | bndr bndrs { $1:$2 } + +id_bndr :: { IfaceIdBndr } + : '(' var_occ '::' ty ')' { ($2,$4) } + +id_bndrs :: { [IfaceIdBndr] } + : {-empty -} { [] } + | id_bndr id_bndrs { $1:$2 } + +tv_bndr :: { IfaceTvBndr } + : tv_occ { ($1, LiftedTypeKind) } + | '(' tv_occ '::' akind ')' { ($2, $4) } + +tv_bndrs :: { [IfaceTvBndr] } + : {- empty -} { [] } + | tv_bndr tv_bndrs { $1:$2 } + +akind :: { IfaceKind } + : '*' { LiftedTypeKind } + | '#' { UnliftedTypeKind } + | '?' { OpenTypeKind } + | '(' kind ')' { $2 } + +kind :: { IfaceKind } + : akind { $1 } + | akind '->' kind { FunKind $1 $3 } + +----------------------------------------- +-- Expressions + +aexp :: { IfaceExpr } + : var_occ { IfaceLcl $1 } + | modid '.' qd_occ { IfaceExt (ExtPkg $1 $3) } + | lit { IfaceLit $1 } + | '(' exp ')' { $2 } + +fexp :: { IfaceExpr } + : fexp aexp { IfaceApp $1 $2 } + | fexp '@' aty { IfaceApp $1 (IfaceType $3) } + | aexp { $1 } + +exp :: { IfaceExpr } + : fexp { $1 } + | '\\' bndrs '->' exp { foldr IfaceLam $4 $2 } + | '%let' let_bind '%in' exp { IfaceLet $2 $4 } +-- gaw 2004 + | '%case' '(' ty ')' aexp '%of' id_bndr + '{' alts1 '}' { IfaceCase $5 (fst $7) $3 $9 } + | '%coerce' aty exp { IfaceNote (IfaceCoerce $2) $3 } + | '%note' STRING exp + { case $2 of + --"SCC" -> IfaceNote (IfaceSCC "scc") $3 + "InlineCall" -> IfaceNote IfaceInlineCall $3 + "InlineMe" -> IfaceNote IfaceInlineMe $3 + } + | '%external' STRING aty { IfaceFCall (ForeignCall.CCall + (CCallSpec (StaticTarget (mkFastString $2)) + CCallConv (PlaySafe False))) + $3 } + +alts1 :: { [IfaceAlt] } + : alt { [$1] } + | alt ';' alts1 { $1:$3 } + +alt :: { IfaceAlt } + : modid '.' d_pat_occ bndrs '->' exp + { (IfaceDataAlt $3, map ifaceBndrName $4, $6) } + -- The external syntax currently includes the types of the + -- the args, but they aren't needed internally + -- Nor is the module qualifier + | lit '->' exp + { (IfaceLitAlt $1, [], $3) } + | '%_' '->' exp + { (IfaceDefault, [], $3) } + +lit :: { Literal } + : '(' INTEGER '::' aty ')' { convIntLit $2 $4 } + | '(' RATIONAL '::' aty ')' { convRatLit $2 $4 } + | '(' CHAR '::' aty ')' { MachChar $2 } + | '(' STRING '::' aty ')' { MachStr (mkFastString $2) } + +tv_occ :: { OccName } + : NAME { mkOccName tvName $1 } + +var_occ :: { OccName } + : NAME { mkVarOcc $1 } + + +-- Type constructor +q_tc_name :: { IfaceExtName } + : modid '.' CNAME { ExtPkg $1 (mkOccName tcName $3) } + +-- Data constructor in a pattern or data type declaration; use the dataName, +-- because that's what we expect in Core case patterns +d_pat_occ :: { OccName } + : CNAME { mkOccName dataName $1 } + +-- Data constructor occurrence in an expression; +-- use the varName because that's the worker Id +d_occ :: { OccName } + : CNAME { mkVarOcc $1 } + +{ + +ifaceBndrName (IfaceIdBndr (n,_)) = n +ifaceBndrName (IfaceTvBndr (n,_)) = n + +convIntLit :: Integer -> IfaceType -> Literal +convIntLit i (IfaceTyConApp tc []) + | tc `eqTc` intPrimTyCon = MachInt i + | tc `eqTc` wordPrimTyCon = MachWord i + | tc `eqTc` charPrimTyCon = MachChar (chr (fromInteger i)) + | tc `eqTc` addrPrimTyCon && i == 0 = MachNullAddr +convIntLit i aty + = pprPanic "Unknown integer literal type" (ppr aty) + +convRatLit :: Rational -> IfaceType -> Literal +convRatLit r (IfaceTyConApp tc []) + | tc `eqTc` floatPrimTyCon = MachFloat r + | tc `eqTc` doublePrimTyCon = MachDouble r +convRatLit i aty + = pprPanic "Unknown rational literal type" (ppr aty) + +eqTc :: IfaceTyCon -> TyCon -> Bool -- Ugh! +eqTc (IfaceTc (ExtPkg mod occ)) tycon + = mod == nameModule nm && occ == nameOccName nm + where + nm = tyConName tycon + +-- Tiresomely, we have to generate both HsTypes (in type/class decls) +-- and IfaceTypes (in Core expressions). So we parse them as IfaceTypes, +-- and convert to HsTypes here. But the IfaceTypes we can see here +-- are very limited (see the productions for 'ty', so the translation +-- isn't hard +toHsType :: IfaceType -> LHsType RdrName +toHsType (IfaceTyVar v) = noLoc $ HsTyVar (mkRdrUnqual v) +toHsType (IfaceAppTy t1 t2) = noLoc $ HsAppTy (toHsType t1) (toHsType t2) +toHsType (IfaceFunTy t1 t2) = noLoc $ HsFunTy (toHsType t1) (toHsType t2) +toHsType (IfaceTyConApp (IfaceTc tc) ts) = foldl mkHsAppTy (noLoc $ HsTyVar (ifaceExtRdrName tc)) (map toHsType ts) +toHsType (IfaceForAllTy tv t) = add_forall (toHsTvBndr tv) (toHsType t) + +toHsTvBndr :: IfaceTvBndr -> LHsTyVarBndr RdrName +toHsTvBndr (tv,k) = noLoc $ KindedTyVar (mkRdrUnqual tv) k + +ifaceExtRdrName :: IfaceExtName -> RdrName +ifaceExtRdrName (ExtPkg mod occ) = mkOrig mod occ +ifaceExtRdrName other = pprPanic "ParserCore.ifaceExtRdrName" (ppr other) + +add_forall tv (L _ (HsForAllTy exp tvs cxt t)) + = noLoc $ HsForAllTy exp (tv:tvs) cxt t +add_forall tv t + = noLoc $ HsForAllTy Explicit [tv] (noLoc []) t + +happyError :: P a +happyError s l = failP (show l ++ ": Parse error\n") (take 100 s) l +} + diff --git a/compiler/parser/ParserCoreUtils.hs b/compiler/parser/ParserCoreUtils.hs new file mode 100644 index 0000000000..a590fb5c93 --- /dev/null +++ b/compiler/parser/ParserCoreUtils.hs @@ -0,0 +1,72 @@ +module ParserCoreUtils where + +import IO + +data ParseResult a = OkP a | FailP String +type P a = String -> Int -> ParseResult a + +thenP :: P a -> (a -> P b) -> P b +m `thenP` k = \ s l -> + case m s l of + OkP a -> k a s l + FailP s -> FailP s + +returnP :: a -> P a +returnP m _ _ = OkP m + +failP :: String -> P a +failP s s' _ = FailP (s ++ ":" ++ s') + +getCoreModuleName :: FilePath -> IO String +getCoreModuleName fpath = + catch (do + h <- openFile fpath ReadMode + ls <- hGetContents h + let mo = findMod (words ls) + -- make sure we close up the file right away. + (length mo) `seq` return () + hClose h + return mo) + (\ _ -> return "Main") + where + findMod [] = "Main" + findMod ("%module":m:_) = m + findMod (_:xs) = findMod xs + + +data Token = + TKmodule + | TKdata + | TKnewtype + | TKforall + | TKrec + | TKlet + | TKin + | TKcase + | TKof + | TKcoerce + | TKnote + | TKexternal + | TKwild + | TKoparen + | TKcparen + | TKobrace + | TKcbrace + | TKhash + | TKeq + | TKcoloncolon + | TKstar + | TKrarrow + | TKlambda + | TKat + | TKdot + | TKquestion + | TKsemicolon + | TKname String + | TKcname String + | TKinteger Integer + | TKrational Rational + | TKstring String + | TKchar Char + | TKEOF + diff --git a/compiler/parser/RdrHsSyn.lhs b/compiler/parser/RdrHsSyn.lhs new file mode 100644 index 0000000000..8d59e2b22c --- /dev/null +++ b/compiler/parser/RdrHsSyn.lhs @@ -0,0 +1,869 @@ +% +% (c) The University of Glasgow, 1996-2003 + +Functions over HsSyn specialised to RdrName. + +\begin{code} +module RdrHsSyn ( + extractHsTyRdrTyVars, + extractHsRhoRdrTyVars, extractGenericPatTyVars, + + mkHsOpApp, mkClassDecl, + mkHsNegApp, mkHsIntegral, mkHsFractional, + mkHsDo, mkHsSplice, + mkTyData, mkPrefixCon, mkRecCon, mkInlineSpec, + mkRecConstrOrUpdate, -- HsExp -> [HsFieldUpdate] -> P HsExp + + cvBindGroup, + cvBindsAndSigs, + cvTopDecls, + findSplice, mkGroup, + + -- Stuff to do with Foreign declarations + CallConv(..), + mkImport, -- CallConv -> Safety + -- -> (FastString, RdrName, RdrNameHsType) + -- -> P RdrNameHsDecl + mkExport, -- CallConv + -- -> (FastString, RdrName, RdrNameHsType) + -- -> P RdrNameHsDecl + mkExtName, -- RdrName -> CLabelString + mkGadtDecl, -- Located RdrName -> LHsType RdrName -> ConDecl RdrName + + -- Bunch of functions in the parser monad for + -- checking and constructing values + checkPrecP, -- Int -> P Int + checkContext, -- HsType -> P HsContext + checkPred, -- HsType -> P HsPred + checkTyClHdr, -- LHsContext RdrName -> LHsType RdrName -> P (LHsContext RdrName, Located RdrName, [LHsTyVarBndr RdrName]) + checkSynHdr, -- LHsType RdrName -> P (Located RdrName, [LHsTyVarBndr RdrName]) + checkInstType, -- HsType -> P HsType + checkPattern, -- HsExp -> P HsPat + checkPatterns, -- SrcLoc -> [HsExp] -> P [HsPat] + checkDo, -- [Stmt] -> P [Stmt] + checkMDo, -- [Stmt] -> P [Stmt] + checkValDef, -- (SrcLoc, HsExp, HsRhs, [HsDecl]) -> P HsDecl + checkValSig, -- (SrcLoc, HsExp, HsRhs, [HsDecl]) -> P HsDecl + parseError, -- String -> Pa + ) where + +#include "HsVersions.h" + +import HsSyn -- Lots of it +import RdrName ( RdrName, isRdrTyVar, mkUnqual, rdrNameOcc, + isRdrDataCon, isUnqual, getRdrName, isQual, + setRdrNameSpace ) +import BasicTypes ( maxPrecedence, Activation, InlineSpec(..), alwaysInlineSpec, neverInlineSpec ) +import Lexer ( P, failSpanMsgP, extension, bangPatEnabled ) +import TysWiredIn ( unitTyCon ) +import ForeignCall ( CCallConv, Safety, CCallTarget(..), CExportSpec(..), + DNCallSpec(..), DNKind(..), CLabelString ) +import OccName ( srcDataName, varName, isDataOcc, isTcOcc, + occNameString ) +import SrcLoc +import OrdList ( OrdList, fromOL ) +import Bag ( Bag, emptyBag, snocBag, consBag, foldrBag ) +import Outputable +import FastString +import Panic + +import List ( isSuffixOf, nubBy ) +\end{code} + + +%************************************************************************ +%* * +\subsection{A few functions over HsSyn at RdrName} +%* * +%************************************************************************ + +extractHsTyRdrNames finds the free variables of a HsType +It's used when making the for-alls explicit. + +\begin{code} +extractHsTyRdrTyVars :: LHsType RdrName -> [Located RdrName] +extractHsTyRdrTyVars ty = nubBy eqLocated (extract_lty ty []) + +extractHsRhoRdrTyVars :: LHsContext RdrName -> LHsType RdrName -> [Located RdrName] +-- This one takes the context and tau-part of a +-- sigma type and returns their free type variables +extractHsRhoRdrTyVars ctxt ty + = nubBy eqLocated $ extract_lctxt ctxt (extract_lty ty []) + +extract_lctxt ctxt acc = foldr (extract_pred . unLoc) acc (unLoc ctxt) + +extract_pred (HsClassP cls tys) acc = foldr extract_lty acc tys +extract_pred (HsIParam n ty) acc = extract_lty ty acc + +extract_lty (L loc ty) acc + = case ty of + HsTyVar tv -> extract_tv loc tv acc + HsBangTy _ ty -> extract_lty ty acc + HsAppTy ty1 ty2 -> extract_lty ty1 (extract_lty ty2 acc) + HsListTy ty -> extract_lty ty acc + HsPArrTy ty -> extract_lty ty acc + HsTupleTy _ tys -> foldr extract_lty acc tys + HsFunTy ty1 ty2 -> extract_lty ty1 (extract_lty ty2 acc) + HsPredTy p -> extract_pred p acc + HsOpTy ty1 (L loc tv) ty2 -> extract_tv loc tv (extract_lty ty1 (extract_lty ty2 acc)) + HsParTy ty -> extract_lty ty acc + HsNumTy num -> acc + HsSpliceTy _ -> acc -- Type splices mention no type variables + HsKindSig ty k -> extract_lty ty acc + HsForAllTy exp [] cx ty -> extract_lctxt cx (extract_lty ty acc) + HsForAllTy exp tvs cx ty -> acc ++ (filter ((`notElem` locals) . unLoc) $ + extract_lctxt cx (extract_lty ty [])) + where + locals = hsLTyVarNames tvs + +extract_tv :: SrcSpan -> RdrName -> [Located RdrName] -> [Located RdrName] +extract_tv loc tv acc | isRdrTyVar tv = L loc tv : acc + | otherwise = acc + +extractGenericPatTyVars :: LHsBinds RdrName -> [Located RdrName] +-- Get the type variables out of the type patterns in a bunch of +-- possibly-generic bindings in a class declaration +extractGenericPatTyVars binds + = nubBy eqLocated (foldrBag get [] binds) + where + get (L _ (FunBind { fun_matches = MatchGroup ms _ })) acc = foldr (get_m.unLoc) acc ms + get other acc = acc + + get_m (Match (L _ (TypePat ty) : _) _ _) acc = extract_lty ty acc + get_m other acc = acc +\end{code} + + +%************************************************************************ +%* * +\subsection{Construction functions for Rdr stuff} +%* * +%************************************************************************ + +mkClassDecl builds a RdrClassDecl, filling in the names for tycon and datacon +by deriving them from the name of the class. We fill in the names for the +tycon and datacon corresponding to the class, by deriving them from the +name of the class itself. This saves recording the names in the interface +file (which would be equally good). + +Similarly for mkConDecl, mkClassOpSig and default-method names. + + *** See "THE NAMING STORY" in HsDecls **** + +\begin{code} +mkClassDecl (cxt, cname, tyvars) fds sigs mbinds + = ClassDecl { tcdCtxt = cxt, tcdLName = cname, tcdTyVars = tyvars, + tcdFDs = fds, + tcdSigs = sigs, + tcdMeths = mbinds + } + +mkTyData new_or_data (context, tname, tyvars) ksig data_cons maybe_deriv + = TyData { tcdND = new_or_data, tcdCtxt = context, tcdLName = tname, + tcdTyVars = tyvars, tcdCons = data_cons, + tcdKindSig = ksig, tcdDerivs = maybe_deriv } +\end{code} + +\begin{code} +mkHsNegApp :: LHsExpr RdrName -> HsExpr RdrName +-- RdrName If the type checker sees (negate 3#) it will barf, because negate +-- can't take an unboxed arg. But that is exactly what it will see when +-- we write "-3#". So we have to do the negation right now! +mkHsNegApp (L loc e) = f e + where f (HsLit (HsIntPrim i)) = HsLit (HsIntPrim (-i)) + f (HsLit (HsFloatPrim i)) = HsLit (HsFloatPrim (-i)) + f (HsLit (HsDoublePrim i)) = HsLit (HsDoublePrim (-i)) + f expr = NegApp (L loc e) noSyntaxExpr +\end{code} + +%************************************************************************ +%* * +\subsection[cvBinds-etc]{Converting to @HsBinds@, etc.} +%* * +%************************************************************************ + +Function definitions are restructured here. Each is assumed to be recursive +initially, and non recursive definitions are discovered by the dependency +analyser. + + +\begin{code} +-- | Groups together bindings for a single function +cvTopDecls :: OrdList (LHsDecl RdrName) -> [LHsDecl RdrName] +cvTopDecls decls = go (fromOL decls) + where + go :: [LHsDecl RdrName] -> [LHsDecl RdrName] + go [] = [] + go (L l (ValD b) : ds) = L l' (ValD b') : go ds' + where (L l' b', ds') = getMonoBind (L l b) ds + go (d : ds) = d : go ds + +cvBindGroup :: OrdList (LHsDecl RdrName) -> HsValBinds RdrName +cvBindGroup binding + = case (cvBindsAndSigs binding) of { (mbs, sigs) -> + ValBindsIn mbs sigs + } + +cvBindsAndSigs :: OrdList (LHsDecl RdrName) + -> (Bag (LHsBind RdrName), [LSig RdrName]) +-- Input decls contain just value bindings and signatures +cvBindsAndSigs fb = go (fromOL fb) + where + go [] = (emptyBag, []) + go (L l (SigD s) : ds) = (bs, L l s : ss) + where (bs,ss) = go ds + go (L l (ValD b) : ds) = (b' `consBag` bs, ss) + where (b',ds') = getMonoBind (L l b) ds + (bs,ss) = go ds' + +----------------------------------------------------------------------------- +-- Group function bindings into equation groups + +getMonoBind :: LHsBind RdrName -> [LHsDecl RdrName] + -> (LHsBind RdrName, [LHsDecl RdrName]) +-- Suppose (b',ds') = getMonoBind b ds +-- ds is a *reversed* list of parsed bindings +-- b is a MonoBinds that has just been read off the front + +-- Then b' is the result of grouping more equations from ds that +-- belong with b into a single MonoBinds, and ds' is the depleted +-- list of parsed bindings. +-- +-- No AndMonoBinds or EmptyMonoBinds here; just single equations + +getMonoBind (L loc bind@(FunBind { fun_id = L _ f, fun_matches = MatchGroup mtchs _ })) binds + | has_args mtchs + = go mtchs loc binds + where + go mtchs1 loc1 (L loc2 (ValD (FunBind { fun_id = L _ f2, fun_matches = MatchGroup mtchs2 _ })) : binds) + | f == f2 = go (mtchs2++mtchs1) loc binds + where loc = combineSrcSpans loc1 loc2 + go mtchs1 loc binds + = (L loc (bind { fun_matches = mkMatchGroup (reverse mtchs1) }), binds) + -- Reverse the final matches, to get it back in the right order + +getMonoBind bind binds = (bind, binds) + +has_args ((L _ (Match args _ _)) : _) = not (null args) + -- Don't group together FunBinds if they have + -- no arguments. This is necessary now that variable bindings + -- with no arguments are now treated as FunBinds rather + -- than pattern bindings (tests/rename/should_fail/rnfail002). +\end{code} + +\begin{code} +findSplice :: [LHsDecl a] -> (HsGroup a, Maybe (SpliceDecl a, [LHsDecl a])) +findSplice ds = addl emptyRdrGroup ds + +mkGroup :: [LHsDecl a] -> HsGroup a +mkGroup ds = addImpDecls emptyRdrGroup ds + +addImpDecls :: HsGroup a -> [LHsDecl a] -> HsGroup a +-- The decls are imported, and should not have a splice +addImpDecls group decls = case addl group decls of + (group', Nothing) -> group' + other -> panic "addImpDecls" + +addl :: HsGroup a -> [LHsDecl a] -> (HsGroup a, Maybe (SpliceDecl a, [LHsDecl a])) + -- This stuff reverses the declarations (again) but it doesn't matter + +-- Base cases +addl gp [] = (gp, Nothing) +addl gp (L l d : ds) = add gp l d ds + + +add :: HsGroup a -> SrcSpan -> HsDecl a -> [LHsDecl a] + -> (HsGroup a, Maybe (SpliceDecl a, [LHsDecl a])) + +add gp l (SpliceD e) ds = (gp, Just (e, ds)) + +-- Class declarations: pull out the fixity signatures to the top +add gp@(HsGroup {hs_tyclds = ts, hs_fixds = fs}) l (TyClD d) ds + | isClassDecl d = + let fsigs = [ L l f | L l (FixSig f) <- tcdSigs d ] in + addl (gp { hs_tyclds = L l d : ts, hs_fixds = fsigs ++ fs }) ds + | otherwise = + addl (gp { hs_tyclds = L l d : ts }) ds + +-- Signatures: fixity sigs go a different place than all others +add gp@(HsGroup {hs_fixds = ts}) l (SigD (FixSig f)) ds + = addl (gp {hs_fixds = L l f : ts}) ds +add gp@(HsGroup {hs_valds = ts}) l (SigD d) ds + = addl (gp {hs_valds = add_sig (L l d) ts}) ds + +-- Value declarations: use add_bind +add gp@(HsGroup {hs_valds = ts}) l (ValD d) ds + = addl (gp { hs_valds = add_bind (L l d) ts }) ds + +-- The rest are routine +add gp@(HsGroup {hs_instds = ts}) l (InstD d) ds + = addl (gp { hs_instds = L l d : ts }) ds +add gp@(HsGroup {hs_defds = ts}) l (DefD d) ds + = addl (gp { hs_defds = L l d : ts }) ds +add gp@(HsGroup {hs_fords = ts}) l (ForD d) ds + = addl (gp { hs_fords = L l d : ts }) ds +add gp@(HsGroup {hs_depds = ts}) l (DeprecD d) ds + = addl (gp { hs_depds = L l d : ts }) ds +add gp@(HsGroup {hs_ruleds = ts}) l (RuleD d) ds + = addl (gp { hs_ruleds = L l d : ts }) ds + +add_bind b (ValBindsIn bs sigs) = ValBindsIn (bs `snocBag` b) sigs +add_sig s (ValBindsIn bs sigs) = ValBindsIn bs (s:sigs) +\end{code} + +%************************************************************************ +%* * +\subsection[PrefixToHS-utils]{Utilities for conversion} +%* * +%************************************************************************ + + +\begin{code} +----------------------------------------------------------------------------- +-- mkPrefixCon + +-- When parsing data declarations, we sometimes inadvertently parse +-- a constructor application as a type (eg. in data T a b = C a b `D` E a b) +-- This function splits up the type application, adds any pending +-- arguments, and converts the type constructor back into a data constructor. + +mkPrefixCon :: LHsType RdrName -> [LBangType RdrName] + -> P (Located RdrName, HsConDetails RdrName (LBangType RdrName)) +mkPrefixCon ty tys + = split ty tys + where + split (L _ (HsAppTy t u)) ts = split t (u : ts) + split (L l (HsTyVar tc)) ts = do data_con <- tyConToDataCon l tc + return (data_con, PrefixCon ts) + split (L l _) _ = parseError l "parse error in data/newtype declaration" + +mkRecCon :: Located RdrName -> [([Located RdrName], LBangType RdrName)] + -> P (Located RdrName, HsConDetails RdrName (LBangType RdrName)) +mkRecCon (L loc con) fields + = do data_con <- tyConToDataCon loc con + return (data_con, RecCon [ (l,t) | (ls,t) <- fields, l <- ls ]) + +tyConToDataCon :: SrcSpan -> RdrName -> P (Located RdrName) +tyConToDataCon loc tc + | isTcOcc (rdrNameOcc tc) + = return (L loc (setRdrNameSpace tc srcDataName)) + | otherwise + = parseError loc (showSDoc (text "Not a constructor:" <+> quotes (ppr tc))) + +---------------------------------------------------------------------------- +-- Various Syntactic Checks + +checkInstType :: LHsType RdrName -> P (LHsType RdrName) +checkInstType (L l t) + = case t of + HsForAllTy exp tvs ctxt ty -> do + dict_ty <- checkDictTy ty + return (L l (HsForAllTy exp tvs ctxt dict_ty)) + + HsParTy ty -> checkInstType ty + + ty -> do dict_ty <- checkDictTy (L l ty) + return (L l (HsForAllTy Implicit [] (noLoc []) dict_ty)) + +checkTyVars :: [LHsType RdrName] -> P [LHsTyVarBndr RdrName] +checkTyVars tvs + = mapM chk tvs + where + -- Check that the name space is correct! + chk (L l (HsKindSig (L _ (HsTyVar tv)) k)) + | isRdrTyVar tv = return (L l (KindedTyVar tv k)) + chk (L l (HsTyVar tv)) + | isRdrTyVar tv = return (L l (UserTyVar tv)) + chk (L l other) + = parseError l "Type found where type variable expected" + +checkSynHdr :: LHsType RdrName -> P (Located RdrName, [LHsTyVarBndr RdrName]) +checkSynHdr ty = do { (_, tc, tvs) <- checkTyClHdr (noLoc []) ty + ; return (tc, tvs) } + +checkTyClHdr :: LHsContext RdrName -> LHsType RdrName + -> P (LHsContext RdrName, Located RdrName, [LHsTyVarBndr RdrName]) +-- The header of a type or class decl should look like +-- (C a, D b) => T a b +-- or T a b +-- or a + b +-- etc +checkTyClHdr (L l cxt) ty + = do (tc, tvs) <- gol ty [] + mapM_ chk_pred cxt + return (L l cxt, tc, tvs) + where + gol (L l ty) acc = go l ty acc + + go l (HsTyVar tc) acc + | not (isRdrTyVar tc) = checkTyVars acc >>= \ tvs -> + return (L l tc, tvs) + go l (HsOpTy t1 tc t2) acc = checkTyVars (t1:t2:acc) >>= \ tvs -> + return (tc, tvs) + go l (HsParTy ty) acc = gol ty acc + go l (HsAppTy t1 t2) acc = gol t1 (t2:acc) + go l other acc = parseError l "Malformed LHS to type of class declaration" + + -- The predicates in a type or class decl must all + -- be HsClassPs. They need not all be type variables, + -- even in Haskell 98. E.g. class (Monad m, Monad (t m)) => MonadT t m + chk_pred (L l (HsClassP _ args)) = return () + chk_pred (L l _) + = parseError l "Malformed context in type or class declaration" + + +checkContext :: LHsType RdrName -> P (LHsContext RdrName) +checkContext (L l t) + = check t + where + check (HsTupleTy _ ts) -- (Eq a, Ord b) shows up as a tuple type + = do ctx <- mapM checkPred ts + return (L l ctx) + + check (HsParTy ty) -- to be sure HsParTy doesn't get into the way + = check (unLoc ty) + + check (HsTyVar t) -- Empty context shows up as a unit type () + | t == getRdrName unitTyCon = return (L l []) + + check t + = do p <- checkPred (L l t) + return (L l [p]) + + +checkPred :: LHsType RdrName -> P (LHsPred RdrName) +-- Watch out.. in ...deriving( Show )... we use checkPred on +-- the list of partially applied predicates in the deriving, +-- so there can be zero args. +checkPred (L spn (HsPredTy (HsIParam n ty))) + = return (L spn (HsIParam n ty)) +checkPred (L spn ty) + = check spn ty [] + where + checkl (L l ty) args = check l ty args + + check _loc (HsTyVar t) args | not (isRdrTyVar t) + = return (L spn (HsClassP t args)) + check _loc (HsAppTy l r) args = checkl l (r:args) + check _loc (HsOpTy l (L loc tc) r) args = check loc (HsTyVar tc) (l:r:args) + check _loc (HsParTy t) args = checkl t args + check loc _ _ = parseError loc "malformed class assertion" + +checkDictTy :: LHsType RdrName -> P (LHsType RdrName) +checkDictTy (L spn ty) = check ty [] + where + check (HsTyVar t) args | not (isRdrTyVar t) + = return (L spn (HsPredTy (HsClassP t args))) + check (HsAppTy l r) args = check (unLoc l) (r:args) + check (HsParTy t) args = check (unLoc t) args + check _ _ = parseError spn "Malformed context in instance header" + +--------------------------------------------------------------------------- +-- Checking statements in a do-expression +-- We parse do { e1 ; e2 ; } +-- as [ExprStmt e1, ExprStmt e2] +-- checkDo (a) checks that the last thing is an ExprStmt +-- (b) returns it separately +-- same comments apply for mdo as well + +checkDo = checkDoMDo "a " "'do'" +checkMDo = checkDoMDo "an " "'mdo'" + +checkDoMDo :: String -> String -> SrcSpan -> [LStmt RdrName] -> P ([LStmt RdrName], LHsExpr RdrName) +checkDoMDo pre nm loc [] = parseError loc ("Empty " ++ nm ++ " construct") +checkDoMDo pre nm loc ss = do + check ss + where + check [L l (ExprStmt e _ _)] = return ([], e) + check [L l _] = parseError l ("The last statement in " ++ pre ++ nm ++ + " construct must be an expression") + check (s:ss) = do + (ss',e') <- check ss + return ((s:ss'),e') + +-- ------------------------------------------------------------------------- +-- Checking Patterns. + +-- We parse patterns as expressions and check for valid patterns below, +-- converting the expression into a pattern at the same time. + +checkPattern :: LHsExpr RdrName -> P (LPat RdrName) +checkPattern e = checkLPat e + +checkPatterns :: [LHsExpr RdrName] -> P [LPat RdrName] +checkPatterns es = mapM checkPattern es + +checkLPat :: LHsExpr RdrName -> P (LPat RdrName) +checkLPat e@(L l _) = checkPat l e [] + +checkPat :: SrcSpan -> LHsExpr RdrName -> [LPat RdrName] -> P (LPat RdrName) +checkPat loc (L l (HsVar c)) args + | isRdrDataCon c = return (L loc (ConPatIn (L l c) (PrefixCon args))) +checkPat loc e args -- OK to let this happen even if bang-patterns + -- are not enabled, because there is no valid + -- non-bang-pattern parse of (C ! e) + | Just (e', args') <- splitBang e + = do { args'' <- checkPatterns args' + ; checkPat loc e' (args'' ++ args) } +checkPat loc (L _ (HsApp f x)) args + = do { x <- checkLPat x; checkPat loc f (x:args) } +checkPat loc (L _ e) [] + = do { p <- checkAPat loc e; return (L loc p) } +checkPat loc pat _some_args + = patFail loc + +checkAPat loc e = case e of + EWildPat -> return (WildPat placeHolderType) + HsVar x | isQual x -> parseError loc ("Qualified variable in pattern: " + ++ showRdrName x) + | otherwise -> return (VarPat x) + HsLit l -> return (LitPat l) + + -- Overloaded numeric patterns (e.g. f 0 x = x) + -- Negation is recorded separately, so that the literal is zero or +ve + -- NB. Negative *primitive* literals are already handled by + -- RdrHsSyn.mkHsNegApp + HsOverLit pos_lit -> return (mkNPat pos_lit Nothing) + NegApp (L _ (HsOverLit pos_lit)) _ + -> return (mkNPat pos_lit (Just noSyntaxExpr)) + + SectionR (L _ (HsVar bang)) e + | bang == bang_RDR -> checkLPat e >>= (return . BangPat) + ELazyPat e -> checkLPat e >>= (return . LazyPat) + EAsPat n e -> checkLPat e >>= (return . AsPat n) + ExprWithTySig e t -> checkLPat e >>= \e -> + -- Pattern signatures are parsed as sigtypes, + -- but they aren't explicit forall points. Hence + -- we have to remove the implicit forall here. + let t' = case t of + L _ (HsForAllTy Implicit _ (L _ []) ty) -> ty + other -> other + in + return (SigPatIn e t') + + -- n+k patterns + OpApp (L nloc (HsVar n)) (L _ (HsVar plus)) _ + (L _ (HsOverLit lit@(HsIntegral _ _))) + | plus == plus_RDR + -> return (mkNPlusKPat (L nloc n) lit) + + OpApp l op fix r -> checkLPat l >>= \l -> + checkLPat r >>= \r -> + case op of + L cl (HsVar c) | isDataOcc (rdrNameOcc c) + -> return (ConPatIn (L cl c) (InfixCon l r)) + _ -> patFail loc + + HsPar e -> checkLPat e >>= (return . ParPat) + ExplicitList _ es -> mapM (\e -> checkLPat e) es >>= \ps -> + return (ListPat ps placeHolderType) + ExplicitPArr _ es -> mapM (\e -> checkLPat e) es >>= \ps -> + return (PArrPat ps placeHolderType) + + ExplicitTuple es b -> mapM (\e -> checkLPat e) es >>= \ps -> + return (TuplePat ps b placeHolderType) + + RecordCon c _ fs -> mapM checkPatField fs >>= \fs -> + return (ConPatIn c (RecCon fs)) +-- Generics + HsType ty -> return (TypePat ty) + _ -> patFail loc + +plus_RDR, bang_RDR :: RdrName +plus_RDR = mkUnqual varName FSLIT("+") -- Hack +bang_RDR = mkUnqual varName FSLIT("!") -- Hack + +checkPatField :: (Located RdrName, LHsExpr RdrName) -> P (Located RdrName, LPat RdrName) +checkPatField (n,e) = do + p <- checkLPat e + return (n,p) + +patFail loc = parseError loc "Parse error in pattern" + + +--------------------------------------------------------------------------- +-- Check Equation Syntax + +checkValDef :: LHsExpr RdrName + -> Maybe (LHsType RdrName) + -> Located (GRHSs RdrName) + -> P (HsBind RdrName) + +checkValDef lhs opt_sig grhss + = do { mb_fun <- isFunLhs lhs + ; case mb_fun of + Just (fun, is_infix, pats) -> checkFunBind (getLoc lhs) + fun is_infix pats opt_sig grhss + Nothing -> checkPatBind lhs grhss } + +checkFunBind lhs_loc fun is_infix pats opt_sig (L rhs_span grhss) + | isQual (unLoc fun) + = parseError (getLoc fun) ("Qualified name in function definition: " ++ + showRdrName (unLoc fun)) + | otherwise + = do ps <- checkPatterns pats + let match_span = combineSrcSpans lhs_loc rhs_span + matches = mkMatchGroup [L match_span (Match ps opt_sig grhss)] + return (FunBind { fun_id = fun, fun_infix = is_infix, fun_matches = matches, + fun_co_fn = idCoercion, bind_fvs = placeHolderNames }) + -- The span of the match covers the entire equation. + -- That isn't quite right, but it'll do for now. + +checkPatBind lhs (L _ grhss) + = do { lhs <- checkPattern lhs + ; return (PatBind lhs grhss placeHolderType placeHolderNames) } + +checkValSig + :: LHsExpr RdrName + -> LHsType RdrName + -> P (Sig RdrName) +checkValSig (L l (HsVar v)) ty + | isUnqual v && not (isDataOcc (rdrNameOcc v)) + = return (TypeSig (L l v) ty) +checkValSig (L l other) ty + = parseError l "Invalid type signature" + +mkGadtDecl + :: Located RdrName + -> LHsType RdrName -- assuming HsType + -> ConDecl RdrName +mkGadtDecl name (L _ (HsForAllTy _ qvars cxt ty)) = ConDecl + { con_name = name + , con_explicit = Implicit + , con_qvars = qvars + , con_cxt = cxt + , con_details = PrefixCon args + , con_res = ResTyGADT res + } + where + (args, res) = splitHsFunType ty +mkGadtDecl name ty = ConDecl + { con_name = name + , con_explicit = Implicit + , con_qvars = [] + , con_cxt = noLoc [] + , con_details = PrefixCon args + , con_res = ResTyGADT res + } + where + (args, res) = splitHsFunType ty + +-- A variable binding is parsed as a FunBind. + + + -- The parser left-associates, so there should + -- not be any OpApps inside the e's +splitBang :: LHsExpr RdrName -> Maybe (LHsExpr RdrName, [LHsExpr RdrName]) +-- Splits (f ! g a b) into (f, [(! g), a, g]) +splitBang (L loc (OpApp l_arg bang@(L loc' (HsVar op)) _ r_arg)) + | op == bang_RDR = Just (l_arg, L loc (SectionR bang arg1) : argns) + where + (arg1,argns) = split_bang r_arg [] + split_bang (L _ (HsApp f e)) es = split_bang f (e:es) + split_bang e es = (e,es) +splitBang other = Nothing + +isFunLhs :: LHsExpr RdrName + -> P (Maybe (Located RdrName, Bool, [LHsExpr RdrName])) +-- Just (fun, is_infix, arg_pats) if e is a function LHS +isFunLhs e = go e [] + where + go (L loc (HsVar f)) es + | not (isRdrDataCon f) = return (Just (L loc f, False, es)) + go (L _ (HsApp f e)) es = go f (e:es) + go (L _ (HsPar e)) es@(_:_) = go e es + go e@(L loc (OpApp l (L loc' (HsVar op)) fix r)) es + | Just (e',es') <- splitBang e + = do { bang_on <- extension bangPatEnabled + ; if bang_on then go e' (es' ++ es) + else return (Just (L loc' op, True, (l:r:es))) } + -- No bangs; behave just like the next case + | not (isRdrDataCon op) + = return (Just (L loc' op, True, (l:r:es))) + | otherwise + = do { mb_l <- go l es + ; case mb_l of + Just (op', True, j : k : es') + -> return (Just (op', True, j : op_app : es')) + where + op_app = L loc (OpApp k (L loc' (HsVar op)) fix r) + _ -> return Nothing } + go _ _ = return Nothing + +--------------------------------------------------------------------------- +-- Miscellaneous utilities + +checkPrecP :: Located Int -> P Int +checkPrecP (L l i) + | 0 <= i && i <= maxPrecedence = return i + | otherwise = parseError l "Precedence out of range" + +mkRecConstrOrUpdate + :: LHsExpr RdrName + -> SrcSpan + -> HsRecordBinds RdrName + -> P (HsExpr RdrName) + +mkRecConstrOrUpdate (L l (HsVar c)) loc fs | isRdrDataCon c + = return (RecordCon (L l c) noPostTcExpr fs) +mkRecConstrOrUpdate exp loc fs@(_:_) + = return (RecordUpd exp fs placeHolderType placeHolderType) +mkRecConstrOrUpdate _ loc [] + = parseError loc "Empty record update" + +mkInlineSpec :: Maybe Activation -> Bool -> InlineSpec +-- The Maybe is becuase the user can omit the activation spec (and usually does) +mkInlineSpec Nothing True = alwaysInlineSpec -- INLINE +mkInlineSpec Nothing False = neverInlineSpec -- NOINLINE +mkInlineSpec (Just act) inl = Inline act inl + + +----------------------------------------------------------------------------- +-- utilities for foreign declarations + +-- supported calling conventions +-- +data CallConv = CCall CCallConv -- ccall or stdcall + | DNCall -- .NET + +-- construct a foreign import declaration +-- +mkImport :: CallConv + -> Safety + -> (Located FastString, Located RdrName, LHsType RdrName) + -> P (HsDecl RdrName) +mkImport (CCall cconv) safety (entity, v, ty) = do + importSpec <- parseCImport entity cconv safety v + return (ForD (ForeignImport v ty importSpec False)) +mkImport (DNCall ) _ (entity, v, ty) = do + spec <- parseDImport entity + return $ ForD (ForeignImport v ty (DNImport spec) False) + +-- parse the entity string of a foreign import declaration for the `ccall' or +-- `stdcall' calling convention' +-- +parseCImport :: Located FastString + -> CCallConv + -> Safety + -> Located RdrName + -> P ForeignImport +parseCImport (L loc entity) cconv safety v + -- FIXME: we should allow white space around `dynamic' and `wrapper' -=chak + | entity == FSLIT ("dynamic") = + return $ CImport cconv safety nilFS nilFS (CFunction DynamicTarget) + | entity == FSLIT ("wrapper") = + return $ CImport cconv safety nilFS nilFS CWrapper + | otherwise = parse0 (unpackFS entity) + where + -- using the static keyword? + parse0 (' ': rest) = parse0 rest + parse0 ('s':'t':'a':'t':'i':'c':rest) = parse1 rest + parse0 rest = parse1 rest + -- check for header file name + parse1 "" = parse4 "" nilFS False nilFS + parse1 (' ':rest) = parse1 rest + parse1 str@('&':_ ) = parse2 str nilFS + parse1 str@('[':_ ) = parse3 str nilFS False + parse1 str + | ".h" `isSuffixOf` first = parse2 rest (mkFastString first) + | otherwise = parse4 str nilFS False nilFS + where + (first, rest) = break (\c -> c == ' ' || c == '&' || c == '[') str + -- check for address operator (indicating a label import) + parse2 "" header = parse4 "" header False nilFS + parse2 (' ':rest) header = parse2 rest header + parse2 ('&':rest) header = parse3 rest header True + parse2 str@('[':_ ) header = parse3 str header False + parse2 str header = parse4 str header False nilFS + -- check for library object name + parse3 (' ':rest) header isLbl = parse3 rest header isLbl + parse3 ('[':rest) header isLbl = + case break (== ']') rest of + (lib, ']':rest) -> parse4 rest header isLbl (mkFastString lib) + _ -> parseError loc "Missing ']' in entity" + parse3 str header isLbl = parse4 str header isLbl nilFS + -- check for name of C function + parse4 "" header isLbl lib = build (mkExtName (unLoc v)) header isLbl lib + parse4 (' ':rest) header isLbl lib = parse4 rest header isLbl lib + parse4 str header isLbl lib + | all (== ' ') rest = build (mkFastString first) header isLbl lib + | otherwise = parseError loc "Malformed entity string" + where + (first, rest) = break (== ' ') str + -- + build cid header False lib = return $ + CImport cconv safety header lib (CFunction (StaticTarget cid)) + build cid header True lib = return $ + CImport cconv safety header lib (CLabel cid ) + +-- +-- Unravel a dotnet spec string. +-- +parseDImport :: Located FastString -> P DNCallSpec +parseDImport (L loc entity) = parse0 comps + where + comps = words (unpackFS entity) + + parse0 [] = d'oh + parse0 (x : xs) + | x == "static" = parse1 True xs + | otherwise = parse1 False (x:xs) + + parse1 _ [] = d'oh + parse1 isStatic (x:xs) + | x == "method" = parse2 isStatic DNMethod xs + | x == "field" = parse2 isStatic DNField xs + | x == "ctor" = parse2 isStatic DNConstructor xs + parse1 isStatic xs = parse2 isStatic DNMethod xs + + parse2 _ _ [] = d'oh + parse2 isStatic kind (('[':x):xs) = + case x of + [] -> d'oh + vs | last vs == ']' -> parse3 isStatic kind (init vs) xs + parse2 isStatic kind xs = parse3 isStatic kind "" xs + + parse3 isStatic kind assem [x] = + return (DNCallSpec isStatic kind assem x + -- these will be filled in once known. + (error "FFI-dotnet-args") + (error "FFI-dotnet-result")) + parse3 _ _ _ _ = d'oh + + d'oh = parseError loc "Malformed entity string" + +-- construct a foreign export declaration +-- +mkExport :: CallConv + -> (Located FastString, Located RdrName, LHsType RdrName) + -> P (HsDecl RdrName) +mkExport (CCall cconv) (L loc entity, v, ty) = return $ + ForD (ForeignExport v ty (CExport (CExportStatic entity' cconv)) False) + where + entity' | nullFS entity = mkExtName (unLoc v) + | otherwise = entity +mkExport DNCall (L loc entity, v, ty) = + parseError (getLoc v){-TODO: not quite right-} + "Foreign export is not yet supported for .NET" + +-- Supplying the ext_name in a foreign decl is optional; if it +-- isn't there, the Haskell name is assumed. Note that no transformation +-- of the Haskell name is then performed, so if you foreign export (++), +-- it's external name will be "++". Too bad; it's important because we don't +-- want z-encoding (e.g. names with z's in them shouldn't be doubled) +-- +mkExtName :: RdrName -> CLabelString +mkExtName rdrNm = mkFastString (occNameString (rdrNameOcc rdrNm)) +\end{code} + + +----------------------------------------------------------------------------- +-- Misc utils + +\begin{code} +showRdrName :: RdrName -> String +showRdrName r = showSDoc (ppr r) + +parseError :: SrcSpan -> String -> P a +parseError span s = failSpanMsgP span s +\end{code} diff --git a/compiler/parser/cutils.c b/compiler/parser/cutils.c new file mode 100644 index 0000000000..08832f298d --- /dev/null +++ b/compiler/parser/cutils.c @@ -0,0 +1,70 @@ +/* +These utility routines are used various +places in the GHC library. +*/ + +/* For GHC 4.08, we are relying on the fact that RtsFlags has + * compatible layout with the current version, because we're + * #including the current version of RtsFlags.h below. 4.08 didn't + * ship with its own RtsFlags.h, unfortunately. For later GHC + * versions, we #include the correct RtsFlags.h. + */ +#if __GLASGOW_HASKELL__ < 502 +#include "../includes/Rts.h" +#include "../includes/RtsFlags.h" +#else +#include "Rts.h" +#include "RtsFlags.h" +#endif + +#include "HsFFI.h" + +#include <string.h> + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +/* +Calling 'strlen' and 'memcpy' directly gives problems with GCC's inliner, +and causes gcc to require too many registers on x84 +*/ + +HsInt +ghc_strlen( HsAddr a ) +{ + return (strlen((char *)a)); +} + +HsInt +ghc_memcmp( HsAddr a1, HsAddr a2, HsInt len ) +{ + return (memcmp((char *)a1, a2, len)); +} + +HsInt +ghc_memcmp_off( HsAddr a1, HsInt i, HsAddr a2, HsInt len ) +{ + return (memcmp((char *)a1 + i, a2, len)); +} + +void +enableTimingStats( void ) /* called from the driver */ +{ +#if __GLASGOW_HASKELL__ >= 411 + RtsFlags.GcFlags.giveStats = ONELINE_GC_STATS; +#endif + /* ignored when bootstrapping with an older GHC */ +} + +void +setHeapSize( HsInt size ) +{ + RtsFlags.GcFlags.heapSizeSuggestion = size / BLOCK_SIZE; + if (RtsFlags.GcFlags.maxHeapSize != 0 && + RtsFlags.GcFlags.heapSizeSuggestion > RtsFlags.GcFlags.maxHeapSize) { + RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion; + } +} + + diff --git a/compiler/parser/cutils.h b/compiler/parser/cutils.h new file mode 100644 index 0000000000..c7c1867ded --- /dev/null +++ b/compiler/parser/cutils.h @@ -0,0 +1,16 @@ +/* ----------------------------------------------------------------------------- + * + * Utility C functions. + * + * -------------------------------------------------------------------------- */ + +#include "HsFFI.h" + +// Out-of-line string functions, see PrimPacked.lhs +HsInt ghc_strlen( HsAddr a ); +HsInt ghc_memcmp( HsAddr a1, HsAddr a2, HsInt len ); +HsInt ghc_memcmp_off( HsAddr a1, HsInt i, HsAddr a2, HsInt len ); + + +void enableTimingStats( void ); +void setHeapSize( HsInt size ); diff --git a/compiler/parser/hschooks.c b/compiler/parser/hschooks.c new file mode 100644 index 0000000000..f3e7447a49 --- /dev/null +++ b/compiler/parser/hschooks.c @@ -0,0 +1,55 @@ +/* +These routines customise the error messages +for various bits of the RTS. They are linked +in instead of the defaults. +*/ + +/* For GHC 4.08, we are relying on the fact that RtsFlags has + * compatible layout with the current version, because we're + * #including the current version of RtsFlags.h below. 4.08 didn't + * ship with its own RtsFlags.h, unfortunately. For later GHC + * versions, we #include the correct RtsFlags.h. + */ +#if __GLASGOW_HASKELL__ < 502 +#include "../includes/Rts.h" +#include "../includes/RtsFlags.h" +#else +#include "Rts.h" +#include "RtsFlags.h" +#endif + +#include "HsFFI.h" + +#include <string.h> + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +void +defaultsHook (void) +{ + RtsFlags.GcFlags.heapSizeSuggestion = 6*1024*1024 / BLOCK_SIZE; + RtsFlags.GcFlags.maxStkSize = 8*1024*1024 / sizeof(W_); +#if __GLASGOW_HASKELL__ >= 411 + /* GHC < 4.11 didn't have these */ + RtsFlags.GcFlags.giveStats = COLLECT_GC_STATS; + RtsFlags.GcFlags.statsFile = stderr; +#endif +} + +void +OutOfHeapHook (unsigned long request_size/* always zero these days */, + unsigned long heap_size) + /* both in bytes */ +{ + fprintf(stderr, "GHC's heap exhausted: current limit is %lu bytes;\nUse the `-M<size>' option to increase the total heap size.\n", + heap_size); +} + +void +StackOverflowHook (unsigned long stack_size) /* in bytes */ +{ + fprintf(stderr, "GHC stack-space overflow: current limit is %ld bytes.\nUse the `-K<size>' option to increase it.\n", stack_size); +} + diff --git a/compiler/parser/hschooks.h b/compiler/parser/hschooks.h new file mode 100644 index 0000000000..4ce1c0f93d --- /dev/null +++ b/compiler/parser/hschooks.h @@ -0,0 +1,9 @@ +/* ----------------------------------------------------------------------------- + * $Id: hschooks.h,v 1.4 2002/04/22 14:54:10 simonmar Exp $ + * + * Hooks into the RTS from the compiler. + * + * -------------------------------------------------------------------------- */ + +#include "HsFFI.h" + |