diff options
author | Vladislav Zavialov <vlad.z.4096@gmail.com> | 2020-02-15 12:32:30 +0300 |
---|---|---|
committer | Vladislav Zavialov <vlad.z.4096@gmail.com> | 2020-02-22 20:17:37 +0300 |
commit | a5ec380589c7e6dc9e3416b6409ad49153acdfff (patch) | |
tree | 610784e13d2cbdfe71cfe845182dcf6e71c87182 /compiler/GHC/Cmm/Lexer.hs | |
parent | 9d09411122b9b534b96e988b6d3f6d7eb04b8f66 (diff) | |
download | haskell-a5ec380589c7e6dc9e3416b6409ad49153acdfff.tar.gz |
WIP: Use alex/happy as -pgmF preprocessorswip/pgmf
Diffstat (limited to 'compiler/GHC/Cmm/Lexer.hs')
-rw-r--r-- | compiler/GHC/Cmm/Lexer.hs | 370 |
1 files changed, 370 insertions, 0 deletions
diff --git a/compiler/GHC/Cmm/Lexer.hs b/compiler/GHC/Cmm/Lexer.hs new file mode 100644 index 0000000000..eeb68459a2 --- /dev/null +++ b/compiler/GHC/Cmm/Lexer.hs @@ -0,0 +1,370 @@ +{-# OPTIONS -pgmF utils/alex.sh -F #-} + +----------------------------------------------------------------------------- +-- +-- (c) The University of Glasgow, 2004-2006 +-- +-- Lexer for concrete Cmm. We try to stay close to the C-- spec, but there +-- are a few minor differences: +-- +-- * extra keywords for our macros, and float32/float64 types +-- * global registers (Sp,Hp, etc.) +-- +----------------------------------------------------------------------------- + +{ +module GHC.Cmm.Lexer ( + CmmToken(..), cmmlex, + ) where + +import GhcPrelude + +import GHC.Cmm.Expr + +import Lexer +import GHC.Cmm.Monad +import SrcLoc +import UniqFM +import StringBuffer +import FastString +import Ctype +import Util +--import TRACE + +import Data.Word +import Data.Char +} + +$whitechar = [\ \t\n\r\f\v\xa0] -- \xa0 is Unicode no-break space +$white_no_nl = $whitechar # \n + +$ascdigit = 0-9 +$unidigit = \x01 -- Trick Alex into handling Unicode. See alexGetChar. +$digit = [$ascdigit $unidigit] +$octit = 0-7 +$hexit = [$digit A-F a-f] + +$unilarge = \x03 -- Trick Alex into handling Unicode. See alexGetChar. +$asclarge = [A-Z \xc0-\xd6 \xd8-\xde] +$large = [$asclarge $unilarge] + +$unismall = \x04 -- Trick Alex into handling Unicode. See alexGetChar. +$ascsmall = [a-z \xdf-\xf6 \xf8-\xff] +$small = [$ascsmall $unismall \_] + +$namebegin = [$large $small \. \$ \@] +$namechar = [$namebegin $digit] + +@decimal = $digit+ +@octal = $octit+ +@hexadecimal = $hexit+ +@exponent = [eE] [\-\+]? @decimal + +@floating_point = @decimal \. @decimal @exponent? | @decimal @exponent + +@escape = \\ ([abfnrt\\\'\"\?] | x $hexit{1,2} | $octit{1,3}) +@strchar = ($printable # [\"\\]) | @escape + +cmm :- + +$white_no_nl+ ; +^\# pragma .* \n ; -- Apple GCC 3.3 CPP generates pragmas in its output + +^\# (line)? { begin line_prag } + +-- single-line line pragmas, of the form +-- # <line> "<file>" <extra-stuff> \n +<line_prag> $digit+ { setLine line_prag1 } +<line_prag1> \" [^\"]* \" { setFile line_prag2 } +<line_prag2> .* { pop } + +<0> { + \n ; + + [\:\;\{\}\[\]\(\)\=\`\~\/\*\%\-\+\&\^\|\>\<\,\!] { special_char } + + ".." { kw CmmT_DotDot } + "::" { kw CmmT_DoubleColon } + ">>" { kw CmmT_Shr } + "<<" { kw CmmT_Shl } + ">=" { kw CmmT_Ge } + "<=" { kw CmmT_Le } + "==" { kw CmmT_Eq } + "!=" { kw CmmT_Ne } + "&&" { kw CmmT_BoolAnd } + "||" { kw CmmT_BoolOr } + + "True" { kw CmmT_True } + "False" { kw CmmT_False } + "likely" { kw CmmT_likely} + + P@decimal { global_regN (\n -> VanillaReg n VGcPtr) } + R@decimal { global_regN (\n -> VanillaReg n VNonGcPtr) } + F@decimal { global_regN FloatReg } + D@decimal { global_regN DoubleReg } + L@decimal { global_regN LongReg } + Sp { global_reg Sp } + SpLim { global_reg SpLim } + Hp { global_reg Hp } + HpLim { global_reg HpLim } + CCCS { global_reg CCCS } + CurrentTSO { global_reg CurrentTSO } + CurrentNursery { global_reg CurrentNursery } + HpAlloc { global_reg HpAlloc } + BaseReg { global_reg BaseReg } + MachSp { global_reg MachSp } + UnwindReturnReg { global_reg UnwindReturnReg } + + $namebegin $namechar* { name } + + 0 @octal { tok_octal } + @decimal { tok_decimal } + 0[xX] @hexadecimal { tok_hexadecimal } + @floating_point { strtoken tok_float } + + \" @strchar* \" { strtoken tok_string } +} + +{ +data CmmToken + = CmmT_SpecChar Char + | CmmT_DotDot + | CmmT_DoubleColon + | CmmT_Shr + | CmmT_Shl + | CmmT_Ge + | CmmT_Le + | CmmT_Eq + | CmmT_Ne + | CmmT_BoolAnd + | CmmT_BoolOr + | CmmT_CLOSURE + | CmmT_INFO_TABLE + | CmmT_INFO_TABLE_RET + | CmmT_INFO_TABLE_FUN + | CmmT_INFO_TABLE_CONSTR + | CmmT_INFO_TABLE_SELECTOR + | CmmT_else + | CmmT_export + | CmmT_section + | CmmT_goto + | CmmT_if + | CmmT_call + | CmmT_jump + | CmmT_foreign + | CmmT_never + | CmmT_prim + | CmmT_reserve + | CmmT_return + | CmmT_returns + | CmmT_import + | CmmT_switch + | CmmT_case + | CmmT_default + | CmmT_push + | CmmT_unwind + | CmmT_bits8 + | CmmT_bits16 + | CmmT_bits32 + | CmmT_bits64 + | CmmT_bits128 + | CmmT_bits256 + | CmmT_bits512 + | CmmT_float32 + | CmmT_float64 + | CmmT_gcptr + | CmmT_GlobalReg GlobalReg + | CmmT_Name FastString + | CmmT_String String + | CmmT_Int Integer + | CmmT_Float Rational + | CmmT_EOF + | CmmT_False + | CmmT_True + | CmmT_likely + deriving (Show) + +-- ----------------------------------------------------------------------------- +-- Lexer actions + +type Action = RealSrcSpan -> StringBuffer -> Int -> PD (RealLocated CmmToken) + +begin :: Int -> Action +begin code _span _str _len = do liftP (pushLexState code); lexToken + +pop :: Action +pop _span _buf _len = liftP popLexState >> lexToken + +special_char :: Action +special_char span buf _len = return (L span (CmmT_SpecChar (currentChar buf))) + +kw :: CmmToken -> Action +kw tok span _buf _len = return (L span tok) + +global_regN :: (Int -> GlobalReg) -> Action +global_regN con span buf len + = return (L span (CmmT_GlobalReg (con (fromIntegral n)))) + where buf' = stepOn buf + n = parseUnsignedInteger buf' (len-1) 10 octDecDigit + +global_reg :: GlobalReg -> Action +global_reg r span _buf _len = return (L span (CmmT_GlobalReg r)) + +strtoken :: (String -> CmmToken) -> Action +strtoken f span buf len = + return (L span $! (f $! lexemeToString buf len)) + +name :: Action +name span buf len = + case lookupUFM reservedWordsFM fs of + Just tok -> return (L span tok) + Nothing -> return (L span (CmmT_Name fs)) + where + fs = lexemeToFastString buf len + +reservedWordsFM = listToUFM $ + map (\(x, y) -> (mkFastString x, y)) [ + ( "CLOSURE", CmmT_CLOSURE ), + ( "INFO_TABLE", CmmT_INFO_TABLE ), + ( "INFO_TABLE_RET", CmmT_INFO_TABLE_RET ), + ( "INFO_TABLE_FUN", CmmT_INFO_TABLE_FUN ), + ( "INFO_TABLE_CONSTR", CmmT_INFO_TABLE_CONSTR ), + ( "INFO_TABLE_SELECTOR",CmmT_INFO_TABLE_SELECTOR ), + ( "else", CmmT_else ), + ( "export", CmmT_export ), + ( "section", CmmT_section ), + ( "goto", CmmT_goto ), + ( "if", CmmT_if ), + ( "call", CmmT_call ), + ( "jump", CmmT_jump ), + ( "foreign", CmmT_foreign ), + ( "never", CmmT_never ), + ( "prim", CmmT_prim ), + ( "reserve", CmmT_reserve ), + ( "return", CmmT_return ), + ( "returns", CmmT_returns ), + ( "import", CmmT_import ), + ( "switch", CmmT_switch ), + ( "case", CmmT_case ), + ( "default", CmmT_default ), + ( "push", CmmT_push ), + ( "unwind", CmmT_unwind ), + ( "bits8", CmmT_bits8 ), + ( "bits16", CmmT_bits16 ), + ( "bits32", CmmT_bits32 ), + ( "bits64", CmmT_bits64 ), + ( "bits128", CmmT_bits128 ), + ( "bits256", CmmT_bits256 ), + ( "bits512", CmmT_bits512 ), + ( "float32", CmmT_float32 ), + ( "float64", CmmT_float64 ), +-- New forms + ( "b8", CmmT_bits8 ), + ( "b16", CmmT_bits16 ), + ( "b32", CmmT_bits32 ), + ( "b64", CmmT_bits64 ), + ( "b128", CmmT_bits128 ), + ( "b256", CmmT_bits256 ), + ( "b512", CmmT_bits512 ), + ( "f32", CmmT_float32 ), + ( "f64", CmmT_float64 ), + ( "gcptr", CmmT_gcptr ), + ( "likely", CmmT_likely), + ( "True", CmmT_True ), + ( "False", CmmT_False ) + ] + +tok_decimal span buf len + = return (L span (CmmT_Int $! parseUnsignedInteger buf len 10 octDecDigit)) + +tok_octal span buf len + = return (L span (CmmT_Int $! parseUnsignedInteger (offsetBytes 1 buf) (len-1) 8 octDecDigit)) + +tok_hexadecimal span buf len + = return (L span (CmmT_Int $! parseUnsignedInteger (offsetBytes 2 buf) (len-2) 16 hexDigit)) + +tok_float str = CmmT_Float $! readRational str + +tok_string str = CmmT_String (read str) + -- urk, not quite right, but it'll do for now + +-- ----------------------------------------------------------------------------- +-- Line pragmas + +setLine :: Int -> Action +setLine code span buf len = do + let line = parseUnsignedInteger buf len 10 octDecDigit + liftP $ do + setSrcLoc (mkRealSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1) + -- subtract one: the line number refers to the *following* line + -- trace ("setLine " ++ show line) $ do + popLexState >> pushLexState code + lexToken + +setFile :: Int -> Action +setFile code span buf len = do + let file = lexemeToFastString (stepOn buf) (len-2) + liftP $ do + setSrcLoc (mkRealSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span)) + popLexState >> pushLexState code + lexToken + +-- ----------------------------------------------------------------------------- +-- This is the top-level function: called from the parser each time a +-- new token is to be read from the input. + +cmmlex :: (Located CmmToken -> PD a) -> PD a +cmmlex cont = do + (L span tok) <- lexToken + --trace ("token: " ++ show tok) $ do + cont (L (RealSrcSpan span) tok) + +lexToken :: PD (RealLocated CmmToken) +lexToken = do + inp@(loc1,buf) <- getInput + sc <- liftP getLexState + case alexScan inp sc of + AlexEOF -> do let span = mkRealSrcSpan loc1 loc1 + liftP (setLastToken span 0) + return (L span CmmT_EOF) + AlexError (loc2,_) -> liftP $ failLocMsgP loc1 loc2 "lexical error" + AlexSkip inp2 _ -> do + setInput inp2 + lexToken + AlexToken inp2@(end,_buf2) len t -> do + setInput inp2 + let span = mkRealSrcSpan loc1 end + span `seq` liftP (setLastToken span len) + t span buf len + +-- ----------------------------------------------------------------------------- +-- Monad stuff + +-- Stuff that Alex needs to know about our input type: +type AlexInput = (RealSrcLoc,StringBuffer) + +alexInputPrevChar :: AlexInput -> Char +alexInputPrevChar (_,s) = prevChar s '\n' + +-- backwards compatibility for Alex 2.x +alexGetChar :: AlexInput -> Maybe (Char,AlexInput) +alexGetChar inp = case alexGetByte inp of + Nothing -> Nothing + Just (b,i) -> c `seq` Just (c,i) + where c = chr $ fromIntegral b + +alexGetByte :: AlexInput -> Maybe (Word8,AlexInput) +alexGetByte (loc,s) + | atEnd s = Nothing + | otherwise = b `seq` loc' `seq` s' `seq` Just (b, (loc', s')) + where c = currentChar s + b = fromIntegral $ ord $ c + loc' = advanceSrcLoc loc c + s' = stepOn s + +getInput :: PD AlexInput +getInput = PD $ \_ s@PState{ loc=l, buffer=b } -> POk s (l,b) + +setInput :: AlexInput -> PD () +setInput (l,b) = PD $ \_ s -> POk s{ loc=l, buffer=b } () +} |