summaryrefslogtreecommitdiff
path: root/compiler/GHC/Cmm/Lexer.hs
diff options
context:
space:
mode:
authorVladislav Zavialov <vlad.z.4096@gmail.com>2020-02-15 12:32:30 +0300
committerVladislav Zavialov <vlad.z.4096@gmail.com>2020-02-22 20:17:37 +0300
commita5ec380589c7e6dc9e3416b6409ad49153acdfff (patch)
tree610784e13d2cbdfe71cfe845182dcf6e71c87182 /compiler/GHC/Cmm/Lexer.hs
parent9d09411122b9b534b96e988b6d3f6d7eb04b8f66 (diff)
downloadhaskell-a5ec380589c7e6dc9e3416b6409ad49153acdfff.tar.gz
WIP: Use alex/happy as -pgmF preprocessorswip/pgmf
Diffstat (limited to 'compiler/GHC/Cmm/Lexer.hs')
-rw-r--r--compiler/GHC/Cmm/Lexer.hs370
1 files changed, 370 insertions, 0 deletions
diff --git a/compiler/GHC/Cmm/Lexer.hs b/compiler/GHC/Cmm/Lexer.hs
new file mode 100644
index 0000000000..eeb68459a2
--- /dev/null
+++ b/compiler/GHC/Cmm/Lexer.hs
@@ -0,0 +1,370 @@
+{-# OPTIONS -pgmF utils/alex.sh -F #-}
+
+-----------------------------------------------------------------------------
+--
+-- (c) The University of Glasgow, 2004-2006
+--
+-- Lexer for concrete Cmm. We try to stay close to the C-- spec, but there
+-- are a few minor differences:
+--
+-- * extra keywords for our macros, and float32/float64 types
+-- * global registers (Sp,Hp, etc.)
+--
+-----------------------------------------------------------------------------
+
+{
+module GHC.Cmm.Lexer (
+ CmmToken(..), cmmlex,
+ ) where
+
+import GhcPrelude
+
+import GHC.Cmm.Expr
+
+import Lexer
+import GHC.Cmm.Monad
+import SrcLoc
+import UniqFM
+import StringBuffer
+import FastString
+import Ctype
+import Util
+--import TRACE
+
+import Data.Word
+import Data.Char
+}
+
+$whitechar = [\ \t\n\r\f\v\xa0] -- \xa0 is Unicode no-break space
+$white_no_nl = $whitechar # \n
+
+$ascdigit = 0-9
+$unidigit = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
+$digit = [$ascdigit $unidigit]
+$octit = 0-7
+$hexit = [$digit A-F a-f]
+
+$unilarge = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
+$asclarge = [A-Z \xc0-\xd6 \xd8-\xde]
+$large = [$asclarge $unilarge]
+
+$unismall = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
+$ascsmall = [a-z \xdf-\xf6 \xf8-\xff]
+$small = [$ascsmall $unismall \_]
+
+$namebegin = [$large $small \. \$ \@]
+$namechar = [$namebegin $digit]
+
+@decimal = $digit+
+@octal = $octit+
+@hexadecimal = $hexit+
+@exponent = [eE] [\-\+]? @decimal
+
+@floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
+
+@escape = \\ ([abfnrt\\\'\"\?] | x $hexit{1,2} | $octit{1,3})
+@strchar = ($printable # [\"\\]) | @escape
+
+cmm :-
+
+$white_no_nl+ ;
+^\# pragma .* \n ; -- Apple GCC 3.3 CPP generates pragmas in its output
+
+^\# (line)? { begin line_prag }
+
+-- single-line line pragmas, of the form
+-- # <line> "<file>" <extra-stuff> \n
+<line_prag> $digit+ { setLine line_prag1 }
+<line_prag1> \" [^\"]* \" { setFile line_prag2 }
+<line_prag2> .* { pop }
+
+<0> {
+ \n ;
+
+ [\:\;\{\}\[\]\(\)\=\`\~\/\*\%\-\+\&\^\|\>\<\,\!] { special_char }
+
+ ".." { kw CmmT_DotDot }
+ "::" { kw CmmT_DoubleColon }
+ ">>" { kw CmmT_Shr }
+ "<<" { kw CmmT_Shl }
+ ">=" { kw CmmT_Ge }
+ "<=" { kw CmmT_Le }
+ "==" { kw CmmT_Eq }
+ "!=" { kw CmmT_Ne }
+ "&&" { kw CmmT_BoolAnd }
+ "||" { kw CmmT_BoolOr }
+
+ "True" { kw CmmT_True }
+ "False" { kw CmmT_False }
+ "likely" { kw CmmT_likely}
+
+ P@decimal { global_regN (\n -> VanillaReg n VGcPtr) }
+ R@decimal { global_regN (\n -> VanillaReg n VNonGcPtr) }
+ F@decimal { global_regN FloatReg }
+ D@decimal { global_regN DoubleReg }
+ L@decimal { global_regN LongReg }
+ Sp { global_reg Sp }
+ SpLim { global_reg SpLim }
+ Hp { global_reg Hp }
+ HpLim { global_reg HpLim }
+ CCCS { global_reg CCCS }
+ CurrentTSO { global_reg CurrentTSO }
+ CurrentNursery { global_reg CurrentNursery }
+ HpAlloc { global_reg HpAlloc }
+ BaseReg { global_reg BaseReg }
+ MachSp { global_reg MachSp }
+ UnwindReturnReg { global_reg UnwindReturnReg }
+
+ $namebegin $namechar* { name }
+
+ 0 @octal { tok_octal }
+ @decimal { tok_decimal }
+ 0[xX] @hexadecimal { tok_hexadecimal }
+ @floating_point { strtoken tok_float }
+
+ \" @strchar* \" { strtoken tok_string }
+}
+
+{
+data CmmToken
+ = CmmT_SpecChar Char
+ | CmmT_DotDot
+ | CmmT_DoubleColon
+ | CmmT_Shr
+ | CmmT_Shl
+ | CmmT_Ge
+ | CmmT_Le
+ | CmmT_Eq
+ | CmmT_Ne
+ | CmmT_BoolAnd
+ | CmmT_BoolOr
+ | CmmT_CLOSURE
+ | CmmT_INFO_TABLE
+ | CmmT_INFO_TABLE_RET
+ | CmmT_INFO_TABLE_FUN
+ | CmmT_INFO_TABLE_CONSTR
+ | CmmT_INFO_TABLE_SELECTOR
+ | CmmT_else
+ | CmmT_export
+ | CmmT_section
+ | CmmT_goto
+ | CmmT_if
+ | CmmT_call
+ | CmmT_jump
+ | CmmT_foreign
+ | CmmT_never
+ | CmmT_prim
+ | CmmT_reserve
+ | CmmT_return
+ | CmmT_returns
+ | CmmT_import
+ | CmmT_switch
+ | CmmT_case
+ | CmmT_default
+ | CmmT_push
+ | CmmT_unwind
+ | CmmT_bits8
+ | CmmT_bits16
+ | CmmT_bits32
+ | CmmT_bits64
+ | CmmT_bits128
+ | CmmT_bits256
+ | CmmT_bits512
+ | CmmT_float32
+ | CmmT_float64
+ | CmmT_gcptr
+ | CmmT_GlobalReg GlobalReg
+ | CmmT_Name FastString
+ | CmmT_String String
+ | CmmT_Int Integer
+ | CmmT_Float Rational
+ | CmmT_EOF
+ | CmmT_False
+ | CmmT_True
+ | CmmT_likely
+ deriving (Show)
+
+-- -----------------------------------------------------------------------------
+-- Lexer actions
+
+type Action = RealSrcSpan -> StringBuffer -> Int -> PD (RealLocated CmmToken)
+
+begin :: Int -> Action
+begin code _span _str _len = do liftP (pushLexState code); lexToken
+
+pop :: Action
+pop _span _buf _len = liftP popLexState >> lexToken
+
+special_char :: Action
+special_char span buf _len = return (L span (CmmT_SpecChar (currentChar buf)))
+
+kw :: CmmToken -> Action
+kw tok span _buf _len = return (L span tok)
+
+global_regN :: (Int -> GlobalReg) -> Action
+global_regN con span buf len
+ = return (L span (CmmT_GlobalReg (con (fromIntegral n))))
+ where buf' = stepOn buf
+ n = parseUnsignedInteger buf' (len-1) 10 octDecDigit
+
+global_reg :: GlobalReg -> Action
+global_reg r span _buf _len = return (L span (CmmT_GlobalReg r))
+
+strtoken :: (String -> CmmToken) -> Action
+strtoken f span buf len =
+ return (L span $! (f $! lexemeToString buf len))
+
+name :: Action
+name span buf len =
+ case lookupUFM reservedWordsFM fs of
+ Just tok -> return (L span tok)
+ Nothing -> return (L span (CmmT_Name fs))
+ where
+ fs = lexemeToFastString buf len
+
+reservedWordsFM = listToUFM $
+ map (\(x, y) -> (mkFastString x, y)) [
+ ( "CLOSURE", CmmT_CLOSURE ),
+ ( "INFO_TABLE", CmmT_INFO_TABLE ),
+ ( "INFO_TABLE_RET", CmmT_INFO_TABLE_RET ),
+ ( "INFO_TABLE_FUN", CmmT_INFO_TABLE_FUN ),
+ ( "INFO_TABLE_CONSTR", CmmT_INFO_TABLE_CONSTR ),
+ ( "INFO_TABLE_SELECTOR",CmmT_INFO_TABLE_SELECTOR ),
+ ( "else", CmmT_else ),
+ ( "export", CmmT_export ),
+ ( "section", CmmT_section ),
+ ( "goto", CmmT_goto ),
+ ( "if", CmmT_if ),
+ ( "call", CmmT_call ),
+ ( "jump", CmmT_jump ),
+ ( "foreign", CmmT_foreign ),
+ ( "never", CmmT_never ),
+ ( "prim", CmmT_prim ),
+ ( "reserve", CmmT_reserve ),
+ ( "return", CmmT_return ),
+ ( "returns", CmmT_returns ),
+ ( "import", CmmT_import ),
+ ( "switch", CmmT_switch ),
+ ( "case", CmmT_case ),
+ ( "default", CmmT_default ),
+ ( "push", CmmT_push ),
+ ( "unwind", CmmT_unwind ),
+ ( "bits8", CmmT_bits8 ),
+ ( "bits16", CmmT_bits16 ),
+ ( "bits32", CmmT_bits32 ),
+ ( "bits64", CmmT_bits64 ),
+ ( "bits128", CmmT_bits128 ),
+ ( "bits256", CmmT_bits256 ),
+ ( "bits512", CmmT_bits512 ),
+ ( "float32", CmmT_float32 ),
+ ( "float64", CmmT_float64 ),
+-- New forms
+ ( "b8", CmmT_bits8 ),
+ ( "b16", CmmT_bits16 ),
+ ( "b32", CmmT_bits32 ),
+ ( "b64", CmmT_bits64 ),
+ ( "b128", CmmT_bits128 ),
+ ( "b256", CmmT_bits256 ),
+ ( "b512", CmmT_bits512 ),
+ ( "f32", CmmT_float32 ),
+ ( "f64", CmmT_float64 ),
+ ( "gcptr", CmmT_gcptr ),
+ ( "likely", CmmT_likely),
+ ( "True", CmmT_True ),
+ ( "False", CmmT_False )
+ ]
+
+tok_decimal span buf len
+ = return (L span (CmmT_Int $! parseUnsignedInteger buf len 10 octDecDigit))
+
+tok_octal span buf len
+ = return (L span (CmmT_Int $! parseUnsignedInteger (offsetBytes 1 buf) (len-1) 8 octDecDigit))
+
+tok_hexadecimal span buf len
+ = return (L span (CmmT_Int $! parseUnsignedInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
+
+tok_float str = CmmT_Float $! readRational str
+
+tok_string str = CmmT_String (read str)
+ -- urk, not quite right, but it'll do for now
+
+-- -----------------------------------------------------------------------------
+-- Line pragmas
+
+setLine :: Int -> Action
+setLine code span buf len = do
+ let line = parseUnsignedInteger buf len 10 octDecDigit
+ liftP $ do
+ setSrcLoc (mkRealSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
+ -- subtract one: the line number refers to the *following* line
+ -- trace ("setLine " ++ show line) $ do
+ popLexState >> pushLexState code
+ lexToken
+
+setFile :: Int -> Action
+setFile code span buf len = do
+ let file = lexemeToFastString (stepOn buf) (len-2)
+ liftP $ do
+ setSrcLoc (mkRealSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
+ popLexState >> pushLexState code
+ lexToken
+
+-- -----------------------------------------------------------------------------
+-- This is the top-level function: called from the parser each time a
+-- new token is to be read from the input.
+
+cmmlex :: (Located CmmToken -> PD a) -> PD a
+cmmlex cont = do
+ (L span tok) <- lexToken
+ --trace ("token: " ++ show tok) $ do
+ cont (L (RealSrcSpan span) tok)
+
+lexToken :: PD (RealLocated CmmToken)
+lexToken = do
+ inp@(loc1,buf) <- getInput
+ sc <- liftP getLexState
+ case alexScan inp sc of
+ AlexEOF -> do let span = mkRealSrcSpan loc1 loc1
+ liftP (setLastToken span 0)
+ return (L span CmmT_EOF)
+ AlexError (loc2,_) -> liftP $ failLocMsgP loc1 loc2 "lexical error"
+ AlexSkip inp2 _ -> do
+ setInput inp2
+ lexToken
+ AlexToken inp2@(end,_buf2) len t -> do
+ setInput inp2
+ let span = mkRealSrcSpan loc1 end
+ span `seq` liftP (setLastToken span len)
+ t span buf len
+
+-- -----------------------------------------------------------------------------
+-- Monad stuff
+
+-- Stuff that Alex needs to know about our input type:
+type AlexInput = (RealSrcLoc,StringBuffer)
+
+alexInputPrevChar :: AlexInput -> Char
+alexInputPrevChar (_,s) = prevChar s '\n'
+
+-- backwards compatibility for Alex 2.x
+alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
+alexGetChar inp = case alexGetByte inp of
+ Nothing -> Nothing
+ Just (b,i) -> c `seq` Just (c,i)
+ where c = chr $ fromIntegral b
+
+alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
+alexGetByte (loc,s)
+ | atEnd s = Nothing
+ | otherwise = b `seq` loc' `seq` s' `seq` Just (b, (loc', s'))
+ where c = currentChar s
+ b = fromIntegral $ ord $ c
+ loc' = advanceSrcLoc loc c
+ s' = stepOn s
+
+getInput :: PD AlexInput
+getInput = PD $ \_ s@PState{ loc=l, buffer=b } -> POk s (l,b)
+
+setInput :: AlexInput -> PD ()
+setInput (l,b) = PD $ \_ s -> POk s{ loc=l, buffer=b } ()
+}