compiler/cmm/CmmLex.x


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336

-----------------------------------------------------------------------------
--
-- (c) The University of Glasgow, 2004-2006
--
-- Lexer for concrete Cmm.  We try to stay close to the C-- spec, but there
-- are a few minor differences:
--
--   * extra keywords for our macros, and float32/float64 types
--   * global registers (Sp,Hp, etc.)
--
-----------------------------------------------------------------------------

{
{-# LANGUAGE BangPatterns #-}
{-# OPTIONS -Wwarn -w #-}
-- The above -Wwarn supression flag is a temporary kludge.
-- While working on this module you are encouraged to remove it and fix
-- any warnings in the module. See
--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
-- for details

module CmmLex (
   CmmToken(..), cmmlex,
  ) where

import Cmm
import Lexer

import SrcLoc
import UniqFM
import StringBuffer
import FastString
import Ctype
import Util
--import TRACE
}

$whitechar   = [\ \t\n\r\f\v\xa0] -- \xa0 is Unicode no-break space
$white_no_nl = $whitechar # \n

$ascdigit  = 0-9
$unidigit  = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
$digit     = [$ascdigit $unidigit]
$octit	   = 0-7
$hexit     = [$digit A-F a-f]

$unilarge  = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
$asclarge  = [A-Z \xc0-\xd6 \xd8-\xde]
$large     = [$asclarge $unilarge]

$unismall  = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
$ascsmall  = [a-z \xdf-\xf6 \xf8-\xff]
$small     = [$ascsmall $unismall \_]

$namebegin = [$large $small \. \$ \@]
$namechar  = [$namebegin $digit]

@decimal     = $digit+
@octal       = $octit+
@hexadecimal = $hexit+
@exponent    = [eE] [\-\+]? @decimal

@floating_point = @decimal \. @decimal @exponent? | @decimal @exponent

@escape      = \\ ([abfnrt\\\'\"\?] | x $hexit{1,2} | $octit{1,3})
@strchar     = ($printable # [\"\\]) | @escape

cmm :-

$white_no_nl+		;
^\# pragma .* \n        ; -- Apple GCC 3.3 CPP generates pragmas in its output

^\# (line)? 		{ begin line_prag }

-- single-line line pragmas, of the form
--    # <line> "<file>" <extra-stuff> \n
<line_prag> $digit+			{ setLine line_prag1 }
<line_prag1> \" [^\"]* \"	{ setFile line_prag2 }
<line_prag2> .*				{ pop }

<0> {
  \n			;

  [\:\;\{\}\[\]\(\)\=\`\~\/\*\%\-\+\&\^\|\>\<\,\!]	{ special_char }
  
  ".." 			{ kw CmmT_DotDot }
  "::" 			{ kw CmmT_DoubleColon }
  ">>"			{ kw CmmT_Shr }
  "<<"			{ kw CmmT_Shl }
  ">="			{ kw CmmT_Ge }
  "<="			{ kw CmmT_Le }
  "=="			{ kw CmmT_Eq }
  "!="			{ kw CmmT_Ne }
  "&&"			{ kw CmmT_BoolAnd }
  "||"			{ kw CmmT_BoolOr }
  
  P@decimal		{ global_regN (\n -> VanillaReg n VGcPtr) }
  R@decimal		{ global_regN (\n -> VanillaReg n VNonGcPtr) }
  F@decimal		{ global_regN FloatReg }
  D@decimal		{ global_regN DoubleReg }
  L@decimal		{ global_regN LongReg }
  Sp			{ global_reg Sp }
  SpLim			{ global_reg SpLim }
  Hp			{ global_reg Hp }
  HpLim			{ global_reg HpLim }
  CurrentTSO		{ global_reg CurrentTSO }
  CurrentNursery	{ global_reg CurrentNursery }
  HpAlloc		{ global_reg HpAlloc }
  BaseReg		{ global_reg BaseReg }
  
  $namebegin $namechar*	{ name }
  
  0 @octal		{ tok_octal }
  @decimal		{ tok_decimal }
  0[xX] @hexadecimal	{ tok_hexadecimal }
  @floating_point	{ strtoken tok_float }
  
  \" @strchar* \"	{ strtoken tok_string }
}

{
data CmmToken
  = CmmT_SpecChar  Char
  | CmmT_DotDot
  | CmmT_DoubleColon
  | CmmT_Shr
  | CmmT_Shl
  | CmmT_Ge
  | CmmT_Le
  | CmmT_Eq
  | CmmT_Ne
  | CmmT_BoolAnd
  | CmmT_BoolOr
  | CmmT_CLOSURE
  | CmmT_INFO_TABLE
  | CmmT_INFO_TABLE_RET
  | CmmT_INFO_TABLE_FUN
  | CmmT_INFO_TABLE_CONSTR
  | CmmT_INFO_TABLE_SELECTOR
  | CmmT_else
  | CmmT_export
  | CmmT_section
  | CmmT_align
  | CmmT_goto
  | CmmT_if
  | CmmT_jump
  | CmmT_foreign
  | CmmT_never
  | CmmT_prim
  | CmmT_return
  | CmmT_returns
  | CmmT_import
  | CmmT_switch
  | CmmT_case
  | CmmT_default
  | CmmT_bits8
  | CmmT_bits16
  | CmmT_bits32
  | CmmT_bits64
  | CmmT_float32
  | CmmT_float64
  | CmmT_gcptr
  | CmmT_GlobalReg GlobalReg
  | CmmT_Name	   FastString
  | CmmT_String	   String
  | CmmT_Int	   Integer
  | CmmT_Float     Rational
  | CmmT_EOF
#ifdef DEBUG
  deriving (Show)
#endif

-- -----------------------------------------------------------------------------
-- Lexer actions

type Action = SrcSpan -> StringBuffer -> Int -> P (Located CmmToken)

begin :: Int -> Action
begin code _span _str _len = do pushLexState code; lexToken

pop :: Action
pop _span _buf _len = do popLexState; lexToken

special_char :: Action
special_char span buf len = return (L span (CmmT_SpecChar (currentChar buf)))

kw :: CmmToken -> Action
kw tok span buf len = return (L span tok)

global_regN :: (Int -> GlobalReg) -> Action
global_regN con span buf len 
  = return (L span (CmmT_GlobalReg (con (fromIntegral n))))
  where buf' = stepOn buf
	n = parseUnsignedInteger buf' (len-1) 10 octDecDigit

global_reg :: GlobalReg -> Action
global_reg r span buf len = return (L span (CmmT_GlobalReg r))

strtoken :: (String -> CmmToken) -> Action
strtoken f span buf len = 
  return (L span $! (f $! lexemeToString buf len))

name :: Action
name span buf len = 
  case lookupUFM reservedWordsFM fs of
	Just tok -> return (L span tok)
	Nothing  -> return (L span (CmmT_Name fs))
  where
	fs = lexemeToFastString buf len

reservedWordsFM = listToUFM $
	map (\(x, y) -> (mkFastString x, y)) [
	( "CLOSURE",		CmmT_CLOSURE ),
	( "INFO_TABLE",		CmmT_INFO_TABLE ),
	( "INFO_TABLE_RET",	CmmT_INFO_TABLE_RET ),
	( "INFO_TABLE_FUN",	CmmT_INFO_TABLE_FUN ),
	( "INFO_TABLE_CONSTR",	CmmT_INFO_TABLE_CONSTR ),
	( "INFO_TABLE_SELECTOR",CmmT_INFO_TABLE_SELECTOR ),
	( "else",		CmmT_else ),
	( "export",		CmmT_export ),
	( "section",		CmmT_section ),
	( "align",		CmmT_align ),
	( "goto",		CmmT_goto ),
	( "if",			CmmT_if ),
	( "jump",		CmmT_jump ),
	( "foreign",		CmmT_foreign ),
	( "never",		CmmT_never ),
	( "prim",		CmmT_prim ),
	( "return",		CmmT_return ),
	( "returns",		CmmT_returns ),
	( "import",		CmmT_import ),
	( "switch",		CmmT_switch ),
	( "case",		CmmT_case ),
	( "default",		CmmT_default ),
	( "bits8",		CmmT_bits8 ),
	( "bits16",		CmmT_bits16 ),
	( "bits32",		CmmT_bits32 ),
	( "bits64",		CmmT_bits64 ),
	( "float32",		CmmT_float32 ),
	( "float64",		CmmT_float64 ),
-- New forms
	( "b8",			CmmT_bits8 ),
	( "b16",		CmmT_bits16 ),
	( "b32",		CmmT_bits32 ),
	( "b64",		CmmT_bits64 ),
	( "f32",		CmmT_float32 ),
	( "f64",		CmmT_float64 ),
	( "gcptr",		CmmT_gcptr )
	]

tok_decimal span buf len 
  = return (L span (CmmT_Int  $! parseUnsignedInteger buf len 10 octDecDigit))

tok_octal span buf len 
  = return (L span (CmmT_Int  $! parseUnsignedInteger (offsetBytes 1 buf) (len-1) 8 octDecDigit))

tok_hexadecimal span buf len 
  = return (L span (CmmT_Int  $! parseUnsignedInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))

tok_float str = CmmT_Float $! readRational str

tok_string str = CmmT_String (read str)
		 -- urk, not quite right, but it'll do for now

-- -----------------------------------------------------------------------------
-- Line pragmas

setLine :: Int -> Action
setLine code span buf len = do
  let line = parseUnsignedInteger buf len 10 octDecDigit
  setSrcLoc (mkSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
	-- subtract one: the line number refers to the *following* line
  -- trace ("setLine "  ++ show line) $ do
  popLexState
  pushLexState code
  lexToken

setFile :: Int -> Action
setFile code span buf len = do
  let file = lexemeToFastString (stepOn buf) (len-2)
  setSrcLoc (mkSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
  popLexState
  pushLexState code
  lexToken

-- -----------------------------------------------------------------------------
-- This is the top-level function: called from the parser each time a
-- new token is to be read from the input.

cmmlex :: (Located CmmToken -> P a) -> P a
cmmlex cont = do
  tok@(L _ tok__) <- lexToken
  --trace ("token: " ++ show tok__) $ do
  cont tok

lexToken :: P (Located CmmToken)
lexToken = do
  inp@(loc1,buf) <- getInput
  sc <- getLexState
  case alexScan inp sc of
    AlexEOF -> do let span = mkSrcSpan loc1 loc1
		  setLastToken span 0
		  return (L span CmmT_EOF)
    AlexError (loc2,_) -> do failLocMsgP loc1 loc2 "lexical error"
    AlexSkip inp2 _ -> do
	setInput inp2
	lexToken
    AlexToken inp2@(end,buf2) len t -> do
	setInput inp2
	let span = mkSrcSpan loc1 end
	span `seq` setLastToken span len
	t span buf len

-- -----------------------------------------------------------------------------
-- Monad stuff

-- Stuff that Alex needs to know about our input type:
type AlexInput = (SrcLoc,StringBuffer)

alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (_,s) = prevChar s '\n'

alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
alexGetChar (loc,s) 
  | atEnd s   = Nothing
  | otherwise = c `seq` loc' `seq` s' `seq` Just (c, (loc', s'))
  where c = currentChar s
        loc' = advanceSrcLoc loc c
	s'   = stepOn s

getInput :: P AlexInput
getInput = P $ \s@PState{ loc=l, buffer=b } -> POk s (l,b)

setInput :: AlexInput -> P ()
setInput (l,b) = P $ \s -> POk s{ loc=l, buffer=b } ()
}