1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
|
-----------------------------------------------------------------------------
--
-- (c) The University of Glasgow, 2004-2006
--
-- Lexer for concrete Cmm. We try to stay close to the C-- spec, but there
-- are a few minor differences:
--
-- * extra keywords for our macros, and float32/float64 types
-- * global registers (Sp,Hp, etc.)
--
-----------------------------------------------------------------------------
{
-- See Note [Warnings in code generated by Alex] in compiler/parser/Lexer.x
{-# OPTIONS_GHC -fno-warn-unused-matches #-}
{-# OPTIONS_GHC -fno-warn-unused-binds #-}
{-# OPTIONS_GHC -fno-warn-unused-imports #-}
{-# OPTIONS_GHC -fno-warn-tabs #-}
{-# OPTIONS_GHC -fno-warn-missing-signatures #-}
module CmmLex (
CmmToken(..), cmmlex,
) where
import GhcPrelude
import CmmExpr
import Lexer
import CmmMonad
import SrcLoc
import UniqFM
import StringBuffer
import FastString
import Ctype
import Util
--import TRACE
import Data.Word
import Data.Char
}
$whitechar = [\ \t\n\r\f\v\xa0] -- \xa0 is Unicode no-break space
$white_no_nl = $whitechar # \n
$ascdigit = 0-9
$unidigit = \x01 -- Trick Alex into handling Unicode. See alexGetChar.
$digit = [$ascdigit $unidigit]
$octit = 0-7
$hexit = [$digit A-F a-f]
$unilarge = \x03 -- Trick Alex into handling Unicode. See alexGetChar.
$asclarge = [A-Z \xc0-\xd6 \xd8-\xde]
$large = [$asclarge $unilarge]
$unismall = \x04 -- Trick Alex into handling Unicode. See alexGetChar.
$ascsmall = [a-z \xdf-\xf6 \xf8-\xff]
$small = [$ascsmall $unismall \_]
$namebegin = [$large $small \. \$ \@]
$namechar = [$namebegin $digit]
@decimal = $digit+
@octal = $octit+
@hexadecimal = $hexit+
@exponent = [eE] [\-\+]? @decimal
@floating_point = @decimal \. @decimal @exponent? | @decimal @exponent
@escape = \\ ([abfnrt\\\'\"\?] | x $hexit{1,2} | $octit{1,3})
@strchar = ($printable # [\"\\]) | @escape
cmm :-
$white_no_nl+ ;
^\# pragma .* \n ; -- Apple GCC 3.3 CPP generates pragmas in its output
^\# (line)? { begin line_prag }
-- single-line line pragmas, of the form
-- # <line> "<file>" <extra-stuff> \n
<line_prag> $digit+ { setLine line_prag1 }
<line_prag1> \" [^\"]* \" { setFile line_prag2 }
<line_prag2> .* { pop }
<0> {
\n ;
[\:\;\{\}\[\]\(\)\=\`\~\/\*\%\-\+\&\^\|\>\<\,\!] { special_char }
".." { kw CmmT_DotDot }
"::" { kw CmmT_DoubleColon }
">>" { kw CmmT_Shr }
"<<" { kw CmmT_Shl }
">=" { kw CmmT_Ge }
"<=" { kw CmmT_Le }
"==" { kw CmmT_Eq }
"!=" { kw CmmT_Ne }
"&&" { kw CmmT_BoolAnd }
"||" { kw CmmT_BoolOr }
P@decimal { global_regN (\n -> VanillaReg n VGcPtr) }
R@decimal { global_regN (\n -> VanillaReg n VNonGcPtr) }
F@decimal { global_regN FloatReg }
D@decimal { global_regN DoubleReg }
L@decimal { global_regN LongReg }
Sp { global_reg Sp }
SpLim { global_reg SpLim }
Hp { global_reg Hp }
HpLim { global_reg HpLim }
CCCS { global_reg CCCS }
CurrentTSO { global_reg CurrentTSO }
CurrentNursery { global_reg CurrentNursery }
HpAlloc { global_reg HpAlloc }
BaseReg { global_reg BaseReg }
MachSp { global_reg MachSp }
UnwindReturnReg { global_reg UnwindReturnReg }
$namebegin $namechar* { name }
0 @octal { tok_octal }
@decimal { tok_decimal }
0[xX] @hexadecimal { tok_hexadecimal }
@floating_point { strtoken tok_float }
\" @strchar* \" { strtoken tok_string }
}
{
data CmmToken
= CmmT_SpecChar Char
| CmmT_DotDot
| CmmT_DoubleColon
| CmmT_Shr
| CmmT_Shl
| CmmT_Ge
| CmmT_Le
| CmmT_Eq
| CmmT_Ne
| CmmT_BoolAnd
| CmmT_BoolOr
| CmmT_CLOSURE
| CmmT_INFO_TABLE
| CmmT_INFO_TABLE_RET
| CmmT_INFO_TABLE_FUN
| CmmT_INFO_TABLE_CONSTR
| CmmT_INFO_TABLE_SELECTOR
| CmmT_else
| CmmT_export
| CmmT_section
| CmmT_goto
| CmmT_if
| CmmT_call
| CmmT_jump
| CmmT_foreign
| CmmT_never
| CmmT_prim
| CmmT_reserve
| CmmT_return
| CmmT_returns
| CmmT_import
| CmmT_switch
| CmmT_case
| CmmT_default
| CmmT_push
| CmmT_unwind
| CmmT_bits8
| CmmT_bits16
| CmmT_bits32
| CmmT_bits64
| CmmT_bits128
| CmmT_bits256
| CmmT_bits512
| CmmT_float32
| CmmT_float64
| CmmT_gcptr
| CmmT_GlobalReg GlobalReg
| CmmT_Name FastString
| CmmT_String String
| CmmT_Int Integer
| CmmT_Float Rational
| CmmT_EOF
deriving (Show)
-- -----------------------------------------------------------------------------
-- Lexer actions
type Action = RealSrcSpan -> StringBuffer -> Int -> PD (RealLocated CmmToken)
begin :: Int -> Action
begin code _span _str _len = do liftP (pushLexState code); lexToken
pop :: Action
pop _span _buf _len = liftP popLexState >> lexToken
special_char :: Action
special_char span buf _len = return (L span (CmmT_SpecChar (currentChar buf)))
kw :: CmmToken -> Action
kw tok span _buf _len = return (L span tok)
global_regN :: (Int -> GlobalReg) -> Action
global_regN con span buf len
= return (L span (CmmT_GlobalReg (con (fromIntegral n))))
where buf' = stepOn buf
n = parseUnsignedInteger buf' (len-1) 10 octDecDigit
global_reg :: GlobalReg -> Action
global_reg r span _buf _len = return (L span (CmmT_GlobalReg r))
strtoken :: (String -> CmmToken) -> Action
strtoken f span buf len =
return (L span $! (f $! lexemeToString buf len))
name :: Action
name span buf len =
case lookupUFM reservedWordsFM fs of
Just tok -> return (L span tok)
Nothing -> return (L span (CmmT_Name fs))
where
fs = lexemeToFastString buf len
reservedWordsFM = listToUFM $
map (\(x, y) -> (mkFastString x, y)) [
( "CLOSURE", CmmT_CLOSURE ),
( "INFO_TABLE", CmmT_INFO_TABLE ),
( "INFO_TABLE_RET", CmmT_INFO_TABLE_RET ),
( "INFO_TABLE_FUN", CmmT_INFO_TABLE_FUN ),
( "INFO_TABLE_CONSTR", CmmT_INFO_TABLE_CONSTR ),
( "INFO_TABLE_SELECTOR",CmmT_INFO_TABLE_SELECTOR ),
( "else", CmmT_else ),
( "export", CmmT_export ),
( "section", CmmT_section ),
( "goto", CmmT_goto ),
( "if", CmmT_if ),
( "call", CmmT_call ),
( "jump", CmmT_jump ),
( "foreign", CmmT_foreign ),
( "never", CmmT_never ),
( "prim", CmmT_prim ),
( "reserve", CmmT_reserve ),
( "return", CmmT_return ),
( "returns", CmmT_returns ),
( "import", CmmT_import ),
( "switch", CmmT_switch ),
( "case", CmmT_case ),
( "default", CmmT_default ),
( "push", CmmT_push ),
( "unwind", CmmT_unwind ),
( "bits8", CmmT_bits8 ),
( "bits16", CmmT_bits16 ),
( "bits32", CmmT_bits32 ),
( "bits64", CmmT_bits64 ),
( "bits128", CmmT_bits128 ),
( "bits256", CmmT_bits256 ),
( "bits512", CmmT_bits512 ),
( "float32", CmmT_float32 ),
( "float64", CmmT_float64 ),
-- New forms
( "b8", CmmT_bits8 ),
( "b16", CmmT_bits16 ),
( "b32", CmmT_bits32 ),
( "b64", CmmT_bits64 ),
( "b128", CmmT_bits128 ),
( "b256", CmmT_bits256 ),
( "b512", CmmT_bits512 ),
( "f32", CmmT_float32 ),
( "f64", CmmT_float64 ),
( "gcptr", CmmT_gcptr )
]
tok_decimal span buf len
= return (L span (CmmT_Int $! parseUnsignedInteger buf len 10 octDecDigit))
tok_octal span buf len
= return (L span (CmmT_Int $! parseUnsignedInteger (offsetBytes 1 buf) (len-1) 8 octDecDigit))
tok_hexadecimal span buf len
= return (L span (CmmT_Int $! parseUnsignedInteger (offsetBytes 2 buf) (len-2) 16 hexDigit))
tok_float str = CmmT_Float $! readRational str
tok_string str = CmmT_String (read str)
-- urk, not quite right, but it'll do for now
-- -----------------------------------------------------------------------------
-- Line pragmas
setLine :: Int -> Action
setLine code span buf len = do
let line = parseUnsignedInteger buf len 10 octDecDigit
liftP $ do
setSrcLoc (mkRealSrcLoc (srcSpanFile span) (fromIntegral line - 1) 1)
-- subtract one: the line number refers to the *following* line
-- trace ("setLine " ++ show line) $ do
popLexState >> pushLexState code
lexToken
setFile :: Int -> Action
setFile code span buf len = do
let file = lexemeToFastString (stepOn buf) (len-2)
liftP $ do
setSrcLoc (mkRealSrcLoc file (srcSpanEndLine span) (srcSpanEndCol span))
popLexState >> pushLexState code
lexToken
-- -----------------------------------------------------------------------------
-- This is the top-level function: called from the parser each time a
-- new token is to be read from the input.
cmmlex :: (Located CmmToken -> PD a) -> PD a
cmmlex cont = do
(L span tok) <- lexToken
--trace ("token: " ++ show tok) $ do
cont (L (RealSrcSpan span) tok)
lexToken :: PD (RealLocated CmmToken)
lexToken = do
inp@(loc1,buf) <- getInput
sc <- liftP getLexState
case alexScan inp sc of
AlexEOF -> do let span = mkRealSrcSpan loc1 loc1
liftP (setLastToken span 0)
return (L span CmmT_EOF)
AlexError (loc2,_) -> liftP $ failLocMsgP loc1 loc2 "lexical error"
AlexSkip inp2 _ -> do
setInput inp2
lexToken
AlexToken inp2@(end,_buf2) len t -> do
setInput inp2
let span = mkRealSrcSpan loc1 end
span `seq` liftP (setLastToken span len)
t span buf len
-- -----------------------------------------------------------------------------
-- Monad stuff
-- Stuff that Alex needs to know about our input type:
type AlexInput = (RealSrcLoc,StringBuffer)
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (_,s) = prevChar s '\n'
-- backwards compatibility for Alex 2.x
alexGetChar :: AlexInput -> Maybe (Char,AlexInput)
alexGetChar inp = case alexGetByte inp of
Nothing -> Nothing
Just (b,i) -> c `seq` Just (c,i)
where c = chr $ fromIntegral b
alexGetByte :: AlexInput -> Maybe (Word8,AlexInput)
alexGetByte (loc,s)
| atEnd s = Nothing
| otherwise = b `seq` loc' `seq` s' `seq` Just (b, (loc', s'))
where c = currentChar s
b = fromIntegral $ ord $ c
loc' = advanceSrcLoc loc c
s' = stepOn s
getInput :: PD AlexInput
getInput = PD $ \_ s@PState{ loc=l, buffer=b } -> POk s (l,b)
setInput :: AlexInput -> PD ()
setInput (l,b) = PD $ \_ s -> POk s{ loc=l, buffer=b } ()
}
|