1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
|
{-# LANGUAGE BinaryLiterals #-}
module ParserM (
-- Parser Monad
ParserM(..), AlexInput(..), run_parser,
-- Parser state
St,
StartCode, start_code, set_start_code,
inc_brace_depth, dec_brace_depth,
-- Tokens
Token(..),
-- Actions
Action, andBegin, mkT, mkTv,
-- Positions
get_pos, show_pos,
-- Input
alexGetByte, alexInputPrevChar,
-- Other
happyError
) where
import Control.Applicative
import Prelude hiding (fail)
import Control.Monad.Fail (MonadFail (..))
import Control.Monad (ap, liftM)
import Data.Bits ((.&.), (.|.), shiftR)
import Data.Char (ord)
import Data.Word (Word8)
-- Parser Monad
newtype ParserM a = ParserM (AlexInput -> St -> Either String (AlexInput, St, a))
instance Functor ParserM where
fmap = liftM
instance Applicative ParserM where
pure a = ParserM $ \i s -> Right (i, s, a)
(<*>) = ap
instance Monad ParserM where
ParserM m >>= k = ParserM $ \i s -> case m i s of
Right (i', s', x) ->
case k x of
ParserM y -> y i' s'
Left err ->
Left err
instance MonadFail ParserM where
fail err = ParserM $ \_ _ -> Left err
run_parser :: ParserM a -> (String -> Either String a)
run_parser (ParserM f)
= \s -> case f (AlexInput init_pos [] s) init_state of
Left es -> Left es
Right (_, _, x) -> Right x
-- Parser state
data St = St {
start_code :: !StartCode,
brace_depth :: !Int
}
deriving Show
type StartCode = Int
init_state :: St
init_state = St {
start_code = 0,
brace_depth = 0
}
-- Tokens
data Token = TEOF
| TArrow
| TDArrow
| TEquals
| TComma
| TOpenParen
| TCloseParen
| TOpenParenHash
| THashCloseParen
| TOpenBrace
| TCloseBrace
| TOpenBracket
| TCloseBracket
| TOpenAngle
| TCloseAngle
| TSection
| TPrimop
| TPseudoop
| TPrimtype
| TWith
| TDefaults
| TTrue
| TFalse
| TCompare
| TGenPrimOp
| TThatsAllFolks
| TLowerName String
| TUpperName String
| TString String
| TNoBraces String
| TInteger Int
| TFixity
| TInfixN
| TInfixL
| TInfixR
| TNothing
| TVector
| TSCALAR
| TVECTOR
| TVECTUPLE
deriving Show
-- Actions
type Action = String -> ParserM Token
set_start_code :: StartCode -> ParserM ()
set_start_code sc = ParserM $ \i st -> Right (i, st { start_code = sc }, ())
inc_brace_depth :: ParserM ()
inc_brace_depth = ParserM $ \i st ->
Right (i, st { brace_depth = brace_depth st + 1 }, ())
dec_brace_depth :: ParserM ()
dec_brace_depth = ParserM $ \i st ->
let bd = brace_depth st - 1
sc = if bd == 0 then 0 else 1
in Right (i, st { brace_depth = bd, start_code = sc }, ())
andBegin :: Action -> StartCode -> Action
(act `andBegin` sc) x = do set_start_code sc
act x
mkT :: Token -> Action
mkT t = mkTv (const t)
mkTv :: (String -> Token) -> Action
mkTv f str = ParserM (\i st -> Right (i, st, f str))
-- Positions
data Pos = Pos !Int{- Line -} !Int{- Column -}
get_pos :: ParserM Pos
get_pos = ParserM $ \i@(AlexInput p _ _) st -> Right (i, st, p)
alexMove :: Pos -> Char -> Pos
alexMove (Pos l _) '\n' = Pos (l+1) 1
alexMove (Pos l c) '\t' = Pos l ((c+8) `div` 8 * 8)
alexMove (Pos l c) _ = Pos l (c+1)
init_pos :: Pos
init_pos = Pos 1 0
show_pos :: Pos -> String
show_pos (Pos l c) = "line " ++ show l ++ ", column " ++ show c
-- Input
data AlexInput = AlexInput
{ position :: !Pos
, char_bytes :: [Word8]
, input :: String
}
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte (AlexInput p (w:ws) cs)
= Just (w, AlexInput p ws cs)
alexGetByte (AlexInput p [] (c:cs))
= alexGetByte (AlexInput (alexMove p c) (utf8_encode c) cs)
alexGetByte (AlexInput _ [] [])
= Nothing
-- annoyingly, this doesn't seem to exist anywhere else as a standalone function
utf8_encode :: Char -> [Word8]
utf8_encode c = case ord c of
n | n < 0x80 -> [ fromIntegral n ]
| n < 0x800 -> [ fromIntegral $ 0b11000000 .|. (n `shiftR` 6)
, fromIntegral $ 0b10000000 .|. (n .&. 0b111111) ]
| n < 0x10000 -> [ fromIntegral $ 0b11100000 .|. (n `shiftR` 12)
, fromIntegral $ 0b10000000 .|. ((n `shiftR` 6) .&. 0b111111)
, fromIntegral $ 0b10000000 .|. (n .&. 0b111111) ]
| otherwise -> [ fromIntegral $ 0b11110000 .|. (n `shiftR` 18)
, fromIntegral $ 0b10000000 .|. ((n `shiftR` 12) .&. 0b111111)
, fromIntegral $ 0b10000000 .|. ((n `shiftR` 6) .&. 0b111111)
, fromIntegral $ 0b10000000 .|. (n .&. 0b111111) ]
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar _ = error "Lexer doesn't implement alexInputPrevChar"
happyError :: ParserM a
happyError = do p <- get_pos
fail $ "Parse error at " ++ show_pos p
|