blob: 00821d51407571f5559b3165e3a42a3f4c022a89 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
|
/* The grammar for lexer definitions */
%{
open Syntax
open Gram_aux
%}
%token <string> Tident
%token <char> Tchar
%token <string> Tstring
%token <Syntax.location> Taction
%token Trule Tparse Tand Tequal Tend Tor Tunderscore Teof Tlbracket Trbracket
%token Tstar Tmaybe Tplus Tlparen Trparen Tcaret Tdash
%left Tor
%left CONCAT
%nonassoc Tmaybe
%left Tstar
%left Tplus
%start lexer_definition
%type <Syntax.lexer_definition> lexer_definition
%%
lexer_definition:
header Trule definition other_definitions Tend
{ Lexdef($1, $3::(List.rev $4)) }
;
header:
Taction
{ $1 (* '"' test that ocamlyacc can
handle comments correctly"*)" "(*" *) }
|
{ Location(0,0) }
;
other_definitions:
other_definitions Tand definition
{ $3::$1 }
|
{ [] }
;
definition:
Tident Tequal entry
{ ($1,$3) }
;
entry:
Tparse case rest_of_entry
{ $2 :: List.rev $3 }
;
rest_of_entry:
rest_of_entry Tor case
{ $3::$1 }
|
{ [] }
;
case:
regexp Taction
{ ($1,$2) }
;
regexp:
Tunderscore
{ Characters all_chars }
| Teof
{ Characters ['\000'] }
| Tchar
{ Characters [$1] }
| Tstring
{ regexp_for_string $1 }
| Tlbracket char_class Trbracket
{ Characters $2 }
| regexp Tstar
{ Repetition $1 }
| regexp Tmaybe
{ Alternative($1, Epsilon) }
| regexp Tplus
{ Sequence($1, Repetition $1) }
| regexp Tor regexp
{ Alternative($1,$3) }
| regexp regexp %prec CONCAT
{ Sequence($1,$2) }
| Tlparen regexp Trparen
{ $2 }
;
char_class:
Tcaret char_class1
{ subtract all_chars $2 }
| char_class1
{ $1 }
;
char_class1:
Tchar Tdash Tchar
{ char_class $1 $3 }
| Tchar
{ [$1] }
| char_class char_class %prec CONCAT
{ $1 @ $2 }
;
%%
|