summaryrefslogtreecommitdiff
path: root/grammar
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@colm.net>2019-12-13 04:51:45 +0200
committerAdrian Thurston <thurston@colm.net>2019-12-13 04:51:45 +0200
commit402fc46c2a8c8f00c3d5d270ee001ce22776dd41 (patch)
tree77de29c63c6ab3f74bd87c0eff22c3dc3995ad0f /grammar
parent38d2a988b816471e2cf2f414773470e28b39c93c (diff)
downloadcolm-402fc46c2a8c8f00c3d5d270ee001ce22776dd41.tar.gz
go grammar: added lexical definitions and semi insertion
Ooof this makes a big output file. May need to do something about it. refs #82
Diffstat (limited to 'grammar')
-rw-r--r--grammar/go/Makefile5
-rw-r--r--grammar/go/go.lm177
-rw-r--r--grammar/go/insert-semi.lm111
-rw-r--r--grammar/go/parsego.lm10
4 files changed, 185 insertions, 118 deletions
diff --git a/grammar/go/Makefile b/grammar/go/Makefile
index f4a39bad..8f5d8447 100644
--- a/grammar/go/Makefile
+++ b/grammar/go/Makefile
@@ -1,14 +1,11 @@
COLM = ../../colm/colm
RAGEL = ../../ragel/ragel
-all: go insert-semi
+all: go
go: go.lm parsego.lm utf8.lm $(COLM)
$(COLM) -o go parsego.lm
-insert-semi: insert-semi.lm
- $(COLM) $<
-
utf8.lm: Makefile utf8pat.rb UnicodeData.txt
echo 'rl unicode_letter /' >utf8.lm
ruby utf8pat.rb 'L[ultmo]' 1 UnicodeData.txt >> utf8.lm
diff --git a/grammar/go/go.lm b/grammar/go/go.lm
index 0dec2329..0ab8f7fd 100644
--- a/grammar/go/go.lm
+++ b/grammar/go/go.lm
@@ -3,6 +3,9 @@ include 'utf8.lm'
token BOM / 0xEF 0xBB 0xBF /
lex
+ #
+ # Definitions.
+ #
rl newline / 0x0A /
rl valid_utf8 /
@@ -12,12 +15,180 @@ lex
0xF0 .. 0xF7 any any any
/
- rl unicode_char / valid_utf8 - 0x0A /
+ rl unicode_char / valid_utf8 - 0x0A /
- token id / unicode_letter unicode_letter* /
+ rl letter / unicode_letter | '_' /
+ rl binary_digit / '0' | '1' /
+ rl octal_digit / '0' .. '7' /
+ rl decimal_digit / '0' .. '9' /
+ rl hex_digit / '0' .. '9' | 'A' .. 'F' | 'a' .. 'f' /
+ #
+ # Tokens
+ #
+
+ literal
+ `+ `& `+= `&= `&& `== `!= `( `)
+ `- `| `-= `|= `|| `< `<= `[ `]
+ `* `^ `*= `^= `<- `> `>= `{ `}
+ `/ `<< `/= `<<= `++ `= `:= `, #;
+ `% `>> `%= `>>= `-- `! `... `. `:
+ `&^ `&^=
+
+ token SEMI /';'/
+
+ literal
+ `break `default `func `interface `select
+ `case `defer `go `map `struct
+ `chan `else `goto `package `switch
+ `const `fallthrough `if `range `type
+ `continue `for `import `return `var
+
+ token id
+ / letter ( letter | unicode_digit )* /
+
+ #
+ # Non-float numbers
+ #
+
+ rl binary_digits / binary_digit ( '_'? binary_digit )* /
+ rl octal_digits / octal_digit ( '_'? octal_digit )* /
+ rl decimal_digits / decimal_digit ( '_'? decimal_digit )* /
+ rl hex_digits / hex_digit ( '_'? hex_digit )* /
+
+ token binary_lit / '0' ( 'b' | 'B' ) '_'? binary_digits /
+ token octal_lit / '0' ( 'o' | 'O' )? '_'? octal_digits /
+ token decimal_lit / '0' | ( '1' .. '9' ) ( '_'? decimal_digits )? /
+ token hex_lit / '0' ( 'x' | 'X' ) '_'? hex_digits /
+
+ rl int_lit
+ / decimal_lit | binary_lit | octal_lit | hex_lit /
+
+ def int_lit
+ [decimal_lit] | [binary_lit] | [octal_lit] | [hex_lit]
+
+ rl decimal_exponent / ( 'e' | 'E' ) ( '+' | '-' )? decimal_digits /
+
+ #
+ # Floats
+ #
+ token decimal_float_lit /
+ decimal_digits '.' decimal_digits? decimal_exponent? |
+ decimal_digits decimal_exponent |
+ '.' decimal_digits decimal_exponent? /
+
+ rl hex_mantissa
+ / '_'? hex_digits '.' hex_digits? | '_'? hex_digits | '.' hex_digits /
+ rl hex_exponent
+ / ( 'p' | 'P' ) ( '+' | '-' )? decimal_digits /
+
+ token hex_float_lit
+ / '0' ( 'x' | 'X' ) hex_mantissa hex_exponent /
+
+ rl float_lit
+ / decimal_float_lit | hex_float_lit /
+
+ def float_lit
+ [decimal_float_lit] | [hex_float_lit]
+
+ #
+ # Imaginary
+ #
+ token imaginary_lit /
+ ( decimal_digits | int_lit | float_lit ) 'i' /
+
+ #
+ # Rune literals
+ #
+
+
+ rl escaped_char
+ / '\\' ( 'a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' | '\\' | "'" | '"' ) /
+
+ rl octal_byte_value / '\\' octal_digit octal_digit octal_digit /
+ rl hex_byte_value / '\\' 'x' hex_digit hex_digit /
+ rl little_u_value / '\\' 'u' hex_digit hex_digit hex_digit hex_digit /
+ rl big_u_value / '\\' 'U' hex_digit hex_digit hex_digit hex_digit
+ hex_digit hex_digit hex_digit hex_digit /
+
+ rl byte_value / octal_byte_value | hex_byte_value /
+ rl unicode_value / unicode_char | little_u_value | big_u_value | escaped_char /
+
+ token rune_lit / "'" ( unicode_value | byte_value ) "'" /
+
+ #
+ # String literals
+ #
+ rl raw_string_lit / "`" ( ( unicode_char | newline ) - '`' )* "`" /
+ rl interpreted_string_lit / '"' ( ( unicode_value | byte_value ) - '"' )* '"' /
+ token string_lit / raw_string_lit | interpreted_string_lit /
+
+ #
+ # Comments
+ #
+ rl line_comment
+ / '//' [^\n]* '\n' /
+
+ rl general_comment
+ / '/*' any* :> '*/'/
+
+ rl pre_insert_semi /
+ ( id -
+ 'break' - 'default' - 'func' - 'interface' - 'select'
+ 'case' - 'defer' - 'go' - 'map' - 'struct'
+ 'chan' - 'else' - 'goto' - 'package' - 'switch'
+ 'const' - 'fallthrough' - 'if' - 'range' - 'type'
+ 'continue' - 'for' - 'import' - 'return' - 'var' ) |
+ int_lit |
+ float_lit |
+ imaginary_lit |
+ rune_lit |
+ string_lit
+ /
+
+ #
+ # Semi-colons
+ #
+ token insert_semi /
+ pre_insert_semi
+ ( [ \t]+ | general_comment )*
+ ( '/*' [^\n]* | line_comment | '\n' )
+ /
+ {
+ parse BA: break_apart::break_apart[match_text]
+
+ Prefix: str = input->pull( BA.pre_semi.data.length )
+ input->push( make_token( typeid<SEMI>, ';' ) )
+ input->push( Prefix )
+ }
+
+ ignore /line_comment/
+ ignore /general_comment/
ignore /[ \t\n\r]+/
end
+namespace break_apart
+ lex
+ token pre_semi / pre_insert_semi /
+ ignore /line_comment/
+ ignore /'/*' any*/
+ ignore /[ \t\n\r]+/
+ end
+
+ def break_apart
+ [pre_semi]
+end
+
+def item
+ [id]
+| [int_lit]
+| [float_lit]
+| [imaginary_lit]
+| [rune_lit]
+| [string_lit]
+
+def stmt
+ [item+ SEMI]
+
def program
- [BOM? id*]
+ [BOM? stmt*]
diff --git a/grammar/go/insert-semi.lm b/grammar/go/insert-semi.lm
deleted file mode 100644
index 3271216a..00000000
--- a/grammar/go/insert-semi.lm
+++ /dev/null
@@ -1,111 +0,0 @@
-lex
- literal `var `if `then `else `while `do `for `read `write
- `end `to `goto
-
- literal `:= `!= `+ `- `* `/ `= `( `) `: `;
-
- ignore /'//' [^\n]* '\n'/
- ignore /[\n\t ]+/
-
- token id /[a-zA-Z_]+/
- token integer /[0-9]+/
- token stringlit /'"' [^"]* '"'/
-
- token insert_semi /
- ( ( id -
- 'var' - 'if' - 'then' - 'else' - 'while' -
- 'do' - 'for' - 'read' - 'write' - 'end' -
- 'to' - 'goto' ) |
- integer | stringlit | ')' ) '\n' /
- {
- Prefix: str = input->pull( match_length - 1 )
- input->push( ";" )
- input->push( Prefix )
- }
-
-end
-
-def program
- [statement*]
-
-def statement
- [declaration]
-| [assignment_statement]
-| [if_statement]
-| [while_statement]
-| [do_statement]
-| [for_statement]
-| [read_statement]
-| [write_statement]
-| [labelled_statement]
-| [goto_statement]
-
-def declaration
- [`var id `;]
-
-def assignment_statement
- [id `:= expression `;]
-
-def if_statement
- [`if expression `then statement* opt_else_statement `end]
-
-def opt_else_statement
- [`else statement*]
-| []
-
-def while_statement
- [`while expression `do statement* `end]
-
-def do_statement
- [`do statement* `while expression `;]
-
-def for_statement
- [`for id `:= expression `to expression `do statement* `end]
-
-def read_statement
- [`read id `;]
-
-def write_statement
- [`write expression `;]
-
-def expression
- [term]
-| [expression eqop term]
-
-def eqop [`=] | [`!=]
-
-def term
- [factor]
-| [term addop factor]
-
-def addop [`+] | [`-]
-
-def factor
- [primary]
-| [factor mulop primary]
-
-def mulop [`*] | [`/]
-
-def primary
- [id]
-| [lit]
-| [`( expression `)]
-
-def lit
- [integer]
-| [stringlit]
-
-def labelled_statement
- [id `: statement]
-
-def goto_statement
- [`goto id `;]
-
-parse P: program[stdin]
-
-if P
- print[ P ]
-else {
- print "[error]
- exit( 1 )
-}
diff --git a/grammar/go/parsego.lm b/grammar/go/parsego.lm
index 659fcb0e..31eda462 100644
--- a/grammar/go/parsego.lm
+++ b/grammar/go/parsego.lm
@@ -8,4 +8,14 @@ if !P {
}
else {
print [P]
+
+ for IL: int_lit in P {
+ print "int_lit: [^IL]-
+ }
+ for RL: rune_lit in P {
+ print "rune_lit: [^RL]-
+ }
+ for SL: string_lit in P {
+ print "string_lit: [^SL]-
+ }
}