From 402fc46c2a8c8f00c3d5d270ee001ce22776dd41 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Fri, 13 Dec 2019 04:51:45 +0200 Subject: go grammar: added lexical definitions and semi insertion Ooof this makes a big output file. May need to do something about it. refs #82 --- grammar/go/Makefile | 5 +- grammar/go/go.lm | 177 +++++++++++++++++++++++++++++++++++++++++++++- grammar/go/insert-semi.lm | 111 ----------------------------- grammar/go/parsego.lm | 10 +++ 4 files changed, 185 insertions(+), 118 deletions(-) delete mode 100644 grammar/go/insert-semi.lm (limited to 'grammar') diff --git a/grammar/go/Makefile b/grammar/go/Makefile index f4a39bad..8f5d8447 100644 --- a/grammar/go/Makefile +++ b/grammar/go/Makefile @@ -1,14 +1,11 @@ COLM = ../../colm/colm RAGEL = ../../ragel/ragel -all: go insert-semi +all: go go: go.lm parsego.lm utf8.lm $(COLM) $(COLM) -o go parsego.lm -insert-semi: insert-semi.lm - $(COLM) $< - utf8.lm: Makefile utf8pat.rb UnicodeData.txt echo 'rl unicode_letter /' >utf8.lm ruby utf8pat.rb 'L[ultmo]' 1 UnicodeData.txt >> utf8.lm diff --git a/grammar/go/go.lm b/grammar/go/go.lm index 0dec2329..0ab8f7fd 100644 --- a/grammar/go/go.lm +++ b/grammar/go/go.lm @@ -3,6 +3,9 @@ include 'utf8.lm' token BOM / 0xEF 0xBB 0xBF / lex + # + # Definitions. + # rl newline / 0x0A / rl valid_utf8 / @@ -12,12 +15,180 @@ lex 0xF0 .. 0xF7 any any any / - rl unicode_char / valid_utf8 - 0x0A / + rl unicode_char / valid_utf8 - 0x0A / - token id / unicode_letter unicode_letter* / + rl letter / unicode_letter | '_' / + rl binary_digit / '0' | '1' / + rl octal_digit / '0' .. '7' / + rl decimal_digit / '0' .. '9' / + rl hex_digit / '0' .. '9' | 'A' .. 'F' | 'a' .. 'f' / + # + # Tokens + # + + literal + `+ `& `+= `&= `&& `== `!= `( `) + `- `| `-= `|= `|| `< `<= `[ `] + `* `^ `*= `^= `<- `> `>= `{ `} + `/ `<< `/= `<<= `++ `= `:= `, #; + `% `>> `%= `>>= `-- `! `... `. `: + `&^ `&^= + + token SEMI /';'/ + + literal + `break `default `func `interface `select + `case `defer `go `map `struct + `chan `else `goto `package `switch + `const `fallthrough `if `range `type + `continue `for `import `return `var + + token id + / letter ( letter | unicode_digit )* / + + # + # Non-float numbers + # + + rl binary_digits / binary_digit ( '_'? binary_digit )* / + rl octal_digits / octal_digit ( '_'? octal_digit )* / + rl decimal_digits / decimal_digit ( '_'? decimal_digit )* / + rl hex_digits / hex_digit ( '_'? hex_digit )* / + + token binary_lit / '0' ( 'b' | 'B' ) '_'? binary_digits / + token octal_lit / '0' ( 'o' | 'O' )? '_'? octal_digits / + token decimal_lit / '0' | ( '1' .. '9' ) ( '_'? decimal_digits )? / + token hex_lit / '0' ( 'x' | 'X' ) '_'? hex_digits / + + rl int_lit + / decimal_lit | binary_lit | octal_lit | hex_lit / + + def int_lit + [decimal_lit] | [binary_lit] | [octal_lit] | [hex_lit] + + rl decimal_exponent / ( 'e' | 'E' ) ( '+' | '-' )? decimal_digits / + + # + # Floats + # + token decimal_float_lit / + decimal_digits '.' decimal_digits? decimal_exponent? | + decimal_digits decimal_exponent | + '.' decimal_digits decimal_exponent? / + + rl hex_mantissa + / '_'? hex_digits '.' hex_digits? | '_'? hex_digits | '.' hex_digits / + rl hex_exponent + / ( 'p' | 'P' ) ( '+' | '-' )? decimal_digits / + + token hex_float_lit + / '0' ( 'x' | 'X' ) hex_mantissa hex_exponent / + + rl float_lit + / decimal_float_lit | hex_float_lit / + + def float_lit + [decimal_float_lit] | [hex_float_lit] + + # + # Imaginary + # + token imaginary_lit / + ( decimal_digits | int_lit | float_lit ) 'i' / + + # + # Rune literals + # + + + rl escaped_char + / '\\' ( 'a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' | '\\' | "'" | '"' ) / + + rl octal_byte_value / '\\' octal_digit octal_digit octal_digit / + rl hex_byte_value / '\\' 'x' hex_digit hex_digit / + rl little_u_value / '\\' 'u' hex_digit hex_digit hex_digit hex_digit / + rl big_u_value / '\\' 'U' hex_digit hex_digit hex_digit hex_digit + hex_digit hex_digit hex_digit hex_digit / + + rl byte_value / octal_byte_value | hex_byte_value / + rl unicode_value / unicode_char | little_u_value | big_u_value | escaped_char / + + token rune_lit / "'" ( unicode_value | byte_value ) "'" / + + # + # String literals + # + rl raw_string_lit / "`" ( ( unicode_char | newline ) - '`' )* "`" / + rl interpreted_string_lit / '"' ( ( unicode_value | byte_value ) - '"' )* '"' / + token string_lit / raw_string_lit | interpreted_string_lit / + + # + # Comments + # + rl line_comment + / '//' [^\n]* '\n' / + + rl general_comment + / '/*' any* :> '*/'/ + + rl pre_insert_semi / + ( id - + 'break' - 'default' - 'func' - 'interface' - 'select' + 'case' - 'defer' - 'go' - 'map' - 'struct' + 'chan' - 'else' - 'goto' - 'package' - 'switch' + 'const' - 'fallthrough' - 'if' - 'range' - 'type' + 'continue' - 'for' - 'import' - 'return' - 'var' ) | + int_lit | + float_lit | + imaginary_lit | + rune_lit | + string_lit + / + + # + # Semi-colons + # + token insert_semi / + pre_insert_semi + ( [ \t]+ | general_comment )* + ( '/*' [^\n]* | line_comment | '\n' ) + / + { + parse BA: break_apart::break_apart[match_text] + + Prefix: str = input->pull( BA.pre_semi.data.length ) + input->push( make_token( typeid, ';' ) ) + input->push( Prefix ) + } + + ignore /line_comment/ + ignore /general_comment/ ignore /[ \t\n\r]+/ end +namespace break_apart + lex + token pre_semi / pre_insert_semi / + ignore /line_comment/ + ignore /'/*' any*/ + ignore /[ \t\n\r]+/ + end + + def break_apart + [pre_semi] +end + +def item + [id] +| [int_lit] +| [float_lit] +| [imaginary_lit] +| [rune_lit] +| [string_lit] + +def stmt + [item+ SEMI] + def program - [BOM? id*] + [BOM? stmt*] diff --git a/grammar/go/insert-semi.lm b/grammar/go/insert-semi.lm deleted file mode 100644 index 3271216a..00000000 --- a/grammar/go/insert-semi.lm +++ /dev/null @@ -1,111 +0,0 @@ -lex - literal `var `if `then `else `while `do `for `read `write - `end `to `goto - - literal `:= `!= `+ `- `* `/ `= `( `) `: `; - - ignore /'//' [^\n]* '\n'/ - ignore /[\n\t ]+/ - - token id /[a-zA-Z_]+/ - token integer /[0-9]+/ - token stringlit /'"' [^"]* '"'/ - - token insert_semi / - ( ( id - - 'var' - 'if' - 'then' - 'else' - 'while' - - 'do' - 'for' - 'read' - 'write' - 'end' - - 'to' - 'goto' ) | - integer | stringlit | ')' ) '\n' / - { - Prefix: str = input->pull( match_length - 1 ) - input->push( ";" ) - input->push( Prefix ) - } - -end - -def program - [statement*] - -def statement - [declaration] -| [assignment_statement] -| [if_statement] -| [while_statement] -| [do_statement] -| [for_statement] -| [read_statement] -| [write_statement] -| [labelled_statement] -| [goto_statement] - -def declaration - [`var id `;] - -def assignment_statement - [id `:= expression `;] - -def if_statement - [`if expression `then statement* opt_else_statement `end] - -def opt_else_statement - [`else statement*] -| [] - -def while_statement - [`while expression `do statement* `end] - -def do_statement - [`do statement* `while expression `;] - -def for_statement - [`for id `:= expression `to expression `do statement* `end] - -def read_statement - [`read id `;] - -def write_statement - [`write expression `;] - -def expression - [term] -| [expression eqop term] - -def eqop [`=] | [`!=] - -def term - [factor] -| [term addop factor] - -def addop [`+] | [`-] - -def factor - [primary] -| [factor mulop primary] - -def mulop [`*] | [`/] - -def primary - [id] -| [lit] -| [`( expression `)] - -def lit - [integer] -| [stringlit] - -def labelled_statement - [id `: statement] - -def goto_statement - [`goto id `;] - -parse P: program[stdin] - -if P - print[ P ] -else { - print "[error] - exit( 1 ) -} diff --git a/grammar/go/parsego.lm b/grammar/go/parsego.lm index 659fcb0e..31eda462 100644 --- a/grammar/go/parsego.lm +++ b/grammar/go/parsego.lm @@ -8,4 +8,14 @@ if !P { } else { print [P] + + for IL: int_lit in P { + print "int_lit: [^IL]- + } + for RL: rune_lit in P { + print "rune_lit: [^RL]- + } + for SL: string_lit in P { + print "string_lit: [^SL]- + } } -- cgit v1.2.1