diff options
Diffstat (limited to 'test/trans.d/case/clang4_ruby.rl')
-rw-r--r-- | test/trans.d/case/clang4_ruby.rl | 184 |
1 files changed, 184 insertions, 0 deletions
diff --git a/test/trans.d/case/clang4_ruby.rl b/test/trans.d/case/clang4_ruby.rl new file mode 100644 index 00000000..383d6d34 --- /dev/null +++ b/test/trans.d/case/clang4_ruby.rl @@ -0,0 +1,184 @@ +# +# @LANG: ruby +# @GENERATED: true +# + + +%%{ + machine clang; + + # Function to buffer a character. + action bufChar { buffer[blen] = fc; + blen += 1; +} + + # Function to clear the buffer. + action clearBuf { blen = 0; +} + + action incLine {line = line + 1; +} + + # Functions to dump tokens as they are matched. + action ident {print( "ident(" ); +print( line ); +print( "," ); +print( blen ); +print( "): " ); +print( buffer[0..blen-1].pack( "c*" ) ); +print( "\n" ); +} + action literal {print( "literal(" ); +print( line ); +print( "," ); +print( blen ); +print( "): " ); +print( buffer[0..blen-1].pack( "c*" ) ); +print( "\n" ); +} + action float {print( "float(" ); +print( line ); +print( "," ); +print( blen ); +print( "): " ); +print( buffer[0..blen-1].pack( "c*" ) ); +print( "\n" ); +} + action integer {print( "int(" ); +print( line ); +print( "," ); +print( blen ); +print( "): " ); +print( buffer[0..blen-1].pack( "c*" ) ); +print( "\n" ); +} + action hex {print( "hex(" ); +print( line ); +print( "," ); +print( blen ); +print( "): " ); +print( buffer[0..blen-1].pack( "c*" ) ); +print( "\n" ); +} + action symbol {print( "symbol(" ); +print( line ); +print( "," ); +print( blen ); +print( "): " ); +print( buffer[0..blen-1].pack( "c*" ) ); +print( "\n" ); +} + + # Alpha numberic characters or underscore. + alnumu = alnum | '_'; + + # Alpha charactres or underscore. + alphau = alpha | '_'; + + # Symbols. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving dump the symbol. + symbol = ( punct - [_'"] ) >clearBuf $bufChar %symbol; + + # Identifier. Upon entering clear the buffer. On all transitions + # buffer a character. Upon leaving, dump the identifier. + ident = (alphau . alnumu*) >clearBuf $bufChar %ident; + + # Match single characters inside literal strings. Or match + # an escape sequence. Buffers the charater matched. + sliteralChar = + ( extend - ['\\] ) @bufChar | + ( '\\' . extend @bufChar ); + dliteralChar = + ( extend - ["\\] ) @bufChar | + ( '\\' . extend @bufChar ); + + # Single quote and double quota literals. At the start clear + # the buffer. Upon leaving dump the literal. + sliteral = ('\'' @clearBuf . sliteralChar* . '\'' ) %literal; + dliteral = ('"' @clearBuf . dliteralChar* . '"' ) %literal; + literal = sliteral | dliteral; + + # Whitespace is standard ws, newlines and control codes. + whitespace = any - 33 .. 126; + + # Describe both c style comments and c++ style comments. The + # priority bump on tne terminator of the comments brings us + # out of the extend* which matches everything. + ccComment = '//' . extend* $0 . '\n' @1; + cComment = '/!' . extend* $0 . '!/' @1; + + # Match an integer. We don't bother clearing the buf or filling it. + # The float machine overlaps with int and it will do it. + integer = digit+ %integer; + + # Match a float. Upon entering the machine clear the buf, buffer + # characters on every trans and dump the float upon leaving. + float = ( digit+ . '.' . digit+ ) >clearBuf $bufChar %float; + + # Match a hex. Upon entering the hex part, clear the buf, buffer characters + # on every trans and dump the hex on leaving transitions. + hex = '0x' . xdigit+ >clearBuf $bufChar %hex; + + # Or together all the lanuage elements. + fin = ( ccComment | + cComment | + symbol | + ident | + literal | + whitespace | + integer | + float | + hex ); + + # Star the language elements. It is critical in this type of application + # that we decrease the priority of out transitions before doing so. This + # is so that when we see 'aa' we stay in the fin machine to match an ident + # of length two and not wrap around to the front to match two idents of + # length one. + clang_main = ( fin $1 %0 )*; + + # This machine matches everything, taking note of newlines. + newline = ( any | '\n' @incLine )*; + + # The final fsm is the lexer intersected with the newline machine which + # will count lines for us. Since the newline machine accepts everything, + # the strings accepted is goverened by the clang_main machine, onto which + # the newline machine overlays line counting. + main := clang_main & newline; +}%% + + + +%% write data; + +def run_machine( data ) + p = 0 + pe = data.length + eof = data.length + cs = 0; + _m = + _a = + buffer = Array.new + blen = 0 +pos = 1 +line = 1 +pos = 0; +line = 1; + %% write init; + %% write exec; + if cs >= clang_first_final + puts "ACCEPT" + else + puts "FAIL" + end +end + +inp = [ +"999 0xaAFF99 99.99 /!\n!/ 'lksdj' //\n\"\n\nliteral\n\n\n\"0x00aba foobardd.ddsf 0x0.9\n", +"wordwithnum00asdf\n000wordfollowsnum,makes new symbol\n\nfinishing early /! unfinished ...\n", +] + +inplen = 2 + +inp.each { |str| run_machine(str) } + |