diff options
Diffstat (limited to 'test/ruby')
-rw-r--r-- | test/ruby/Makefile | 34 | ||||
-rw-r--r-- | test/ruby/ruby.lm | 627 |
2 files changed, 661 insertions, 0 deletions
diff --git a/test/ruby/Makefile b/test/ruby/Makefile new file mode 100644 index 00000000..22647b42 --- /dev/null +++ b/test/ruby/Makefile @@ -0,0 +1,34 @@ +# +# Copyright 2008 Adrian Thurston <thurston@cs.queensu.ca> +# + +# This file is part of Ragel. +# +# Ragel is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Ragel is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ragel; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +SRC = $(wildcard *.lm) +BIN = $(SRC:%.lm=%.bin) +COLM = ../../colm/colm + +all: $(BIN) + +$(BIN): $(COLM) + +$(BIN): %.bin: %.lm + $(COLM) $< + +clean: + rm -f *.cpp *.bin diff --git a/test/ruby/ruby.lm b/test/ruby/ruby.lm new file mode 100644 index 00000000..b9505515 --- /dev/null +++ b/test/ruby/ruby.lm @@ -0,0 +1,627 @@ +# +# Grammar +# + +# The items in this scanner may have newline in front of them. +lex start +{ + # Reserved Words. + literal '__LINE__', '__FILE__', '__ENCODING__', 'BEGIN', 'END', 'alias', + 'and', 'begin', 'break', 'case', 'class', 'def', 'defined?', 'do', + 'else', 'elsif', 'end', 'ensure', 'false', 'for', 'in', 'module', + 'next', 'nil', 'not', 'or', 'redo', 'rescue', 'retry', 'return', + 'self', 'super', 'then', 'true', 'undef', 'when', 'yield', 'if', + 'unless', 'while', 'until' + + token tNTH_REF /'$' [0-9]+/ + token tBACK_REF /'$' ( '&' | '`' | '\'' | '+' ) / + + literal ')', ',', ']' + literal '{', '}', ':' + literal '.', '::' + literal '->' + + # Unary operators. + literal '!', '~' + token tUPLUS /'+'/ + token tUMINUS /'-'/ + + token tLBRACK /'['/ + token tLPAREN /'('/ + token tSTAR /'*'/ + token tBAR /'|'/ + token tAMPER /'&'/ + + token tIDENTIFIER /[a-z][a-zA-Z_]*/ + token tFID /[a-z][a-zA-Z_]* ('!'|'?')/ + token tCONSTANT /[A-Z][a-zA-Z_]*/ + token tGVAR /'$' [a-zA-Z_]+/ + token tIVAR /'@' [a-zA-Z_]+/ + token tCVAR /'@@' [a-zA-Z_]+/ + + token tINTEGER /[0-9]+/ + token tFLOAT /[0-9]+ '.' [0-9]+/ + + token tDSTRING_BEG /'"'/ + token tSSTRING_BEG /'\''/ + token tXSTRING_BEG /'`'/ + + ignore /[ \t\n]+/ + ignore comment /'#' [^\n]* '\n'/ +} + +# These items cannot appear at the beginning of a line (except maybe the first). +lex expr_cont_ops +{ + ignore /[\t ]+/ + + literal '+', '-', '*', '**', '/', '%', '^' + literal '|', '&', '||', '&&' + literal '[', '(' + literal '=' + literal '<<', '>>' + literal '?' + literal '<=>' + literal '=>' + literal '[]', '[]=' + literal '=~', '!~' + literal '<', '>', '>=', '<=' + literal '!=', '==', '===' + literal '..', '...' +} +lex terms +{ + ignore /[\t ]+/ + ignore /'#' [^\n]*/ + literal ';' + literal '\n' +} + + +lex dstring_contents +{ + token dstring_contents /[^"]+/ + token tDSTRING_END /'"'/ +} + +lex sstring_contents +{ + token sstring_contents /[^']+/ + token tSSTRING_END /'\''/ +} + +lex xstring_contents +{ + token xstring_contents /[^`]+/ + token tXSTRING_END /'`'/ +} + +def ruby + [compstmt] + +def compstmt + [stmts opt_terms] + +def bodystmt + [compstmt opt_rescue opt_else opt_ensure] + +def opt_rescue +# ['rescue' exc_list exc_var then compstmt opt_rescue] | + [] + +def then + [term] +| ['then'] +| [term 'then'] + +def do + [term] +| ['do'] + +def if_tail + [opt_else] +| ['elsif' expr_value then compstmt if_tail] + +def opt_else + ['else' compstmt] +| [] + +def opt_ensure + ['ensure' compstmt] +| [] + +def stmts + [stmts terms stmt] +| [stmt] +| [] + +def opt_terms + [terms] +| [] + +def terms + [term] +| [terms ';'] + +def term + [';'] +| ['\n'] + +def stmt + ['alias' fitem fitem] +| ['undef' undef_list] +| [stmt 'if' expr_value] +| [stmt 'unless' expr_value] +| [stmt 'while' expr_value] +| [stmt 'until' expr_value] +| [stmt 'rescue' stmt] +| ['BEGIN' '{' compstmt '}'] +| ['END' '{' compstmt '}'] +| [lhs '=' mrhs] +| [mlhs '=' arg_value] +| [mlhs '=' mrhs] +| [expr] + +def mlhs + [mlhs_basic] +| [tLPAREN mlhs ')'] + +def mlhs_basic + [mlhs_head] + +def mlhs_head + [mlhs_item ',' mlhs_head] +| [mlhs_item] + +def mlhs_item + [variable] +| ['*' mlhs_item] +| ['*'] +| [primary_value '[' opt_call_args ']'] +| [primary_value '.' tIDENTIFIER] +| [primary_value '.' tCONSTANT] +| [primary_value '::' tIDENTIFIER] +| [primary_value '::' tCONSTANT] +| ['::' tCONSTANT] +| [backref] +| [tLPAREN mlhs ')'] + +def lhs + [variable] +| [primary_value '[' opt_call_args ']'] +| [primary_value '.' tIDENTIFIER] +| [primary_value '.' tCONSTANT] +| [primary_value '::' tIDENTIFIER] +| [primary_value '::' tCONSTANT] +| ['::' tCONSTANT] +| [backref] + +def mrhs + [args ',' arg_value] +| [args ',' '*' arg_value] +| ['*' arg_value] + +def expr + [expr 'and' expr] +| [expr 'or' expr] +| ['not' expr] +| [arg] + +def expr_value + [expr] + +def opt_brace_block + [brace_block] +| [] + +def block_param_def + [tBAR opt_bv_decl tBAR] +| [tBAR block_param opt_bv_decl tBAR] + +def block_param + [block_arg_list] +| [] + +def block_arg_list + [block_arg_list ',' block_arg_item] +| [block_arg_item] + +def block_arg_item + [f_norm_arg] +| [f_rest_arg] +| [f_block_arg] +| ['(' f_args ')'] + +def opt_bv_decl + [';' bv_decls] +| [] + +def bv_decls + [bvar] +| [bv_decls ',' bvar] + +def bvar + [tIDENTIFIER] + +def opt_block_param + [block_param_def] +| [] + +def operation + [tIDENTIFIER] +| [tCONSTANT] +| [tFID] + +def operation2 + [tIDENTIFIER] +| [tCONSTANT] +| [tFID] +| [op] + +def operation3 + [tIDENTIFIER] +| [tFID] +| [op] + +def op + ['|'] | ['^'] | ['&'] | ['<=>'] | ['=='] | ['==='] | ['=~'] | ['!~'] | + ['>'] | ['>='] | ['<'] | ['<='] | ['!='] | ['<<'] | ['>>'] | ['+'] | + ['-'] | ['*'] | ['/'] | ['%'] | ['**'] | ['!'] | ['~'] | ['[]'] | ['[]='] | + [tXSTRING_BEG] + +def opt_call_args + [call_args] +| [] + +def call_args + [args opt_block_arg] +| [assocs opt_block_arg] +| [args ',' assocs opt_block_arg] +| [block_arg] + +def args + [arg_value] +| ['*' arg_value] +| [args ',' arg_value] +| [args ',' '*' arg_value] + +def arg_value + [arg] + +def opt_block_arg + [',' block_arg] +| [] + +def block_arg + [tAMPER arg_value] + +def arg + ['defined?' arg] +| [arg_assign] + +def arg_assign + [lhs '=' arg_assign] +| [lhs '=' arg_assign 'rescue' arg] +| [arg_sel] + +def arg_sel + [arg_dot '?' arg_sel ':' arg_sel] +| [arg_dot] + +def arg_dot + [arg_logical '..' arg_dot] +| [arg_logical '...' arg_dot] +| [arg_logical] + +def arg_logical + [arg_eq '&&' arg_logical] +| [arg_eq '||' arg_logical] +| [arg_eq] + +def arg_eq + [arg_cmp '<=>' arg_eq] +| [arg_cmp '==' arg_eq] +| [arg_cmp '===' arg_eq] +| [arg_cmp '!=' arg_eq] +| [arg_cmp '=~' arg_eq] +| [arg_cmp '!~' arg_eq] +| [arg_cmp] + +def arg_cmp + [arg_bitor '>=' arg_cmp] +| [arg_bitor '<=' arg_cmp] +| [arg_bitor '>' arg_cmp] +| [arg_bitor '<' arg_cmp] +| [arg_bitor] + +def arg_bitor + [arg_bitand '|' arg_bitor] +| [arg_bitand '^' arg_bitor] +| [arg_bitand] + +def arg_bitand + [arg_shift '&' arg_bitand] +| [arg_shift] + +def arg_shift + [arg_add '<<' arg_shift] +| [arg_add '>>' arg_shift] +| [arg_add] + +def arg_add + [arg_mult '+' arg_add] +| [arg_mult '-' arg_add] +| [arg_mult] + +def arg_mult + [arg_pow '*' arg_mult] +| [arg_pow '/' arg_mult] +| [arg_pow '%' arg_mult] +| [arg_pow] + +def arg_pow + [arg_unary '**' arg_pow] +| [arg_unary] + +def arg_unary + ['!' primary] +| ['~' primary] +| [tUPLUS primary] +| [tUMINUS primary] +| [primary] + +def primary_value + [primary] + +def primary + [pliteral] +| [strings] +| [xstring] +#| [regexp] +#| [words] +#| [qwords] +| [var_ref] +| [backref] +| [tFID] +| ['begin' bodystmt 'end'] +| [tLPAREN compstmt ')'] +| [primary_value '::' tCONSTANT] +| ['::' tCONSTANT] +| [tLBRACK aref_args ']'] +| ['{' assoc_list '}'] +| ['defined?' '(' expr ')'] +| [operation brace_block] +| [method_call] +| [method_call brace_block] +| ['->' lambda] +| ['if' expr_value then compstmt if_tail 'end'] +| ['unless' expr_value then compstmt opt_else 'end'] +| ['while' expr_value do compstmt 'end'] +| ['until' expr_value do compstmt 'end'] +#| ['case' expr_value opt_terms case_body 'end'] +#| ['case' opt_terms case_body 'end'] +| ['for' for_var 'in' expr_value do compstmt 'end'] +| ['class' cpath superclass bodystmt 'end'] +| ['class' '<<' expr term bodystmt 'end'] +| ['module' cpath bodystmt 'end'] +| ['def' fname f_arglist bodystmt 'end'] +| ['def' singleton dot_or_colon fname f_arglist bodystmt 'end'] +| ['break'] +| ['next'] +| ['redo'] +| ['retry'] + +def for_var + [lhs] +| [mlhs] + +def lambda + [f_larglist lambda_body] + +def f_larglist + ['(' f_args opt_bv_decl ')'] +| [f_args opt_bv_decl] + +def lambda_body + ['{' compstmt '}'] +| ['do' compstmt 'end'] + +def assoc_list + [assocs trailer] +| [] + +def assocs + [assocs ',' assoc] +| [assoc] + +def assoc + [arg_value '=>' arg_value] +| [':' arg_value] + +def singleton + [var_ref] +| ['(' expr ')'] + +def dot_or_colon + ['.'] +| ['::'] + +def aref_args + [args trailer] +| [args ',' assocs trailer] +| [assocs trailer] +| [] + +def trailer + [','] +| [] + +def brace_block + ['{' opt_block_param compstmt '}'] +| ['do' opt_block_param compstmt 'end'] + +def f_arglist + ['(' f_args ')'] +| [f_args term] + +def f_args + [f_arg_list] +| [] + +def f_arg_list + [f_arg_list ',' f_arg_item] +| [f_arg_item] + +def f_arg_item + [f_norm_arg] +| [f_opt] +| [f_rest_arg] +| [f_block_arg] +| ['(' f_args ')'] + +def f_opt + [tIDENTIFIER '=' arg_value] + +def f_rest_arg + ['*' tIDENTIFIER] +| ['*'] + +def f_block_arg + [tAMPER tIDENTIFIER] + +def f_norm_arg + [tIDENTIFIER] + +def backref + [tNTH_REF] | [tBACK_REF] + +def superclass + [term] +| ['<' expr_value term] + +def cpath + ['::' cname] +| [cname] +| [primary_value '::' cname] + +def fname + [tIDENTIFIER] +| [tCONSTANT] +| [tFID] +| [op] +| [reswords] + +def reswords + ['__LINE__'] | ['__FILE__'] | ['__ENCODING__'] | ['BEGIN'] | ['END'] | + ['alias'] | ['and'] | ['begin'] | ['break'] | ['case'] | ['class'] | + ['def'] | ['defined?'] | ['do'] | ['else'] | ['elsif'] | ['end'] | + ['ensure'] | ['false'] | ['for'] | ['in'] | ['module'] | + ['next'] | ['nil'] | ['not'] | ['or'] | ['redo'] | ['rescue'] | + ['retry'] | ['return'] | ['self'] | ['super'] | ['then'] | ['true'] | + ['undef'] | ['when'] | ['yield'] | ['if'] | ['unless'] | ['while'] | + ['until'] + +def cname + [tIDENTIFIER] +| [tCONSTANT] + +def pliteral + [numeric] +| [symbol] +#| [dsym] + +def strings + [string] + +def string +# [tCHAR] + [string1] +| [string string1] + +def string1 + [tSSTRING_BEG sstring_contents? tSSTRING_END] +| [tDSTRING_BEG dstring_contents? tDSTRING_END] + +def xstring + [tXSTRING_BEG xstring_contents? tXSTRING_END] + +def numeric + [tINTEGER] +| [tFLOAT] + +def symbol + [':' sym] + +def sym + [fname] +| [tIVAR] +| [tGVAR] +| [tCVAR] + +def fitem + [fsym] +#| [dsym] + +def undef_list + [fitem] +| [undef_list ',' fitem] + +def fsym + [fname] +| [symbol] + +#def dsym +# [':' xstring_contents tDSTRING_END] + +def var_ref + [variable] + +def variable + [tIDENTIFIER] | [tIVAR] | [tGVAR] | [tCONSTANT] | [tCVAR] | ['nil'] | + ['self'] | ['true'] | ['false'] | ['__FILE__'] | ['__LINE__'] | + ['__ENCODING__'] + + +# Required whitespace, but newline is not allowed. +token ws_no_nl + /[ \t]+[^ \t\n]/ + { + send( make_token( typeid ws_no_nl, pull(stdin, match_length-1) ) ) + } + +def method_call + [operation paren_args] +| [operation ws_no_nl call_args] +| [primary_value '.' operation2 opt_paren_args] +| [primary_value '.' operation2 ws_no_nl call_args] +| [primary_value '::' operation2 opt_paren_args] +| [primary_value '::' operation2 ws_no_nl call_args] +| [primary_value '.' paren_args] +| [primary_value '::' paren_args] +| ['super' paren_args] +| ['super' ws_no_nl call_args] +| ['super'] +| ['yield' paren_args] +| ['yield' ws_no_nl call_args] +| ['yield'] +| ['return' call_args] +| ['return'] +| [primary_value '[' opt_call_args ']'] + +def opt_paren_args + [paren_args] +| [] + +def paren_args + ['(' opt_call_args ')'] + +# +# Grammar finished +# + +ruby R = parse ruby(stdin) + +print_xml( R ) + +#for T: primary in R +# print_xml( T, '\n\n' ) |