diff options
Diffstat (limited to 'src/colm.lm')
-rw-r--r-- | src/colm.lm | 765 |
1 files changed, 765 insertions, 0 deletions
diff --git a/src/colm.lm b/src/colm.lm new file mode 100644 index 0000000..e0ba8c7 --- /dev/null +++ b/src/colm.lm @@ -0,0 +1,765 @@ +# Main region. +lex + token DEF / 'def' / + token LEX / 'lex' / + token END / 'end' / + token TOKEN / 'token' / + token RL / 'rl' / + token IGNORE / 'ignore' / + token PRINT / 'print' / + token PRINTS / 'prints' / + token PRINT_XML / 'print_xml' / + token PRINT_XML_AC / 'print_xml_ac' / + token PARSE / 'parse' / + token PARSE_STOP / 'parse_stop' / + token CONS / 'construct' | 'cons' / + token MATCH / 'match' / + token REQUIRE / 'require' / + token SEND / 'send' / + token NAMESPACE / 'namespace' / + token FOR / 'for' / + token IF / 'if' / + token YIELD / 'yield' / + token WHILE / 'while' / + token ELSIF / 'elsif' / + token ELSE / 'else' / + token IN / 'in' / + token PARSER / 'parser' | 'accum' / + token LIST / 'list' / + token VECTOR / 'vector' / + token MAP / 'map' / + token PTR / 'ptr' / + token ITER / 'iter' / + token REF / 'ref' / + token EXPORT / 'export' / + token RETURN / 'return' / + token BREAK / 'break' / + token REJECT / 'reject' / + token REDUCEFIRST / 'reducefirst' / + token ALIAS / 'alias' / + token COMMIT / 'commit' / + token NEW / 'new' / + token PREEOF / 'preeof' / + token GLOBAL / 'global' / + token EOS / 'eos' / + token CAST / 'cast' / + + token MAKE_TOKEN / 'make_token' / + token MAKE_TREE / 'make_tree' / + + token TYPEID / 'typeid' / + + token LITERAL /'literal'/ + token CONTEXT /'context'/ + token NI /'ni'/ + + token NIL / 'nil' / + token TRUE / 'true' / + token FALSE / 'false' / + + token LEFT /'left'/ + token RIGHT /'right'/ + token NONASSOC /'nonassoc'/ + + token INCLUDE /'include'/ + + token id / + ( 'a' .. 'z' | 'A' .. 'Z' | '_' ) . + ( 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' ) * + / + + token number + / ( '0' .. '9' ) + / + + token backtick_lit + / '`' . ^( ' ' | '\n' | '\t' | ']' )+ | '`]' / + + token DQ / '\"' / ni + token SQ / '\'' / ni + token TILDE / '~' / ni + + token SQOPEN /'['/ + token SQCLOSE /']'/ + token BAR /'|'/ + token FSLASH /'/'/ + token COLON /':'/ + token DOUBLE_COLON /'::'/ + token DOT /'.'/ + token ARROW /'->'/ + token POPEN /'('/ + token PCLOSE /')'/ + token COPEN /'{'/ + token CCLOSE /'}'/ + token STAR /'*'/ + token QUESTION /'?'/ + token EQUALS /'='/ + token EQEQ /'=='/ + token NEQ /'!='/ + token COMMA /','/ + token LT /'<'/ + token GT /'>'/ + token LTEQ /'<='/ + token GTEQ /'>='/ + token BANG /'!'/ + token DOLLAR /'$'/ + token CARET /'^'/ + token PERCENT /'%'/ + token PLUS /'+'/ + token MINUS /'-'/ + token AMPAMP /'&&'/ + token BARBAR /'||'/ + + ignore / ( '\n' | '\t' | ' ' )+ / + ignore / '#' . ( ^'\n' )* . '\n' / +end + +lex + token CONS_DQ / '\"' / + token CONS_DQ_NL / '\n' / + token CONS_SQOPEN / '[' / + token CONS_SQCLOSE / ']' / + + token dq_cons_data + / ( ^( '\n' | '\"' | '[' | ']' | '\\' ) | '\\' . any )+ / +end + +lex + token CONS_SQ / '\'' / + token CONS_SQ_NL / '\n' / + + token sq_cons_data + / ( ^( '\n' | '\'' | '\\' ) | '\\' . any )+ / +end + +lex + token TILDE_NL / '\n' / + token tilde_data + / ( ^'\n' )+ / +end + +lex + token lex_id / + ( 'a' .. 'z' | 'A' .. 'Z' | '_' ) . + ( 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' ) * + / + + token lex_uint + / ( '0' .. '9' )+ / + + token lex_hex + / '0x' . ( '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' )+ / + + token lex_lit / + '\'' . ( ^( '\'' | '\\' ) | '\\' . any )* . ( '\'' | '\'i' ) | + '\"' . ( ^( '\"' | '\\' ) | '\\' . any )* . ( '\"' | '\"i' ) + / + + token LEX_DOT /'.'/ + token LEX_BAR /'|'/ + token LEX_AMP /'&'/ + token LEX_DASH /'-'/ + token LEX_POPEN /'('/ + token LEX_PCLOSE /')'/ + token LEX_STAR /'*'/ + token LEX_STARSTAR /'**'/ + token LEX_QUESTION /'?'/ + token LEX_PLUS /'+'/ + token LEX_CARET /'^'/ + token LEX_DOTDOT /'..'/ + token LEX_SQOPEN_POS /'['/ ni + token LEX_SQOPEN_NEG /'[^'/ ni + token LEX_FSLASH /'/'/ + + token LEX_DASHDASH /'--'/ + token LEX_COLON_GT /':>'/ + token LEX_COLON_GTGT /':>>'/ + token LEX_LT_COLON /'<:'/ + + ignore / + ( '\n' | '\t' | ' ' ) . + ( '\n' | '\t' | ' ' )* + / + + ignore / '#' . ( ^'\n' )* . '\n' / +end + +lex + token RE_DASH / '-' / + token RE_CHAR / ^( '\\' | '-' | ']' ) | '\\' . any / + token RE_SQCLOSE / ']' / +end + +def start + [RootItemList: root_item*] + +def root_item + [rl_def] :Rl commit +| [literal_def] :Literal commit +| [token_def] :Token commit +| [ic_def] :IgnoreCollector commit +| [ignore_def] :Ignore commit +| [cfl_def] :Cfl commit +| [region_def] :Region commit +| [context_def] :Context commit +| [namespace_def] :Namespace commit +| [function_def] :Function commit +| [iter_def] :Iter commit +| [statement] :Statement commit +| [global_def] :Global commit +| [export_def] :Export commit +| [pre_eof_def] :PreEof commit +| [precedence_def] :Precedence commit +| [alias_def] :Alias commit +| [include] :Include commit + +def include + [INCLUDE SQ SqConsDataList: sq_cons_data* sq_lit_term] + +def precedence_def + [pred_type pred_token_list] + +def pred_type + [LEFT] :Left +| [RIGHT] :Right +| [NONASSOC] :NonAssoc + +def pred_token_list + [pred_token_list COMMA pred_token] :List +| [pred_token] :Base + +def pred_token + [region_qual id] :Id +| [region_qual backtick_lit] :Lit + +def pre_eof_def + [PREEOF COPEN lang_stmt_list CCLOSE] + +def alias_def + [ALIAS id type_ref] + +def context_item + [context_var_def] :ContextVar commit +| [literal_def] :Literal commit +| [rl_def] :Rl commit +| [token_def] :Token commit +| [ic_def] :IgnoreCollector commit +| [ignore_def] :Ignore commit +| [cfl_def] :Cfl commit +| [region_def] :Region commit +| [context_def] :Context commit +| [function_def] :Function commit +| [iter_def] :Iter commit +| [export_def] :Export commit +| [pre_eof_def] :PreEof commit +| [precedence_def] :Precedence commit + +def export_def + [EXPORT var_def opt_def_init] + +def global_def + [GLOBAL var_def opt_def_init] + +def iter_def + [ITER id POPEN ParamVarDefList: param_var_def_list PCLOSE + COPEN lang_stmt_list CCLOSE] + +def reference_type_ref + [REF LT type_ref GT] + +def param_var_def_list + [param_var_def param_var_def_list] +| [] + +def param_var_def + [id COLON type_ref] :Type +| [id COLON reference_type_ref] :Ref + +def opt_export + [EXPORT] :Export +| [] + +def function_def + [opt_export type_ref id + POPEN ParamVarDefList: param_var_def_list PCLOSE + COPEN lang_stmt_list CCLOSE] + +def context_var_def + [var_def] + +def context_def + [CONTEXT id ContextItemList: context_item* END] + +def literal_def + [LITERAL literal_list] + +def literal_list + [literal_list literal_item] :Item +| [literal_item] :Base + +def literal_item + [no_ignore_left backtick_lit no_ignore_right] + +def no_ignore_left + [NI MINUS] :Ni +| [] + +def no_ignore_right + [MINUS NI] :Ni +| [] + +def namespace_def + [NAMESPACE id ItemList: namespace_item* END] + +def namespace_item + [rl_def] :Rl commit +| [literal_def] :Literal commit +| [token_def] :Token commit +| [ic_def] :IgnoreCollector commit +| [ignore_def] :Ignore commit +| [cfl_def] :Cfl commit +| [region_def] :Region commit +| [context_def] :Context commit +| [namespace_def] :Namespace commit +| [function_def] :Function commit +| [iter_def] :Iter commit +| [pre_eof_def] :PreEof commit +| [precedence_def] :Precedence commit +| [alias_def] :Alias commit +| [include] :Include commit + +def obj_var_list + [] + +def opt_reduce_first + [REDUCEFIRST] +| [] + +def cfl_def + [DEF id + VarDefList: var_def* + opt_reduce_first + prod_list] + +def region_def + [LEX RootItemList: root_item* END] + +def rl_def + [RL id LEX_FSLASH lex_expr LEX_FSLASH] + +def opt_lex_expr + [lex_expr] +| [] + +def token_def + [TOKEN id VarDefList: var_def* + no_ignore_left + LEX_FSLASH opt_lex_expr LEX_FSLASH + no_ignore_right + opt_translate] + +def ic_def + [TOKEN id MINUS] + +def opt_translate + [COPEN lang_stmt_list CCLOSE] :Translate +| [] + +def opt_id + [id] :Id +| [] + +def ignore_def + [IGNORE opt_id LEX_FSLASH opt_lex_expr LEX_FSLASH] + +def prod_el + [opt_prod_el_name region_qual id opt_repeat] :Id +| [opt_prod_el_name region_qual backtick_lit opt_repeat] :Lit + +def opt_prod_el_name + [id COLON] :Name +| [] + +def prod_el_list + [prod_el_list prod_el] :List +| [] + +def opt_commit + [COMMIT] :Commit +| [] + +def opt_prod_name + [COLON id] :Name +| [] + +def prod + [SQOPEN prod_el_list SQCLOSE + opt_prod_name + opt_commit + opt_reduce] + +def opt_reduce + [COPEN lang_stmt_list CCLOSE] :Reduce +| [] + +def prod_list + [prod_list BAR prod] :List +| [prod] :Base + +def statement + [print_stmt] :Print +| [expr_stmt] :Expr +| [var_def opt_def_init] :VarDef +| [FOR id COLON type_ref IN iter_call block_or_single] :For +| [IF code_expr block_or_single elsif_list] :If +| [WHILE code_expr block_or_single] :While +| [var_ref EQUALS code_expr] :LhsVarRef +| [YIELD var_ref] :Yield +| [RETURN code_expr] :Return +| [BREAK] :Break +| [REJECT] :Reject + +def elsif_list + [elsif_clause elsif_list] :Clause +| [optional_else] :OptElse + +def elsif_clause + [ELSIF code_expr block_or_single] + +def optional_else + [ELSE block_or_single] :Else +| [] + +def call_arg_list + [code_expr call_arg_list] +| [] + +def iter_call + [E1 var_ref POPEN call_arg_list PCLOSE] :Call +| [E2 id] :Id +| [E3 code_expr] :Expr + +def block_or_single + [COPEN lang_stmt_list CCLOSE] :Block +| [statement] :Single + +def require_pattern + [REQUIRE var_ref pattern] + +def opt_require_stmt + [require_pattern lang_stmt_list] :Require +| [] :Base + +def lang_stmt_list + [StmtList: statement* opt_require_stmt] + +def opt_def_init + [EQUALS code_expr] :Init +| [] :Base + +def var_def + [id COLON type_ref] + +def print_stmt + [PRINT POPEN call_arg_list PCLOSE] :Tree +| [PRINTS POPEN call_arg_list PCLOSE] :PrintStream +| [PRINT_XML POPEN call_arg_list PCLOSE] :Xml +| [PRINT_XML_AC POPEN call_arg_list PCLOSE] :XmlAc + +def expr_stmt + [code_expr] + +def code_expr + [code_expr AMPAMP code_relational] :AmpAmp +| [code_expr BARBAR code_relational] :BarBar +| [code_relational] :Base + +def code_relational + [code_relational EQEQ code_additive] :EqEq +| [code_relational NEQ code_additive] :Neq +| [code_relational LT code_additive] :Lt +| [code_relational GT code_additive] :Gt +| [code_relational LTEQ code_additive] :LtEq +| [code_relational GTEQ code_additive] :GtEq +| [code_additive] :Base + +def code_additive + [code_additive PLUS code_multiplicitive] :Plus +| [code_additive MINUS code_multiplicitive] :Minus +| [code_multiplicitive] :Base + +def code_multiplicitive + [code_multiplicitive STAR code_unary] :Star +| [code_multiplicitive FSLASH code_unary] :Fslash +| [code_unary] :Base + +def code_unary + [BANG code_factor] :Bang +| [DOLLAR code_factor] :Dollar +| [CARET code_factor] :Caret +| [PERCENT code_factor] :Percent +| [code_factor] :Base + +def opt_eos + [DOT] :Dot +| [EOS] :Eos +| [] + +def code_factor + [number] :Number +| [var_ref POPEN call_arg_list PCLOSE] :Call +| [var_ref] :VarRef +| [NIL] :Nil +| [TRUE] :True +| [FALSE] :False +| [POPEN code_expr PCLOSE] :Paren +| [SEND var_ref accumulate opt_eos] :Send +| [PARSE opt_capture type_ref opt_field_init accumulate] :Parse +| [PARSE_STOP opt_capture type_ref opt_field_init accumulate] :ParseStop +| [CONS opt_capture type_ref opt_field_init constructor] :Cons +| [MATCH var_ref pattern] :Match +| [string] :String +| [type_ref IN var_ref] :In +| [MAKE_TREE POPEN call_arg_list PCLOSE] :MakeTree +| [MAKE_TOKEN POPEN call_arg_list PCLOSE] :MakeToken +| [TYPEID LT type_ref GT] :TypeId +| [NEW code_factor] :New +| [CAST LT type_ref GT code_factor] :Cast + +def type_ref + [region_qual id opt_repeat] :Id +| [PTR LT region_qual id opt_repeat GT] :Ptr +| [MAP LT MapKeyType: type_ref MapValueType: type_ref GT] :Map +| [LIST LT type_ref GT] :List +| [VECTOR LT type_ref GT] :Vector +| [PARSER LT type_ref GT] :Parser + +def region_qual + [region_qual id DOUBLE_COLON] :Qual +| [] :Base + +def opt_repeat + [STAR] :Star +| [PLUS] :Plus +| [QUESTION] :Question +| [] + +def opt_capture + [id COLON] :Id +| [] + +def opt_field_init + [POPEN FieldInitList: field_init* PCLOSE] :Init +| [] :Base + +def field_init + [code_expr] + + +# +# Pattern +# + +def opt_label + [id COLON] :Id +| [] + +def dq_lit_term + [CONS_DQ] | [CONS_DQ_NL] + +def sq_lit_term + [CONS_SQ] | [CONS_SQ_NL] + +def opt_tilde_data + [tilde_data] +| [] + +def pattern_el_lel + [region_qual id opt_repeat] :Id +| [region_qual backtick_lit opt_repeat] :Lit + +def pattern_el + [opt_label pattern_el_lel] :PatternEl +| [DQ LitpatElList: litpat_el* dq_lit_term] :Dq +| [SQ SqConsDataList: sq_cons_data* sq_lit_term] :Sq +| [TILDE opt_tilde_data TILDE_NL] :Tilde + +def litpat_el + [dq_cons_data] :ConsData +| [CONS_SQOPEN PatternElList: pattern_el* CONS_SQCLOSE] :SubList + +def pattern_top_el + [DQ LitpatElList: litpat_el* dq_lit_term] :Dq +| [SQ SqConsDataList: sq_cons_data* sq_lit_term] :Sq +| [TILDE opt_tilde_data TILDE_NL] :Tilde +| [SQOPEN PatternElList: pattern_el* SQCLOSE] :SubList + +def pattern_list + [pattern_list pattern_top_el] :List +| [pattern_top_el] :Base + +def pattern + [pattern_list] + +# +# Constructor List +# + +def E1 [] +def E2 [] +def E3 [] +def E4 [] + +def cons_el + [E1 region_qual backtick_lit] :Lit +| [E1 DQ LitConsElList: lit_cons_el* dq_lit_term] :Dq +| [E1 SQ SqConsDataList: sq_cons_data* sq_lit_term] :Sq +| [E1 TILDE opt_tilde_data TILDE_NL] :Tilde +| [E2 code_expr] :CodeExpr + +def lit_cons_el + [dq_cons_data] :ConsData +| [CONS_SQOPEN ConsElList: cons_el* CONS_SQCLOSE] :SubList + +def cons_top_el + [DQ LitConsElList: lit_cons_el* dq_lit_term] :Dq +| [SQ SqConsDataList: sq_cons_data* sq_lit_term] :Sq +| [TILDE opt_tilde_data TILDE_NL] :Tilde +| [SQOPEN ConsElList: cons_el* SQCLOSE] :SubList + +def cons_list + [cons_top_el cons_list] :List +| [cons_top_el] :Base + +def constructor + [cons_list] + +# +# Accumulate +# + +def accum_el + [E1 DQ LitAccumElList: lit_accum_el* dq_lit_term] :Dq +| [E1 SQ SqConsDataList: sq_cons_data* sq_lit_term] :Sq +| [E1 TILDE opt_tilde_data TILDE_NL] :Tilde +| [E2 code_expr] :CodeExpr + +def lit_accum_el + [dq_cons_data] :ConsData +| [CONS_SQOPEN AccumElList: accum_el* CONS_SQCLOSE] :SubList + +def accum_top_el + [DQ LitAccumElList: lit_accum_el* dq_lit_term] :Dq +| [SQ SqConsDataList: sq_cons_data* sq_lit_term] :Sq +| [TILDE opt_tilde_data TILDE_NL] :Tilde +| [SQOPEN AccumElList: accum_el* SQCLOSE] :SubList + +def accum_list + [accum_top_el accum_list] :List +| [accum_top_el] :Base + +def accumulate + [accum_list] + +# +# String List +# + +def string_el + [E1 DQ LitStringElList: lit_string_el* dq_lit_term] :Dq +| [E1 SQ SqConsDataList: sq_cons_data* sq_lit_term] :Sq +| [E1 TILDE opt_tilde_data TILDE_NL] :Tilde +| [E2 code_expr] :CodeExpr + +def lit_string_el + [dq_cons_data] :ConsData +| [CONS_SQOPEN StringElList: string_el* CONS_SQCLOSE] :SubList + +def string_top_el + [DQ LitStringElList: lit_string_el* dq_lit_term] :Dq +| [SQ SqConsDataList: sq_cons_data* sq_lit_term] :Sq +| [TILDE opt_tilde_data TILDE_NL] :Tilde +| [SQOPEN StringElList: string_el* SQCLOSE] :SubList + +def string_list + [string_top_el string_list] :List +| [string_top_el] :Base + +def string + [string_list] + +# +# Variable References +# + +def var_ref + [qual id] + +def qual + [qual id DOT] :Dot +| [qual id ARROW] :Arrow +| [] :Base + +# +# Lexical analysis. +# + +def lex_expr + [lex_expr LEX_BAR lex_term] :Bar +| [lex_expr LEX_AMP lex_term] :Amp +| [lex_expr LEX_DASH lex_term] :Dash +| [lex_expr LEX_DASHDASH lex_term] :DashDash +| [lex_term] :Base + +def opt_lex_dot + [LEX_DOT] +| [] + +def lex_term + [lex_term opt_lex_dot lex_factor_rep] :Dot +| [lex_term LEX_COLON_GT lex_factor_rep] :ColonGt +| [lex_term LEX_COLON_GTGT lex_factor_rep] :ColonGtGt +| [lex_term LEX_LT_COLON lex_factor_rep] :LtColon +| [lex_factor_rep] :Base + +def lex_factor_rep + [lex_factor_rep LEX_STAR] :Star +| [lex_factor_rep LEX_STARSTAR] :StarStar +| [lex_factor_rep LEX_PLUS] :Plus +| [lex_factor_rep LEX_QUESTION] :Question +| [lex_factor_rep COPEN lex_uint CCLOSE ] :Exact +| [lex_factor_rep COPEN COMMA lex_uint CCLOSE ] :Max +| [lex_factor_rep COPEN lex_uint COMMA CCLOSE ] :Min +| [lex_factor_rep COPEN Low: lex_uint COMMA High: lex_uint CCLOSE ] :Range +| [lex_factor_neg] :Base + +def lex_factor_neg + [LEX_CARET lex_factor_neg] :Caret +| [lex_factor] :Base + +def lex_range_lit + [lex_lit] :Lit +| [lex_num] :Number + +def lex_num + [lex_uint] +| [lex_hex] + +#| [LEX_DASH num] + +def lex_factor + [lex_lit] :Literal +| [lex_id] :Id +| [lex_uint] :Number +| [lex_hex] :Hex +| [Low: lex_range_lit LEX_DOTDOT High: lex_range_lit] :Range +| [LEX_SQOPEN_POS reg_or_data RE_SQCLOSE] :PosOrBlock +| [LEX_SQOPEN_NEG reg_or_data RE_SQCLOSE] :NegOrBlock +| [LEX_POPEN lex_expr LEX_PCLOSE] :Paren + +def reg_or_data + [reg_or_data reg_or_char] :Data +| [] :Base + +def reg_or_char + [RE_CHAR] :Char +| [Low: RE_CHAR RE_DASH High: RE_CHAR] :Range |