diff options
author | Adrian Thurston <thurston@complang.org> | 2013-03-21 22:47:03 +0000 |
---|---|---|
committer | Adrian Thurston <thurston@complang.org> | 2013-03-21 22:47:03 +0000 |
commit | cf1738582f8ea68335f5a20820007b26d293faa6 (patch) | |
tree | 043de8c0147e2ae68ab7e409a2055c2afe9d63e5 | |
parent | 6b95c1a385937290cd7716163de7ebd600c79dc8 (diff) | |
download | colm-cf1738582f8ea68335f5a20820007b26d293faa6.tar.gz |
no-ignore between regions, accumulate, print_xml
* Fixed the dependencies, which was necessary after the file renaming.
* Added print_xml and print_xml_ac.
* Added no-ignore to the bootstrap language.
* Using no-ignore around the boundaries between regions
* Worked on accumulate.
-rw-r--r-- | colm/Makefile.am | 4 | ||||
-rw-r--r-- | colm/colm.lm | 54 | ||||
-rw-r--r-- | colm/conscolm.cc | 10 | ||||
-rw-r--r-- | colm/consinit.cc | 38 | ||||
-rw-r--r-- | colm/consinit.h | 4 | ||||
-rw-r--r-- | colm/load.cc | 101 | ||||
-rw-r--r-- | colm/load.h | 18 | ||||
-rw-r--r-- | test/reor2.lm | 24 | ||||
-rw-r--r-- | test/undofrag2.exp | 1 | ||||
-rw-r--r-- | test/undofrag2.in | 1 | ||||
-rw-r--r-- | test/undofrag2.lm | 7 | ||||
-rw-r--r-- | test/undofrag3.exp | 1 | ||||
-rw-r--r-- | test/undofrag3.in | 1 | ||||
-rw-r--r-- | test/undofrag3.lm | 7 |
14 files changed, 229 insertions, 42 deletions
diff --git a/colm/Makefile.am b/colm/Makefile.am index f34fd896..9d5fe0bd 100644 --- a/colm/Makefile.am +++ b/colm/Makefile.am @@ -93,10 +93,10 @@ exports2.h: parse2.c exports2.cc: parse2.c bootstrap1-exports1.$(OBJEXT): exports1.h exports1.cc parse1.c -bootstrap1-loadcolm.$(OBJEXT): exports1.h exports1.cc parse1.c +bootstrap1-conscolm.$(OBJEXT): exports1.h exports1.cc parse1.c colm-exports2.$(OBJEXT): exports2.h exports2.cc parse2.c -colm-loadsrc.$(OBJEXT): exports2.h exports2.cc parse2.c +colm-load.$(OBJEXT): exports2.h exports2.cc parse2.c BUILT_SOURCES = version.h diff --git a/colm/colm.lm b/colm/colm.lm index 8fbcab27..9977edcd 100644 --- a/colm/colm.lm +++ b/colm/colm.lm @@ -6,6 +6,8 @@ lex token TOKEN / 'token' / token IGNORE / 'ignore' / token PRINT / 'print' / + token PRINT_XML / 'print_xml' / + token PRINT_XML_AC / 'print_xml_ac' / token PARSE / 'parse' / token CONS / 'construct' | 'cons' / token SEND / 'send' / @@ -34,10 +36,7 @@ lex ( 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' ) * / - token number / - ( '0' .. '9' ) . - ( '0' .. '9' ) * - / + token number / ( '0' .. '9' ) + / token lit / '\'' . ( ^( '\'' | '\\' ) | '\\' . any )* . '\'' @@ -48,7 +47,7 @@ lex token SQOPEN /'['/ token SQCLOSE /']'/ token BAR /'|'/ - token FSLASH /'/'/ + token FSLASH /'/'/ ni token COLON /':'/ token DOUBLE_COLON /'::'/ token DOT /'.'/ @@ -93,8 +92,9 @@ lex token LEX_PLUS /'+'/ token LEX_CARET /'^'/ token LEX_DOTDOT /'..'/ - token LEX_SQOPEN_POS /'['/ - token LEX_SQOPEN_NEG /'[^'/ + token LEX_SQOPEN_POS /'['/ ni + token LEX_SQOPEN_NEG /'[^'/ ni + token LEX_FSLASH ni /'/'/ ignore / ( '\n' | '\t' | ' ' ) . @@ -176,10 +176,10 @@ def region_def [LEX RootItemList: root_item* END] def token_def - [TOKEN Id: id FSLASH Expr: lex_expr FSLASH] + [TOKEN Id: id FSLASH Expr: lex_expr LEX_FSLASH] def ignore_def - [IGNORE FSLASH Expr: lex_expr FSLASH] + [IGNORE FSLASH Expr: lex_expr LEX_FSLASH] def prod_el [OptName: opt_prod_name Id: id OptRepeat: opt_prod_repeat] @@ -246,7 +246,9 @@ def var_def [Id: id COLON TypeRef: type_ref] def print_stmt - [PRINT POPEN CodeExprList: code_expr* PCLOSE] + [Tree: PRINT POPEN CodeExprList: code_expr* PCLOSE] +| [Xml: PRINT_XML POPEN CodeExprList: code_expr* PCLOSE] +| [XmlAc: PRINT_XML_AC POPEN CodeExprList: code_expr* PCLOSE] def expr_stmt [CodeExpr: code_expr] @@ -296,10 +298,34 @@ def opt_field_init def field_init [CodeExpr: code_expr] -def accumulate - [SQOPEN Id: id SQCLOSE] -| [DQ DLit: dlit TDQ] -| [DQ TDQ] +#def accumulate +# [SQOPEN Id: id SQCLOSE] +#| [DQ DLit: dlit TDQ] +#| [DQ TDQ] + +# +# Accumulate +# + +def accum_el + [CodeExpr: code_expr] +#| ['"' lit_accum_el* '"'] + +def lit_accum_el + [DLit: dlit] +#| ['[' accum_el_list ']'] + +def accum_top_el + [DQ LitAccumElList: lit_accum_el* TDQ] +| [SQOPEN AccumElList: accum_el* SQCLOSE] + +def accum_list + [AccumTopEl: accum_top_el AccumList: accum_list] +| [AccumTopEl: accum_top_el] + +def accumulate + [AccumList: accum_list] + def var_ref [Qual: qual Id: id] diff --git a/colm/conscolm.cc b/colm/conscolm.cc index bbd3f83a..8c5a330c 100644 --- a/colm/conscolm.cc +++ b/colm/conscolm.cc @@ -195,6 +195,11 @@ LexExpression *LoadColm::walkLexExpr( lex_expr &LexExprTree ) } } +bool walkNoIgnore( opt_ni OptNi ) +{ + return OptNi.Ni() != 0; +} + void LoadColm::walkTokenList( token_list &tokenList ) { if ( tokenList.TokenList() != 0 ) { @@ -212,7 +217,10 @@ void LoadColm::walkTokenList( token_list &tokenList ) LexExpression *expr = walkLexExpr( LexExpr ); LexJoin *join = LexJoin::cons( expr ); - defineToken( internal, name, join, objectDef, 0, false, false, false ); + bool leftNi = walkNoIgnore( tokenDef.LeftNi() ); + bool rightNi = walkNoIgnore( tokenDef.RightNi() ); + + defineToken( internal, name, join, objectDef, 0, false, leftNi, rightNi ); } if ( tokenList.IgnoreDef() != 0 ) { diff --git a/colm/consinit.cc b/colm/consinit.cc index 7c98ee0e..0692acdf 100644 --- a/colm/consinit.cc +++ b/colm/consinit.cc @@ -427,6 +427,21 @@ Production *ConsInit::production( ProdEl *prodEl1, ProdEl *prodEl2, return BaseParser::production( internal, prodElList, false, 0, 0 ); } +Production *ConsInit::production( ProdEl *prodEl1, ProdEl *prodEl2, + ProdEl *prodEl3, ProdEl *prodEl4, ProdEl *prodEl5, + ProdEl *prodEl6, ProdEl *prodEl7 ) +{ + ProdElList *prodElList = new ProdElList; + appendProdEl( prodElList, prodEl1 ); + appendProdEl( prodElList, prodEl2 ); + appendProdEl( prodElList, prodEl3 ); + appendProdEl( prodElList, prodEl4 ); + appendProdEl( prodElList, prodEl5 ); + appendProdEl( prodElList, prodEl6 ); + appendProdEl( prodElList, prodEl7 ); + return BaseParser::production( internal, prodElList, false, 0, 0 ); +} + void ConsInit::definition( const String &name, Production *prod1, Production *prod2, Production *prod3, Production *prod4 ) { @@ -553,10 +568,13 @@ void ConsInit::token() { ProdEl *prodEl1 = prodRefLit( "'token'" ); ProdEl *prodEl2 = prodRefName( "Id", "id" ); - ProdEl *prodEl3 = prodRefLit( "'/'" ); - ProdEl *prodEl4 = prodRefName( "Expr", "lex_expr" ); - ProdEl *prodEl5 = prodRefLit( "'/'" ); - Production *prod1 = production( prodEl1, prodEl2, prodEl3, prodEl4, prodEl5 ); + ProdEl *prodEl3 = prodRefName( "LeftNi", "opt_ni" ); + ProdEl *prodEl4 = prodRefLit( "'/'" ); + ProdEl *prodEl5 = prodRefName( "Expr", "lex_expr" ); + ProdEl *prodEl6 = prodRefLit( "'/'" ); + ProdEl *prodEl7 = prodRefName( "RightNi", "opt_ni" ); + Production *prod1 = production( prodEl1, prodEl2, prodEl3, + prodEl4, prodEl5, prodEl6, prodEl7 ); definition( "token_def", prod1 ); } @@ -605,6 +623,16 @@ void ConsInit::optProdName() definition( "opt_prod_name", prod1, prod2 ); } +void ConsInit::optNi() +{ + ProdEl *prodEl1 = prodRefName( "Ni", "NI" ); + Production *prod1 = production( prodEl1 ); + + Production *prod2 = production(); + + definition( "opt_ni", prod1, prod2 ); +} + void ConsInit::optRepeat() { ProdEl *prodEl1 = prodRefName( "Star", "STAR" ); @@ -753,6 +781,7 @@ void ConsInit::go() keyword( "'end'" ); keyword( "'token'" ); keyword( "'ignore'" ); + keyword( "NI", "'ni'" ); idToken(); literalToken(); @@ -778,6 +807,7 @@ void ConsInit::go() lexTerm(); lexExpr(); + optNi(); optRepeat(); optProdName(); prodEl(); diff --git a/colm/consinit.h b/colm/consinit.h index a52c6cf9..3490c2c5 100644 --- a/colm/consinit.h +++ b/colm/consinit.h @@ -48,6 +48,9 @@ struct ConsInit ProdEl *prodEl3, ProdEl *prodEl4 ); Production *production( ProdEl *prodEl1, ProdEl *prodEl2, ProdEl *prodEl3, ProdEl *prodEl4, ProdEl *prodEl5 ); + Production *production( ProdEl *prodEl1, ProdEl *prodEl2, + ProdEl *prodEl3, ProdEl *prodEl4, ProdEl *prodEl5, + ProdEl *prodEl6, ProdEl *prodEl7 ); void definition( const String &name, Production *prod ); void definition( const String &name, Production *prod1, Production *prod2 ); @@ -79,6 +82,7 @@ struct ConsInit Production *prodProd(); Production *prodLex(); + void optNi(); void optRepeat(); void optProdName(); void prodEl(); diff --git a/colm/load.cc b/colm/load.cc index ead1a470..1eef7c13 100644 --- a/colm/load.cc +++ b/colm/load.cc @@ -217,13 +217,14 @@ String transReChars( const String &s ) case 'r': *d++ = '\r'; break; default: *d++ = s[i+1]; break; } - i+= 2; + i += 2; } else { *d++ = s[i]; i += 1; } } + *d = 0; return out; } @@ -289,7 +290,7 @@ LexFactor *LoadSource::walkLexFactor( lex_factor &LexFactorTree ) factor = LexFactor::cons( ReItem::cons( internal, block, ReItem::OrBlock ) ); } else if ( LexFactorTree.NegData() != 0 ) { - ReOrBlock *block = walkRegOrData( LexFactorTree.PosData() ); + ReOrBlock *block = walkRegOrData( LexFactorTree.NegData() ); factor = LexFactor::cons( ReItem::cons( internal, block, ReItem::NegOrBlock ) ); } return factor; @@ -447,7 +448,16 @@ LangStmt *LoadSource::walkPrintStmt( print_stmt &printStmt ) { _repeat_code_expr codeExprList = printStmt.CodeExprList(); ExprVect *exprVect = walkCodeExprList( codeExprList ); - return LangStmt::cons( internal, LangStmt::PrintType, exprVect ); + + LangStmt::Type type; + if ( printStmt.Tree() != 0 ) + type = LangStmt::PrintType; + else if ( printStmt.Xml() != 0 ) + type = LangStmt::PrintXMLType; + else if ( printStmt.XmlAc() != 0 ) + type = LangStmt::PrintXMLACType; + + return LangStmt::cons( internal, type, exprVect ); } QualItemVect *LoadSource::walkQual( qual &Qual ) @@ -485,25 +495,88 @@ ObjectField *walkOptCapture( opt_capture optCapture ) return objField; } -ConsItemList *walkAccumulate( accumulate Accumulate ) + +//def lit_accum_el +// [DLit: dlit] +//#| ['[' accum_el_list ']'] + +//def accum_top_el +// [DQ LitAccumElList: lit_accum_el* TDQ] +//#| [ accum_el_list ']'] + +// [AccumTopEl: accum_top_el AccumList: accum_list] +//| [AccumTopEl: accum_top_el] + +ConsItemList *LoadSource::walkLitAccumEl( lit_accum_el litAccumEl ) { ConsItemList *list = 0; - if ( Accumulate.Id() != 0 ) { - String id = Accumulate.Id().text().c_str(); - LangVarRef *varRef = LangVarRef::cons( internal, new QualItemVect, id ); - LangExpr *accumExpr = LangExpr::cons( LangTerm::cons( internal, LangTerm::VarRefType, varRef ) ); + if ( litAccumEl.DLit() != 0 ) { + String dlit = litAccumEl.DLit().text().c_str(); + ConsItem *consItem = ConsItem::cons( internal, ConsItem::InputText, dlit ); + list = ConsItemList::cons( consItem ); + } + return list; +} +ConsItemList *LoadSource::walkLitAccumElList( _repeat_lit_accum_el litAccumElList ) +{ + ConsItemList *list = new ConsItemList; + while ( !litAccumElList.end() ) { + ConsItemList *extension = walkLitAccumEl( litAccumElList.value() ); + list = consListConcat( list, extension ); + litAccumElList = litAccumElList.next(); + } + return list; +} + +ConsItemList *LoadSource::walkAccumEl( accum_el accumEl ) +{ + ConsItemList *list = 0; + if ( accumEl.CodeExpr() != 0 ) { + LangExpr *accumExpr = walkCodeExpr( accumEl.CodeExpr() ); ConsItem *consItem = ConsItem::cons( internal, ConsItem::ExprType, accumExpr ); list = ConsItemList::cons( consItem ); } - else if ( Accumulate.DLit() != 0 ) { - String dlit = Accumulate.DLit().text().c_str(); - ConsItem *consItem = ConsItem::cons( internal, ConsItem::InputText, dlit ); - list = ConsItemList::cons( consItem ); + return list; +} + +ConsItemList *LoadSource::walkAccumElList( _repeat_accum_el accumElList ) +{ + ConsItemList *list = new ConsItemList; + while ( !accumElList.end() ) { + ConsItemList *extension = walkAccumEl( accumElList.value() ); + list = consListConcat( list, extension ); + accumElList = accumElList.next(); } - else { - list = new ConsItemList; + return list; +} + +ConsItemList *LoadSource::walkAccumTopEl( accum_top_el accumTopEl ) +{ + ConsItemList *list = 0; + if ( accumTopEl.LitAccumElList() != 0 ) + list = walkLitAccumElList( accumTopEl.LitAccumElList() ); + else if ( accumTopEl.AccumElList() != 0 ) { + list = walkAccumElList( accumTopEl.AccumElList() ); + } + return list; +} + +ConsItemList *LoadSource::walkAccumList( accum_list accumList ) +{ + ConsItemList *list = walkAccumTopEl( accumList.AccumTopEl() ); + + if ( accumList.AccumList() != 0 ) { + ConsItemList *extension = walkAccumList( accumList.AccumList() ); + consListConcat( list, extension ); } + + return list; +} + +ConsItemList *LoadSource::walkAccumulate( accumulate Accumulate ) +{ + ConsItemList *list = walkAccumList( Accumulate.AccumList() ); return list; } diff --git a/colm/load.h b/colm/load.h index 2a714b74..bba93dfe 100644 --- a/colm/load.h +++ b/colm/load.h @@ -69,6 +69,13 @@ struct context_var_def; struct opt_reduce; struct field_init; struct opt_field_init; +struct lit_accum_el; +struct _repeat_lit_accum_el; +struct accum_top_el; +struct accum_list; +struct accumulate; +struct accum_el; +struct _repeat_accum_el; struct LoadSource : @@ -82,6 +89,8 @@ struct LoadSource const char *inputFileName; + void go(); + ObjectField *walkVarDef( var_def varDef ); NamespaceQual *walkRegionQual( region_qual regionQual ); RepeatType walkOptRepeat( opt_repeat OptRepeat ); @@ -132,5 +141,12 @@ struct LoadSource void walkFieldInit( FieldInitVect *list, field_init fieldInit ); FieldInitVect *walkOptFieldInit( opt_field_init optFieldInit ); - void go(); + + ConsItemList *walkLitAccumEl( lit_accum_el litAccumEl ); + ConsItemList *walkLitAccumElList( _repeat_lit_accum_el litAccumElList ); + ConsItemList *walkAccumTopEl( accum_top_el accumTopEl ); + ConsItemList *walkAccumList( accum_list accumList ); + ConsItemList *walkAccumulate( accumulate Accumulate ); + ConsItemList *walkAccumEl( accum_el accumEl ); + ConsItemList *walkAccumElList( _repeat_accum_el accumElList ); }; diff --git a/test/reor2.lm b/test/reor2.lm new file mode 100644 index 00000000..fe839f11 --- /dev/null +++ b/test/reor2.lm @@ -0,0 +1,24 @@ +##### LM ##### +context undo + + lex + ignore /[ ]+/ + literal ';', '\n' + token id /[a-zA-Z_]+/ + end + + def item + [id] + + def start + [item* ';' '\n'] + +end + +cons Undo: undo[] +parse Input: undo::start( Undo )[ stdin ] +print( Input.tree ) +##### IN ##### +a b; +##### EXP ##### +a b; diff --git a/test/undofrag2.exp b/test/undofrag2.exp deleted file mode 100644 index 8731daaf..00000000 --- a/test/undofrag2.exp +++ /dev/null @@ -1 +0,0 @@ -a b c; diff --git a/test/undofrag2.in b/test/undofrag2.in deleted file mode 100644 index 8731daaf..00000000 --- a/test/undofrag2.in +++ /dev/null @@ -1 +0,0 @@ -a b c; diff --git a/test/undofrag2.lm b/test/undofrag2.lm index 5a5b4580..cb25b444 100644 --- a/test/undofrag2.lm +++ b/test/undofrag2.lm @@ -1,7 +1,8 @@ +##### LM ##### context undo lex - ignore /[ \t]+/ + ignore /( ' ' | '\t')+/ literal '*', '(', ')', '^', ';', '\n' token id /[a-zA-Z_]+/ end @@ -43,3 +44,7 @@ Undo.Out = construct parser<undo::out> [] parse InputP: undo::start(Undo)[ stdin ] Input: undo::start = InputP.tree print( Input ) +##### IN ##### +a b c; +##### EXP ##### +a b c; diff --git a/test/undofrag3.exp b/test/undofrag3.exp deleted file mode 100644 index b7f65215..00000000 --- a/test/undofrag3.exp +++ /dev/null @@ -1 +0,0 @@ -<undo::out><undo::_repeat_out_item><undo::out_item><undo::id>a</undo::id></undo::out_item></undo::_repeat_out_item></undo::out><undo::out><undo::_repeat_out_item><undo::out_item><undo::id>a</undo::id></undo::out_item></undo::_repeat_out_item></undo::out>a . ; diff --git a/test/undofrag3.in b/test/undofrag3.in deleted file mode 100644 index 4f3692dd..00000000 --- a/test/undofrag3.in +++ /dev/null @@ -1 +0,0 @@ -a . ; diff --git a/test/undofrag3.lm b/test/undofrag3.lm index 37b1554d..75ffd3c5 100644 --- a/test/undofrag3.lm +++ b/test/undofrag3.lm @@ -1,7 +1,8 @@ +##### LM ##### context undo lex - ignore /[ \t]+/ + ignore /( ' ' | '\t' )+/ literal '*', '(', ')', '^', ';', '\n', '.' token id /[a-zA-Z_]+/ end @@ -47,3 +48,7 @@ Undo.Out = construct parser<undo::out> [] parse Input: undo::start(Undo)[ stdin ] print( Input.tree ) +##### IN ##### +a . ; +##### EXP ##### +<undo::out><undo::_repeat_out_item><undo::out_item><undo::id>a</undo::id></undo::out_item></undo::_repeat_out_item></undo::out><undo::out><undo::_repeat_out_item><undo::out_item><undo::id>a</undo::id></undo::out_item></undo::_repeat_out_item></undo::out>a . ; |