diff options
author | Adrian Thurston <thurston@complang.org> | 2012-05-24 19:05:56 -0400 |
---|---|---|
committer | Adrian Thurston <thurston@complang.org> | 2012-05-24 19:11:27 -0400 |
commit | 270fd57e97eeda35b09fd2c5669ab9fdaeee290e (patch) | |
tree | 41087e1c4b3ad0638bc14f92323270899629aef9 | |
parent | 64c59bd0dd83af9f329efeaf4e444c28caec1353 (diff) | |
download | colm-270fd57e97eeda35b09fd2c5669ab9fdaeee290e.tar.gz |
collect-ignore implementation
Now possible to parse patterns that have collect-ignores. Sometimes you need
them present in the input stream when you pass over the production. Other times
you don't when you pass over the nonterminal. Built skipping of them into the
backtracker.
-rw-r--r-- | colm/bytecode.h | 1 | ||||
-rw-r--r-- | colm/lmparse.kh | 2 | ||||
-rw-r--r-- | colm/lmparse.kl | 12 | ||||
-rw-r--r-- | colm/lmscan.rl | 11 | ||||
-rw-r--r-- | colm/pdabuild.cc | 3 | ||||
-rw-r--r-- | colm/pdarun.c | 44 |
6 files changed, 52 insertions, 21 deletions
diff --git a/colm/bytecode.h b/colm/bytecode.h index aa395d6f..1efe2739 100644 --- a/colm/bytecode.h +++ b/colm/bytecode.h @@ -361,6 +361,7 @@ typedef unsigned char uchar; #define PF_RIGHT_IL_ATTACHED 0x0800 #define PF_CI 0x1000 +#define PF_UNDO_CI 0x2000 #define AF_LEFT_IGNORE 0x0100 #define AF_RIGHT_IGNORE 0x0200 diff --git a/colm/lmparse.kh b/colm/lmparse.kh index 469e7ec3..722e8082 100644 --- a/colm/lmparse.kh +++ b/colm/lmparse.kh @@ -58,7 +58,7 @@ struct ColmParser # Patterns. token KW_Match, KW_Construct, KW_Parse, KW_ParseStop, KW_New, KW_MakeToken, - KW_MakeTree, KW_TypeId, KW_Alias, KW_Send, KW_Ni; + KW_MakeTree, KW_TypeId, KW_Alias, KW_Send, KW_Ni, KW_Ci; token KW_Include, KW_Preeof; diff --git a/colm/lmparse.kl b/colm/lmparse.kl index b58821f4..b93c1da0 100644 --- a/colm/lmparse.kl +++ b/colm/lmparse.kl @@ -879,8 +879,16 @@ nonterm class opt_collect_ignore String region; }; -opt_collect_ignore: KW_Ni TK_Word final { $$->value = true; $$->region = $2->data; }; -opt_collect_ignore: final { $$->value = false; }; +opt_collect_ignore: + KW_Ci '<' TK_Word '>' + final { + $$->value = true; + $$->region = $3->data; + }; +opt_collect_ignore: + final { + $$->value = false; + }; nonterm prod_el { diff --git a/colm/lmscan.rl b/colm/lmscan.rl index dbe79ca0..cadc8dc3 100644 --- a/colm/lmscan.rl +++ b/colm/lmscan.rl @@ -472,11 +472,12 @@ void Scanner::endSection( ) 'right' => { token( KW_Right ); }; 'nonassoc' => { token( KW_Nonassoc ); }; 'prec' => { token( KW_Prec ); }; - 'include' => {token( KW_Include ); }; - 'context' => {token( KW_Context ); }; - 'alias' => {token( KW_Alias ); }; - 'send' => {token( KW_Send ); }; - 'ni' => {token( KW_Ni ); }; + 'include' => { token( KW_Include ); }; + 'context' => { token( KW_Context ); }; + 'alias' => { token( KW_Alias ); }; + 'send' => { token( KW_Send ); }; + 'ni' => { token( KW_Ni ); }; + 'ci' => { token( KW_Ci ); }; # Identifiers. ident => { token( TK_Word, ts, te ); } ; diff --git a/colm/pdabuild.cc b/colm/pdabuild.cc index de0bbac8..c22369b3 100644 --- a/colm/pdabuild.cc +++ b/colm/pdabuild.cc @@ -1662,6 +1662,9 @@ void fillNodes( Program *prg, int &nextAvail, Bindings *bindings, long &bindId, ? parseTree->child : 0; + if ( ptChild != 0 && ptChild->flags & PF_CI ) + ptChild = ptChild->next; + /* Set up the fields. */ node.id = kid->tree->id; node.prodNum = kid->tree->prodNum; diff --git a/colm/pdarun.c b/colm/pdarun.c index d6886dc5..e5dd5736 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -1206,7 +1206,7 @@ case PcrStart: } } - if ( ( pdaRun->tokenId == SCAN_ERROR /*|| pdaRun->tokenId == SCAN_LANG_EL*/ ) && + if ( pdaRun->tokenId == SCAN_ERROR && ( prg->rtd->regionInfo[fsmRun->region].ciLelId > 0 ) ) { debug( REALM_PARSE, "sending a collect ignore\n" ); @@ -1214,6 +1214,16 @@ case PcrStart: goto yes; } + if ( pdaRun->tokenId == SCAN_LANG_EL && + ( prg->rtd->regionInfo[fsmRun->region].ciLelId > 0 ) ) + { + debug( REALM_PARSE, "sending a collect ignore\n" ); + sendCi( prg, sp, inputStream, fsmRun, pdaRun, prg->rtd->regionInfo[fsmRun->region].ciLelId ); + pdaRun->parseInput->flags |= PF_UNDO_CI; + pdaRun->numRetry += 1; + goto yes; + } + if ( pdaRun->tokenId == SCAN_TRY_AGAIN_LATER ) { debug( REALM_PARSE, "scanner says try again later\n" ); break; @@ -1922,10 +1932,10 @@ again: child->next = last; dataChild->next = dataLast; -// if ( child->flags & PF_CI ) { -// debug( REALM_PARSE, "advancing over CI\n" ); -// dataChild = dataChild->next; -// } + if ( child->flags & PF_CI ) { + debug( REALM_PARSE, "advancing over CI\n" ); + dataChild = dataChild->next; + } /* Track last for reversal. */ last = child; @@ -2102,14 +2112,22 @@ case PcrReverse: pdaRun->parseInput = pdaRun->undoLel; } else { - long region = pdaRun->parseInput->region; - pdaRun->next = region > 0 ? region + 1 : 0; - pdaRun->checkNext = true; - pdaRun->checkStop = true; - - sendBack( prg, sp, pdaRun, fsmRun, inputStream, pdaRun->parseInput ); - - pdaRun->parseInput = 0; + if ( pdaRun->stackTop->flags & PF_UNDO_CI ) { + pdaRun->cs = pdaRun->stackTop->state; + pdaRun->numRetry -= 1; + pdaRun->stackTop = pdaRun->stackTop->next; + goto again; + } + else { + long region = pdaRun->parseInput->region; + pdaRun->next = region > 0 ? region + 1 : 0; + pdaRun->checkNext = true; + pdaRun->checkStop = true; + + sendBack( prg, sp, pdaRun, fsmRun, inputStream, pdaRun->parseInput ); + + pdaRun->parseInput = 0; + } } } else if ( pdaRun->parseInput->flags & PF_HAS_RCODE ) { |