summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2012-05-24 19:05:56 -0400
committerAdrian Thurston <thurston@complang.org>2012-05-24 19:11:27 -0400
commit270fd57e97eeda35b09fd2c5669ab9fdaeee290e (patch)
tree41087e1c4b3ad0638bc14f92323270899629aef9
parent64c59bd0dd83af9f329efeaf4e444c28caec1353 (diff)
downloadcolm-270fd57e97eeda35b09fd2c5669ab9fdaeee290e.tar.gz
collect-ignore implementation
Now possible to parse patterns that have collect-ignores. Sometimes you need them present in the input stream when you pass over the production. Other times you don't when you pass over the nonterminal. Built skipping of them into the backtracker.
-rw-r--r--colm/bytecode.h1
-rw-r--r--colm/lmparse.kh2
-rw-r--r--colm/lmparse.kl12
-rw-r--r--colm/lmscan.rl11
-rw-r--r--colm/pdabuild.cc3
-rw-r--r--colm/pdarun.c44
6 files changed, 52 insertions, 21 deletions
diff --git a/colm/bytecode.h b/colm/bytecode.h
index aa395d6f..1efe2739 100644
--- a/colm/bytecode.h
+++ b/colm/bytecode.h
@@ -361,6 +361,7 @@ typedef unsigned char uchar;
#define PF_RIGHT_IL_ATTACHED 0x0800
#define PF_CI 0x1000
+#define PF_UNDO_CI 0x2000
#define AF_LEFT_IGNORE 0x0100
#define AF_RIGHT_IGNORE 0x0200
diff --git a/colm/lmparse.kh b/colm/lmparse.kh
index 469e7ec3..722e8082 100644
--- a/colm/lmparse.kh
+++ b/colm/lmparse.kh
@@ -58,7 +58,7 @@ struct ColmParser
# Patterns.
token KW_Match, KW_Construct, KW_Parse, KW_ParseStop, KW_New, KW_MakeToken,
- KW_MakeTree, KW_TypeId, KW_Alias, KW_Send, KW_Ni;
+ KW_MakeTree, KW_TypeId, KW_Alias, KW_Send, KW_Ni, KW_Ci;
token KW_Include, KW_Preeof;
diff --git a/colm/lmparse.kl b/colm/lmparse.kl
index b58821f4..b93c1da0 100644
--- a/colm/lmparse.kl
+++ b/colm/lmparse.kl
@@ -879,8 +879,16 @@ nonterm class opt_collect_ignore
String region;
};
-opt_collect_ignore: KW_Ni TK_Word final { $$->value = true; $$->region = $2->data; };
-opt_collect_ignore: final { $$->value = false; };
+opt_collect_ignore:
+ KW_Ci '<' TK_Word '>'
+ final {
+ $$->value = true;
+ $$->region = $3->data;
+ };
+opt_collect_ignore:
+ final {
+ $$->value = false;
+ };
nonterm prod_el
{
diff --git a/colm/lmscan.rl b/colm/lmscan.rl
index dbe79ca0..cadc8dc3 100644
--- a/colm/lmscan.rl
+++ b/colm/lmscan.rl
@@ -472,11 +472,12 @@ void Scanner::endSection( )
'right' => { token( KW_Right ); };
'nonassoc' => { token( KW_Nonassoc ); };
'prec' => { token( KW_Prec ); };
- 'include' => {token( KW_Include ); };
- 'context' => {token( KW_Context ); };
- 'alias' => {token( KW_Alias ); };
- 'send' => {token( KW_Send ); };
- 'ni' => {token( KW_Ni ); };
+ 'include' => { token( KW_Include ); };
+ 'context' => { token( KW_Context ); };
+ 'alias' => { token( KW_Alias ); };
+ 'send' => { token( KW_Send ); };
+ 'ni' => { token( KW_Ni ); };
+ 'ci' => { token( KW_Ci ); };
# Identifiers.
ident => { token( TK_Word, ts, te ); } ;
diff --git a/colm/pdabuild.cc b/colm/pdabuild.cc
index de0bbac8..c22369b3 100644
--- a/colm/pdabuild.cc
+++ b/colm/pdabuild.cc
@@ -1662,6 +1662,9 @@ void fillNodes( Program *prg, int &nextAvail, Bindings *bindings, long &bindId,
?
parseTree->child : 0;
+ if ( ptChild != 0 && ptChild->flags & PF_CI )
+ ptChild = ptChild->next;
+
/* Set up the fields. */
node.id = kid->tree->id;
node.prodNum = kid->tree->prodNum;
diff --git a/colm/pdarun.c b/colm/pdarun.c
index d6886dc5..e5dd5736 100644
--- a/colm/pdarun.c
+++ b/colm/pdarun.c
@@ -1206,7 +1206,7 @@ case PcrStart:
}
}
- if ( ( pdaRun->tokenId == SCAN_ERROR /*|| pdaRun->tokenId == SCAN_LANG_EL*/ ) &&
+ if ( pdaRun->tokenId == SCAN_ERROR &&
( prg->rtd->regionInfo[fsmRun->region].ciLelId > 0 ) )
{
debug( REALM_PARSE, "sending a collect ignore\n" );
@@ -1214,6 +1214,16 @@ case PcrStart:
goto yes;
}
+ if ( pdaRun->tokenId == SCAN_LANG_EL &&
+ ( prg->rtd->regionInfo[fsmRun->region].ciLelId > 0 ) )
+ {
+ debug( REALM_PARSE, "sending a collect ignore\n" );
+ sendCi( prg, sp, inputStream, fsmRun, pdaRun, prg->rtd->regionInfo[fsmRun->region].ciLelId );
+ pdaRun->parseInput->flags |= PF_UNDO_CI;
+ pdaRun->numRetry += 1;
+ goto yes;
+ }
+
if ( pdaRun->tokenId == SCAN_TRY_AGAIN_LATER ) {
debug( REALM_PARSE, "scanner says try again later\n" );
break;
@@ -1922,10 +1932,10 @@ again:
child->next = last;
dataChild->next = dataLast;
-// if ( child->flags & PF_CI ) {
-// debug( REALM_PARSE, "advancing over CI\n" );
-// dataChild = dataChild->next;
-// }
+ if ( child->flags & PF_CI ) {
+ debug( REALM_PARSE, "advancing over CI\n" );
+ dataChild = dataChild->next;
+ }
/* Track last for reversal. */
last = child;
@@ -2102,14 +2112,22 @@ case PcrReverse:
pdaRun->parseInput = pdaRun->undoLel;
}
else {
- long region = pdaRun->parseInput->region;
- pdaRun->next = region > 0 ? region + 1 : 0;
- pdaRun->checkNext = true;
- pdaRun->checkStop = true;
-
- sendBack( prg, sp, pdaRun, fsmRun, inputStream, pdaRun->parseInput );
-
- pdaRun->parseInput = 0;
+ if ( pdaRun->stackTop->flags & PF_UNDO_CI ) {
+ pdaRun->cs = pdaRun->stackTop->state;
+ pdaRun->numRetry -= 1;
+ pdaRun->stackTop = pdaRun->stackTop->next;
+ goto again;
+ }
+ else {
+ long region = pdaRun->parseInput->region;
+ pdaRun->next = region > 0 ? region + 1 : 0;
+ pdaRun->checkNext = true;
+ pdaRun->checkStop = true;
+
+ sendBack( prg, sp, pdaRun, fsmRun, inputStream, pdaRun->parseInput );
+
+ pdaRun->parseInput = 0;
+ }
}
}
else if ( pdaRun->parseInput->flags & PF_HAS_RCODE ) {