summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2008-11-07 20:19:21 +0000
committerAdrian Thurston <thurston@complang.org>2008-11-07 20:19:21 +0000
commit8e568f07ea6f54fb5e9ad61f264a4f29972fb693 (patch)
tree25b3f5e4e9750bd16ec9d9284eac6f16ef6c2aaf
parent1624689f25b16426d150f65d1f7d47bf9d5629b8 (diff)
downloadcolm-8e568f07ea6f54fb5e9ad61f264a4f29972fb693.tar.gz
Reverse code is now transferred to the allReverseCode buffer after each
execution. This is enabled by adding a no-token if there is reverse code but nothing left in the queue after a generation action.
-rw-r--r--colm/bytecode.cpp4
-rw-r--r--colm/fsmrun.cpp159
-rw-r--r--colm/parsedata.h2
-rw-r--r--colm/pdabuild.cpp32
-rw-r--r--colm/pdacodegen.cpp3
-rw-r--r--colm/pdarun.cpp2
-rw-r--r--colm/pdarun.h3
7 files changed, 112 insertions, 93 deletions
diff --git a/colm/bytecode.cpp b/colm/bytecode.cpp
index 5f7c4dab..092ac299 100644
--- a/colm/bytecode.cpp
+++ b/colm/bytecode.cpp
@@ -376,7 +376,7 @@ void Program::run()
cerr << "freeing the root reverse code" << endl;
#endif
- bool hasrcode = makeReverseCode( allReverseCode, reverseCode );
+ bool hasrcode = make_reverse_code( allReverseCode, reverseCode );
if ( hasrcode )
rcode_downref( root, this, allReverseCode->data );
delete allReverseCode;
@@ -635,7 +635,7 @@ void Execution::execute( Tree **root )
assert( sp == root );
}
-bool makeReverseCode( CodeVect *all, CodeVect &reverseCode )
+bool make_reverse_code( CodeVect *all, CodeVect &reverseCode )
{
/* Do we need to revert the left hand side? */
diff --git a/colm/fsmrun.cpp b/colm/fsmrun.cpp
index ef45022b..82a8d483 100644
--- a/colm/fsmrun.cpp
+++ b/colm/fsmrun.cpp
@@ -322,6 +322,27 @@ void FsmRun::sendBack( Kid *input )
prg->kidPool.free( input );
}
+/* If no token was generated but there is reverse code then we must generate
+ * a fake token so we can attach the reverse code to it. */
+void add_notoken( Program *prg, PdaRun *parser )
+{
+ /* Check if there was anything generated. */
+ if ( parser->queue == 0 && parser->reverseCode.length() > 0 ) {
+ #ifdef COLM_LOG_PARSE
+ cerr << "found reverse code but no token, sending _notoken" << endl;
+ #endif
+
+ Tree *tree = prg->treePool.allocate();
+ tree->refs = 1;
+ tree->id = prg->rtd->noTokenId;
+ tree->tokdata = 0;
+
+ parser->queue = prg->kidPool.allocate();
+ parser->queue->tree = tree;
+ parser->queue->next = 0;
+ }
+}
+
/* Sets the AF_GROUP_MEM so the backtracker can tell which tokens were sent
* generated from a single action. */
void set_AF_GROUP_MEM( PdaRun *parser )
@@ -372,53 +393,6 @@ void send_queued_tokens( FsmRun *fsmRun, PdaRun *parser )
}
}
-
-void FsmRun::sendEOF( )
-{
- #ifdef COLM_LOG_PARSE
- cerr << "token: _EOF" << endl;
- #endif
-
- Kid *input = prg->kidPool.allocate();
- input->tree = prg->treePool.allocate();
- input->tree->alg = prg->algPool.allocate();
-
- input->tree->refs = 1;
- input->tree->id = parser->tables->gbl->eofId;
-
- bool ctxDepParsing = prg->ctxDepParsing;
- long frameId = parser->tables->gbl->regionInfo[region].eofFrameId;
- if ( ctxDepParsing && frameId >= 0 ) {
- #ifdef COLM_LOG_PARSE
- cerr << "HAVE PRE_EOF BLOCK" << endl;
- #endif
-
- Code *code = parser->tables->gbl->frameInfo[frameId].codeWV;
-
- /* Execute the translation. */
- Execution execution( prg, parser->reverseCode,
- parser, code, 0, 0 );
- execution.execute( parser->root );
-
- /* Mark generated tokens as belonging to a group. */
- set_AF_GROUP_MEM( parser );
-
- /* Send the generated tokens. */
- send_queued_tokens( this, parser );
- }
-
- parser->send( input );
-
- if ( parser->errCount > 0 ) {
- parser->parse_error( parser->tables->gbl->eofId, input->tree ) <<
- "parse error" << endp;
- }
-
- tokstart = 0;
- region = parser->getNextRegion();
- cs = tables->entryByRegion[region];
-}
-
void FsmRun::sendToken( long id )
{
#ifdef COLM_LOG_PARSE
@@ -477,6 +451,30 @@ void FsmRun::sendNamedLangEl()
send_handle_error( this, parser, input );
}
+void execute_generation_action( Program *prg, PdaRun *parser, Code *code, Head *tokdata )
+{
+ /* Execute the translation. */
+ Execution execution( prg, parser->reverseCode, parser, code, 0, tokdata );
+ execution.execute( parser->root );
+
+ /* If there is revese code but nothing generated we need a noToken. */
+ add_notoken( prg, parser );
+
+ /* If there is reverse code then add_notoken will guarantee that the
+ * queue is not empty. Pull the reverse code out and store in the
+ * token. */
+ Tree *tree = parser->queue->tree;
+ bool hasrcode = make_reverse_code( parser->allReverseCode, parser->reverseCode );
+ if ( hasrcode ) {
+ if ( tree->alg == 0 )
+ tree->alg = prg->algPool.allocate();
+ tree->alg->flags |= AF_HAS_RCODE;
+ }
+
+ /* Mark generated tokens as belonging to a group. */
+ set_AF_GROUP_MEM( parser );
+}
+
/*
* Not supported:
* -invoke failure (the backtracker)
@@ -492,18 +490,13 @@ void FsmRun::generationAction( int id, Head *tokdata, bool namedLangEl, int bind
/* Find the code. */
Code *code = parser->tables->gbl->frameInfo[
parser->tables->gbl->lelInfo[id].frameId].codeWV;
-
- /* Execute the translation. */
- Execution execution( prg, parser->reverseCode,
- parser, code, 0, tokdata );
- execution.execute( parser->root );
+
+ /* Execute the action and process the queue. */
+ execute_generation_action( prg, parser, code, tokdata );
/* Finished with the match text. */
string_free( prg, tokdata );
- /* Mark generated tokens as belonging to a group. */
- set_AF_GROUP_MEM( parser );
-
/* Send the queued tokens. */
send_queued_tokens( this, parser );
}
@@ -580,11 +573,6 @@ void PdaRun::send( Kid *input )
ignore->next = child;
}
- /* Pull the reverse code out and store in the token. */
- bool hasrcode = makeReverseCode( allReverseCode, reverseCode );
- if ( hasrcode )
- input->tree->alg->flags |= AF_HAS_RCODE;
-
parseToken( input );
}
@@ -626,14 +614,6 @@ void PdaRun::ignore( Tree *tree )
Kid *ignore = prg->kidPool.allocate();
ignore->tree = tree;
- /* Pull the reverse code out and store in the token. */
- bool hasrcode = makeReverseCode( allReverseCode, reverseCode );
- if ( hasrcode ) {
- if ( tree->alg == 0 )
- tree->alg = prg->algPool.allocate();
- tree->alg->flags |= AF_HAS_RCODE;
- }
-
/* Prepend it to the list of ignore tokens. */
ignore->next = accumIgnore;
accumIgnore = ignore;
@@ -706,6 +686,49 @@ Head *FsmRun::extractToken( long length )
return tokdata;
}
+void FsmRun::sendEOF( )
+{
+ #ifdef COLM_LOG_PARSE
+ cerr << "token: _EOF" << endl;
+ #endif
+
+ Kid *input = prg->kidPool.allocate();
+ input->tree = prg->treePool.allocate();
+ input->tree->alg = prg->algPool.allocate();
+
+ input->tree->refs = 1;
+ input->tree->id = parser->tables->gbl->eofId;
+
+ bool ctxDepParsing = prg->ctxDepParsing;
+ long frameId = parser->tables->gbl->regionInfo[region].eofFrameId;
+ if ( ctxDepParsing && frameId >= 0 ) {
+ #ifdef COLM_LOG_PARSE
+ cerr << "HAVE PRE_EOF BLOCK" << endl;
+ #endif
+
+ /* Get the code for the pre-eof block. */
+ Code *code = parser->tables->gbl->frameInfo[frameId].codeWV;
+
+ /* Execute the action and process the queue. */
+ execute_generation_action( prg, parser, code, 0 );
+
+ /* Send the generated tokens. */
+ send_queued_tokens( this, parser );
+ }
+
+ parser->send( input );
+
+ if ( parser->errCount > 0 ) {
+ parser->parse_error( parser->tables->gbl->eofId, input->tree ) <<
+ "parse error" << endp;
+ }
+
+ tokstart = 0;
+ region = parser->getNextRegion();
+ cs = tables->entryByRegion[region];
+}
+
+
void FsmRun::attachInputStream( InputStream *in )
{
/* Run buffers need to stick around because
diff --git a/colm/parsedata.h b/colm/parsedata.h
index f1808595..5912e8e6 100644
--- a/colm/parsedata.h
+++ b/colm/parsedata.h
@@ -777,7 +777,7 @@ struct ParseData
KlangEl *streamKlangEl;
KlangEl *anyKlangEl;
KlangEl *rootKlangEl;
- KlangEl *noTokenEl;
+ KlangEl *noTokenKlangEl;
KlangEl *eofKlangEl;
KlangEl *errorKlangEl;
KlangEl *defaultCharKlangEl;
diff --git a/colm/pdabuild.cpp b/colm/pdabuild.cpp
index 98df9af1..c77a74a9 100644
--- a/colm/pdabuild.cpp
+++ b/colm/pdabuild.cpp
@@ -208,10 +208,11 @@ void ParseData::makeKlangElIds()
* that needs to be associated with a language element. This allows us to
* always associate reverse code with the first language element produced
* after a generation action. */
- noTokenEl = new KlangEl( rootNamespace, strdup("_notoken"), KlangEl::Term );
- noTokenEl->ignore = true;
- langEls.prepend( noTokenEl );
- SymbolMapEl *noTokenMapEl = rootNamespace->symbolMap.insert( noTokenEl->name, noTokenEl );
+ noTokenKlangEl = new KlangEl( rootNamespace, strdup("_notoken"), KlangEl::Term );
+ noTokenKlangEl->ignore = true;
+ langEls.prepend( noTokenKlangEl );
+ SymbolMapEl *noTokenMapEl = rootNamespace->symbolMap.insert(
+ noTokenKlangEl->name, noTokenKlangEl );
assert( noTokenMapEl != 0 );
/* Make the EOF language element. */
@@ -252,27 +253,19 @@ void ParseData::makeKlangElIds()
for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
/* Must be a term, and not any of the special reserved terminals.
* Remember if the non terminal is a user non terminal. */
- if ( lel->type == KlangEl::Term && lel != eofKlangEl && lel != errorKlangEl ) {
+ if ( lel->type == KlangEl::Term &&
+ lel != eofKlangEl &&
+ lel != errorKlangEl &&
+ lel != noTokenKlangEl )
+ {
lel->isUserTerm = true;
lel->id = nextSymbolId++;
}
}
- /* Next assign to the eof token, which we always create. */
+ /* Next assign to the eof notoken, which we always create. */
eofKlangEl->id = nextSymbolId++;
-
- /* First pass assigns to the user terminals. */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->id < 0 ) {
- /* Must be a term, and not any of the special reserved terminals.
- * Remember if the non terminal is a user non terminal. */
- if ( lel->type == KlangEl::Term && lel != eofKlangEl && lel != errorKlangEl ) {
- assert( false );
- lel->isUserTerm = true;
- lel->id = nextSymbolId++;
- }
- }
- }
+ noTokenKlangEl->id = nextSymbolId++;
/* Possibly assign to the error language element. */
if ( errorKlangEl != 0 )
@@ -1385,6 +1378,7 @@ void ParseData::makeRuntimeData()
runtimeData->stringId = strKlangEl->id;
runtimeData->anyId = anyKlangEl->id;
runtimeData->eofId = eofKlangEl->id;
+ runtimeData->noTokenId = noTokenKlangEl->id;
}
/* Borrow alg->state for mapsTo. */
diff --git a/colm/pdacodegen.cpp b/colm/pdacodegen.cpp
index 399209a8..8067dc06 100644
--- a/colm/pdacodegen.cpp
+++ b/colm/pdacodegen.cpp
@@ -406,7 +406,8 @@ void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData )
" " << runtimeData->integerId << ",\n"
" " << runtimeData->stringId << ",\n"
" " << runtimeData->anyId << ",\n"
- " " << runtimeData->eofId << "\n"
+ " " << runtimeData->eofId << ",\n"
+ " " << runtimeData->noTokenId << "\n"
"};\n"
"\n";
}
diff --git a/colm/pdarun.cpp b/colm/pdarun.cpp
index d058df73..ef22665e 100644
--- a/colm/pdarun.cpp
+++ b/colm/pdarun.cpp
@@ -427,7 +427,7 @@ again:
execution.execute( root );
/* Pull out the reverse code, if any. */
- bool hasrcode = makeReverseCode( allReverseCode, reverseCode );
+ bool hasrcode = make_reverse_code( allReverseCode, reverseCode );
if ( hasrcode )
redAlg->flags |= AF_HAS_RCODE;
diff --git a/colm/pdarun.h b/colm/pdarun.h
index 8f6f2c26..ccb0290a 100644
--- a/colm/pdarun.h
+++ b/colm/pdarun.h
@@ -410,6 +410,7 @@ struct RuntimeData
long stringId;
long anyId;
long eofId;
+ long noTokenId;
};
struct PdaTables
@@ -438,7 +439,7 @@ struct PdaTables
RuntimeData *gbl;
};
-bool makeReverseCode( CodeVect *all, CodeVect &reverseCode );
+bool make_reverse_code( CodeVect *all, CodeVect &reverseCode );
typedef Vector<Tree*> Bindings;