summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2009-03-07 18:03:29 +0000
committerAdrian Thurston <thurston@complang.org>2009-03-07 18:03:29 +0000
commitc2210d0045411c83ca57406daf2c858d4cdf8d2a (patch)
tree01d6104cd68c7ad221ac5157388972243c514016
parent13f3e768a8e8c5964d45d13b86a1d72f1770ce33 (diff)
downloadcolm-c2210d0045411c83ca57406daf2c858d4cdf8d2a.tar.gz
Took the 'parser' variable out of FsmRun. Changed FsmRun::run to FsmRun::scan
and started on a separte scan-parse-repeat loop.
-rw-r--r--colm/bytecode.cpp8
-rw-r--r--colm/fsmcodegen.cpp1
-rw-r--r--colm/fsmrun.cpp94
-rw-r--r--colm/fsmrun.h30
-rw-r--r--colm/parsedata.cpp4
-rw-r--r--colm/pdarun.cpp2
6 files changed, 84 insertions, 55 deletions
diff --git a/colm/bytecode.cpp b/colm/bytecode.cpp
index 03474e73..b40326f3 100644
--- a/colm/bytecode.cpp
+++ b/colm/bytecode.cpp
@@ -133,7 +133,7 @@ Tree *call_parser( Tree **&sp, Program *prg, Stream *stream,
{
PdaTables *tables = prg->rtd->pdaTables;
PdaRun parser( sp, prg, tables, parserId, stream->fsmRun, stopId, revertOn );
- stream->fsmRun->run( &parser );
+ parse( stream->fsmRun, &parser );
commit_full( &parser, 0 );
Tree *tree = parser.getParsedRoot( stopId > 0 );
tree_upref( tree );
@@ -161,10 +161,10 @@ void undo_parse( Tree **&sp, Program *prg, Stream *stream,
parser.undoParse( tree, rev );
}
-Tree *stream_pull( Program *prg, Stream *stream, Tree *length )
+Tree *stream_pull( Program *prg, PdaRun *parser, Stream *stream, Tree *length )
{
long len = ((Int*)length)->value;
- Head *tokdata = stream->fsmRun->extractToken( len );
+ Head *tokdata = stream->fsmRun->extractToken( parser, len );
return construct_string( prg, tokdata );
}
@@ -1989,7 +1989,7 @@ again:
#endif
Tree *len = pop();
Tree *stream = pop();
- Tree *string = stream_pull( prg, (Stream*)stream, len );
+ Tree *string = stream_pull( prg, parser, (Stream*)stream, len );
tree_upref( string );
push( string );
diff --git a/colm/fsmcodegen.cpp b/colm/fsmcodegen.cpp
index 9e1ff32d..63fec1c3 100644
--- a/colm/fsmcodegen.cpp
+++ b/colm/fsmcodegen.cpp
@@ -1012,6 +1012,7 @@ void FsmCodeGen::writeExec()
out <<
"void FsmRun::execute()\n"
"{\n"
+ " matchedToken = 0;\n"
"/*_resume:*/\n";
if ( redFsm->errState != 0 ) {
diff --git a/colm/fsmrun.cpp b/colm/fsmrun.cpp
index c1be11d7..1ee8b918 100644
--- a/colm/fsmrun.cpp
+++ b/colm/fsmrun.cpp
@@ -44,8 +44,8 @@ void operator<<( ostream &out, exit_object & )
FsmRun::FsmRun( Program *prg ) :
prg(prg),
- tables(prg->rtd->fsmTables),
- parser(0)
+ tables(prg->rtd->fsmTables)
+ //,parser(0)
{
}
@@ -197,7 +197,7 @@ void FsmRun::sendBackText( const char *data, long length )
tokstart = 0;
}
-void FsmRun::queueBack( Kid *input )
+void FsmRun::queueBack( PdaRun *parser, Kid *input )
{
if ( input->tree->flags & AF_GROUP_MEM ) {
#ifdef COLM_LOG_PARSE
@@ -230,7 +230,7 @@ void FsmRun::queueBack( Kid *input )
/* Send them back. */
while ( last != 0 ) {
Kid *next = last->next;
- sendBack( last );
+ sendBack( parser, last );
last = next;
}
@@ -238,11 +238,11 @@ void FsmRun::queueBack( Kid *input )
}
/* Now that the queue is flushed, can send back the original item. */
- sendBack( input );
+ sendBack( parser, input );
}
}
-void FsmRun::sendBackIgnore( Kid *ignore )
+void FsmRun::sendBackIgnore( PdaRun *parser, Kid *ignore )
{
/* Ignore tokens are queued in reverse order. */
while ( tree_is_ignore( prg, ignore ) ) {
@@ -276,7 +276,7 @@ void FsmRun::sendBackIgnore( Kid *ignore )
}
}
-void FsmRun::sendBack( Kid *input )
+void FsmRun::sendBack( PdaRun *parser, Kid *input )
{
#ifdef COLM_LOG_PARSE
if ( colm_log_parse ) {
@@ -315,7 +315,7 @@ void FsmRun::sendBack( Kid *input )
}
/* Always push back the ignore text. */
- sendBackIgnore( tree_ignore( prg, input->tree ) );
+ sendBackIgnore( parser, tree_ignore( prg, input->tree ) );
/* If eof was just sent back remember that it needs to be sent again. */
if ( input->tree->id == parser->tables->rtd->eofLelIds[parser->parserId] )
@@ -415,7 +415,7 @@ void send_queued_tokens( FsmRun *fsmRun, PdaRun *parser )
}
}
-void FsmRun::sendNamedLangEl()
+void FsmRun::sendNamedLangEl( PdaRun *parser )
{
/* All three set by getLangEl. */
long bindId;
@@ -437,7 +437,7 @@ void FsmRun::sendNamedLangEl()
if ( data != 0 )
tokdata = string_alloc_new( prg, data, length );
- Kid *input = makeToken( klangEl->id, tokdata, true, bindId );
+ Kid *input = makeToken( parser, klangEl->id, tokdata, true, bindId );
send_handle_error( this, parser, input );
}
@@ -467,7 +467,7 @@ void execute_generation_action( Program *prg, PdaRun *parser, Code *code, long i
* -invoke failure (the backtracker)
*/
-void FsmRun::generationAction( int id, Head *tokdata, bool namedLangEl, int bindId )
+void FsmRun::generationAction( PdaRun *parser, int id, Head *tokdata, bool namedLangEl, int bindId )
{
#ifdef COLM_LOG_PARSE
if ( colm_log_parse ) {
@@ -490,7 +490,7 @@ void FsmRun::generationAction( int id, Head *tokdata, bool namedLangEl, int bind
send_queued_tokens( this, parser );
}
-Kid *FsmRun::makeToken( int id, Head *tokdata, bool namedLangEl, int bindId )
+Kid *FsmRun::makeToken( PdaRun *parser, int id, Head *tokdata, bool namedLangEl, int bindId )
{
/* Make the token object. */
long objectLength = parser->tables->rtd->lelInfo[id].objectLength;
@@ -533,10 +533,10 @@ Kid *FsmRun::makeToken( int id, Head *tokdata, bool namedLangEl, int bindId )
}
/* Send back the accumulated ignore tokens. */
-void PdaRun::sendBackIgnore()
+void PdaRun::sendBackIgnore( )
{
Kid *ignore = extractIgnore();
- fsmRun->sendBackIgnore( ignore );
+ fsmRun->sendBackIgnore( this, ignore );
while ( ignore != 0 ) {
Kid *next = ignore->next;
tree_downref( prg, root, ignore->tree );
@@ -614,7 +614,7 @@ void PdaRun::ignore( Tree *tree )
accumIgnore = ignore;
}
-void FsmRun::execGen( long id )
+void FsmRun::execGen( PdaRun *parser, long id )
{
#ifdef COLM_LOG_PARSE
if ( colm_log_parse ) {
@@ -636,12 +636,12 @@ void FsmRun::execGen( long id )
p = tokstart;
tokstart = 0;
- generationAction( id, tokdata, false, 0 );
+ generationAction( parser, id, tokdata, false, 0 );
memset( mark, 0, sizeof(mark) );
}
-void FsmRun::sendIgnore( long id )
+void FsmRun::sendIgnore( PdaRun *parser, long id )
{
#ifdef COLM_LOG_PARSE
if ( colm_log_parse ) {
@@ -674,7 +674,7 @@ void FsmRun::sendIgnore( long id )
memset( mark, 0, sizeof(mark) );
}
-void FsmRun::sendToken( long id )
+void FsmRun::sendToken( PdaRun *parser, long id )
{
#ifdef COLM_LOG_PARSE
if ( colm_log_parse ) {
@@ -695,7 +695,7 @@ void FsmRun::sendToken( long id )
* need to reset tokstart. */
tokstart = 0;
- Kid *input = makeToken( id, tokdata, false, 0 );
+ Kid *input = makeToken( parser, id, tokdata, false, 0 );
send_handle_error( this, parser, input );
memset( mark, 0, sizeof(mark) );
@@ -717,7 +717,7 @@ void FsmRun::emitToken( KlangEl *token )
/* Load up a token, starting from tokstart if it is set. If not set then
* start it at p. */
-Head *FsmRun::extractToken( long length )
+Head *FsmRun::extractToken( PdaRun *parser, long length )
{
/* How much do we have already? Tokstart may or may not be set. */
assert( tokstart == 0 );
@@ -746,7 +746,7 @@ Head *FsmRun::extractToken( long length )
return tokdata;
}
-void FsmRun::sendEOF( )
+void FsmRun::sendEOF( PdaRun *parser )
{
#ifdef COLM_LOG_PARSE
if ( colm_log_parse ) {
@@ -818,12 +818,12 @@ long PdaRun::undoParse( Tree *tree, CodeVect *rev )
numRetry += 1;
allReverseCode = rev;
- PdaRun *prevParser = fsmRun->parser;
- fsmRun->parser = this;
+// PdaRun *prevParser = fsmRun->parser;
+// fsmRun->parser = this;
parseToken( 0 );
- fsmRun->parser = prevParser;
+// fsmRun->parser = prevParser;
assert( stackTop->next == 0 );
@@ -832,14 +832,36 @@ long PdaRun::undoParse( Tree *tree, CodeVect *rev )
return 0;
}
-long FsmRun::run( PdaRun *destParser )
+void parse( FsmRun *fsmRun, PdaRun *parser )
{
- long space, prevState = cs;
+ parser->init();
- PdaRun *prevParser = parser;
- parser = destParser;
+ while ( true ) {
+ int tokenId = fsmRun->scan( parser );
- parser->init();
+ if ( tokenId == 0 )
+ break;
+
+ bool ctxDepParsing = fsmRun->prg->ctxDepParsing;
+ LangElInfo *lelInfo = parser->tables->rtd->lelInfo;
+ if ( ctxDepParsing && lelInfo[tokenId].frameId >= 0 )
+ fsmRun->execGen( parser, tokenId );
+ else if ( lelInfo[tokenId].ignore )
+ fsmRun->sendIgnore( parser, tokenId );
+ else
+ fsmRun->sendToken( parser, tokenId );
+ }
+}
+
+long FsmRun::scan( PdaRun *parser )
+{
+ long space;
+
+// long prevState = cs;
+// PdaRun *prevParser = parser;
+// parser = destParser;
+
+// parser->init();
act = 0;
tokstart = 0;
@@ -861,7 +883,7 @@ long FsmRun::run( PdaRun *destParser )
}
else {
eofSent = true;
- sendEOF();
+ sendEOF( parser );
if ( !eofSent )
continue;
break;
@@ -871,7 +893,7 @@ long FsmRun::run( PdaRun *destParser )
if ( p == pe ) {
/* We don't have any data. What is next in the input stream? */
if ( inputStream->isLangEl() )
- sendNamedLangEl( );
+ sendNamedLangEl( parser );
else {
space = runBuf->buf + FSM_BUFSIZE - pe;
@@ -887,6 +909,10 @@ long FsmRun::run( PdaRun *destParser )
execute();
+ /* First check if scanning stopped because we have a token. */
+ if ( matchedToken > 0 )
+ return matchedToken;
+
/* Fall through here either when the input buffer has been exhausted
* or the scanner is in an error state. Otherwise we must continue. */
@@ -910,7 +936,7 @@ long FsmRun::run( PdaRun *destParser )
* then send it and continue with the processing loop. */
if ( parser->tables->rtd->regionInfo[region].defaultToken >= 0 ) {
tokstart = tokend = p;
- sendToken( parser->tables->rtd->regionInfo[region].defaultToken );
+ sendToken( parser, parser->tables->rtd->regionInfo[region].defaultToken );
continue;
}
@@ -1010,7 +1036,7 @@ long FsmRun::run( PdaRun *destParser )
}
done:
- parser = prevParser;
- cs = prevState;
+ //parser = prevParser;
+ //cs = prevState;
return 0;
}
diff --git a/colm/fsmrun.h b/colm/fsmrun.h
index ac3274ca..95c7b706 100644
--- a/colm/fsmrun.h
+++ b/colm/fsmrun.h
@@ -83,38 +83,40 @@ struct RunBuf
#define MARK_SLOTS 32
+void parse( FsmRun *fsmRun, PdaRun *parser );
+
struct FsmRun
{
FsmRun( Program *prg );
~FsmRun();
- Kid *makeToken( int id, Head *tokdata, bool namedLangEl, int bindId );
- void generationAction( int id, Head *tokdata, bool namedLangEl, int bindId );
- void sendNamedLangEl();
- void sendEOF();
- void sendIgnore( long id );
- void sendToken( long id );
- void execGen( long id );
-
- void sendBackIgnore( Kid *ignore );
- void sendBack( Kid *input );
- void queueBack( Kid *input );
+ Kid *makeToken( PdaRun *parser, int id, Head *tokdata, bool namedLangEl, int bindId );
+ void generationAction( PdaRun *parser, int id, Head *tokdata, bool namedLangEl, int bindId );
+ void sendNamedLangEl( PdaRun *parser );
+ void sendEOF( PdaRun *parser );
+ void sendIgnore( PdaRun *parser, long id );
+ void sendToken( PdaRun *parser, long id );
+ void execGen( PdaRun *parser, long id );
+
+ void sendBackIgnore( PdaRun *parser, Kid *ignore );
+ void sendBack( PdaRun *parser, Kid *input );
+ void queueBack( PdaRun *parser, Kid *input );
void sendBackText( const char *data, long length );
void emitToken( KlangEl *token );
void execAction( GenAction *action );
- long run( PdaRun *parser );
+ long scan( PdaRun *parser );
void attachInputStream( InputStream *in );
void streamPush( const char *data, long length );
void undoStreamPush( long length );
- Head *extractToken( long len );
+ Head *extractToken( PdaRun *parser, long len );
void execute();
Program *prg;
FsmTables *tables;
- PdaRun *parser;
+// PdaRun *parser;
InputStream *inputStream;
/* FsmRun State. */
diff --git a/colm/parsedata.cpp b/colm/parsedata.cpp
index 32da2e5c..42fb87e6 100644
--- a/colm/parsedata.cpp
+++ b/colm/parsedata.cpp
@@ -1698,7 +1698,7 @@ void ParseData::parsePatterns()
repl->pdaRun = new PdaRun( root, &program,
pdaTables, repl->langEl->parserId, &fsmRun, 0, false );
- fsmRun.run( repl->pdaRun );
+ parse( &fsmRun, repl->pdaRun );
//#ifdef COLM_LOG_COMPILE
//if ( colm_log_compile ) {
@@ -1714,7 +1714,7 @@ void ParseData::parsePatterns()
pat->pdaRun = new PdaRun( root, &program,
pdaTables, pat->langEl->parserId, &fsmRun, 0, false );
- fsmRun.run( pat->pdaRun );
+ parse( &fsmRun, pat->pdaRun );
//#ifdef COLM_LOG_COMPILE
//if ( colm_log_compile ) {
diff --git a/colm/pdarun.cpp b/colm/pdarun.cpp
index b85a32c0..8ed8cf09 100644
--- a/colm/pdarun.cpp
+++ b/colm/pdarun.cpp
@@ -567,7 +567,7 @@ parseError:
if ( pt(input->tree)->causeReduce == 0 ) {
int next = pt(input->tree)->region + 1;
- fsmRun->queueBack( input );
+ fsmRun->queueBack( this, input );
input = 0;
if ( tables->tokenRegions[next] != 0 ) {
#ifdef COLM_LOG_PARSE