diff options
author | Adrian Thurston <thurston@complang.org> | 2009-03-07 18:03:29 +0000 |
---|---|---|
committer | Adrian Thurston <thurston@complang.org> | 2009-03-07 18:03:29 +0000 |
commit | c2210d0045411c83ca57406daf2c858d4cdf8d2a (patch) | |
tree | 01d6104cd68c7ad221ac5157388972243c514016 | |
parent | 13f3e768a8e8c5964d45d13b86a1d72f1770ce33 (diff) | |
download | colm-c2210d0045411c83ca57406daf2c858d4cdf8d2a.tar.gz |
Took the 'parser' variable out of FsmRun. Changed FsmRun::run to FsmRun::scan
and started on a separte scan-parse-repeat loop.
-rw-r--r-- | colm/bytecode.cpp | 8 | ||||
-rw-r--r-- | colm/fsmcodegen.cpp | 1 | ||||
-rw-r--r-- | colm/fsmrun.cpp | 94 | ||||
-rw-r--r-- | colm/fsmrun.h | 30 | ||||
-rw-r--r-- | colm/parsedata.cpp | 4 | ||||
-rw-r--r-- | colm/pdarun.cpp | 2 |
6 files changed, 84 insertions, 55 deletions
diff --git a/colm/bytecode.cpp b/colm/bytecode.cpp index 03474e73..b40326f3 100644 --- a/colm/bytecode.cpp +++ b/colm/bytecode.cpp @@ -133,7 +133,7 @@ Tree *call_parser( Tree **&sp, Program *prg, Stream *stream, { PdaTables *tables = prg->rtd->pdaTables; PdaRun parser( sp, prg, tables, parserId, stream->fsmRun, stopId, revertOn ); - stream->fsmRun->run( &parser ); + parse( stream->fsmRun, &parser ); commit_full( &parser, 0 ); Tree *tree = parser.getParsedRoot( stopId > 0 ); tree_upref( tree ); @@ -161,10 +161,10 @@ void undo_parse( Tree **&sp, Program *prg, Stream *stream, parser.undoParse( tree, rev ); } -Tree *stream_pull( Program *prg, Stream *stream, Tree *length ) +Tree *stream_pull( Program *prg, PdaRun *parser, Stream *stream, Tree *length ) { long len = ((Int*)length)->value; - Head *tokdata = stream->fsmRun->extractToken( len ); + Head *tokdata = stream->fsmRun->extractToken( parser, len ); return construct_string( prg, tokdata ); } @@ -1989,7 +1989,7 @@ again: #endif Tree *len = pop(); Tree *stream = pop(); - Tree *string = stream_pull( prg, (Stream*)stream, len ); + Tree *string = stream_pull( prg, parser, (Stream*)stream, len ); tree_upref( string ); push( string ); diff --git a/colm/fsmcodegen.cpp b/colm/fsmcodegen.cpp index 9e1ff32d..63fec1c3 100644 --- a/colm/fsmcodegen.cpp +++ b/colm/fsmcodegen.cpp @@ -1012,6 +1012,7 @@ void FsmCodeGen::writeExec() out << "void FsmRun::execute()\n" "{\n" + " matchedToken = 0;\n" "/*_resume:*/\n"; if ( redFsm->errState != 0 ) { diff --git a/colm/fsmrun.cpp b/colm/fsmrun.cpp index c1be11d7..1ee8b918 100644 --- a/colm/fsmrun.cpp +++ b/colm/fsmrun.cpp @@ -44,8 +44,8 @@ void operator<<( ostream &out, exit_object & ) FsmRun::FsmRun( Program *prg ) : prg(prg), - tables(prg->rtd->fsmTables), - parser(0) + tables(prg->rtd->fsmTables) + //,parser(0) { } @@ -197,7 +197,7 @@ void FsmRun::sendBackText( const char *data, long length ) tokstart = 0; } -void FsmRun::queueBack( Kid *input ) +void FsmRun::queueBack( PdaRun *parser, Kid *input ) { if ( input->tree->flags & AF_GROUP_MEM ) { #ifdef COLM_LOG_PARSE @@ -230,7 +230,7 @@ void FsmRun::queueBack( Kid *input ) /* Send them back. */ while ( last != 0 ) { Kid *next = last->next; - sendBack( last ); + sendBack( parser, last ); last = next; } @@ -238,11 +238,11 @@ void FsmRun::queueBack( Kid *input ) } /* Now that the queue is flushed, can send back the original item. */ - sendBack( input ); + sendBack( parser, input ); } } -void FsmRun::sendBackIgnore( Kid *ignore ) +void FsmRun::sendBackIgnore( PdaRun *parser, Kid *ignore ) { /* Ignore tokens are queued in reverse order. */ while ( tree_is_ignore( prg, ignore ) ) { @@ -276,7 +276,7 @@ void FsmRun::sendBackIgnore( Kid *ignore ) } } -void FsmRun::sendBack( Kid *input ) +void FsmRun::sendBack( PdaRun *parser, Kid *input ) { #ifdef COLM_LOG_PARSE if ( colm_log_parse ) { @@ -315,7 +315,7 @@ void FsmRun::sendBack( Kid *input ) } /* Always push back the ignore text. */ - sendBackIgnore( tree_ignore( prg, input->tree ) ); + sendBackIgnore( parser, tree_ignore( prg, input->tree ) ); /* If eof was just sent back remember that it needs to be sent again. */ if ( input->tree->id == parser->tables->rtd->eofLelIds[parser->parserId] ) @@ -415,7 +415,7 @@ void send_queued_tokens( FsmRun *fsmRun, PdaRun *parser ) } } -void FsmRun::sendNamedLangEl() +void FsmRun::sendNamedLangEl( PdaRun *parser ) { /* All three set by getLangEl. */ long bindId; @@ -437,7 +437,7 @@ void FsmRun::sendNamedLangEl() if ( data != 0 ) tokdata = string_alloc_new( prg, data, length ); - Kid *input = makeToken( klangEl->id, tokdata, true, bindId ); + Kid *input = makeToken( parser, klangEl->id, tokdata, true, bindId ); send_handle_error( this, parser, input ); } @@ -467,7 +467,7 @@ void execute_generation_action( Program *prg, PdaRun *parser, Code *code, long i * -invoke failure (the backtracker) */ -void FsmRun::generationAction( int id, Head *tokdata, bool namedLangEl, int bindId ) +void FsmRun::generationAction( PdaRun *parser, int id, Head *tokdata, bool namedLangEl, int bindId ) { #ifdef COLM_LOG_PARSE if ( colm_log_parse ) { @@ -490,7 +490,7 @@ void FsmRun::generationAction( int id, Head *tokdata, bool namedLangEl, int bind send_queued_tokens( this, parser ); } -Kid *FsmRun::makeToken( int id, Head *tokdata, bool namedLangEl, int bindId ) +Kid *FsmRun::makeToken( PdaRun *parser, int id, Head *tokdata, bool namedLangEl, int bindId ) { /* Make the token object. */ long objectLength = parser->tables->rtd->lelInfo[id].objectLength; @@ -533,10 +533,10 @@ Kid *FsmRun::makeToken( int id, Head *tokdata, bool namedLangEl, int bindId ) } /* Send back the accumulated ignore tokens. */ -void PdaRun::sendBackIgnore() +void PdaRun::sendBackIgnore( ) { Kid *ignore = extractIgnore(); - fsmRun->sendBackIgnore( ignore ); + fsmRun->sendBackIgnore( this, ignore ); while ( ignore != 0 ) { Kid *next = ignore->next; tree_downref( prg, root, ignore->tree ); @@ -614,7 +614,7 @@ void PdaRun::ignore( Tree *tree ) accumIgnore = ignore; } -void FsmRun::execGen( long id ) +void FsmRun::execGen( PdaRun *parser, long id ) { #ifdef COLM_LOG_PARSE if ( colm_log_parse ) { @@ -636,12 +636,12 @@ void FsmRun::execGen( long id ) p = tokstart; tokstart = 0; - generationAction( id, tokdata, false, 0 ); + generationAction( parser, id, tokdata, false, 0 ); memset( mark, 0, sizeof(mark) ); } -void FsmRun::sendIgnore( long id ) +void FsmRun::sendIgnore( PdaRun *parser, long id ) { #ifdef COLM_LOG_PARSE if ( colm_log_parse ) { @@ -674,7 +674,7 @@ void FsmRun::sendIgnore( long id ) memset( mark, 0, sizeof(mark) ); } -void FsmRun::sendToken( long id ) +void FsmRun::sendToken( PdaRun *parser, long id ) { #ifdef COLM_LOG_PARSE if ( colm_log_parse ) { @@ -695,7 +695,7 @@ void FsmRun::sendToken( long id ) * need to reset tokstart. */ tokstart = 0; - Kid *input = makeToken( id, tokdata, false, 0 ); + Kid *input = makeToken( parser, id, tokdata, false, 0 ); send_handle_error( this, parser, input ); memset( mark, 0, sizeof(mark) ); @@ -717,7 +717,7 @@ void FsmRun::emitToken( KlangEl *token ) /* Load up a token, starting from tokstart if it is set. If not set then * start it at p. */ -Head *FsmRun::extractToken( long length ) +Head *FsmRun::extractToken( PdaRun *parser, long length ) { /* How much do we have already? Tokstart may or may not be set. */ assert( tokstart == 0 ); @@ -746,7 +746,7 @@ Head *FsmRun::extractToken( long length ) return tokdata; } -void FsmRun::sendEOF( ) +void FsmRun::sendEOF( PdaRun *parser ) { #ifdef COLM_LOG_PARSE if ( colm_log_parse ) { @@ -818,12 +818,12 @@ long PdaRun::undoParse( Tree *tree, CodeVect *rev ) numRetry += 1; allReverseCode = rev; - PdaRun *prevParser = fsmRun->parser; - fsmRun->parser = this; +// PdaRun *prevParser = fsmRun->parser; +// fsmRun->parser = this; parseToken( 0 ); - fsmRun->parser = prevParser; +// fsmRun->parser = prevParser; assert( stackTop->next == 0 ); @@ -832,14 +832,36 @@ long PdaRun::undoParse( Tree *tree, CodeVect *rev ) return 0; } -long FsmRun::run( PdaRun *destParser ) +void parse( FsmRun *fsmRun, PdaRun *parser ) { - long space, prevState = cs; + parser->init(); - PdaRun *prevParser = parser; - parser = destParser; + while ( true ) { + int tokenId = fsmRun->scan( parser ); - parser->init(); + if ( tokenId == 0 ) + break; + + bool ctxDepParsing = fsmRun->prg->ctxDepParsing; + LangElInfo *lelInfo = parser->tables->rtd->lelInfo; + if ( ctxDepParsing && lelInfo[tokenId].frameId >= 0 ) + fsmRun->execGen( parser, tokenId ); + else if ( lelInfo[tokenId].ignore ) + fsmRun->sendIgnore( parser, tokenId ); + else + fsmRun->sendToken( parser, tokenId ); + } +} + +long FsmRun::scan( PdaRun *parser ) +{ + long space; + +// long prevState = cs; +// PdaRun *prevParser = parser; +// parser = destParser; + +// parser->init(); act = 0; tokstart = 0; @@ -861,7 +883,7 @@ long FsmRun::run( PdaRun *destParser ) } else { eofSent = true; - sendEOF(); + sendEOF( parser ); if ( !eofSent ) continue; break; @@ -871,7 +893,7 @@ long FsmRun::run( PdaRun *destParser ) if ( p == pe ) { /* We don't have any data. What is next in the input stream? */ if ( inputStream->isLangEl() ) - sendNamedLangEl( ); + sendNamedLangEl( parser ); else { space = runBuf->buf + FSM_BUFSIZE - pe; @@ -887,6 +909,10 @@ long FsmRun::run( PdaRun *destParser ) execute(); + /* First check if scanning stopped because we have a token. */ + if ( matchedToken > 0 ) + return matchedToken; + /* Fall through here either when the input buffer has been exhausted * or the scanner is in an error state. Otherwise we must continue. */ @@ -910,7 +936,7 @@ long FsmRun::run( PdaRun *destParser ) * then send it and continue with the processing loop. */ if ( parser->tables->rtd->regionInfo[region].defaultToken >= 0 ) { tokstart = tokend = p; - sendToken( parser->tables->rtd->regionInfo[region].defaultToken ); + sendToken( parser, parser->tables->rtd->regionInfo[region].defaultToken ); continue; } @@ -1010,7 +1036,7 @@ long FsmRun::run( PdaRun *destParser ) } done: - parser = prevParser; - cs = prevState; + //parser = prevParser; + //cs = prevState; return 0; } diff --git a/colm/fsmrun.h b/colm/fsmrun.h index ac3274ca..95c7b706 100644 --- a/colm/fsmrun.h +++ b/colm/fsmrun.h @@ -83,38 +83,40 @@ struct RunBuf #define MARK_SLOTS 32 +void parse( FsmRun *fsmRun, PdaRun *parser ); + struct FsmRun { FsmRun( Program *prg ); ~FsmRun(); - Kid *makeToken( int id, Head *tokdata, bool namedLangEl, int bindId ); - void generationAction( int id, Head *tokdata, bool namedLangEl, int bindId ); - void sendNamedLangEl(); - void sendEOF(); - void sendIgnore( long id ); - void sendToken( long id ); - void execGen( long id ); - - void sendBackIgnore( Kid *ignore ); - void sendBack( Kid *input ); - void queueBack( Kid *input ); + Kid *makeToken( PdaRun *parser, int id, Head *tokdata, bool namedLangEl, int bindId ); + void generationAction( PdaRun *parser, int id, Head *tokdata, bool namedLangEl, int bindId ); + void sendNamedLangEl( PdaRun *parser ); + void sendEOF( PdaRun *parser ); + void sendIgnore( PdaRun *parser, long id ); + void sendToken( PdaRun *parser, long id ); + void execGen( PdaRun *parser, long id ); + + void sendBackIgnore( PdaRun *parser, Kid *ignore ); + void sendBack( PdaRun *parser, Kid *input ); + void queueBack( PdaRun *parser, Kid *input ); void sendBackText( const char *data, long length ); void emitToken( KlangEl *token ); void execAction( GenAction *action ); - long run( PdaRun *parser ); + long scan( PdaRun *parser ); void attachInputStream( InputStream *in ); void streamPush( const char *data, long length ); void undoStreamPush( long length ); - Head *extractToken( long len ); + Head *extractToken( PdaRun *parser, long len ); void execute(); Program *prg; FsmTables *tables; - PdaRun *parser; +// PdaRun *parser; InputStream *inputStream; /* FsmRun State. */ diff --git a/colm/parsedata.cpp b/colm/parsedata.cpp index 32da2e5c..42fb87e6 100644 --- a/colm/parsedata.cpp +++ b/colm/parsedata.cpp @@ -1698,7 +1698,7 @@ void ParseData::parsePatterns() repl->pdaRun = new PdaRun( root, &program, pdaTables, repl->langEl->parserId, &fsmRun, 0, false ); - fsmRun.run( repl->pdaRun ); + parse( &fsmRun, repl->pdaRun ); //#ifdef COLM_LOG_COMPILE //if ( colm_log_compile ) { @@ -1714,7 +1714,7 @@ void ParseData::parsePatterns() pat->pdaRun = new PdaRun( root, &program, pdaTables, pat->langEl->parserId, &fsmRun, 0, false ); - fsmRun.run( pat->pdaRun ); + parse( &fsmRun, pat->pdaRun ); //#ifdef COLM_LOG_COMPILE //if ( colm_log_compile ) { diff --git a/colm/pdarun.cpp b/colm/pdarun.cpp index b85a32c0..8ed8cf09 100644 --- a/colm/pdarun.cpp +++ b/colm/pdarun.cpp @@ -567,7 +567,7 @@ parseError: if ( pt(input->tree)->causeReduce == 0 ) { int next = pt(input->tree)->region + 1; - fsmRun->queueBack( input ); + fsmRun->queueBack( this, input ); input = 0; if ( tables->tokenRegions[next] != 0 ) { #ifdef COLM_LOG_PARSE |