diff options
author | Adrian Thurston <thurston@complang.org> | 2008-11-07 20:19:21 +0000 |
---|---|---|
committer | Adrian Thurston <thurston@complang.org> | 2008-11-07 20:19:21 +0000 |
commit | 8e568f07ea6f54fb5e9ad61f264a4f29972fb693 (patch) | |
tree | 25b3f5e4e9750bd16ec9d9284eac6f16ef6c2aaf | |
parent | 1624689f25b16426d150f65d1f7d47bf9d5629b8 (diff) | |
download | colm-8e568f07ea6f54fb5e9ad61f264a4f29972fb693.tar.gz |
Reverse code is now transferred to the allReverseCode buffer after each
execution. This is enabled by adding a no-token if there is reverse code but
nothing left in the queue after a generation action.
-rw-r--r-- | colm/bytecode.cpp | 4 | ||||
-rw-r--r-- | colm/fsmrun.cpp | 159 | ||||
-rw-r--r-- | colm/parsedata.h | 2 | ||||
-rw-r--r-- | colm/pdabuild.cpp | 32 | ||||
-rw-r--r-- | colm/pdacodegen.cpp | 3 | ||||
-rw-r--r-- | colm/pdarun.cpp | 2 | ||||
-rw-r--r-- | colm/pdarun.h | 3 |
7 files changed, 112 insertions, 93 deletions
diff --git a/colm/bytecode.cpp b/colm/bytecode.cpp index 5f7c4dab..092ac299 100644 --- a/colm/bytecode.cpp +++ b/colm/bytecode.cpp @@ -376,7 +376,7 @@ void Program::run() cerr << "freeing the root reverse code" << endl; #endif - bool hasrcode = makeReverseCode( allReverseCode, reverseCode ); + bool hasrcode = make_reverse_code( allReverseCode, reverseCode ); if ( hasrcode ) rcode_downref( root, this, allReverseCode->data ); delete allReverseCode; @@ -635,7 +635,7 @@ void Execution::execute( Tree **root ) assert( sp == root ); } -bool makeReverseCode( CodeVect *all, CodeVect &reverseCode ) +bool make_reverse_code( CodeVect *all, CodeVect &reverseCode ) { /* Do we need to revert the left hand side? */ diff --git a/colm/fsmrun.cpp b/colm/fsmrun.cpp index ef45022b..82a8d483 100644 --- a/colm/fsmrun.cpp +++ b/colm/fsmrun.cpp @@ -322,6 +322,27 @@ void FsmRun::sendBack( Kid *input ) prg->kidPool.free( input ); } +/* If no token was generated but there is reverse code then we must generate + * a fake token so we can attach the reverse code to it. */ +void add_notoken( Program *prg, PdaRun *parser ) +{ + /* Check if there was anything generated. */ + if ( parser->queue == 0 && parser->reverseCode.length() > 0 ) { + #ifdef COLM_LOG_PARSE + cerr << "found reverse code but no token, sending _notoken" << endl; + #endif + + Tree *tree = prg->treePool.allocate(); + tree->refs = 1; + tree->id = prg->rtd->noTokenId; + tree->tokdata = 0; + + parser->queue = prg->kidPool.allocate(); + parser->queue->tree = tree; + parser->queue->next = 0; + } +} + /* Sets the AF_GROUP_MEM so the backtracker can tell which tokens were sent * generated from a single action. */ void set_AF_GROUP_MEM( PdaRun *parser ) @@ -372,53 +393,6 @@ void send_queued_tokens( FsmRun *fsmRun, PdaRun *parser ) } } - -void FsmRun::sendEOF( ) -{ - #ifdef COLM_LOG_PARSE - cerr << "token: _EOF" << endl; - #endif - - Kid *input = prg->kidPool.allocate(); - input->tree = prg->treePool.allocate(); - input->tree->alg = prg->algPool.allocate(); - - input->tree->refs = 1; - input->tree->id = parser->tables->gbl->eofId; - - bool ctxDepParsing = prg->ctxDepParsing; - long frameId = parser->tables->gbl->regionInfo[region].eofFrameId; - if ( ctxDepParsing && frameId >= 0 ) { - #ifdef COLM_LOG_PARSE - cerr << "HAVE PRE_EOF BLOCK" << endl; - #endif - - Code *code = parser->tables->gbl->frameInfo[frameId].codeWV; - - /* Execute the translation. */ - Execution execution( prg, parser->reverseCode, - parser, code, 0, 0 ); - execution.execute( parser->root ); - - /* Mark generated tokens as belonging to a group. */ - set_AF_GROUP_MEM( parser ); - - /* Send the generated tokens. */ - send_queued_tokens( this, parser ); - } - - parser->send( input ); - - if ( parser->errCount > 0 ) { - parser->parse_error( parser->tables->gbl->eofId, input->tree ) << - "parse error" << endp; - } - - tokstart = 0; - region = parser->getNextRegion(); - cs = tables->entryByRegion[region]; -} - void FsmRun::sendToken( long id ) { #ifdef COLM_LOG_PARSE @@ -477,6 +451,30 @@ void FsmRun::sendNamedLangEl() send_handle_error( this, parser, input ); } +void execute_generation_action( Program *prg, PdaRun *parser, Code *code, Head *tokdata ) +{ + /* Execute the translation. */ + Execution execution( prg, parser->reverseCode, parser, code, 0, tokdata ); + execution.execute( parser->root ); + + /* If there is revese code but nothing generated we need a noToken. */ + add_notoken( prg, parser ); + + /* If there is reverse code then add_notoken will guarantee that the + * queue is not empty. Pull the reverse code out and store in the + * token. */ + Tree *tree = parser->queue->tree; + bool hasrcode = make_reverse_code( parser->allReverseCode, parser->reverseCode ); + if ( hasrcode ) { + if ( tree->alg == 0 ) + tree->alg = prg->algPool.allocate(); + tree->alg->flags |= AF_HAS_RCODE; + } + + /* Mark generated tokens as belonging to a group. */ + set_AF_GROUP_MEM( parser ); +} + /* * Not supported: * -invoke failure (the backtracker) @@ -492,18 +490,13 @@ void FsmRun::generationAction( int id, Head *tokdata, bool namedLangEl, int bind /* Find the code. */ Code *code = parser->tables->gbl->frameInfo[ parser->tables->gbl->lelInfo[id].frameId].codeWV; - - /* Execute the translation. */ - Execution execution( prg, parser->reverseCode, - parser, code, 0, tokdata ); - execution.execute( parser->root ); + + /* Execute the action and process the queue. */ + execute_generation_action( prg, parser, code, tokdata ); /* Finished with the match text. */ string_free( prg, tokdata ); - /* Mark generated tokens as belonging to a group. */ - set_AF_GROUP_MEM( parser ); - /* Send the queued tokens. */ send_queued_tokens( this, parser ); } @@ -580,11 +573,6 @@ void PdaRun::send( Kid *input ) ignore->next = child; } - /* Pull the reverse code out and store in the token. */ - bool hasrcode = makeReverseCode( allReverseCode, reverseCode ); - if ( hasrcode ) - input->tree->alg->flags |= AF_HAS_RCODE; - parseToken( input ); } @@ -626,14 +614,6 @@ void PdaRun::ignore( Tree *tree ) Kid *ignore = prg->kidPool.allocate(); ignore->tree = tree; - /* Pull the reverse code out and store in the token. */ - bool hasrcode = makeReverseCode( allReverseCode, reverseCode ); - if ( hasrcode ) { - if ( tree->alg == 0 ) - tree->alg = prg->algPool.allocate(); - tree->alg->flags |= AF_HAS_RCODE; - } - /* Prepend it to the list of ignore tokens. */ ignore->next = accumIgnore; accumIgnore = ignore; @@ -706,6 +686,49 @@ Head *FsmRun::extractToken( long length ) return tokdata; } +void FsmRun::sendEOF( ) +{ + #ifdef COLM_LOG_PARSE + cerr << "token: _EOF" << endl; + #endif + + Kid *input = prg->kidPool.allocate(); + input->tree = prg->treePool.allocate(); + input->tree->alg = prg->algPool.allocate(); + + input->tree->refs = 1; + input->tree->id = parser->tables->gbl->eofId; + + bool ctxDepParsing = prg->ctxDepParsing; + long frameId = parser->tables->gbl->regionInfo[region].eofFrameId; + if ( ctxDepParsing && frameId >= 0 ) { + #ifdef COLM_LOG_PARSE + cerr << "HAVE PRE_EOF BLOCK" << endl; + #endif + + /* Get the code for the pre-eof block. */ + Code *code = parser->tables->gbl->frameInfo[frameId].codeWV; + + /* Execute the action and process the queue. */ + execute_generation_action( prg, parser, code, 0 ); + + /* Send the generated tokens. */ + send_queued_tokens( this, parser ); + } + + parser->send( input ); + + if ( parser->errCount > 0 ) { + parser->parse_error( parser->tables->gbl->eofId, input->tree ) << + "parse error" << endp; + } + + tokstart = 0; + region = parser->getNextRegion(); + cs = tables->entryByRegion[region]; +} + + void FsmRun::attachInputStream( InputStream *in ) { /* Run buffers need to stick around because diff --git a/colm/parsedata.h b/colm/parsedata.h index f1808595..5912e8e6 100644 --- a/colm/parsedata.h +++ b/colm/parsedata.h @@ -777,7 +777,7 @@ struct ParseData KlangEl *streamKlangEl; KlangEl *anyKlangEl; KlangEl *rootKlangEl; - KlangEl *noTokenEl; + KlangEl *noTokenKlangEl; KlangEl *eofKlangEl; KlangEl *errorKlangEl; KlangEl *defaultCharKlangEl; diff --git a/colm/pdabuild.cpp b/colm/pdabuild.cpp index 98df9af1..c77a74a9 100644 --- a/colm/pdabuild.cpp +++ b/colm/pdabuild.cpp @@ -208,10 +208,11 @@ void ParseData::makeKlangElIds() * that needs to be associated with a language element. This allows us to * always associate reverse code with the first language element produced * after a generation action. */ - noTokenEl = new KlangEl( rootNamespace, strdup("_notoken"), KlangEl::Term ); - noTokenEl->ignore = true; - langEls.prepend( noTokenEl ); - SymbolMapEl *noTokenMapEl = rootNamespace->symbolMap.insert( noTokenEl->name, noTokenEl ); + noTokenKlangEl = new KlangEl( rootNamespace, strdup("_notoken"), KlangEl::Term ); + noTokenKlangEl->ignore = true; + langEls.prepend( noTokenKlangEl ); + SymbolMapEl *noTokenMapEl = rootNamespace->symbolMap.insert( + noTokenKlangEl->name, noTokenKlangEl ); assert( noTokenMapEl != 0 ); /* Make the EOF language element. */ @@ -252,27 +253,19 @@ void ParseData::makeKlangElIds() for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { /* Must be a term, and not any of the special reserved terminals. * Remember if the non terminal is a user non terminal. */ - if ( lel->type == KlangEl::Term && lel != eofKlangEl && lel != errorKlangEl ) { + if ( lel->type == KlangEl::Term && + lel != eofKlangEl && + lel != errorKlangEl && + lel != noTokenKlangEl ) + { lel->isUserTerm = true; lel->id = nextSymbolId++; } } - /* Next assign to the eof token, which we always create. */ + /* Next assign to the eof notoken, which we always create. */ eofKlangEl->id = nextSymbolId++; - - /* First pass assigns to the user terminals. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->id < 0 ) { - /* Must be a term, and not any of the special reserved terminals. - * Remember if the non terminal is a user non terminal. */ - if ( lel->type == KlangEl::Term && lel != eofKlangEl && lel != errorKlangEl ) { - assert( false ); - lel->isUserTerm = true; - lel->id = nextSymbolId++; - } - } - } + noTokenKlangEl->id = nextSymbolId++; /* Possibly assign to the error language element. */ if ( errorKlangEl != 0 ) @@ -1385,6 +1378,7 @@ void ParseData::makeRuntimeData() runtimeData->stringId = strKlangEl->id; runtimeData->anyId = anyKlangEl->id; runtimeData->eofId = eofKlangEl->id; + runtimeData->noTokenId = noTokenKlangEl->id; } /* Borrow alg->state for mapsTo. */ diff --git a/colm/pdacodegen.cpp b/colm/pdacodegen.cpp index 399209a8..8067dc06 100644 --- a/colm/pdacodegen.cpp +++ b/colm/pdacodegen.cpp @@ -406,7 +406,8 @@ void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData ) " " << runtimeData->integerId << ",\n" " " << runtimeData->stringId << ",\n" " " << runtimeData->anyId << ",\n" - " " << runtimeData->eofId << "\n" + " " << runtimeData->eofId << ",\n" + " " << runtimeData->noTokenId << "\n" "};\n" "\n"; } diff --git a/colm/pdarun.cpp b/colm/pdarun.cpp index d058df73..ef22665e 100644 --- a/colm/pdarun.cpp +++ b/colm/pdarun.cpp @@ -427,7 +427,7 @@ again: execution.execute( root ); /* Pull out the reverse code, if any. */ - bool hasrcode = makeReverseCode( allReverseCode, reverseCode ); + bool hasrcode = make_reverse_code( allReverseCode, reverseCode ); if ( hasrcode ) redAlg->flags |= AF_HAS_RCODE; diff --git a/colm/pdarun.h b/colm/pdarun.h index 8f6f2c26..ccb0290a 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -410,6 +410,7 @@ struct RuntimeData long stringId; long anyId; long eofId; + long noTokenId; }; struct PdaTables @@ -438,7 +439,7 @@ struct PdaTables RuntimeData *gbl; }; -bool makeReverseCode( CodeVect *all, CodeVect &reverseCode ); +bool make_reverse_code( CodeVect *all, CodeVect &reverseCode ); typedef Vector<Tree*> Bindings; |