From f9a1603eead3b6daa97b79eab8a6445fb46af5de Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Fri, 13 Feb 2009 04:20:27 +0000 Subject: Now using a single parse table with multiple entry points, instead of multiple disjoint tables, one for each nonterminal that must be parsed. I had pursued this before but gave up because the EOF token of one parser could interfere with another parser. The observation I was missing was that we can use unique EOF tokens for each nonterminal we parse, thus avoiding interference. --- colm/bytecode.cpp | 8 ++-- colm/closure.cpp | 40 ++++++++++------- colm/compile.cpp | 2 + colm/dotgen.cpp | 18 ++++---- colm/fsmrun.cpp | 4 +- colm/parsedata.cpp | 108 +++++++++++++++++--------------------------- colm/parsedata.h | 25 +++++++---- colm/pdabuild.cpp | 127 ++++++++++++++++++++++++++++++++++++++-------------- colm/pdacodegen.cpp | 19 ++++---- colm/pdacodegen.h | 2 +- colm/pdagraph.cpp | 6 ++- colm/pdagraph.h | 1 + colm/pdarun.cpp | 5 ++- colm/pdarun.h | 9 ++-- 14 files changed, 220 insertions(+), 154 deletions(-) diff --git a/colm/bytecode.cpp b/colm/bytecode.cpp index 787fa353..58db7499 100644 --- a/colm/bytecode.cpp +++ b/colm/bytecode.cpp @@ -131,8 +131,8 @@ void send( Tree **root, Program *prg, PdaRun *parser, Tree *tree, bool ignore ) Tree *call_parser( Tree **&sp, Program *prg, Stream *stream, long parserId, long stopId, CodeVect *&cv, bool revertOn ) { - PdaTables *tables = prg->rtd->parsers[parserId]; - PdaRun parser( sp, prg, tables, stream->scanner, stopId, revertOn ); + PdaTables *tables = prg->rtd->pdaTables; + PdaRun parser( sp, prg, tables, parserId, stream->scanner, stopId, revertOn ); parser.run(); commit_full( &parser, 0 ); Tree *tree = parser.getParsedRoot( stopId > 0 ); @@ -156,8 +156,8 @@ Tree *call_parser( Tree **&sp, Program *prg, Stream *stream, void undo_parse( Tree **&sp, Program *prg, Stream *stream, long parserId, Tree *tree, CodeVect *rev ) { - PdaTables *tables = prg->rtd->parsers[parserId]; - PdaRun parser( sp, prg, tables, stream->scanner, 0, false ); + PdaTables *tables = prg->rtd->pdaTables; + PdaRun parser( sp, prg, tables, parserId, stream->scanner, 0, false ); parser.undoParse( tree, rev ); } diff --git a/colm/closure.cpp b/colm/closure.cpp index b3c17b1c..5ee6538e 100644 --- a/colm/closure.cpp +++ b/colm/closure.cpp @@ -320,7 +320,7 @@ void ParseData::lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans ) } /* Add follow sets to an LR(0) graph to make it LALR(1). */ -void ParseData::lalr1AddFollowSets( PdaGraph *pdaGraph, KlangEl *rootEl ) +void ParseData::lalr1AddFollowSets( PdaGraph *pdaGraph, KlangElSet &parserEls ) { /* Make the state that all reduction actions go to. Since a reduction pops * states of the stack and sets the new target state, this state is @@ -328,13 +328,15 @@ void ParseData::lalr1AddFollowSets( PdaGraph *pdaGraph, KlangEl *rootEl ) actionDestState = pdaGraph->addState(); pdaGraph->setFinState( actionDestState ); - /* Get the entry into the graph and traverse over start. */ - PdaState *overStart = pdaGraph->followFsm( pdaGraph->startState, rootEl->rootDef->fsm ); + for ( KlangElSet::Iter pe = parserEls; pe.lte(); pe++ ) { + /* Get the entry into the graph and traverse over start. */ + PdaState *overStart = pdaGraph->followFsm( (*pe)->startState, (*pe)->rootDef->fsm ); - /* Add _eof after the initial _start. */ - PdaTrans *eofTrans = pdaGraph->insertNewTrans( overStart, actionDestState, - eofKlangEl->id, eofKlangEl->id ); - eofTrans->isShift = true; + /* Add _eof after the initial _start. */ + PdaTrans *eofTrans = pdaGraph->insertNewTrans( overStart, actionDestState, + (*pe)->eofLel->id, (*pe)->eofLel->id ); + eofTrans->isShift = true; + } /* This was used during lr0 table construction. */ pdaGraph->transClosureQueue.abandon(); @@ -416,20 +418,26 @@ void ParseData::addDupTerms( PdaGraph *pdaGraph ) } /* Generate a LALR(1) graph. */ -void ParseData::lalr1GenerateParser( PdaGraph *pdaGraph, KlangEl *rootEl ) +void ParseData::lalr1GenerateParser( PdaGraph *pdaGraph, KlangElSet &parserEls ) { /* Make the intial graph. */ pdaGraph->langElIndex = langElIndex; - PdaState *start = pdaGraph->addState(); - pdaGraph->setStartState( start ); + for ( Vector::Iter r = parserEls; r.lte(); r++ ) { + /* Create the entry point. */ + PdaState *rs = pdaGraph->addState(); + pdaGraph->entryStateSet.insert( rs ); + + /* State set of just one state. */ + rs->stateSet = new PdaStateSet; + rs->stateSet->insert( (*r)->rootDef->fsm->startState ); - start->stateSet = new PdaStateSet; - start->stateSet->insert( rootEl->rootDef->fsm->startState ); + /* Queue the start state for closure. */ + rs->onClosureQueue = true; + pdaGraph->stateClosureQueue.append( rs ); - /* Queue the start state for closure. */ - start->onClosureQueue = true; - pdaGraph->stateClosureQueue.append( start ); + (*r)->startState = rs; + } /* Run the lr0 closure. */ lr0CloseAllStates( pdaGraph ); @@ -441,7 +449,7 @@ void ParseData::lalr1GenerateParser( PdaGraph *pdaGraph, KlangEl *rootEl ) linkExpansions( pdaGraph ); /* Walk the graph adding follow sets to the LR(0) graph. */ - lalr1AddFollowSets( pdaGraph, rootEl ); + lalr1AddFollowSets( pdaGraph, parserEls ); // /* Set the commit on the final eof shift. */ // PdaTrans *overStart = pdaGraph->startState->findTrans( rootEl->id ); diff --git a/colm/compile.cpp b/colm/compile.cpp index aab44efc..1f6068bb 100644 --- a/colm/compile.cpp +++ b/colm/compile.cpp @@ -1147,6 +1147,8 @@ UniqueType *LangTerm::evaluateParse( ParseData *pd, CodeVect &code, bool stop ) /* Allocate a parser id. This will cause a parser to be built for * the type. */ ut->langEl->parserId = pd->nextParserId++; + if ( stop ) + ut->langEl->parseStop = true; /* Parse instruction, dependent on whether or not we are * producing revert or commit code. */ diff --git a/colm/dotgen.cpp b/colm/dotgen.cpp index 2fd9bf09..30788a4c 100644 --- a/colm/dotgen.cpp +++ b/colm/dotgen.cpp @@ -79,8 +79,12 @@ void ParseData::writeDotFile( PdaGraph *graph ) /* Define the psuedo states. Transitions will be done after the states * have been defined as either final or not final. */ out << - " node [ shape = point ];\n" - " ENTRY [ label = \"\" ];\n" + " node [ shape = point ];\n"; + + for ( int i = 0; i < graph->entryStateSet.length(); i++ ) + out << "\tENTRY" << i << " [ label = \"\" ];\n"; + + out << "\n" " node [ shape = circle, fixedsize = true, height = 0.2 ];\n"; @@ -94,8 +98,9 @@ void ParseData::writeDotFile( PdaGraph *graph ) for ( PdaStateList::Iter st = graph->stateList; st.lte(); st++ ) writeTransList( st ); - /* Transitions into the start state. */ - out << " ENTRY -> " << graph->startState->stateNum << " [ label = \"\" ];\n"; + /* Start state and other entry points. */ + for ( PdaStateSet::Iter st = graph->entryStateSet; st.lte(); st++ ) + out << "\tENTRY" << st.pos() << " -> " << (*st)->stateNum << " [ label = \"\" ];\n"; out << "}\n"; @@ -103,9 +108,6 @@ void ParseData::writeDotFile( PdaGraph *graph ) void ParseData::writeDotFile() { - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->parserId >= 0 ) - writeDotFile( lel->pdaGraph ); - } + writeDotFile( pdaGraph ); } diff --git a/colm/fsmrun.cpp b/colm/fsmrun.cpp index f7602d2f..2ddf8751 100644 --- a/colm/fsmrun.cpp +++ b/colm/fsmrun.cpp @@ -724,7 +724,7 @@ void FsmRun::sendEOF( ) input->tree->flags |= AF_PARSE_TREE; input->tree->refs = 1; - input->tree->id = parser->tables->rtd->eofId; + input->tree->id = parser->tables->rtd->eofLelIds[parser->parserId]; bool ctxDepParsing = prg->ctxDepParsing; long frameId = parser->tables->rtd->regionInfo[region].eofFrameId; @@ -748,7 +748,7 @@ void FsmRun::sendEOF( ) parser->send( input ); if ( parser->errCount > 0 ) { - parser->parse_error( parser->tables->rtd->eofId, input->tree ) << + parser->parse_error( input->tree->id, input->tree ) << "parse error" << endp; } diff --git a/colm/parsedata.cpp b/colm/parsedata.cpp index ba7f14cd..fd38e64e 100644 --- a/colm/parsedata.cpp +++ b/colm/parsedata.cpp @@ -1603,29 +1603,6 @@ void InputStreamRepl::pushBackNamed() } -void ParseData::makePatternParsers() -{ - for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) { - /* We assume the reduction action compilation phase was run before - * pattern parsing and it decorated the pattern with the target type. */ - assert( pat->langEl != 0 ); - if ( pat->langEl->type != KlangEl::NonTerm ) - error(pat->loc) << "pattern type is not a non-terminal" << endp; - - /* Make a parser for the language element. */ - makeParser( pat->langEl ); - } - - for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) { - /* We assume the reduction action compilation phase was run before - * replacement parsing decorated the replacement with the target type. */ - assert( repl->langEl != 0 ); - - /* Make a parser for the language element. */ - makeParser( repl->langEl ); - } -} - void ParseData::parsePatterns() { Program program( false, runtimeData ); @@ -1639,7 +1616,7 @@ void ParseData::parsePatterns() fsmRun.attachInputStream( &in ); repl->pdaRun = new PdaRun( root, &program, - repl->langEl->pdaTables, &fsmRun, 0, false ); + pdaTables, repl->langEl->parserId, &fsmRun, 0, false ); repl->pdaRun->run(); //#ifdef COLM_LOG_COMPILE @@ -1655,7 +1632,7 @@ void ParseData::parsePatterns() fsmRun.attachInputStream( &in ); pat->pdaRun = new PdaRun( root, &program, - pat->langEl->pdaTables, &fsmRun, 0, false ); + pdaTables, pat->langEl->parserId, &fsmRun, 0, false ); pat->pdaRun->run(); //#ifdef COLM_LOG_COMPILE @@ -1668,31 +1645,6 @@ void ParseData::parsePatterns() fillInPatterns( &program ); } -void ParseData::verifyParseStopGrammar( KlangEl *langEl ) -{ - PdaGraph *pdaGraph = langEl->pdaGraph; - - /* Get the entry into the graph and traverse over the root. The resulting - * state can have eof, nothing else can. */ - PdaState *overStart = pdaGraph->followFsm( - pdaGraph->startState, - langEl->rootDef->fsm ); - - /* The graph must reduce to root all on it's own. It cannot depend on - * require EOF. */ - for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) { - if ( st == overStart ) - continue; - - for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) { - if ( tr->value->lowKey == eofKlangEl->id ) { - /* This needs a better error message. Appears to be voodoo. */ - error() << "grammar is not usable with parse_stop" << endp; - } - } - } -} - void ParseData::resolveUses() { for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { @@ -1710,6 +1662,41 @@ void ParseData::resolveUses() } } +void ParseData::collectParserEls( BstSet &parserEls ) +{ + for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) { + /* We assume the reduction action compilation phase was run before + * pattern parsing and it decorated the pattern with the target type. */ + assert( pat->langEl != 0 ); + if ( pat->langEl->type != KlangEl::NonTerm ) + error(pat->loc) << "pattern type is not a non-terminal" << endp; + + if ( pat->langEl->parserId < 0 ) { + /* Make a parser for the language element. */ + parserEls.insert( pat->langEl ); + pat->langEl->parserId = nextParserId++; + } + } + + for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) { + /* We assume the reduction action compilation phase was run before + * replacement parsing decorated the replacement with the target type. */ + assert( repl->langEl != 0 ); + + if ( repl->langEl->parserId < 0 ) { + /* Make a parser for the language element. */ + parserEls.insert( repl->langEl ); + repl->langEl->parserId = nextParserId++; + } + } + + /* Make parsers that we need. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->parserId >= 0 ) + parserEls.insert( lel ); + } +} + void ParseData::semanticAnalysis() { beginProcessing(); @@ -1749,18 +1736,10 @@ void ParseData::semanticAnalysis() RedFsmBuild reduce( sectionName, this, fsmGraph ); redFsm = reduce.reduceMachine(); - /* Build the parsers used for patterns and replacements. */ - makePatternParsers(); + BstSet parserEls; + collectParserEls( parserEls ); - /* Make parsers that we need. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->parserId >= 0 ) { - makeParser( lel ); - - if ( lel->parseStop ) - verifyParseStopGrammar( lel ); - } - } + makeParser( parserEls ); /* Make the scanner tables. */ fsmTables = redFsm->makeFsmTables(); @@ -1787,13 +1766,10 @@ void ParseData::generateOutput() fsmGen->writeCode(); /* Make parsers that we need. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->parserId >= 0 ) - pdaGen->writeParserData( lel->parserId, lel->pdaTables ); - } + pdaGen->writeParserData( 0, pdaTables ); /* Write the runtime data. */ - pdaGen->writeRuntimeData( runtimeData ); + pdaGen->writeRuntimeData( runtimeData, pdaTables ); outStream->flush(); } diff --git a/colm/parsedata.h b/colm/parsedata.h index f2c382f5..522a7aea 100644 --- a/colm/parsedata.h +++ b/colm/parsedata.h @@ -147,6 +147,7 @@ typedef Vector< TokenDef* > TokenDefVect; struct UniqueType; typedef Vector KlangElVect; +typedef BstSet KlangElSet; /* A language element class. Can be a nonTerm or a term. */ struct KlangEl : public DListEl @@ -178,6 +179,7 @@ struct KlangEl : public DListEl bool isRepeat; bool isOpt; bool parseStop; + bool isEOF; /* Productions from the language element if it is a non-terminal. */ LelDefList defList; @@ -185,10 +187,13 @@ struct KlangEl : public DListEl TokenDef *tokenDef; Definition *rootDef; KlangEl *termDup; + KlangEl *eofLel; PdaGraph *pdaGraph; PdaTables *pdaTables; + PdaState *startState; + CodeBlock *transBlock; ObjectDef *objectDef; @@ -635,14 +640,14 @@ struct ParseData void lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, long followKey, long prior ); void lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans ); - void lalr1AddFollowSets( PdaGraph *pdaGraph, KlangEl *rootEl ); + void lalr1AddFollowSets( PdaGraph *pdaGraph, KlangElSet &parserEls ); void lr0BringInItem( PdaGraph *pdaGraph, PdaState *dest, PdaState *prodState, PdaTrans *expandFrom, Definition *prod ); void lr0InvokeClosure( PdaGraph *pdaGraph, PdaState *state ); void lr0CloseAllStates( PdaGraph *pdaGraph ); - void lalr1GenerateParser( PdaGraph *pdaGraph, KlangEl *rootEl ); + void lalr1GenerateParser( PdaGraph *pdaGraph, KlangElSet &parserEls ); void reduceActions( PdaGraph *pdaGraph ); @@ -657,13 +662,13 @@ struct ParseData PdaState *followProd( PdaState *tabState, PdaState *prodState ); void findFollow( AlphSet &result, PdaState *overTab, PdaState *overSrc, Definition *parentDef ); - void pdaActionOrder( PdaGraph *pdaGraph, KlangEl *rootEl ); + void pdaActionOrder( PdaGraph *pdaGraph, KlangElSet &parserEls ); void pdaOrderFollow( KlangEl *rootEl, PdaState *tabState, PdaTrans *tabTrans, PdaTrans *srcTrans, Definition *parentDef, Definition *definition, long &time ); void pdaOrderProd( KlangEl *rootEl, PdaState *tabState, PdaState *srcState, Definition *parentDef, long &time ); - void analyzeMachine( PdaGraph *pdaGraph, KlangEl *rootEl ); + void analyzeMachine( PdaGraph *pdaGraph, KlangElSet &parserEls ); void makeProdFsms(); void insertUniqueEmptyProductions(); @@ -688,12 +693,11 @@ struct ParseData void addProdRHSLoads( Definition *prod, CodeVect &code, long &insertPos ); void prepGrammar(); - - void makePatternParsers(); void parsePatterns(); - void makeParser( KlangEl *rootEl ); - PdaGraph *makePdaGraph( KlangEl *rootEl ); + void collectParserEls( KlangElSet &parserEls ); + void makeParser( KlangElSet &parserEls ); + PdaGraph *makePdaGraph( BstSet &parserEls ); PdaTables *makePdaTables( PdaGraph *pdaGraph ); void fillInPatterns( Program *prg ); @@ -702,7 +706,7 @@ struct ParseData /* Generate and write out the fsm. */ void generateGraphviz(); - void verifyParseStopGrammar( KlangEl *langEl ); + void verifyParseStopGrammar( KlangEl *langEl, PdaGraph *pdaGraph ); void initFieldInstructions( ObjField *el ); void initLocalInstructions( ObjField *el ); @@ -896,6 +900,9 @@ struct ParseData bool revertOn; RedFsm *redFsm; + + PdaGraph *pdaGraph; + PdaTables *pdaTables; }; void afterOpMinimize( FsmGraph *fsm, bool lastInSeq = true ); diff --git a/colm/pdabuild.cpp b/colm/pdabuild.cpp index 2c370644..17a85d03 100644 --- a/colm/pdabuild.cpp +++ b/colm/pdabuild.cpp @@ -73,9 +73,11 @@ KlangEl::KlangEl( Namespace *nspace, const String &name, Type type ) isRepeat(false), isOpt(false), parseStop(false), + isEOF(false), tokenDef(0), rootDef(0), termDup(0), + eofLel(0), pdaGraph(0), pdaTables(0), transBlock(0), @@ -218,10 +220,11 @@ void ParseData::makeKlangElIds() assert( noTokenMapEl != 0 ); /* Make the EOF language element. */ - eofKlangEl = new KlangEl( rootNamespace, strdup("_eof"), KlangEl::Term ); - langEls.prepend( eofKlangEl ); - SymbolMapEl *eofMapEl = rootNamespace->symbolMap.insert( eofKlangEl->name, eofKlangEl ); - assert( eofMapEl != 0 ); + eofKlangEl = 0; +// eofKlangEl = new KlangEl( rootNamespace, strdup("_eof"), KlangEl::Term ); +// langEls.prepend( eofKlangEl ); +// SymbolMapEl *eofMapEl = rootNamespace->symbolMap.insert( eofKlangEl->name, eofKlangEl ); +// assert( eofMapEl != 0 ); /* Make the "any" language element */ anyKlangEl = new KlangEl( rootNamespace, strdup("any"), KlangEl::NonTerm ); @@ -247,6 +250,25 @@ void ParseData::makeKlangElIds() } } + /* Make eof language elements for each user terminal. This is a bit excessive and + * need to be reduced to the ones that we need parsers for, but we don't know that yet. + * Another pass before this one is needed. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->eofLel == 0 && + lel != eofKlangEl && + lel != errorKlangEl && + lel != noTokenKlangEl ) + { + String name( lel->name.length() + 5, "_eof_%s", lel->name.data ); + KlangEl *eofLel = new KlangEl( lel->nspace, name, KlangEl::Term ); + + langEls.append( eofLel ); + lel->eofLel = eofLel; + eofLel->eofLel = lel; + eofLel->isEOF = true; + } + } + /* The first id 0 is reserved for the stack sentinal. A negative id means * error to the parsing function, inducing backtracking. */ nextSymbolId = 1; @@ -256,7 +278,7 @@ void ParseData::makeKlangElIds() /* Must be a term, and not any of the special reserved terminals. * Remember if the non terminal is a user non terminal. */ if ( lel->type == KlangEl::Term && - lel != eofKlangEl && + !lel->isEOF && lel != errorKlangEl && lel != noTokenKlangEl ) { @@ -265,8 +287,15 @@ void ParseData::makeKlangElIds() } } + //eofKlangEl->id = nextSymbolId++; + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + /* Must be a term, and not any of the special reserved terminals. + * Remember if the non terminal is a user non terminal. */ + if ( lel->isEOF ) + lel->id = nextSymbolId++; + } + /* Next assign to the eof notoken, which we always create. */ - eofKlangEl->id = nextSymbolId++; noTokenKlangEl->id = nextSymbolId++; /* Possibly assign to the error language element. */ @@ -482,7 +511,7 @@ void ParseData::pdaOrderFollow( KlangEl *rootEl, PdaState *tabState, AlphSet alphSet; if ( parentDef == rootEl->rootDef ) - alphSet.insert( eofKlangEl->id ); + alphSet.insert( rootEl->eofLel->id ); else findFollow( alphSet, overTab, overSrc, parentDef ); @@ -519,7 +548,7 @@ void ParseData::addRegion( PdaState *tabState, long pdaKey ) /* If it is not the eof, then use the region associated * with the token definition. */ - if ( klangEl != eofKlangEl && klangEl->tokenDef != 0 ) + if ( !klangEl->isEOF && klangEl->tokenDef != 0 ) region = klangEl->tokenDef->tokenRegion; if ( region != 0 && !regionVectHas( tabState->regions, region ) ) @@ -620,7 +649,7 @@ void ParseData::pdaOrderProd( KlangEl *rootEl, PdaState *tabState, } } -void ParseData::pdaActionOrder( PdaGraph *pdaGraph, KlangEl *rootEl ) +void ParseData::pdaActionOrder( PdaGraph *pdaGraph, KlangElSet &parserEls ) { for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { assert( (state->stateBits & SB_ISMARKED) == 0 ); @@ -636,14 +665,16 @@ void ParseData::pdaActionOrder( PdaGraph *pdaGraph, KlangEl *rootEl ) /* Compute the action orderings, record the max value. */ long time = 1; - PdaState *startState = rootEl->rootDef->fsm->startState; - pdaOrderProd( rootEl, pdaGraph->startState, startState, rootEl->rootDef, time ); - - /* Walk over the start lang el and set the time for shift of - * the eof action that completes the parse. */ - PdaTrans *overStart = pdaGraph->startState->findTrans( rootEl->id ); - PdaTrans *eofTrans = overStart->toState->findTrans( eofKlangEl->id ); - eofTrans->actOrds[0] = time++; + for ( KlangElSet::Iter pe = parserEls; pe.lte(); pe++ ) { + PdaState *startState = (*pe)->rootDef->fsm->startState; + pdaOrderProd( *pe, (*pe)->startState, startState, (*pe)->rootDef, time ); + + /* Walk over the start lang el and set the time for shift of + * the eof action that completes the parse. */ + PdaTrans *overStart = (*pe)->startState->findTrans( (*pe)->id ); + PdaTrans *eofTrans = overStart->toState->findTrans( (*pe)->eofLel->id ); + eofTrans->actOrds[0] = time++; + } for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { if ( state->regions.length() == 0 ) { @@ -651,7 +682,8 @@ void ParseData::pdaActionOrder( PdaGraph *pdaGraph, KlangEl *rootEl ) /* There are no regions and EOF leaves the state. Add the eof * token region. */ PdaTrans *trans = tel->value; - if ( trans->lowKey == eofKlangEl->id ) + KlangEl *lel = langElIndex[trans->lowKey]; + if ( lel != 0 && lel->isEOF ) state->regions.append( eofTokenRegion ); } } @@ -859,7 +891,30 @@ void ParseData::reduceActions( PdaGraph *pdaGraph ) } } -void ParseData::analyzeMachine( PdaGraph *pdaGraph, KlangEl *rootEl ) +void ParseData::verifyParseStopGrammar( KlangEl *langEl, PdaGraph *pdaGraph ) +{ + /* Get the entry into the graph and traverse over the root. The resulting + * state can have eof, nothing else can. */ + PdaState *overStart = pdaGraph->followFsm( + langEl->startState, + langEl->rootDef->fsm ); + + /* The graph must reduce to root all on it's own. It cannot depend on + * require EOF. */ + for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) { + if ( st == overStart ) + continue; + + for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) { + if ( tr->value->lowKey == langEl->eofLel->id ) { + /* This needs a better error message. Appears to be voodoo. */ + error() << "grammar is not usable with parse_stop" << endp; + } + } + } +} + +void ParseData::analyzeMachine( PdaGraph *pdaGraph, KlangElSet &parserEls ) { pdaGraph->maxState = pdaGraph->stateList.length() - 1; pdaGraph->maxLelId = nextSymbolId - 1; @@ -879,7 +934,7 @@ void ParseData::analyzeMachine( PdaGraph *pdaGraph, KlangEl *rootEl ) } } - pdaActionOrder( pdaGraph, rootEl ); + pdaActionOrder( pdaGraph, parserEls ); sortActions( pdaGraph ); advanceReductions( pdaGraph ); pdaGraph->setStateNumbers(); @@ -922,6 +977,10 @@ void ParseData::analyzeMachine( PdaGraph *pdaGraph, KlangEl *rootEl ) } } } + + /* Verify that any type we parse_stop can actually be parsed that way. */ + for ( KlangElSet::Iter pe = parserEls; pe.lte(); pe++ ) + verifyParseStopGrammar(*pe, pdaGraph); } void ParseData::wrapNonTerminals() @@ -1359,12 +1418,16 @@ void ParseData::makeRuntimeData() } runtimeData->fsmTables = fsmTables; + runtimeData->pdaTables = pdaTables; - runtimeData->parsers = new PdaTables*[nextParserId]; + runtimeData->startStates = new int[nextParserId]; + runtimeData->eofLelIds = new int[nextParserId]; runtimeData->numParsers = nextParserId; for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->parserId >= 0 ) - runtimeData->parsers[lel->parserId] = lel->pdaTables; + if ( lel->parserId >= 0 ) { + runtimeData->startStates[lel->parserId] = lel->startState->stateNum; + runtimeData->eofLelIds[lel->parserId] = lel->eofLel->id; + } } @@ -1379,7 +1442,7 @@ void ParseData::makeRuntimeData() runtimeData->integerId = intKlangEl->id; runtimeData->stringId = strKlangEl->id; runtimeData->anyId = anyKlangEl->id; - runtimeData->eofId = eofKlangEl->id; + runtimeData->eofId = 0; //eofKlangEl->id; runtimeData->noTokenId = noTokenKlangEl->id; } @@ -1503,8 +1566,6 @@ PdaTables *ParseData::makePdaTables( PdaGraph *pdaGraph ) int count, curOffset, pos; PdaTables *pdaTables = new PdaTables; - pdaTables->startState = pdaGraph->startState->stateNum; - /* * Indicies. */ @@ -1687,25 +1748,23 @@ void ParseData::prepGrammar() runtimeData = new RuntimeData; } -PdaGraph *ParseData::makePdaGraph( KlangEl *rootEl ) +PdaGraph *ParseData::makePdaGraph( KlangElSet &parserEls ) { //for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) // cerr << prod->prodId << " " << prod->data << endl; PdaGraph *pdaGraph = new PdaGraph(); - lalr1GenerateParser( pdaGraph, rootEl ); + lalr1GenerateParser( pdaGraph, parserEls ); pdaGraph->setStateNumbers(); - analyzeMachine( pdaGraph, rootEl ); + analyzeMachine( pdaGraph, parserEls ); //cerr << "NUMBER OF STATES: " << pdaGraph->stateList.length() << endl; return pdaGraph; } -void ParseData::makeParser( KlangEl *rootEl ) +void ParseData::makeParser( KlangElSet &parserEls ) { - if ( rootEl->pdaTables == 0 ) { - rootEl->pdaGraph = makePdaGraph( rootEl ); - rootEl->pdaTables = makePdaTables( rootEl->pdaGraph ); - } + pdaGraph = makePdaGraph( parserEls ); + pdaTables = makePdaTables( pdaGraph ); } diff --git a/colm/pdacodegen.cpp b/colm/pdacodegen.cpp index 8067dc06..53530e80 100644 --- a/colm/pdacodegen.cpp +++ b/colm/pdacodegen.cpp @@ -92,7 +92,7 @@ void PdaCodeGen::writeFirst() "\n"; } -void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData ) +void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData, PdaTables *pdaTables ) { /* * Blocks of code in frames. @@ -355,10 +355,15 @@ void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData ) out << "0, "; out << "};\n\n"; - /* Parsers. */ - out << "PdaTables *parsers[] = {\n\t"; + out << "int startStates[] = {\n\t"; for ( long i = 0; i < runtimeData->numParsers; i++ ) { - out << "&pid_" << i << "_pdaTables,\n"; + out << runtimeData->startStates[i] << ", "; + } + out << "};\n\n"; + + out << "int eofLelIds[] = {\n\t"; + for ( long i = 0; i < runtimeData->numParsers; i++ ) { + out << runtimeData->eofLelIds[i] << ", "; } out << "};\n\n"; @@ -398,7 +403,8 @@ void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData ) " " << runtimeData->numLiterals << ",\n" "\n" " &fsmTables_start,\n" - " parsers, " << runtimeData->numParsers << ",\n" + " &pid_0_pdaTables,\n" + " startStates, eofLelIds, " << runtimeData->numParsers << ",\n" "\n" " " << runtimeData->globalSize << ",\n" "\n" @@ -415,8 +421,6 @@ void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData ) void PdaCodeGen::writeParserData( long id, PdaTables *tables ) { String prefix = "pid_" + String(0, "%ld", id) + "_"; - out << "unsigned int " << prefix << startState() << " = " << - tables->startState << ";\n\n"; out << "int " << prefix << indicies() << "[] = {\n\t"; for ( int i = 0; i < tables->numIndicies; i++ ) { @@ -529,7 +533,6 @@ void PdaCodeGen::writeParserData( long id, PdaTables *tables ) out << "PdaTables " << prefix << "pdaTables =\n" "{\n" - " " << prefix << startState() << ",\n" " " << prefix << indicies() << ",\n" " " << prefix << keys() << ",\n" " " << prefix << offsets() << ",\n" diff --git a/colm/pdacodegen.h b/colm/pdacodegen.h index 216bd9e3..7ca32546 100644 --- a/colm/pdacodegen.h +++ b/colm/pdacodegen.h @@ -51,7 +51,7 @@ struct PdaCodeGen void writeRhsLocate( Definition *prod ); void writeFirst(); - void writeRuntimeData( RuntimeData *runtimeData ); + void writeRuntimeData( RuntimeData *runtimeData, PdaTables *pdaTables ); void writeParserData( long id, PdaTables *tables ); String PARSER() { return "parser_"; } diff --git a/colm/pdagraph.cpp b/colm/pdagraph.cpp index 72fd8dfc..ce3c3a7f 100644 --- a/colm/pdagraph.cpp +++ b/colm/pdagraph.cpp @@ -497,7 +497,11 @@ void PdaGraph::removeUnreachableStates() { /* Mark all the states that can be reached * through the existing set of entry points. */ - markReachableFromHere( startState ); + if ( startState != 0 ) + markReachableFromHere( startState ); + + for ( PdaStateSet::Iter si = entryStateSet; si.lte(); si++ ) + markReachableFromHere( *si ); /* Delete all states that are not marked * and unmark the ones that are marked. */ diff --git a/colm/pdagraph.h b/colm/pdagraph.h index fc0e5ef7..479a60de 100644 --- a/colm/pdagraph.h +++ b/colm/pdagraph.h @@ -388,6 +388,7 @@ struct PdaGraph /* The start state. */ PdaState *startState; + PdaStateSet entryStateSet; /* The set of final states. */ PdaStateSet finStateSet; diff --git a/colm/pdarun.cpp b/colm/pdarun.cpp index fd52668b..d3a3fd87 100644 --- a/colm/pdarun.cpp +++ b/colm/pdarun.cpp @@ -86,7 +86,8 @@ bool PdaRun::isParserStopFinished() void PdaRun::init() { - cs = tables->startState; + /* FIXME: need the right one here. */ + cs = prg->rtd->startStates[parserId]; /* Init the element allocation variables. */ stackTop = prg->kidPool.allocate(); @@ -111,7 +112,7 @@ long PdaRun::stackTopTarget() { long state; if ( pt(stackTop->tree)->state < 0 ) - state = tables->startState; + state = prg->rtd->startStates[parserId]; else { state = tables->targs[(int)tables->indicies[tables->offsets[ pt(stackTop->tree)->state] + diff --git a/colm/pdarun.h b/colm/pdarun.h index 9634b20c..61039a7e 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -420,7 +420,9 @@ struct RuntimeData long numLiterals; FsmTables *fsmTables; - PdaTables **parsers; + PdaTables *pdaTables; + int *startStates; + int *eofLelIds; long numParsers; long globalSize; @@ -437,7 +439,6 @@ struct RuntimeData struct PdaTables { /* Parser table data. */ - unsigned int startState; int *indicies; int *keys; unsigned int *offsets; @@ -466,12 +467,13 @@ typedef Vector Bindings; struct PdaRun { - PdaRun( Tree **root, Program *prg, PdaTables *tables, + PdaRun( Tree **root, Program *prg, PdaTables *tables, int parserId, FsmRun *scanner, long stopTarget, bool revertOn ) : root(root), prg(prg), tables(tables), + parserId(parserId), fsmRun(scanner), stopParsing(false), stopTarget(stopTarget), @@ -497,6 +499,7 @@ struct PdaRun Program *prg; PdaTables *tables; + int parserId; FsmRun *fsmRun; -- cgit v1.2.1