summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2009-02-13 04:20:27 +0000
committerAdrian Thurston <thurston@complang.org>2009-02-13 04:20:27 +0000
commitf9a1603eead3b6daa97b79eab8a6445fb46af5de (patch)
treeaa381b6c511f23e84d79104c2ae9a0fd4f4c9b9c
parentba86e1650f475034ef4ae4d03da4a5f17b81830b (diff)
downloadcolm-f9a1603eead3b6daa97b79eab8a6445fb46af5de.tar.gz
Now using a single parse table with multiple entry points, instead of multiple
disjoint tables, one for each nonterminal that must be parsed. I had pursued this before but gave up because the EOF token of one parser could interfere with another parser. The observation I was missing was that we can use unique EOF tokens for each nonterminal we parse, thus avoiding interference.
-rw-r--r--colm/bytecode.cpp8
-rw-r--r--colm/closure.cpp40
-rw-r--r--colm/compile.cpp2
-rw-r--r--colm/dotgen.cpp18
-rw-r--r--colm/fsmrun.cpp4
-rw-r--r--colm/parsedata.cpp108
-rw-r--r--colm/parsedata.h25
-rw-r--r--colm/pdabuild.cpp127
-rw-r--r--colm/pdacodegen.cpp19
-rw-r--r--colm/pdacodegen.h2
-rw-r--r--colm/pdagraph.cpp6
-rw-r--r--colm/pdagraph.h1
-rw-r--r--colm/pdarun.cpp5
-rw-r--r--colm/pdarun.h9
14 files changed, 220 insertions, 154 deletions
diff --git a/colm/bytecode.cpp b/colm/bytecode.cpp
index 787fa353..58db7499 100644
--- a/colm/bytecode.cpp
+++ b/colm/bytecode.cpp
@@ -131,8 +131,8 @@ void send( Tree **root, Program *prg, PdaRun *parser, Tree *tree, bool ignore )
Tree *call_parser( Tree **&sp, Program *prg, Stream *stream,
long parserId, long stopId, CodeVect *&cv, bool revertOn )
{
- PdaTables *tables = prg->rtd->parsers[parserId];
- PdaRun parser( sp, prg, tables, stream->scanner, stopId, revertOn );
+ PdaTables *tables = prg->rtd->pdaTables;
+ PdaRun parser( sp, prg, tables, parserId, stream->scanner, stopId, revertOn );
parser.run();
commit_full( &parser, 0 );
Tree *tree = parser.getParsedRoot( stopId > 0 );
@@ -156,8 +156,8 @@ Tree *call_parser( Tree **&sp, Program *prg, Stream *stream,
void undo_parse( Tree **&sp, Program *prg, Stream *stream,
long parserId, Tree *tree, CodeVect *rev )
{
- PdaTables *tables = prg->rtd->parsers[parserId];
- PdaRun parser( sp, prg, tables, stream->scanner, 0, false );
+ PdaTables *tables = prg->rtd->pdaTables;
+ PdaRun parser( sp, prg, tables, parserId, stream->scanner, 0, false );
parser.undoParse( tree, rev );
}
diff --git a/colm/closure.cpp b/colm/closure.cpp
index b3c17b1c..5ee6538e 100644
--- a/colm/closure.cpp
+++ b/colm/closure.cpp
@@ -320,7 +320,7 @@ void ParseData::lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans )
}
/* Add follow sets to an LR(0) graph to make it LALR(1). */
-void ParseData::lalr1AddFollowSets( PdaGraph *pdaGraph, KlangEl *rootEl )
+void ParseData::lalr1AddFollowSets( PdaGraph *pdaGraph, KlangElSet &parserEls )
{
/* Make the state that all reduction actions go to. Since a reduction pops
* states of the stack and sets the new target state, this state is
@@ -328,13 +328,15 @@ void ParseData::lalr1AddFollowSets( PdaGraph *pdaGraph, KlangEl *rootEl )
actionDestState = pdaGraph->addState();
pdaGraph->setFinState( actionDestState );
- /* Get the entry into the graph and traverse over start. */
- PdaState *overStart = pdaGraph->followFsm( pdaGraph->startState, rootEl->rootDef->fsm );
+ for ( KlangElSet::Iter pe = parserEls; pe.lte(); pe++ ) {
+ /* Get the entry into the graph and traverse over start. */
+ PdaState *overStart = pdaGraph->followFsm( (*pe)->startState, (*pe)->rootDef->fsm );
- /* Add _eof after the initial _start. */
- PdaTrans *eofTrans = pdaGraph->insertNewTrans( overStart, actionDestState,
- eofKlangEl->id, eofKlangEl->id );
- eofTrans->isShift = true;
+ /* Add _eof after the initial _start. */
+ PdaTrans *eofTrans = pdaGraph->insertNewTrans( overStart, actionDestState,
+ (*pe)->eofLel->id, (*pe)->eofLel->id );
+ eofTrans->isShift = true;
+ }
/* This was used during lr0 table construction. */
pdaGraph->transClosureQueue.abandon();
@@ -416,20 +418,26 @@ void ParseData::addDupTerms( PdaGraph *pdaGraph )
}
/* Generate a LALR(1) graph. */
-void ParseData::lalr1GenerateParser( PdaGraph *pdaGraph, KlangEl *rootEl )
+void ParseData::lalr1GenerateParser( PdaGraph *pdaGraph, KlangElSet &parserEls )
{
/* Make the intial graph. */
pdaGraph->langElIndex = langElIndex;
- PdaState *start = pdaGraph->addState();
- pdaGraph->setStartState( start );
+ for ( Vector<KlangEl*>::Iter r = parserEls; r.lte(); r++ ) {
+ /* Create the entry point. */
+ PdaState *rs = pdaGraph->addState();
+ pdaGraph->entryStateSet.insert( rs );
+
+ /* State set of just one state. */
+ rs->stateSet = new PdaStateSet;
+ rs->stateSet->insert( (*r)->rootDef->fsm->startState );
- start->stateSet = new PdaStateSet;
- start->stateSet->insert( rootEl->rootDef->fsm->startState );
+ /* Queue the start state for closure. */
+ rs->onClosureQueue = true;
+ pdaGraph->stateClosureQueue.append( rs );
- /* Queue the start state for closure. */
- start->onClosureQueue = true;
- pdaGraph->stateClosureQueue.append( start );
+ (*r)->startState = rs;
+ }
/* Run the lr0 closure. */
lr0CloseAllStates( pdaGraph );
@@ -441,7 +449,7 @@ void ParseData::lalr1GenerateParser( PdaGraph *pdaGraph, KlangEl *rootEl )
linkExpansions( pdaGraph );
/* Walk the graph adding follow sets to the LR(0) graph. */
- lalr1AddFollowSets( pdaGraph, rootEl );
+ lalr1AddFollowSets( pdaGraph, parserEls );
// /* Set the commit on the final eof shift. */
// PdaTrans *overStart = pdaGraph->startState->findTrans( rootEl->id );
diff --git a/colm/compile.cpp b/colm/compile.cpp
index aab44efc..1f6068bb 100644
--- a/colm/compile.cpp
+++ b/colm/compile.cpp
@@ -1147,6 +1147,8 @@ UniqueType *LangTerm::evaluateParse( ParseData *pd, CodeVect &code, bool stop )
/* Allocate a parser id. This will cause a parser to be built for
* the type. */
ut->langEl->parserId = pd->nextParserId++;
+ if ( stop )
+ ut->langEl->parseStop = true;
/* Parse instruction, dependent on whether or not we are
* producing revert or commit code. */
diff --git a/colm/dotgen.cpp b/colm/dotgen.cpp
index 2fd9bf09..30788a4c 100644
--- a/colm/dotgen.cpp
+++ b/colm/dotgen.cpp
@@ -79,8 +79,12 @@ void ParseData::writeDotFile( PdaGraph *graph )
/* Define the psuedo states. Transitions will be done after the states
* have been defined as either final or not final. */
out <<
- " node [ shape = point ];\n"
- " ENTRY [ label = \"\" ];\n"
+ " node [ shape = point ];\n";
+
+ for ( int i = 0; i < graph->entryStateSet.length(); i++ )
+ out << "\tENTRY" << i << " [ label = \"\" ];\n";
+
+ out <<
"\n"
" node [ shape = circle, fixedsize = true, height = 0.2 ];\n";
@@ -94,8 +98,9 @@ void ParseData::writeDotFile( PdaGraph *graph )
for ( PdaStateList::Iter st = graph->stateList; st.lte(); st++ )
writeTransList( st );
- /* Transitions into the start state. */
- out << " ENTRY -> " << graph->startState->stateNum << " [ label = \"\" ];\n";
+ /* Start state and other entry points. */
+ for ( PdaStateSet::Iter st = graph->entryStateSet; st.lte(); st++ )
+ out << "\tENTRY" << st.pos() << " -> " << (*st)->stateNum << " [ label = \"\" ];\n";
out <<
"}\n";
@@ -103,9 +108,6 @@ void ParseData::writeDotFile( PdaGraph *graph )
void ParseData::writeDotFile()
{
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->parserId >= 0 )
- writeDotFile( lel->pdaGraph );
- }
+ writeDotFile( pdaGraph );
}
diff --git a/colm/fsmrun.cpp b/colm/fsmrun.cpp
index f7602d2f..2ddf8751 100644
--- a/colm/fsmrun.cpp
+++ b/colm/fsmrun.cpp
@@ -724,7 +724,7 @@ void FsmRun::sendEOF( )
input->tree->flags |= AF_PARSE_TREE;
input->tree->refs = 1;
- input->tree->id = parser->tables->rtd->eofId;
+ input->tree->id = parser->tables->rtd->eofLelIds[parser->parserId];
bool ctxDepParsing = prg->ctxDepParsing;
long frameId = parser->tables->rtd->regionInfo[region].eofFrameId;
@@ -748,7 +748,7 @@ void FsmRun::sendEOF( )
parser->send( input );
if ( parser->errCount > 0 ) {
- parser->parse_error( parser->tables->rtd->eofId, input->tree ) <<
+ parser->parse_error( input->tree->id, input->tree ) <<
"parse error" << endp;
}
diff --git a/colm/parsedata.cpp b/colm/parsedata.cpp
index ba7f14cd..fd38e64e 100644
--- a/colm/parsedata.cpp
+++ b/colm/parsedata.cpp
@@ -1603,29 +1603,6 @@ void InputStreamRepl::pushBackNamed()
}
-void ParseData::makePatternParsers()
-{
- for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) {
- /* We assume the reduction action compilation phase was run before
- * pattern parsing and it decorated the pattern with the target type. */
- assert( pat->langEl != 0 );
- if ( pat->langEl->type != KlangEl::NonTerm )
- error(pat->loc) << "pattern type is not a non-terminal" << endp;
-
- /* Make a parser for the language element. */
- makeParser( pat->langEl );
- }
-
- for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) {
- /* We assume the reduction action compilation phase was run before
- * replacement parsing decorated the replacement with the target type. */
- assert( repl->langEl != 0 );
-
- /* Make a parser for the language element. */
- makeParser( repl->langEl );
- }
-}
-
void ParseData::parsePatterns()
{
Program program( false, runtimeData );
@@ -1639,7 +1616,7 @@ void ParseData::parsePatterns()
fsmRun.attachInputStream( &in );
repl->pdaRun = new PdaRun( root, &program,
- repl->langEl->pdaTables, &fsmRun, 0, false );
+ pdaTables, repl->langEl->parserId, &fsmRun, 0, false );
repl->pdaRun->run();
//#ifdef COLM_LOG_COMPILE
@@ -1655,7 +1632,7 @@ void ParseData::parsePatterns()
fsmRun.attachInputStream( &in );
pat->pdaRun = new PdaRun( root, &program,
- pat->langEl->pdaTables, &fsmRun, 0, false );
+ pdaTables, pat->langEl->parserId, &fsmRun, 0, false );
pat->pdaRun->run();
//#ifdef COLM_LOG_COMPILE
@@ -1668,31 +1645,6 @@ void ParseData::parsePatterns()
fillInPatterns( &program );
}
-void ParseData::verifyParseStopGrammar( KlangEl *langEl )
-{
- PdaGraph *pdaGraph = langEl->pdaGraph;
-
- /* Get the entry into the graph and traverse over the root. The resulting
- * state can have eof, nothing else can. */
- PdaState *overStart = pdaGraph->followFsm(
- pdaGraph->startState,
- langEl->rootDef->fsm );
-
- /* The graph must reduce to root all on it's own. It cannot depend on
- * require EOF. */
- for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) {
- if ( st == overStart )
- continue;
-
- for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) {
- if ( tr->value->lowKey == eofKlangEl->id ) {
- /* This needs a better error message. Appears to be voodoo. */
- error() << "grammar is not usable with parse_stop" << endp;
- }
- }
- }
-}
-
void ParseData::resolveUses()
{
for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
@@ -1710,6 +1662,41 @@ void ParseData::resolveUses()
}
}
+void ParseData::collectParserEls( BstSet<KlangEl*> &parserEls )
+{
+ for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) {
+ /* We assume the reduction action compilation phase was run before
+ * pattern parsing and it decorated the pattern with the target type. */
+ assert( pat->langEl != 0 );
+ if ( pat->langEl->type != KlangEl::NonTerm )
+ error(pat->loc) << "pattern type is not a non-terminal" << endp;
+
+ if ( pat->langEl->parserId < 0 ) {
+ /* Make a parser for the language element. */
+ parserEls.insert( pat->langEl );
+ pat->langEl->parserId = nextParserId++;
+ }
+ }
+
+ for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) {
+ /* We assume the reduction action compilation phase was run before
+ * replacement parsing decorated the replacement with the target type. */
+ assert( repl->langEl != 0 );
+
+ if ( repl->langEl->parserId < 0 ) {
+ /* Make a parser for the language element. */
+ parserEls.insert( repl->langEl );
+ repl->langEl->parserId = nextParserId++;
+ }
+ }
+
+ /* Make parsers that we need. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->parserId >= 0 )
+ parserEls.insert( lel );
+ }
+}
+
void ParseData::semanticAnalysis()
{
beginProcessing();
@@ -1749,18 +1736,10 @@ void ParseData::semanticAnalysis()
RedFsmBuild reduce( sectionName, this, fsmGraph );
redFsm = reduce.reduceMachine();
- /* Build the parsers used for patterns and replacements. */
- makePatternParsers();
+ BstSet<KlangEl*> parserEls;
+ collectParserEls( parserEls );
- /* Make parsers that we need. */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->parserId >= 0 ) {
- makeParser( lel );
-
- if ( lel->parseStop )
- verifyParseStopGrammar( lel );
- }
- }
+ makeParser( parserEls );
/* Make the scanner tables. */
fsmTables = redFsm->makeFsmTables();
@@ -1787,13 +1766,10 @@ void ParseData::generateOutput()
fsmGen->writeCode();
/* Make parsers that we need. */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->parserId >= 0 )
- pdaGen->writeParserData( lel->parserId, lel->pdaTables );
- }
+ pdaGen->writeParserData( 0, pdaTables );
/* Write the runtime data. */
- pdaGen->writeRuntimeData( runtimeData );
+ pdaGen->writeRuntimeData( runtimeData, pdaTables );
outStream->flush();
}
diff --git a/colm/parsedata.h b/colm/parsedata.h
index f2c382f5..522a7aea 100644
--- a/colm/parsedata.h
+++ b/colm/parsedata.h
@@ -147,6 +147,7 @@ typedef Vector< TokenDef* > TokenDefVect;
struct UniqueType;
typedef Vector<KlangEl*> KlangElVect;
+typedef BstSet<KlangEl*> KlangElSet;
/* A language element class. Can be a nonTerm or a term. */
struct KlangEl : public DListEl<KlangEl>
@@ -178,6 +179,7 @@ struct KlangEl : public DListEl<KlangEl>
bool isRepeat;
bool isOpt;
bool parseStop;
+ bool isEOF;
/* Productions from the language element if it is a non-terminal. */
LelDefList defList;
@@ -185,10 +187,13 @@ struct KlangEl : public DListEl<KlangEl>
TokenDef *tokenDef;
Definition *rootDef;
KlangEl *termDup;
+ KlangEl *eofLel;
PdaGraph *pdaGraph;
PdaTables *pdaTables;
+ PdaState *startState;
+
CodeBlock *transBlock;
ObjectDef *objectDef;
@@ -635,14 +640,14 @@ struct ParseData
void lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, long followKey, long prior );
void lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans );
- void lalr1AddFollowSets( PdaGraph *pdaGraph, KlangEl *rootEl );
+ void lalr1AddFollowSets( PdaGraph *pdaGraph, KlangElSet &parserEls );
void lr0BringInItem( PdaGraph *pdaGraph, PdaState *dest, PdaState *prodState,
PdaTrans *expandFrom, Definition *prod );
void lr0InvokeClosure( PdaGraph *pdaGraph, PdaState *state );
void lr0CloseAllStates( PdaGraph *pdaGraph );
- void lalr1GenerateParser( PdaGraph *pdaGraph, KlangEl *rootEl );
+ void lalr1GenerateParser( PdaGraph *pdaGraph, KlangElSet &parserEls );
void reduceActions( PdaGraph *pdaGraph );
@@ -657,13 +662,13 @@ struct ParseData
PdaState *followProd( PdaState *tabState, PdaState *prodState );
void findFollow( AlphSet &result, PdaState *overTab,
PdaState *overSrc, Definition *parentDef );
- void pdaActionOrder( PdaGraph *pdaGraph, KlangEl *rootEl );
+ void pdaActionOrder( PdaGraph *pdaGraph, KlangElSet &parserEls );
void pdaOrderFollow( KlangEl *rootEl, PdaState *tabState,
PdaTrans *tabTrans, PdaTrans *srcTrans,
Definition *parentDef, Definition *definition, long &time );
void pdaOrderProd( KlangEl *rootEl, PdaState *tabState,
PdaState *srcState, Definition *parentDef, long &time );
- void analyzeMachine( PdaGraph *pdaGraph, KlangEl *rootEl );
+ void analyzeMachine( PdaGraph *pdaGraph, KlangElSet &parserEls );
void makeProdFsms();
void insertUniqueEmptyProductions();
@@ -688,12 +693,11 @@ struct ParseData
void addProdRHSLoads( Definition *prod, CodeVect &code, long &insertPos );
void prepGrammar();
-
- void makePatternParsers();
void parsePatterns();
- void makeParser( KlangEl *rootEl );
- PdaGraph *makePdaGraph( KlangEl *rootEl );
+ void collectParserEls( KlangElSet &parserEls );
+ void makeParser( KlangElSet &parserEls );
+ PdaGraph *makePdaGraph( BstSet<KlangEl*> &parserEls );
PdaTables *makePdaTables( PdaGraph *pdaGraph );
void fillInPatterns( Program *prg );
@@ -702,7 +706,7 @@ struct ParseData
/* Generate and write out the fsm. */
void generateGraphviz();
- void verifyParseStopGrammar( KlangEl *langEl );
+ void verifyParseStopGrammar( KlangEl *langEl, PdaGraph *pdaGraph );
void initFieldInstructions( ObjField *el );
void initLocalInstructions( ObjField *el );
@@ -896,6 +900,9 @@ struct ParseData
bool revertOn;
RedFsm *redFsm;
+
+ PdaGraph *pdaGraph;
+ PdaTables *pdaTables;
};
void afterOpMinimize( FsmGraph *fsm, bool lastInSeq = true );
diff --git a/colm/pdabuild.cpp b/colm/pdabuild.cpp
index 2c370644..17a85d03 100644
--- a/colm/pdabuild.cpp
+++ b/colm/pdabuild.cpp
@@ -73,9 +73,11 @@ KlangEl::KlangEl( Namespace *nspace, const String &name, Type type )
isRepeat(false),
isOpt(false),
parseStop(false),
+ isEOF(false),
tokenDef(0),
rootDef(0),
termDup(0),
+ eofLel(0),
pdaGraph(0),
pdaTables(0),
transBlock(0),
@@ -218,10 +220,11 @@ void ParseData::makeKlangElIds()
assert( noTokenMapEl != 0 );
/* Make the EOF language element. */
- eofKlangEl = new KlangEl( rootNamespace, strdup("_eof"), KlangEl::Term );
- langEls.prepend( eofKlangEl );
- SymbolMapEl *eofMapEl = rootNamespace->symbolMap.insert( eofKlangEl->name, eofKlangEl );
- assert( eofMapEl != 0 );
+ eofKlangEl = 0;
+// eofKlangEl = new KlangEl( rootNamespace, strdup("_eof"), KlangEl::Term );
+// langEls.prepend( eofKlangEl );
+// SymbolMapEl *eofMapEl = rootNamespace->symbolMap.insert( eofKlangEl->name, eofKlangEl );
+// assert( eofMapEl != 0 );
/* Make the "any" language element */
anyKlangEl = new KlangEl( rootNamespace, strdup("any"), KlangEl::NonTerm );
@@ -247,6 +250,25 @@ void ParseData::makeKlangElIds()
}
}
+ /* Make eof language elements for each user terminal. This is a bit excessive and
+ * need to be reduced to the ones that we need parsers for, but we don't know that yet.
+ * Another pass before this one is needed. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->eofLel == 0 &&
+ lel != eofKlangEl &&
+ lel != errorKlangEl &&
+ lel != noTokenKlangEl )
+ {
+ String name( lel->name.length() + 5, "_eof_%s", lel->name.data );
+ KlangEl *eofLel = new KlangEl( lel->nspace, name, KlangEl::Term );
+
+ langEls.append( eofLel );
+ lel->eofLel = eofLel;
+ eofLel->eofLel = lel;
+ eofLel->isEOF = true;
+ }
+ }
+
/* The first id 0 is reserved for the stack sentinal. A negative id means
* error to the parsing function, inducing backtracking. */
nextSymbolId = 1;
@@ -256,7 +278,7 @@ void ParseData::makeKlangElIds()
/* Must be a term, and not any of the special reserved terminals.
* Remember if the non terminal is a user non terminal. */
if ( lel->type == KlangEl::Term &&
- lel != eofKlangEl &&
+ !lel->isEOF &&
lel != errorKlangEl &&
lel != noTokenKlangEl )
{
@@ -265,8 +287,15 @@ void ParseData::makeKlangElIds()
}
}
+ //eofKlangEl->id = nextSymbolId++;
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ /* Must be a term, and not any of the special reserved terminals.
+ * Remember if the non terminal is a user non terminal. */
+ if ( lel->isEOF )
+ lel->id = nextSymbolId++;
+ }
+
/* Next assign to the eof notoken, which we always create. */
- eofKlangEl->id = nextSymbolId++;
noTokenKlangEl->id = nextSymbolId++;
/* Possibly assign to the error language element. */
@@ -482,7 +511,7 @@ void ParseData::pdaOrderFollow( KlangEl *rootEl, PdaState *tabState,
AlphSet alphSet;
if ( parentDef == rootEl->rootDef )
- alphSet.insert( eofKlangEl->id );
+ alphSet.insert( rootEl->eofLel->id );
else
findFollow( alphSet, overTab, overSrc, parentDef );
@@ -519,7 +548,7 @@ void ParseData::addRegion( PdaState *tabState, long pdaKey )
/* If it is not the eof, then use the region associated
* with the token definition. */
- if ( klangEl != eofKlangEl && klangEl->tokenDef != 0 )
+ if ( !klangEl->isEOF && klangEl->tokenDef != 0 )
region = klangEl->tokenDef->tokenRegion;
if ( region != 0 && !regionVectHas( tabState->regions, region ) )
@@ -620,7 +649,7 @@ void ParseData::pdaOrderProd( KlangEl *rootEl, PdaState *tabState,
}
}
-void ParseData::pdaActionOrder( PdaGraph *pdaGraph, KlangEl *rootEl )
+void ParseData::pdaActionOrder( PdaGraph *pdaGraph, KlangElSet &parserEls )
{
for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
assert( (state->stateBits & SB_ISMARKED) == 0 );
@@ -636,14 +665,16 @@ void ParseData::pdaActionOrder( PdaGraph *pdaGraph, KlangEl *rootEl )
/* Compute the action orderings, record the max value. */
long time = 1;
- PdaState *startState = rootEl->rootDef->fsm->startState;
- pdaOrderProd( rootEl, pdaGraph->startState, startState, rootEl->rootDef, time );
-
- /* Walk over the start lang el and set the time for shift of
- * the eof action that completes the parse. */
- PdaTrans *overStart = pdaGraph->startState->findTrans( rootEl->id );
- PdaTrans *eofTrans = overStart->toState->findTrans( eofKlangEl->id );
- eofTrans->actOrds[0] = time++;
+ for ( KlangElSet::Iter pe = parserEls; pe.lte(); pe++ ) {
+ PdaState *startState = (*pe)->rootDef->fsm->startState;
+ pdaOrderProd( *pe, (*pe)->startState, startState, (*pe)->rootDef, time );
+
+ /* Walk over the start lang el and set the time for shift of
+ * the eof action that completes the parse. */
+ PdaTrans *overStart = (*pe)->startState->findTrans( (*pe)->id );
+ PdaTrans *eofTrans = overStart->toState->findTrans( (*pe)->eofLel->id );
+ eofTrans->actOrds[0] = time++;
+ }
for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
if ( state->regions.length() == 0 ) {
@@ -651,7 +682,8 @@ void ParseData::pdaActionOrder( PdaGraph *pdaGraph, KlangEl *rootEl )
/* There are no regions and EOF leaves the state. Add the eof
* token region. */
PdaTrans *trans = tel->value;
- if ( trans->lowKey == eofKlangEl->id )
+ KlangEl *lel = langElIndex[trans->lowKey];
+ if ( lel != 0 && lel->isEOF )
state->regions.append( eofTokenRegion );
}
}
@@ -859,7 +891,30 @@ void ParseData::reduceActions( PdaGraph *pdaGraph )
}
}
-void ParseData::analyzeMachine( PdaGraph *pdaGraph, KlangEl *rootEl )
+void ParseData::verifyParseStopGrammar( KlangEl *langEl, PdaGraph *pdaGraph )
+{
+ /* Get the entry into the graph and traverse over the root. The resulting
+ * state can have eof, nothing else can. */
+ PdaState *overStart = pdaGraph->followFsm(
+ langEl->startState,
+ langEl->rootDef->fsm );
+
+ /* The graph must reduce to root all on it's own. It cannot depend on
+ * require EOF. */
+ for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) {
+ if ( st == overStart )
+ continue;
+
+ for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) {
+ if ( tr->value->lowKey == langEl->eofLel->id ) {
+ /* This needs a better error message. Appears to be voodoo. */
+ error() << "grammar is not usable with parse_stop" << endp;
+ }
+ }
+ }
+}
+
+void ParseData::analyzeMachine( PdaGraph *pdaGraph, KlangElSet &parserEls )
{
pdaGraph->maxState = pdaGraph->stateList.length() - 1;
pdaGraph->maxLelId = nextSymbolId - 1;
@@ -879,7 +934,7 @@ void ParseData::analyzeMachine( PdaGraph *pdaGraph, KlangEl *rootEl )
}
}
- pdaActionOrder( pdaGraph, rootEl );
+ pdaActionOrder( pdaGraph, parserEls );
sortActions( pdaGraph );
advanceReductions( pdaGraph );
pdaGraph->setStateNumbers();
@@ -922,6 +977,10 @@ void ParseData::analyzeMachine( PdaGraph *pdaGraph, KlangEl *rootEl )
}
}
}
+
+ /* Verify that any type we parse_stop can actually be parsed that way. */
+ for ( KlangElSet::Iter pe = parserEls; pe.lte(); pe++ )
+ verifyParseStopGrammar(*pe, pdaGraph);
}
void ParseData::wrapNonTerminals()
@@ -1359,12 +1418,16 @@ void ParseData::makeRuntimeData()
}
runtimeData->fsmTables = fsmTables;
+ runtimeData->pdaTables = pdaTables;
- runtimeData->parsers = new PdaTables*[nextParserId];
+ runtimeData->startStates = new int[nextParserId];
+ runtimeData->eofLelIds = new int[nextParserId];
runtimeData->numParsers = nextParserId;
for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->parserId >= 0 )
- runtimeData->parsers[lel->parserId] = lel->pdaTables;
+ if ( lel->parserId >= 0 ) {
+ runtimeData->startStates[lel->parserId] = lel->startState->stateNum;
+ runtimeData->eofLelIds[lel->parserId] = lel->eofLel->id;
+ }
}
@@ -1379,7 +1442,7 @@ void ParseData::makeRuntimeData()
runtimeData->integerId = intKlangEl->id;
runtimeData->stringId = strKlangEl->id;
runtimeData->anyId = anyKlangEl->id;
- runtimeData->eofId = eofKlangEl->id;
+ runtimeData->eofId = 0; //eofKlangEl->id;
runtimeData->noTokenId = noTokenKlangEl->id;
}
@@ -1503,8 +1566,6 @@ PdaTables *ParseData::makePdaTables( PdaGraph *pdaGraph )
int count, curOffset, pos;
PdaTables *pdaTables = new PdaTables;
- pdaTables->startState = pdaGraph->startState->stateNum;
-
/*
* Indicies.
*/
@@ -1687,25 +1748,23 @@ void ParseData::prepGrammar()
runtimeData = new RuntimeData;
}
-PdaGraph *ParseData::makePdaGraph( KlangEl *rootEl )
+PdaGraph *ParseData::makePdaGraph( KlangElSet &parserEls )
{
//for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
// cerr << prod->prodId << " " << prod->data << endl;
PdaGraph *pdaGraph = new PdaGraph();
- lalr1GenerateParser( pdaGraph, rootEl );
+ lalr1GenerateParser( pdaGraph, parserEls );
pdaGraph->setStateNumbers();
- analyzeMachine( pdaGraph, rootEl );
+ analyzeMachine( pdaGraph, parserEls );
//cerr << "NUMBER OF STATES: " << pdaGraph->stateList.length() << endl;
return pdaGraph;
}
-void ParseData::makeParser( KlangEl *rootEl )
+void ParseData::makeParser( KlangElSet &parserEls )
{
- if ( rootEl->pdaTables == 0 ) {
- rootEl->pdaGraph = makePdaGraph( rootEl );
- rootEl->pdaTables = makePdaTables( rootEl->pdaGraph );
- }
+ pdaGraph = makePdaGraph( parserEls );
+ pdaTables = makePdaTables( pdaGraph );
}
diff --git a/colm/pdacodegen.cpp b/colm/pdacodegen.cpp
index 8067dc06..53530e80 100644
--- a/colm/pdacodegen.cpp
+++ b/colm/pdacodegen.cpp
@@ -92,7 +92,7 @@ void PdaCodeGen::writeFirst()
"\n";
}
-void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData )
+void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData, PdaTables *pdaTables )
{
/*
* Blocks of code in frames.
@@ -355,10 +355,15 @@ void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData )
out << "0, ";
out << "};\n\n";
- /* Parsers. */
- out << "PdaTables *parsers[] = {\n\t";
+ out << "int startStates[] = {\n\t";
for ( long i = 0; i < runtimeData->numParsers; i++ ) {
- out << "&pid_" << i << "_pdaTables,\n";
+ out << runtimeData->startStates[i] << ", ";
+ }
+ out << "};\n\n";
+
+ out << "int eofLelIds[] = {\n\t";
+ for ( long i = 0; i < runtimeData->numParsers; i++ ) {
+ out << runtimeData->eofLelIds[i] << ", ";
}
out << "};\n\n";
@@ -398,7 +403,8 @@ void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData )
" " << runtimeData->numLiterals << ",\n"
"\n"
" &fsmTables_start,\n"
- " parsers, " << runtimeData->numParsers << ",\n"
+ " &pid_0_pdaTables,\n"
+ " startStates, eofLelIds, " << runtimeData->numParsers << ",\n"
"\n"
" " << runtimeData->globalSize << ",\n"
"\n"
@@ -415,8 +421,6 @@ void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData )
void PdaCodeGen::writeParserData( long id, PdaTables *tables )
{
String prefix = "pid_" + String(0, "%ld", id) + "_";
- out << "unsigned int " << prefix << startState() << " = " <<
- tables->startState << ";\n\n";
out << "int " << prefix << indicies() << "[] = {\n\t";
for ( int i = 0; i < tables->numIndicies; i++ ) {
@@ -529,7 +533,6 @@ void PdaCodeGen::writeParserData( long id, PdaTables *tables )
out <<
"PdaTables " << prefix << "pdaTables =\n"
"{\n"
- " " << prefix << startState() << ",\n"
" " << prefix << indicies() << ",\n"
" " << prefix << keys() << ",\n"
" " << prefix << offsets() << ",\n"
diff --git a/colm/pdacodegen.h b/colm/pdacodegen.h
index 216bd9e3..7ca32546 100644
--- a/colm/pdacodegen.h
+++ b/colm/pdacodegen.h
@@ -51,7 +51,7 @@ struct PdaCodeGen
void writeRhsLocate( Definition *prod );
void writeFirst();
- void writeRuntimeData( RuntimeData *runtimeData );
+ void writeRuntimeData( RuntimeData *runtimeData, PdaTables *pdaTables );
void writeParserData( long id, PdaTables *tables );
String PARSER() { return "parser_"; }
diff --git a/colm/pdagraph.cpp b/colm/pdagraph.cpp
index 72fd8dfc..ce3c3a7f 100644
--- a/colm/pdagraph.cpp
+++ b/colm/pdagraph.cpp
@@ -497,7 +497,11 @@ void PdaGraph::removeUnreachableStates()
{
/* Mark all the states that can be reached
* through the existing set of entry points. */
- markReachableFromHere( startState );
+ if ( startState != 0 )
+ markReachableFromHere( startState );
+
+ for ( PdaStateSet::Iter si = entryStateSet; si.lte(); si++ )
+ markReachableFromHere( *si );
/* Delete all states that are not marked
* and unmark the ones that are marked. */
diff --git a/colm/pdagraph.h b/colm/pdagraph.h
index fc0e5ef7..479a60de 100644
--- a/colm/pdagraph.h
+++ b/colm/pdagraph.h
@@ -388,6 +388,7 @@ struct PdaGraph
/* The start state. */
PdaState *startState;
+ PdaStateSet entryStateSet;
/* The set of final states. */
PdaStateSet finStateSet;
diff --git a/colm/pdarun.cpp b/colm/pdarun.cpp
index fd52668b..d3a3fd87 100644
--- a/colm/pdarun.cpp
+++ b/colm/pdarun.cpp
@@ -86,7 +86,8 @@ bool PdaRun::isParserStopFinished()
void PdaRun::init()
{
- cs = tables->startState;
+ /* FIXME: need the right one here. */
+ cs = prg->rtd->startStates[parserId];
/* Init the element allocation variables. */
stackTop = prg->kidPool.allocate();
@@ -111,7 +112,7 @@ long PdaRun::stackTopTarget()
{
long state;
if ( pt(stackTop->tree)->state < 0 )
- state = tables->startState;
+ state = prg->rtd->startStates[parserId];
else {
state = tables->targs[(int)tables->indicies[tables->offsets[
pt(stackTop->tree)->state] +
diff --git a/colm/pdarun.h b/colm/pdarun.h
index 9634b20c..61039a7e 100644
--- a/colm/pdarun.h
+++ b/colm/pdarun.h
@@ -420,7 +420,9 @@ struct RuntimeData
long numLiterals;
FsmTables *fsmTables;
- PdaTables **parsers;
+ PdaTables *pdaTables;
+ int *startStates;
+ int *eofLelIds;
long numParsers;
long globalSize;
@@ -437,7 +439,6 @@ struct RuntimeData
struct PdaTables
{
/* Parser table data. */
- unsigned int startState;
int *indicies;
int *keys;
unsigned int *offsets;
@@ -466,12 +467,13 @@ typedef Vector<Tree*> Bindings;
struct PdaRun
{
- PdaRun( Tree **root, Program *prg, PdaTables *tables,
+ PdaRun( Tree **root, Program *prg, PdaTables *tables, int parserId,
FsmRun *scanner, long stopTarget, bool revertOn )
:
root(root),
prg(prg),
tables(tables),
+ parserId(parserId),
fsmRun(scanner),
stopParsing(false),
stopTarget(stopTarget),
@@ -497,6 +499,7 @@ struct PdaRun
Program *prg;
PdaTables *tables;
+ int parserId;
FsmRun *fsmRun;