summaryrefslogtreecommitdiff
path: root/colm/pdabuild.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'colm/pdabuild.cpp')
-rw-r--r--colm/pdabuild.cpp127
1 files changed, 93 insertions, 34 deletions
diff --git a/colm/pdabuild.cpp b/colm/pdabuild.cpp
index 2c370644..17a85d03 100644
--- a/colm/pdabuild.cpp
+++ b/colm/pdabuild.cpp
@@ -73,9 +73,11 @@ KlangEl::KlangEl( Namespace *nspace, const String &name, Type type )
isRepeat(false),
isOpt(false),
parseStop(false),
+ isEOF(false),
tokenDef(0),
rootDef(0),
termDup(0),
+ eofLel(0),
pdaGraph(0),
pdaTables(0),
transBlock(0),
@@ -218,10 +220,11 @@ void ParseData::makeKlangElIds()
assert( noTokenMapEl != 0 );
/* Make the EOF language element. */
- eofKlangEl = new KlangEl( rootNamespace, strdup("_eof"), KlangEl::Term );
- langEls.prepend( eofKlangEl );
- SymbolMapEl *eofMapEl = rootNamespace->symbolMap.insert( eofKlangEl->name, eofKlangEl );
- assert( eofMapEl != 0 );
+ eofKlangEl = 0;
+// eofKlangEl = new KlangEl( rootNamespace, strdup("_eof"), KlangEl::Term );
+// langEls.prepend( eofKlangEl );
+// SymbolMapEl *eofMapEl = rootNamespace->symbolMap.insert( eofKlangEl->name, eofKlangEl );
+// assert( eofMapEl != 0 );
/* Make the "any" language element */
anyKlangEl = new KlangEl( rootNamespace, strdup("any"), KlangEl::NonTerm );
@@ -247,6 +250,25 @@ void ParseData::makeKlangElIds()
}
}
+ /* Make eof language elements for each user terminal. This is a bit excessive and
+ * need to be reduced to the ones that we need parsers for, but we don't know that yet.
+ * Another pass before this one is needed. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->eofLel == 0 &&
+ lel != eofKlangEl &&
+ lel != errorKlangEl &&
+ lel != noTokenKlangEl )
+ {
+ String name( lel->name.length() + 5, "_eof_%s", lel->name.data );
+ KlangEl *eofLel = new KlangEl( lel->nspace, name, KlangEl::Term );
+
+ langEls.append( eofLel );
+ lel->eofLel = eofLel;
+ eofLel->eofLel = lel;
+ eofLel->isEOF = true;
+ }
+ }
+
/* The first id 0 is reserved for the stack sentinal. A negative id means
* error to the parsing function, inducing backtracking. */
nextSymbolId = 1;
@@ -256,7 +278,7 @@ void ParseData::makeKlangElIds()
/* Must be a term, and not any of the special reserved terminals.
* Remember if the non terminal is a user non terminal. */
if ( lel->type == KlangEl::Term &&
- lel != eofKlangEl &&
+ !lel->isEOF &&
lel != errorKlangEl &&
lel != noTokenKlangEl )
{
@@ -265,8 +287,15 @@ void ParseData::makeKlangElIds()
}
}
+ //eofKlangEl->id = nextSymbolId++;
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ /* Must be a term, and not any of the special reserved terminals.
+ * Remember if the non terminal is a user non terminal. */
+ if ( lel->isEOF )
+ lel->id = nextSymbolId++;
+ }
+
/* Next assign to the eof notoken, which we always create. */
- eofKlangEl->id = nextSymbolId++;
noTokenKlangEl->id = nextSymbolId++;
/* Possibly assign to the error language element. */
@@ -482,7 +511,7 @@ void ParseData::pdaOrderFollow( KlangEl *rootEl, PdaState *tabState,
AlphSet alphSet;
if ( parentDef == rootEl->rootDef )
- alphSet.insert( eofKlangEl->id );
+ alphSet.insert( rootEl->eofLel->id );
else
findFollow( alphSet, overTab, overSrc, parentDef );
@@ -519,7 +548,7 @@ void ParseData::addRegion( PdaState *tabState, long pdaKey )
/* If it is not the eof, then use the region associated
* with the token definition. */
- if ( klangEl != eofKlangEl && klangEl->tokenDef != 0 )
+ if ( !klangEl->isEOF && klangEl->tokenDef != 0 )
region = klangEl->tokenDef->tokenRegion;
if ( region != 0 && !regionVectHas( tabState->regions, region ) )
@@ -620,7 +649,7 @@ void ParseData::pdaOrderProd( KlangEl *rootEl, PdaState *tabState,
}
}
-void ParseData::pdaActionOrder( PdaGraph *pdaGraph, KlangEl *rootEl )
+void ParseData::pdaActionOrder( PdaGraph *pdaGraph, KlangElSet &parserEls )
{
for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
assert( (state->stateBits & SB_ISMARKED) == 0 );
@@ -636,14 +665,16 @@ void ParseData::pdaActionOrder( PdaGraph *pdaGraph, KlangEl *rootEl )
/* Compute the action orderings, record the max value. */
long time = 1;
- PdaState *startState = rootEl->rootDef->fsm->startState;
- pdaOrderProd( rootEl, pdaGraph->startState, startState, rootEl->rootDef, time );
-
- /* Walk over the start lang el and set the time for shift of
- * the eof action that completes the parse. */
- PdaTrans *overStart = pdaGraph->startState->findTrans( rootEl->id );
- PdaTrans *eofTrans = overStart->toState->findTrans( eofKlangEl->id );
- eofTrans->actOrds[0] = time++;
+ for ( KlangElSet::Iter pe = parserEls; pe.lte(); pe++ ) {
+ PdaState *startState = (*pe)->rootDef->fsm->startState;
+ pdaOrderProd( *pe, (*pe)->startState, startState, (*pe)->rootDef, time );
+
+ /* Walk over the start lang el and set the time for shift of
+ * the eof action that completes the parse. */
+ PdaTrans *overStart = (*pe)->startState->findTrans( (*pe)->id );
+ PdaTrans *eofTrans = overStart->toState->findTrans( (*pe)->eofLel->id );
+ eofTrans->actOrds[0] = time++;
+ }
for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
if ( state->regions.length() == 0 ) {
@@ -651,7 +682,8 @@ void ParseData::pdaActionOrder( PdaGraph *pdaGraph, KlangEl *rootEl )
/* There are no regions and EOF leaves the state. Add the eof
* token region. */
PdaTrans *trans = tel->value;
- if ( trans->lowKey == eofKlangEl->id )
+ KlangEl *lel = langElIndex[trans->lowKey];
+ if ( lel != 0 && lel->isEOF )
state->regions.append( eofTokenRegion );
}
}
@@ -859,7 +891,30 @@ void ParseData::reduceActions( PdaGraph *pdaGraph )
}
}
-void ParseData::analyzeMachine( PdaGraph *pdaGraph, KlangEl *rootEl )
+void ParseData::verifyParseStopGrammar( KlangEl *langEl, PdaGraph *pdaGraph )
+{
+ /* Get the entry into the graph and traverse over the root. The resulting
+ * state can have eof, nothing else can. */
+ PdaState *overStart = pdaGraph->followFsm(
+ langEl->startState,
+ langEl->rootDef->fsm );
+
+ /* The graph must reduce to root all on it's own. It cannot depend on
+ * require EOF. */
+ for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) {
+ if ( st == overStart )
+ continue;
+
+ for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) {
+ if ( tr->value->lowKey == langEl->eofLel->id ) {
+ /* This needs a better error message. Appears to be voodoo. */
+ error() << "grammar is not usable with parse_stop" << endp;
+ }
+ }
+ }
+}
+
+void ParseData::analyzeMachine( PdaGraph *pdaGraph, KlangElSet &parserEls )
{
pdaGraph->maxState = pdaGraph->stateList.length() - 1;
pdaGraph->maxLelId = nextSymbolId - 1;
@@ -879,7 +934,7 @@ void ParseData::analyzeMachine( PdaGraph *pdaGraph, KlangEl *rootEl )
}
}
- pdaActionOrder( pdaGraph, rootEl );
+ pdaActionOrder( pdaGraph, parserEls );
sortActions( pdaGraph );
advanceReductions( pdaGraph );
pdaGraph->setStateNumbers();
@@ -922,6 +977,10 @@ void ParseData::analyzeMachine( PdaGraph *pdaGraph, KlangEl *rootEl )
}
}
}
+
+ /* Verify that any type we parse_stop can actually be parsed that way. */
+ for ( KlangElSet::Iter pe = parserEls; pe.lte(); pe++ )
+ verifyParseStopGrammar(*pe, pdaGraph);
}
void ParseData::wrapNonTerminals()
@@ -1359,12 +1418,16 @@ void ParseData::makeRuntimeData()
}
runtimeData->fsmTables = fsmTables;
+ runtimeData->pdaTables = pdaTables;
- runtimeData->parsers = new PdaTables*[nextParserId];
+ runtimeData->startStates = new int[nextParserId];
+ runtimeData->eofLelIds = new int[nextParserId];
runtimeData->numParsers = nextParserId;
for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->parserId >= 0 )
- runtimeData->parsers[lel->parserId] = lel->pdaTables;
+ if ( lel->parserId >= 0 ) {
+ runtimeData->startStates[lel->parserId] = lel->startState->stateNum;
+ runtimeData->eofLelIds[lel->parserId] = lel->eofLel->id;
+ }
}
@@ -1379,7 +1442,7 @@ void ParseData::makeRuntimeData()
runtimeData->integerId = intKlangEl->id;
runtimeData->stringId = strKlangEl->id;
runtimeData->anyId = anyKlangEl->id;
- runtimeData->eofId = eofKlangEl->id;
+ runtimeData->eofId = 0; //eofKlangEl->id;
runtimeData->noTokenId = noTokenKlangEl->id;
}
@@ -1503,8 +1566,6 @@ PdaTables *ParseData::makePdaTables( PdaGraph *pdaGraph )
int count, curOffset, pos;
PdaTables *pdaTables = new PdaTables;
- pdaTables->startState = pdaGraph->startState->stateNum;
-
/*
* Indicies.
*/
@@ -1687,25 +1748,23 @@ void ParseData::prepGrammar()
runtimeData = new RuntimeData;
}
-PdaGraph *ParseData::makePdaGraph( KlangEl *rootEl )
+PdaGraph *ParseData::makePdaGraph( KlangElSet &parserEls )
{
//for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
// cerr << prod->prodId << " " << prod->data << endl;
PdaGraph *pdaGraph = new PdaGraph();
- lalr1GenerateParser( pdaGraph, rootEl );
+ lalr1GenerateParser( pdaGraph, parserEls );
pdaGraph->setStateNumbers();
- analyzeMachine( pdaGraph, rootEl );
+ analyzeMachine( pdaGraph, parserEls );
//cerr << "NUMBER OF STATES: " << pdaGraph->stateList.length() << endl;
return pdaGraph;
}
-void ParseData::makeParser( KlangEl *rootEl )
+void ParseData::makeParser( KlangElSet &parserEls )
{
- if ( rootEl->pdaTables == 0 ) {
- rootEl->pdaGraph = makePdaGraph( rootEl );
- rootEl->pdaTables = makePdaTables( rootEl->pdaGraph );
- }
+ pdaGraph = makePdaGraph( parserEls );
+ pdaTables = makePdaTables( pdaGraph );
}