summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2013-04-07 11:59:34 -0400
committerAdrian Thurston <thurston@complang.org>2013-04-07 11:59:34 -0400
commit8ec3a5a4c25c6be546d6c4b951e2c0678a5cdc2c (patch)
tree80b17c191dfdea3839bf1bcc90d554c8d9b316e9
parentf2c2fe5e637d3c03407ca835bba125784e0af1f1 (diff)
downloadcolm-8ec3a5a4c25c6be546d6c4b951e2c0678a5cdc2c.tar.gz
split TokenRegion into Region and Impl
We have a many-to-one region to impl mapping because the ignore-only state machine is used more than once.
-rw-r--r--colm/compiler.cc28
-rw-r--r--colm/parsedata.h1
-rw-r--r--colm/parser.cc41
-rw-r--r--colm/parser.h2
-rw-r--r--colm/parsetree.cc14
-rw-r--r--colm/parsetree.h97
-rw-r--r--colm/pdabuild.cc6
-rw-r--r--colm/redbuild.cc4
8 files changed, 118 insertions, 75 deletions
diff --git a/colm/compiler.cc b/colm/compiler.cc
index bace6ec6..8a89f211 100644
--- a/colm/compiler.cc
+++ b/colm/compiler.cc
@@ -540,7 +540,7 @@ Action *Compiler::newAction( const String &name, InlineList *inlineList )
void Compiler::initLongestMatchData()
{
- if ( regionList.length() > 0 ) {
+ if ( regionSetList.length() > 0 ) {
/* The initActId action gives act a default value. */
InlineList *il4 = InlineList::cons();
il4->append( InlineItem::cons( InputLoc(), InlineItem::LmInitAct ) );
@@ -610,7 +610,7 @@ NameInst *Compiler::makeNameTree()
nextNameId = 1;
/* First make the name tree. */
- for ( RegionList::Iter rel = regionList; rel.lte(); rel++ ) {
+ for ( RegionImplList::Iter rel = regionImplList; rel.lte(); rel++ ) {
/* Recurse on the instance. */
rel->makeNameTree( rel->loc, this );
}
@@ -625,10 +625,10 @@ FsmGraph *Compiler::makeAllRegions()
NameInst **nameIndex = makeNameIndex();
int numGraphs = 0;
- FsmGraph **graphs = new FsmGraph*[regionList.length()];
+ FsmGraph **graphs = new FsmGraph*[regionImplList.length()];
/* Make all the instantiations, we know that main exists in this list. */
- for ( RegionList::Iter rel = regionList; rel.lte(); rel++ ) {
+ for ( RegionImplList::Iter rel = regionImplList; rel.lte(); rel++ ) {
/* Build the graph from a walk of the parse tree. */
FsmGraph *newGraph = rel->walk( this );
@@ -649,7 +649,7 @@ FsmGraph *Compiler::makeAllRegions()
delete[] graphs;
/* Go through all the token regions and check for lmRequiresErrorState. */
- for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) {
+ for ( RegionImplList::Iter reg = regionImplList; reg.lte(); reg++ ) {
if ( reg->lmSwitchHandlesError )
all->lmRequiresErrorState = true;
}
@@ -668,7 +668,7 @@ void Compiler::analyzeAction( Action *action, InlineList *inlineList )
/* Need to recurse into longest match items. */
if ( item->type == InlineItem::LmSwitch ) {
- TokenRegion *lm = item->tokenRegion;
+ RegionImpl *lm = item->tokenRegion;
for ( TokenInstanceListReg::Iter lmi = lm->tokenInstanceList; lmi.lte(); lmi++ ) {
if ( lmi->action != 0 )
analyzeAction( action, lmi->action->inlineList );
@@ -757,11 +757,12 @@ void Compiler::createDefaultScanner()
/* Create a scanner which will be used when no other scanner can be
* figured out. It returns single characters. */
- defaultRegion = new TokenRegion( InputLoc(),
- regionList.length() );
+ RegionImpl *impl = new RegionImpl;
+ regionImplList.append( impl );
+ defaultRegion = new TokenRegion( internal, regionList.length(), impl );
regionList.append( defaultRegion );
- RegionSet *regionSet = new RegionSet( defaultRegion, 0, 0, 0 );
+ RegionSet *regionSet = new RegionSet( impl, 0, 0, defaultRegion, 0, 0, 0 );
regionSetList.append( regionSet );
LexJoin *join = LexJoin::cons( LexExpression::cons( BT_Any ) );
@@ -773,7 +774,7 @@ void Compiler::createDefaultScanner()
join, loc, nextTokenId++,
rootNamespace, defaultRegion );
- defaultRegion->tokenInstanceList.append( tokenInstance );
+ defaultRegion->impl->tokenInstanceList.append( tokenInstance );
/* Now create the one and only token -> "<chr>" / any / */
name = "___DEFAULT_SCANNER_CHR";
@@ -969,8 +970,8 @@ Namespace *NamespaceQual::getQual( Compiler *pd )
void Compiler::initEmptyScanner( RegionSet *regionSet, TokenRegion *reg )
{
- if ( reg != 0 && reg->tokenInstanceList.length() == 0 ) {
- reg->wasEmpty = true;
+ if ( reg != 0 && reg->impl->tokenInstanceList.length() == 0 ) {
+ reg->impl->wasEmpty = true;
static int def = 1;
String name( 64, "__%p_DEF_PAT_%d", reg, def++ );
@@ -985,7 +986,7 @@ void Compiler::initEmptyScanner( RegionSet *regionSet, TokenRegion *reg )
join, internal, nextTokenId++,
rootNamespace, reg );
- reg->tokenInstanceList.append( tokenInstance );
+ reg->impl->tokenInstanceList.append( tokenInstance );
/* These do not go in the namespace so so they cannot get declared
* in the declare pass. */
@@ -1094,7 +1095,6 @@ void Compiler::collectParserEls( BstSet<LangEl*> &parserEls )
}
}
-
void Compiler::generateOutput( long activeRealm )
{
FsmCodeGen *fsmGen = new FsmCodeGen( *outStream, redFsm, fsmTables );
diff --git a/colm/parsedata.h b/colm/parsedata.h
index 8fd634d5..b6907134 100644
--- a/colm/parsedata.h
+++ b/colm/parsedata.h
@@ -610,6 +610,7 @@ struct Compiler
/* Counter for assigning ids to longest match items. */
int nextTokenId;
+ RegionImplList regionImplList;
RegionList regionList;
RegionSetList regionSetList;
diff --git a/colm/parser.cc b/colm/parser.cc
index bd3f6abc..eb244c65 100644
--- a/colm/parser.cc
+++ b/colm/parser.cc
@@ -41,9 +41,11 @@ void BaseParser::init()
internal, String("___ROOT_NAMESPACE") );
pd->rootNamespace = rootNamespace;
- TokenRegion *rootRegion = createRegion( internal );
+ RegionImpl *rootImpl = new RegionImpl;
+ pd->regionImplList.append( rootImpl );
+ TokenRegion *rootRegion = createRegion( internal, rootImpl );
- RegionSet *rootRegionSet = new RegionSet( rootRegion, 0, 0, 0 );
+ RegionSet *rootRegionSet = new RegionSet( rootImpl, 0, 0, rootRegion, 0, 0, 0 );
pd->regionSetList.append( rootRegionSet );
regionStack.push( rootRegionSet );
@@ -92,10 +94,10 @@ void BaseParser::addRegularDef( const InputLoc &loc, Namespace *nspace,
}
}
-TokenRegion *BaseParser::createRegion( const InputLoc &loc )
+TokenRegion *BaseParser::createRegion( const InputLoc &loc, RegionImpl *impl )
{
TokenRegion *tokenRegion = new TokenRegion( loc,
- pd->regionList.length() );
+ pd->regionList.length(), impl );
pd->regionList.append( tokenRegion );
@@ -104,13 +106,22 @@ TokenRegion *BaseParser::createRegion( const InputLoc &loc )
void BaseParser::pushRegionSet( const InputLoc &loc )
{
- TokenRegion *tokenIgnore = createRegion( loc );
- TokenRegion *tokenOnly = createRegion( loc );
- TokenRegion *ignoreOnly = createRegion( loc );
- TokenRegion *collectIgnore = createRegion( loc );
+ RegionImpl *implTokenIgnore = new RegionImpl;
+ RegionImpl *implTokenOnly = new RegionImpl;
+ RegionImpl *implIgnoreOnly = new RegionImpl;
- RegionSet *regionSet = new RegionSet( tokenIgnore,
- tokenOnly, ignoreOnly, collectIgnore );
+ pd->regionImplList.append( implTokenIgnore );
+ pd->regionImplList.append( implTokenOnly );
+ pd->regionImplList.append( implIgnoreOnly );
+
+ TokenRegion *tokenIgnore = createRegion( loc, implTokenIgnore );
+ TokenRegion *tokenOnly = createRegion( loc, implTokenOnly );
+ TokenRegion *ignoreOnly = createRegion( loc, implIgnoreOnly );
+ TokenRegion *collectIgnore = createRegion( loc, implIgnoreOnly );
+
+ RegionSet *regionSet = new RegionSet(
+ implTokenIgnore, implTokenIgnore, implIgnoreOnly,
+ tokenIgnore, tokenOnly, ignoreOnly, collectIgnore );
collectIgnore->ignoreOnly = ignoreOnly;
@@ -190,7 +201,7 @@ void BaseParser::defineToken( const InputLoc &loc, String name, LexJoin *join, O
join, loc, pd->nextTokenId++, nspace,
regionSet->tokenIgnore );
- regionSet->tokenIgnore->tokenInstanceList.append( tokenInstance );
+ regionSet->tokenIgnore->impl->tokenInstanceList.append( tokenInstance );
tokenDef->noPreIgnore = noPreIgnore;
tokenDef->noPostIgnore = noPostIgnore;
@@ -202,7 +213,7 @@ void BaseParser::defineToken( const InputLoc &loc, String name, LexJoin *join, O
tokenInstanceIgn->dupOf = tokenInstance;
- regionSet->ignoreOnly->tokenInstanceList.append( tokenInstanceIgn );
+ regionSet->ignoreOnly->impl->tokenInstanceList.append( tokenInstanceIgn );
}
else {
/* The instance for the token-only. */
@@ -211,7 +222,7 @@ void BaseParser::defineToken( const InputLoc &loc, String name, LexJoin *join, O
tokenInstanceTok->dupOf = tokenInstance;
- regionSet->tokenOnly->tokenInstanceList.append( tokenInstanceTok );
+ regionSet->tokenOnly->impl->tokenInstanceList.append( tokenInstanceTok );
}
/* This is created and pushed in the name. */
@@ -299,7 +310,7 @@ void BaseParser::literalDef( const InputLoc &loc, const String &data,
TokenInstance *tokenInstance = TokenInstance::cons( tokenDef, join,
loc, pd->nextTokenId++, nspace, regionSet->tokenIgnore );
- regionSet->tokenIgnore->tokenInstanceList.append( tokenInstance );
+ regionSet->tokenIgnore->impl->tokenInstanceList.append( tokenInstance );
ldel = nspace->literalDict.insert( interp, tokenInstance );
@@ -314,7 +325,7 @@ void BaseParser::literalDef( const InputLoc &loc, const String &data,
tokenInstanceTok->dupOf = tokenInstance;
- regionSet->tokenOnly->tokenInstanceList.append( tokenInstanceTok );
+ regionSet->tokenOnly->impl->tokenInstanceList.append( tokenInstanceTok );
if ( pushedRegion )
popRegionSet();
diff --git a/colm/parser.h b/colm/parser.h
index 497a968a..a7a71c96 100644
--- a/colm/parser.h
+++ b/colm/parser.h
@@ -53,7 +53,7 @@ struct BaseParser
void init();
void addRegularDef( const InputLoc &loc, Namespace *nspace,
const String &name, LexJoin *join );
- TokenRegion *createRegion( const InputLoc &loc );
+ TokenRegion *createRegion( const InputLoc &loc, RegionImpl *impl );
Namespace *createNamespace( const InputLoc &loc, const String &name );
void pushRegionSet( const InputLoc &loc );
void popRegionSet();
diff --git a/colm/parsetree.cc b/colm/parsetree.cc
index 97bbcdaf..08cf0dec 100644
--- a/colm/parsetree.cc
+++ b/colm/parsetree.cc
@@ -195,7 +195,7 @@ FsmGraph *LexDefinition::walk( Compiler *pd )
return rtnVal;
}
-void TokenRegion::makeNameTree( const InputLoc &loc, Compiler *pd )
+void RegionImpl::makeNameTree( const InputLoc &loc, Compiler *pd )
{
NameInst *nameInst = new NameInst( pd->nextNameId++ );
pd->nameInstList.append( nameInst );
@@ -226,7 +226,7 @@ InputLoc TokenInstance::getLoc()
* 4. start state of all longest match routines.
*/
-Action *TokenRegion::newAction( Compiler *pd, const InputLoc &loc,
+Action *RegionImpl::newAction( Compiler *pd, const InputLoc &loc,
const String &name, InlineList *inlineList )
{
Action *action = Action::cons( loc, name, inlineList );
@@ -235,7 +235,7 @@ Action *TokenRegion::newAction( Compiler *pd, const InputLoc &loc,
return action;
}
-void TokenRegion::makeActions( Compiler *pd )
+void RegionImpl::makeActions( Compiler *pd )
{
/* Make actions that set the action id. */
for ( TokenInstanceListReg::Iter lmi = tokenInstanceList; lmi.lte(); lmi++ ) {
@@ -298,14 +298,14 @@ void TokenRegion::makeActions( Compiler *pd )
lmActSelect = newAction( pd, loc, "lagsel", il6 );
}
-void TokenRegion::restart( FsmGraph *graph, FsmTrans *trans )
+void RegionImpl::restart( FsmGraph *graph, FsmTrans *trans )
{
FsmState *fromState = trans->fromState;
graph->detachTrans( fromState, trans->toState, trans );
graph->attachTrans( fromState, graph->startState, trans );
}
-void TokenRegion::runLongestMatch( Compiler *pd, FsmGraph *graph )
+void RegionImpl::runLongestMatch( Compiler *pd, FsmGraph *graph )
{
graph->markReachableFromHereStopFinal( graph->startState );
for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
@@ -478,7 +478,7 @@ void TokenRegion::runLongestMatch( Compiler *pd, FsmGraph *graph )
graph->setFinState( graph->startState );
}
-void TokenRegion::transferScannerLeavingActions( FsmGraph *graph )
+void RegionImpl::transferScannerLeavingActions( FsmGraph *graph )
{
for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
if ( st->outActionTable.length() > 0 )
@@ -486,7 +486,7 @@ void TokenRegion::transferScannerLeavingActions( FsmGraph *graph )
}
}
-FsmGraph *TokenRegion::walk( Compiler *pd )
+FsmGraph *RegionImpl::walk( Compiler *pd )
{
/* Make each part of the longest match. */
int numParts = 0;
diff --git a/colm/parsetree.h b/colm/parsetree.h
index 9ecbc790..a8414249 100644
--- a/colm/parsetree.h
+++ b/colm/parsetree.h
@@ -568,74 +568,102 @@ typedef AvlTree< TypeMapEl, String, CmpStr > TypeMap;
typedef Vector<TokenRegion*> RegionVect;
-struct TokenRegion
+struct RegionImpl
{
- /* Construct with a list of joins */
- TokenRegion( const InputLoc &loc, int id )
- :
- loc(loc),
- id(id),
- lmSwitchHandlesError(false),
+ RegionImpl()
+ :
regionNameInst(0),
+ lmActSelect(0),
+ lmSwitchHandlesError(false),
defaultTokenInstance(0),
- preEofBlock(0),
- wasEmpty(false),
- zeroLel(0),
- ignoreOnly(0)
- { }
-
- /* Tree traversal. */
- FsmGraph *walk( Compiler *pd );
- void makeNameTree( const InputLoc &loc, Compiler *pd );
- void runLongestMatch( Compiler *pd, FsmGraph *graph );
- void transferScannerLeavingActions( FsmGraph *graph );
- Action *newAction( Compiler *pd, const InputLoc &loc, const String &name,
- InlineList *inlineList );
- void makeActions( Compiler *pd );
- void findName( Compiler *pd );
- void restart( FsmGraph *graph, FsmTrans *trans );
+ wasEmpty(false)
+ {}
InputLoc loc;
- TokenInstanceListReg tokenInstanceList;
- int id;
-
- Action *lmActSelect;
- bool lmSwitchHandlesError;
/* This gets saved off during the name walk. Can save it off because token
* regions are referenced once only. */
NameInst *regionNameInst;
+ TokenInstanceListReg tokenInstanceList;
+ Action *lmActSelect;
+ bool lmSwitchHandlesError;
TokenInstance *defaultTokenInstance;
- CodeBlock *preEofBlock;
-
/* We alway init empty scanners with a single token. If we had to do this
* then wasEmpty is true. */
bool wasEmpty;
+ RegionImpl *prev, *next;
+
+ void runLongestMatch( Compiler *pd, FsmGraph *graph );
+ void transferScannerLeavingActions( FsmGraph *graph );
+ FsmGraph *walk( Compiler *pd );
+
+ void restart( FsmGraph *graph, FsmTrans *trans );
+ void makeNameTree( const InputLoc &loc, Compiler *pd );
+ void makeActions( Compiler *pd );
+ Action *newAction( Compiler *pd, const InputLoc &loc,
+ const String &name, InlineList *inlineList );
+};
+
+struct TokenRegion
+{
+ /* Construct with a list of joins */
+ TokenRegion( const InputLoc &loc, int id, RegionImpl *impl )
+ :
+ loc(loc),
+ id(id),
+ preEofBlock(0),
+ zeroLel(0),
+ ignoreOnly(0),
+ impl(impl)
+ { }
+
+ InputLoc loc;
+ int id;
+
+ CodeBlock *preEofBlock;
+
LangEl *zeroLel;
TokenRegion *ignoreOnly;
+ RegionImpl *impl;
+
TokenRegion *next, *prev;
+
+ /* Tree traversal. */
+ void findName( Compiler *pd );
};
struct RegionSet
{
- RegionSet( TokenRegion *tokenIgnore, TokenRegion *tokenOnly,
- TokenRegion *ignoreOnly, TokenRegion *collectIgnore )
+ RegionSet( RegionImpl *implTokenIgnore, RegionImpl *implTokenOnly,
+ RegionImpl *implIgnoreOnly, TokenRegion *tokenIgnore,
+ TokenRegion *tokenOnly, TokenRegion *ignoreOnly,
+ TokenRegion *collectIgnore )
:
+ implTokenIgnore(implTokenIgnore),
+ implTokenOnly(implTokenOnly),
+ implIgnoreOnly(implIgnoreOnly),
+
tokenIgnore(tokenIgnore),
tokenOnly(tokenOnly),
ignoreOnly(ignoreOnly),
collectIgnore(collectIgnore)
{}
+ /* Provides the scanner state machines. We reuse ignore-only. */
+ RegionImpl *implTokenIgnore;
+ RegionImpl *implTokenOnly;
+ RegionImpl *implIgnoreOnly;
+
TokenRegion *tokenIgnore;
TokenRegion *tokenOnly;
TokenRegion *ignoreOnly;
TokenRegion *collectIgnore;
+
TokenDefListReg tokenDefList;
RegionSet *next, *prev;
@@ -645,6 +673,7 @@ typedef Vector<RegionSet*> RegionSetVect;
typedef DList<RegionSet> RegionSetList;
typedef DList<TokenRegion> RegionList;
+typedef DList<RegionImpl> RegionImplList;
typedef Vector<Namespace*> NamespaceVect;
@@ -1377,7 +1406,7 @@ struct InlineItem
return i;
}
- static InlineItem *cons( const InputLoc &loc, TokenRegion *tokenRegion,
+ static InlineItem *cons( const InputLoc &loc, RegionImpl *tokenRegion,
TokenInstance *longestMatchPart, Type type )
{
InlineItem *i = new InlineItem;
@@ -1416,7 +1445,7 @@ struct InlineItem
NameRef *nameRef;
NameInst *nameTarg;
InlineList *children;
- TokenRegion *tokenRegion;
+ RegionImpl *tokenRegion;
TokenInstance *longestMatchPart;
Type type;
diff --git a/colm/pdabuild.cc b/colm/pdabuild.cc
index 526c3b53..703c9c5f 100644
--- a/colm/pdabuild.cc
+++ b/colm/pdabuild.cc
@@ -1350,7 +1350,9 @@ void Compiler::makeRuntimeData()
for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) {
long regId = reg->id+1;
runtimeData->regionInfo[regId].defaultToken =
- reg->defaultTokenInstance == 0 ? -1 : reg->defaultTokenInstance->tokenDef->tdLangEl->id;
+ reg->impl->defaultTokenInstance == 0 ?
+ -1 :
+ reg->impl->defaultTokenInstance->tokenDef->tdLangEl->id;
runtimeData->regionInfo[regId].eofFrameId = -1;
runtimeData->regionInfo[regId].ciLelId = reg->zeroLel != 0 ? reg->zeroLel->id : 0;
@@ -2051,7 +2053,7 @@ PdaTables *Compiler::makePdaTables( PdaGraph *pdaGraph )
for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) {
for ( RegionVect::Iter reg = state->regions; reg.lte(); reg++ ) {
assert( state->preRegions.length() <= 1 );
- if ( state->preRegions.length() == 0 || state->preRegions[0]->wasEmpty )
+ if ( state->preRegions.length() == 0 || state->preRegions[0]->impl->wasEmpty )
pdaTables->tokenPreRegions[count++] = -1;
else
pdaTables->tokenPreRegions[count++] = state->preRegions[0]->id + 1;
diff --git a/colm/redbuild.cc b/colm/redbuild.cc
index 3372c19e..257bc929 100644
--- a/colm/redbuild.cc
+++ b/colm/redbuild.cc
@@ -487,14 +487,14 @@ void RedFsmBuild::makeEntryPoints()
}
for ( RegionList::Iter reg = pd->regionList; reg.lte(); reg++ ) {
- assert( reg->regionNameInst != 0 );
+ assert( reg->impl->regionNameInst != 0 );
TokenRegion *use = reg;
if ( use->zeroLel != 0 )
use = use->ignoreOnly;
- NameInst *regionName = use->regionNameInst;
+ NameInst *regionName = use->impl->regionNameInst;
addRegionToEntry( reg->id, regionName->id );
}
}