summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2012-05-28 15:53:39 +0000
committerAdrian Thurston <thurston@complang.org>2012-05-28 15:53:39 +0000
commit24f39e9b43a591701cbaba4845fcc14edb585864 (patch)
treea71f1c93e201751eaf8dda96b624ab2b5718526a
parent3d6ef0d073b8cab086261be954300164f217b584 (diff)
downloadcolm-24f39e9b43a591701cbaba4845fcc14edb585864.tar.gz
specializing graph dicts and lists for regions and regular language defs
Previously used a single graph dictionary for regions and regular language defs because we were derived from ragel. Splitting these The split goes down to VarDef and JoinOrLm.
-rw-r--r--colm/compiler.cc12
-rw-r--r--colm/lmparse.kh4
-rw-r--r--colm/lmparse.kl60
-rw-r--r--colm/parsedata.h2
-rw-r--r--colm/parsetree.cc103
-rw-r--r--colm/parsetree.h67
6 files changed, 171 insertions, 77 deletions
diff --git a/colm/compiler.cc b/colm/compiler.cc
index 086fb2bc..d757c08c 100644
--- a/colm/compiler.cc
+++ b/colm/compiler.cc
@@ -903,7 +903,7 @@ NameInst *Compiler::makeNameTree()
/* First make the name tree. */
initNameWalk( rootName );
- for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
+ for ( RegionGraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
/* Recurse on the instance. */
glel->value->makeNameTree( glel->loc, this );
}
@@ -945,7 +945,7 @@ FsmGraph *Compiler::makeAllRegions()
/* Resove name references in the tree. */
initNameWalk( rootName );
- for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ )
+ for ( RegionGraphList::Iter glel = instanceList; glel.lte(); glel++ )
glel->value->resolveNameRefs( this );
/* Resovle the implicit name references to the nfa instantiations. */
@@ -956,7 +956,7 @@ FsmGraph *Compiler::makeAllRegions()
/* Make all the instantiations, we know that main exists in this list. */
initNameWalk( rootName );
- for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
+ for ( RegionGraphList::Iter glel = instanceList; glel.lte(); glel++ ) {
/* Build the graph from a walk of the parse tree. */
FsmGraph *newGraph = glel->value->walk( this );
@@ -1094,12 +1094,12 @@ void Compiler::createDefaultScanner()
defaultRegion = new TokenRegion( InputLoc(), name,
regionList.length(), 0 );
regionList.append( defaultRegion );
- JoinOrLm *joinOrLm = new JoinOrLm( defaultRegion );
+ RegionJoinOrLm *joinOrLm = new RegionJoinOrLm( defaultRegion );
/* Insert the machine definition into the graph dictionary. */
- GraphDictEl *newEl = rootNamespace->graphDict.insert( name );
+ RegionGraphDictEl *newEl = rootNamespace->graphDict.insert( name );
assert( newEl != 0 );
- newEl->value = new VarDef( name, joinOrLm );
+ newEl->value = new RegionVarDef( name, joinOrLm );
newEl->isInstance = true;
instanceList.append( newEl );
diff --git a/colm/lmparse.kh b/colm/lmparse.kh
index 6774c7c6..8e82e844 100644
--- a/colm/lmparse.kh
+++ b/colm/lmparse.kh
@@ -79,8 +79,8 @@ struct ColmParser
int token( InputLoc &loc, int tokId, char *tokstart, int toklen );
void addRegularDef( const InputLoc &loc, Namespace *nspace,
const String &name, JoinOrLm *joinOrLm );
- void addLexicalRegionDef( const InputLoc &loc, Namespace *nspace,
- const String &name, JoinOrLm *joinOrLm );
+ void addRegionDef( const InputLoc &loc, Namespace *nspace,
+ const String &name, RegionJoinOrLm *joinOrLm );
void addProduction( const InputLoc &loc, const String &name,
ProdElList *prodElList, bool commit, CodeBlock *redBlock, LangEl *predOf );
void addArgvList();
diff --git a/colm/lmparse.kl b/colm/lmparse.kl
index 299d173e..356acf71 100644
--- a/colm/lmparse.kl
+++ b/colm/lmparse.kl
@@ -486,36 +486,36 @@ region_head:
pd->regionList.length(), regionStack.top() );
regionStack.top()->childRegions.append( tokenRegionIgn );
pd->regionList.append( tokenRegionIgn );
- JoinOrLm *joinOrLmIgn = new JoinOrLm( tokenRegionIgn );
+ RegionJoinOrLm *joinOrLmIgn = new RegionJoinOrLm( tokenRegionIgn );
String scannerNameIgn( $2->data.length() + 2, "<%s>-ign", $2->data.data );
- addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn );
+ addRegionDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn );
/* Just for collect ignores. Will use the ignore-only start state. */
TokenRegion *tokenRegionCi = new TokenRegion( InputLoc(), $2->data + "_ci" ,
pd->regionList.length(), regionStack.top() );
regionStack.top()->childRegions.append( tokenRegionCi );
pd->regionList.append( tokenRegionCi );
- JoinOrLm *joinOrLmCi = new JoinOrLm( tokenRegionCi );
+ RegionJoinOrLm *joinOrLmCi = new RegionJoinOrLm( tokenRegionCi );
String scannerNameCi( $2->data.length() + 2, "<%s>-ci", $2->data.data );
- addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi );
+ addRegionDef( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi );
/* Just for tokens. */
TokenRegion *tokenRegionTok = new TokenRegion( InputLoc(), $2->data + "_tok" ,
pd->regionList.length(), regionStack.top() );
regionStack.top()->childRegions.append( tokenRegionTok );
pd->regionList.append( tokenRegionTok );
- JoinOrLm *joinOrLmTok = new JoinOrLm( tokenRegionTok );
+ RegionJoinOrLm *joinOrLmTok = new RegionJoinOrLm( tokenRegionTok );
String scannerNameTok( $2->data.length() + 2, "<%s>-tok", $2->data.data );
- addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok );
+ addRegionDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok );
/* Make the new token region. */
TokenRegion *tokenRegion = new TokenRegion( InputLoc(), $2->data,
pd->regionList.length(), regionStack.top() );
regionStack.top()->childRegions.append( tokenRegion );
pd->regionList.append( tokenRegion );
- JoinOrLm *joinOrLm = new JoinOrLm( tokenRegion );
+ RegionJoinOrLm *joinOrLm = new RegionJoinOrLm( tokenRegion );
String scannerName( $2->data.length() + 2, "<%s>", $2->data.data );
- addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm );
+ addRegionDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm );
regionStack.push( tokenRegion );
tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
@@ -956,36 +956,36 @@ literal_item: opt_no_ignore TK_Literal opt_no_ignore
pd->regionList.length(), regionStack.top() );
regionStack.top()->childRegions.append( tokenRegionIgn );
pd->regionList.append( tokenRegionIgn );
- JoinOrLm *joinOrLmIgn = new JoinOrLm( tokenRegionIgn );
+ RegionJoinOrLm *joinOrLmIgn = new RegionJoinOrLm( tokenRegionIgn );
String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data );
- addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn );
+ addRegionDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn );
/* Just for collect ignores. Will use the ignore-only start state. */
TokenRegion *tokenRegionCi = new TokenRegion( InputLoc(), name + "_ci",
pd->regionList.length(), regionStack.top() );
regionStack.top()->childRegions.append( tokenRegionCi );
pd->regionList.append( tokenRegionCi );
- JoinOrLm *joinOrLmCi = new JoinOrLm( tokenRegionCi );
+ RegionJoinOrLm *joinOrLmCi = new RegionJoinOrLm( tokenRegionCi );
String scannerNameCi( name.length() + 2, "<%s>-ci", name.data );
- addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi );
+ addRegionDef( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi );
/* Just for tokens. */
TokenRegion *tokenRegionTok = new TokenRegion( InputLoc(), name + "_tok",
pd->regionList.length(), regionStack.top() );
regionStack.top()->childRegions.append( tokenRegionTok );
pd->regionList.append( tokenRegionTok );
- JoinOrLm *joinOrLmTok = new JoinOrLm( tokenRegionTok );
+ RegionJoinOrLm *joinOrLmTok = new RegionJoinOrLm( tokenRegionTok );
String scannerNameTok( name.length() + 2, "<%s>-tok", name.data );
- addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok );
+ addRegionDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok );
/* Make a new token region just for the token. */
TokenRegion *tokenRegion = new TokenRegion( InputLoc(), $2->data,
pd->regionList.length(), regionStack.top() );
regionStack.top()->childRegions.append( tokenRegion );
pd->regionList.append( tokenRegion );
- JoinOrLm *joinOrLm = new JoinOrLm( tokenRegion );
+ RegionJoinOrLm *joinOrLm = new RegionJoinOrLm( tokenRegion );
String scannerName( name.length() + 2, "<%s>", name.data );
- addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm );
+ addRegionDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm );
regionStack.push( tokenRegion );
tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
@@ -1187,36 +1187,36 @@ token_def_name:
pd->regionList.length(), regionStack.top() );
regionStack.top()->childRegions.append( tokenRegionIgn );
pd->regionList.append( tokenRegionIgn );
- JoinOrLm *joinOrLmIgn = new JoinOrLm( tokenRegionIgn );
+ RegionJoinOrLm *joinOrLmIgn = new RegionJoinOrLm( tokenRegionIgn );
String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data );
- addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn );
+ addRegionDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn );
/* Just for explicitly collecting ignores. */
TokenRegion *tokenRegionCi = new TokenRegion( InputLoc(), name + "_ci",
pd->regionList.length(), regionStack.top() );
regionStack.top()->childRegions.append( tokenRegionCi );
pd->regionList.append( tokenRegionCi );
- JoinOrLm *joinOrLmCi = new JoinOrLm( tokenRegionCi );
+ RegionJoinOrLm *joinOrLmCi = new RegionJoinOrLm( tokenRegionCi );
String scannerNameCi( name.length() + 2, "<%s>-ci", name.data );
- addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi );
+ addRegionDef( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi );
/* Just for tokens. */
TokenRegion *tokenRegionTok = new TokenRegion( InputLoc(), name + "_tok",
pd->regionList.length(), regionStack.top() );
regionStack.top()->childRegions.append( tokenRegionTok );
pd->regionList.append( tokenRegionTok );
- JoinOrLm *joinOrLmTok = new JoinOrLm( tokenRegionTok );
+ RegionJoinOrLm *joinOrLmTok = new RegionJoinOrLm( tokenRegionTok );
String scannerNameTok( name.length() + 2, "<%s>-tok", name.data );
- addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok );
+ addRegionDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok );
/* If not inside a region, make one for the token. */
TokenRegion *tokenRegion = new TokenRegion( InputLoc(), name,
pd->regionList.length(), regionStack.top() );
regionStack.top()->childRegions.append( tokenRegion );
pd->regionList.append( tokenRegion );
- JoinOrLm *joinOrLm = new JoinOrLm( tokenRegion );
+ RegionJoinOrLm *joinOrLm = new RegionJoinOrLm( tokenRegion );
String scannerName( name.length() + 2, "<%s>", name.data );
- addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm );
+ addRegionDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm );
regionStack.push( tokenRegion );
tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
@@ -2602,8 +2602,8 @@ void ColmParser::init()
TokenRegion *rootRegion = new TokenRegion( InputLoc(), rootRegionName,
pd->regionList.length(), 0 );
pd->regionList.append( rootRegion );
- JoinOrLm *joinOrLm = new JoinOrLm( rootRegion );
- addLexicalRegionDef( InputLoc(), namespaceStack.top(), rootRegionName, joinOrLm );
+ RegionJoinOrLm *joinOrLm = new RegionJoinOrLm( rootRegion );
+ addRegionDef( InputLoc(), namespaceStack.top(), rootRegionName, joinOrLm );
regionStack.push( rootRegion );
pd->rootRegion = rootRegion;
@@ -2665,13 +2665,13 @@ void ColmParser::addRegularDef( const InputLoc &loc, Namespace *nspace,
}
}
-void ColmParser::addLexicalRegionDef( const InputLoc &loc, Namespace *nspace,
- const String &name, JoinOrLm *joinOrLm )
+void ColmParser::addRegionDef( const InputLoc &loc, Namespace *nspace,
+ const String &name, RegionJoinOrLm *joinOrLm )
{
- GraphDictEl *newEl = nspace->graphDict.insert( name );
+ RegionGraphDictEl *newEl = nspace->graphDict.insert( name );
if ( newEl != 0 ) {
/* New element in the dict, all good. */
- newEl->value = new VarDef( name, joinOrLm );
+ newEl->value = new RegionVarDef( name, joinOrLm );
newEl->isInstance = true;
newEl->loc = loc;
diff --git a/colm/parsedata.h b/colm/parsedata.h
index 63ca1245..5e3a59c2 100644
--- a/colm/parsedata.h
+++ b/colm/parsedata.h
@@ -591,7 +591,7 @@ struct Compiler
*/
/* The list of instances. */
- GraphList instanceList;
+ RegionGraphList instanceList;
/* Dictionary of actions. Lets actions be defined and then referenced. */
ActionDict actionDict;
diff --git a/colm/parsetree.cc b/colm/parsetree.cc
index a195f1b7..5a7b6003 100644
--- a/colm/parsetree.cc
+++ b/colm/parsetree.cc
@@ -198,7 +198,7 @@ FsmGraph *VarDef::walk( Compiler *pd )
/* If the expression below is a join operation with multiple expressions
* then it just had epsilon transisions resolved. If it is a join
* with only a single expression then run the epsilon op now. */
- if ( joinOrLm->type == JoinOrLm::JoinType && joinOrLm->join->exprList.length() == 1 )
+ if ( joinOrLm->join->exprList.length() == 1 )
rtnVal->epsilonOp();
/* We can now unset entry points that are not longer used. */
@@ -220,9 +220,6 @@ void VarDef::makeNameTree( const InputLoc &loc, Compiler *pd )
NameInst *prevNameInst = pd->curNameInst;
pd->curNameInst = pd->addNameInst( loc, name, false );
- if ( joinOrLm->type == JoinOrLm::LongestMatchType )
- pd->curNameInst->isLongestMatch = true;
-
/* Recurse. */
joinOrLm->makeNameTree( pd );
@@ -242,6 +239,61 @@ void VarDef::resolveNameRefs( Compiler *pd )
pd->popNameScope( nameFrame );
}
+FsmGraph *RegionVarDef::walk( Compiler *pd )
+{
+ /* We enter into a new name scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* Recurse on the expression. */
+ FsmGraph *rtnVal = joinOrLm->walk( pd );
+
+ /* Do the tranfer of local error actions. */
+ LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
+ if ( localErrDictEl != 0 ) {
+ for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
+ rtnVal->transferErrorActions( state, localErrDictEl->value );
+ }
+
+ /* We can now unset entry points that are not longer used. */
+ pd->unsetObsoleteEntries( rtnVal );
+
+ /* If the name of the variable is referenced then add the entry point to
+ * the graph. */
+ if ( pd->curNameInst->numRefs > 0 )
+ rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
+
+ /* Pop the name scope. */
+ pd->popNameScope( nameFrame );
+ return rtnVal;
+}
+
+void RegionVarDef::makeNameTree( const InputLoc &loc, Compiler *pd )
+{
+ /* The variable definition enters a new scope. */
+ NameInst *prevNameInst = pd->curNameInst;
+ pd->curNameInst = pd->addNameInst( loc, name, false );
+
+ pd->curNameInst->isLongestMatch = true;
+
+ /* Recurse. */
+ joinOrLm->makeNameTree( pd );
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->curNameInst = prevNameInst;
+}
+
+void RegionVarDef::resolveNameRefs( Compiler *pd )
+{
+ /* Entering into a new scope. */
+ NameFrame nameFrame = pd->enterNameScope( true, 1 );
+
+ /* Recurse. */
+ joinOrLm->resolveNameRefs( pd );
+
+ /* The name scope ends, pop the name instantiation. */
+ pd->popNameScope( nameFrame );
+}
+
InputLoc TokenDef::getLoc()
{
return action != 0 ? action->loc : semiLoc;
@@ -637,41 +689,36 @@ FsmGraph *TokenRegion::walk( Compiler *pd )
FsmGraph *JoinOrLm::walk( Compiler *pd )
{
FsmGraph *rtnVal = 0;
- switch ( type ) {
- case JoinType:
- rtnVal = join->walk( pd );
- break;
- case LongestMatchType:
- rtnVal = tokenRegion->walk( pd );
- break;
- }
+ rtnVal = join->walk( pd );
return rtnVal;
}
void JoinOrLm::makeNameTree( Compiler *pd )
{
- switch ( type ) {
- case JoinType:
- join->makeNameTree( pd );
- break;
- case LongestMatchType:
- tokenRegion->makeNameTree( pd );
- break;
- }
+ join->makeNameTree( pd );
}
void JoinOrLm::resolveNameRefs( Compiler *pd )
{
- switch ( type ) {
- case JoinType:
- join->resolveNameRefs( pd );
- break;
- case LongestMatchType:
- tokenRegion->resolveNameRefs( pd );
- break;
- }
+ join->resolveNameRefs( pd );
}
+FsmGraph *RegionJoinOrLm::walk( Compiler *pd )
+{
+ FsmGraph *rtnVal = 0;
+ rtnVal = tokenRegion->walk( pd );
+ return rtnVal;
+}
+
+void RegionJoinOrLm::makeNameTree( Compiler *pd )
+{
+ tokenRegion->makeNameTree( pd );
+}
+
+void RegionJoinOrLm::resolveNameRefs( Compiler *pd )
+{
+ tokenRegion->resolveNameRefs( pd );
+}
/* Construct with a location and the first expression. */
Join::Join( Expression *expr )
diff --git a/colm/parsetree.h b/colm/parsetree.h
index 965085c6..f451d789 100644
--- a/colm/parsetree.h
+++ b/colm/parsetree.h
@@ -167,6 +167,7 @@ struct Factor;
struct Expression;
struct Join;
struct JoinOrLm;
+struct RegionJoinOrLm;
struct TokenRegion;
struct Namespace;
struct Context;
@@ -328,6 +329,23 @@ struct VarDef
JoinOrLm *joinOrLm;
};
+/*
+ * A Variable Definition
+ */
+struct RegionVarDef
+{
+ RegionVarDef( const String &name, RegionJoinOrLm *joinOrLm )
+ : name(name), joinOrLm(joinOrLm) { }
+
+ /* Parse tree traversal. */
+ FsmGraph *walk( Compiler *pd );
+ void makeNameTree( const InputLoc &loc, Compiler *pd );
+ void resolveNameRefs( Compiler *pd );
+
+ String name;
+ RegionJoinOrLm *joinOrLm;
+};
+
typedef Vector<String> StringVect;
typedef CmpTable<String, CmpStr> CmpStrVect;
@@ -645,6 +663,30 @@ struct GraphDictEl
typedef AvlTree<GraphDictEl, String, CmpStr> GraphDict;
typedef DList<GraphDictEl> GraphList;
+/* Graph dictionary. */
+struct RegionGraphDictEl
+:
+ public AvlTreeEl<RegionGraphDictEl>,
+ public DListEl<RegionGraphDictEl>
+{
+ RegionGraphDictEl( const String &key )
+ : key(key), value(0), isInstance(false) { }
+ RegionGraphDictEl( const String &key, RegionVarDef *value )
+ : key(key), value(value), isInstance(false) { }
+
+ const String &getKey() { return key; }
+
+ String key;
+ RegionVarDef *value;
+ bool isInstance;
+
+ /* Location info of graph definition. Points to variable name of assignment. */
+ InputLoc loc;
+};
+
+typedef AvlTree<RegionGraphDictEl, String, CmpStr> RegionGraphDict;
+typedef DList<RegionGraphDictEl> RegionGraphList;
+
struct TypeAlias
{
TypeAlias( const InputLoc &loc, Namespace *nspace,
@@ -699,7 +741,7 @@ struct Namespace
GenericList genericList;
/* Dictionary of graphs. Both instances and non-instances go here. */
- GraphDict graphDict;
+ RegionGraphDict graphDict;
/* regular language definitions. */
GraphDict rlMap;
@@ -722,23 +764,28 @@ typedef DList<Expression> ExprList;
struct JoinOrLm
{
- enum Type {
- JoinType,
- LongestMatchType
- };
-
JoinOrLm( Join *join ) :
- join(join), type(JoinType) {}
- JoinOrLm( TokenRegion *tokenRegion ) :
- tokenRegion(tokenRegion), type(LongestMatchType) {}
+ join(join) {}
FsmGraph *walk( Compiler *pd );
void makeNameTree( Compiler *pd );
void resolveNameRefs( Compiler *pd );
Join *join;
+};
+
+struct RegionJoinOrLm
+{
+ enum Type { LongestMatchType };
+
+ RegionJoinOrLm( TokenRegion *tokenRegion ) :
+ tokenRegion(tokenRegion) {}
+
+ FsmGraph *walk( Compiler *pd );
+ void makeNameTree( Compiler *pd );
+ void resolveNameRefs( Compiler *pd );
+
TokenRegion *tokenRegion;
- Type type;
};
/*