diff options
author | Adrian Thurston <thurston@complang.org> | 2012-05-28 15:53:39 +0000 |
---|---|---|
committer | Adrian Thurston <thurston@complang.org> | 2012-05-28 15:53:39 +0000 |
commit | 24f39e9b43a591701cbaba4845fcc14edb585864 (patch) | |
tree | a71f1c93e201751eaf8dda96b624ab2b5718526a | |
parent | 3d6ef0d073b8cab086261be954300164f217b584 (diff) | |
download | colm-24f39e9b43a591701cbaba4845fcc14edb585864.tar.gz |
specializing graph dicts and lists for regions and regular language defs
Previously used a single graph dictionary for regions and regular language defs
because we were derived from ragel. Splitting these The split goes down to
VarDef and JoinOrLm.
-rw-r--r-- | colm/compiler.cc | 12 | ||||
-rw-r--r-- | colm/lmparse.kh | 4 | ||||
-rw-r--r-- | colm/lmparse.kl | 60 | ||||
-rw-r--r-- | colm/parsedata.h | 2 | ||||
-rw-r--r-- | colm/parsetree.cc | 103 | ||||
-rw-r--r-- | colm/parsetree.h | 67 |
6 files changed, 171 insertions, 77 deletions
diff --git a/colm/compiler.cc b/colm/compiler.cc index 086fb2bc..d757c08c 100644 --- a/colm/compiler.cc +++ b/colm/compiler.cc @@ -903,7 +903,7 @@ NameInst *Compiler::makeNameTree() /* First make the name tree. */ initNameWalk( rootName ); - for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) { + for ( RegionGraphList::Iter glel = instanceList; glel.lte(); glel++ ) { /* Recurse on the instance. */ glel->value->makeNameTree( glel->loc, this ); } @@ -945,7 +945,7 @@ FsmGraph *Compiler::makeAllRegions() /* Resove name references in the tree. */ initNameWalk( rootName ); - for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) + for ( RegionGraphList::Iter glel = instanceList; glel.lte(); glel++ ) glel->value->resolveNameRefs( this ); /* Resovle the implicit name references to the nfa instantiations. */ @@ -956,7 +956,7 @@ FsmGraph *Compiler::makeAllRegions() /* Make all the instantiations, we know that main exists in this list. */ initNameWalk( rootName ); - for ( GraphList::Iter glel = instanceList; glel.lte(); glel++ ) { + for ( RegionGraphList::Iter glel = instanceList; glel.lte(); glel++ ) { /* Build the graph from a walk of the parse tree. */ FsmGraph *newGraph = glel->value->walk( this ); @@ -1094,12 +1094,12 @@ void Compiler::createDefaultScanner() defaultRegion = new TokenRegion( InputLoc(), name, regionList.length(), 0 ); regionList.append( defaultRegion ); - JoinOrLm *joinOrLm = new JoinOrLm( defaultRegion ); + RegionJoinOrLm *joinOrLm = new RegionJoinOrLm( defaultRegion ); /* Insert the machine definition into the graph dictionary. */ - GraphDictEl *newEl = rootNamespace->graphDict.insert( name ); + RegionGraphDictEl *newEl = rootNamespace->graphDict.insert( name ); assert( newEl != 0 ); - newEl->value = new VarDef( name, joinOrLm ); + newEl->value = new RegionVarDef( name, joinOrLm ); newEl->isInstance = true; instanceList.append( newEl ); diff --git a/colm/lmparse.kh b/colm/lmparse.kh index 6774c7c6..8e82e844 100644 --- a/colm/lmparse.kh +++ b/colm/lmparse.kh @@ -79,8 +79,8 @@ struct ColmParser int token( InputLoc &loc, int tokId, char *tokstart, int toklen ); void addRegularDef( const InputLoc &loc, Namespace *nspace, const String &name, JoinOrLm *joinOrLm ); - void addLexicalRegionDef( const InputLoc &loc, Namespace *nspace, - const String &name, JoinOrLm *joinOrLm ); + void addRegionDef( const InputLoc &loc, Namespace *nspace, + const String &name, RegionJoinOrLm *joinOrLm ); void addProduction( const InputLoc &loc, const String &name, ProdElList *prodElList, bool commit, CodeBlock *redBlock, LangEl *predOf ); void addArgvList(); diff --git a/colm/lmparse.kl b/colm/lmparse.kl index 299d173e..356acf71 100644 --- a/colm/lmparse.kl +++ b/colm/lmparse.kl @@ -486,36 +486,36 @@ region_head: pd->regionList.length(), regionStack.top() ); regionStack.top()->childRegions.append( tokenRegionIgn ); pd->regionList.append( tokenRegionIgn ); - JoinOrLm *joinOrLmIgn = new JoinOrLm( tokenRegionIgn ); + RegionJoinOrLm *joinOrLmIgn = new RegionJoinOrLm( tokenRegionIgn ); String scannerNameIgn( $2->data.length() + 2, "<%s>-ign", $2->data.data ); - addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn ); + addRegionDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn ); /* Just for collect ignores. Will use the ignore-only start state. */ TokenRegion *tokenRegionCi = new TokenRegion( InputLoc(), $2->data + "_ci" , pd->regionList.length(), regionStack.top() ); regionStack.top()->childRegions.append( tokenRegionCi ); pd->regionList.append( tokenRegionCi ); - JoinOrLm *joinOrLmCi = new JoinOrLm( tokenRegionCi ); + RegionJoinOrLm *joinOrLmCi = new RegionJoinOrLm( tokenRegionCi ); String scannerNameCi( $2->data.length() + 2, "<%s>-ci", $2->data.data ); - addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi ); + addRegionDef( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi ); /* Just for tokens. */ TokenRegion *tokenRegionTok = new TokenRegion( InputLoc(), $2->data + "_tok" , pd->regionList.length(), regionStack.top() ); regionStack.top()->childRegions.append( tokenRegionTok ); pd->regionList.append( tokenRegionTok ); - JoinOrLm *joinOrLmTok = new JoinOrLm( tokenRegionTok ); + RegionJoinOrLm *joinOrLmTok = new RegionJoinOrLm( tokenRegionTok ); String scannerNameTok( $2->data.length() + 2, "<%s>-tok", $2->data.data ); - addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok ); + addRegionDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok ); /* Make the new token region. */ TokenRegion *tokenRegion = new TokenRegion( InputLoc(), $2->data, pd->regionList.length(), regionStack.top() ); regionStack.top()->childRegions.append( tokenRegion ); pd->regionList.append( tokenRegion ); - JoinOrLm *joinOrLm = new JoinOrLm( tokenRegion ); + RegionJoinOrLm *joinOrLm = new RegionJoinOrLm( tokenRegion ); String scannerName( $2->data.length() + 2, "<%s>", $2->data.data ); - addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm ); + addRegionDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm ); regionStack.push( tokenRegion ); tokenRegion->ignoreOnlyRegion = tokenRegionIgn; @@ -956,36 +956,36 @@ literal_item: opt_no_ignore TK_Literal opt_no_ignore pd->regionList.length(), regionStack.top() ); regionStack.top()->childRegions.append( tokenRegionIgn ); pd->regionList.append( tokenRegionIgn ); - JoinOrLm *joinOrLmIgn = new JoinOrLm( tokenRegionIgn ); + RegionJoinOrLm *joinOrLmIgn = new RegionJoinOrLm( tokenRegionIgn ); String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data ); - addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn ); + addRegionDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn ); /* Just for collect ignores. Will use the ignore-only start state. */ TokenRegion *tokenRegionCi = new TokenRegion( InputLoc(), name + "_ci", pd->regionList.length(), regionStack.top() ); regionStack.top()->childRegions.append( tokenRegionCi ); pd->regionList.append( tokenRegionCi ); - JoinOrLm *joinOrLmCi = new JoinOrLm( tokenRegionCi ); + RegionJoinOrLm *joinOrLmCi = new RegionJoinOrLm( tokenRegionCi ); String scannerNameCi( name.length() + 2, "<%s>-ci", name.data ); - addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi ); + addRegionDef( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi ); /* Just for tokens. */ TokenRegion *tokenRegionTok = new TokenRegion( InputLoc(), name + "_tok", pd->regionList.length(), regionStack.top() ); regionStack.top()->childRegions.append( tokenRegionTok ); pd->regionList.append( tokenRegionTok ); - JoinOrLm *joinOrLmTok = new JoinOrLm( tokenRegionTok ); + RegionJoinOrLm *joinOrLmTok = new RegionJoinOrLm( tokenRegionTok ); String scannerNameTok( name.length() + 2, "<%s>-tok", name.data ); - addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok ); + addRegionDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok ); /* Make a new token region just for the token. */ TokenRegion *tokenRegion = new TokenRegion( InputLoc(), $2->data, pd->regionList.length(), regionStack.top() ); regionStack.top()->childRegions.append( tokenRegion ); pd->regionList.append( tokenRegion ); - JoinOrLm *joinOrLm = new JoinOrLm( tokenRegion ); + RegionJoinOrLm *joinOrLm = new RegionJoinOrLm( tokenRegion ); String scannerName( name.length() + 2, "<%s>", name.data ); - addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm ); + addRegionDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm ); regionStack.push( tokenRegion ); tokenRegion->ignoreOnlyRegion = tokenRegionIgn; @@ -1187,36 +1187,36 @@ token_def_name: pd->regionList.length(), regionStack.top() ); regionStack.top()->childRegions.append( tokenRegionIgn ); pd->regionList.append( tokenRegionIgn ); - JoinOrLm *joinOrLmIgn = new JoinOrLm( tokenRegionIgn ); + RegionJoinOrLm *joinOrLmIgn = new RegionJoinOrLm( tokenRegionIgn ); String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data ); - addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn ); + addRegionDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn ); /* Just for explicitly collecting ignores. */ TokenRegion *tokenRegionCi = new TokenRegion( InputLoc(), name + "_ci", pd->regionList.length(), regionStack.top() ); regionStack.top()->childRegions.append( tokenRegionCi ); pd->regionList.append( tokenRegionCi ); - JoinOrLm *joinOrLmCi = new JoinOrLm( tokenRegionCi ); + RegionJoinOrLm *joinOrLmCi = new RegionJoinOrLm( tokenRegionCi ); String scannerNameCi( name.length() + 2, "<%s>-ci", name.data ); - addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi ); + addRegionDef( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi ); /* Just for tokens. */ TokenRegion *tokenRegionTok = new TokenRegion( InputLoc(), name + "_tok", pd->regionList.length(), regionStack.top() ); regionStack.top()->childRegions.append( tokenRegionTok ); pd->regionList.append( tokenRegionTok ); - JoinOrLm *joinOrLmTok = new JoinOrLm( tokenRegionTok ); + RegionJoinOrLm *joinOrLmTok = new RegionJoinOrLm( tokenRegionTok ); String scannerNameTok( name.length() + 2, "<%s>-tok", name.data ); - addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok ); + addRegionDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok ); /* If not inside a region, make one for the token. */ TokenRegion *tokenRegion = new TokenRegion( InputLoc(), name, pd->regionList.length(), regionStack.top() ); regionStack.top()->childRegions.append( tokenRegion ); pd->regionList.append( tokenRegion ); - JoinOrLm *joinOrLm = new JoinOrLm( tokenRegion ); + RegionJoinOrLm *joinOrLm = new RegionJoinOrLm( tokenRegion ); String scannerName( name.length() + 2, "<%s>", name.data ); - addLexicalRegionDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm ); + addRegionDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm ); regionStack.push( tokenRegion ); tokenRegion->ignoreOnlyRegion = tokenRegionIgn; @@ -2602,8 +2602,8 @@ void ColmParser::init() TokenRegion *rootRegion = new TokenRegion( InputLoc(), rootRegionName, pd->regionList.length(), 0 ); pd->regionList.append( rootRegion ); - JoinOrLm *joinOrLm = new JoinOrLm( rootRegion ); - addLexicalRegionDef( InputLoc(), namespaceStack.top(), rootRegionName, joinOrLm ); + RegionJoinOrLm *joinOrLm = new RegionJoinOrLm( rootRegion ); + addRegionDef( InputLoc(), namespaceStack.top(), rootRegionName, joinOrLm ); regionStack.push( rootRegion ); pd->rootRegion = rootRegion; @@ -2665,13 +2665,13 @@ void ColmParser::addRegularDef( const InputLoc &loc, Namespace *nspace, } } -void ColmParser::addLexicalRegionDef( const InputLoc &loc, Namespace *nspace, - const String &name, JoinOrLm *joinOrLm ) +void ColmParser::addRegionDef( const InputLoc &loc, Namespace *nspace, + const String &name, RegionJoinOrLm *joinOrLm ) { - GraphDictEl *newEl = nspace->graphDict.insert( name ); + RegionGraphDictEl *newEl = nspace->graphDict.insert( name ); if ( newEl != 0 ) { /* New element in the dict, all good. */ - newEl->value = new VarDef( name, joinOrLm ); + newEl->value = new RegionVarDef( name, joinOrLm ); newEl->isInstance = true; newEl->loc = loc; diff --git a/colm/parsedata.h b/colm/parsedata.h index 63ca1245..5e3a59c2 100644 --- a/colm/parsedata.h +++ b/colm/parsedata.h @@ -591,7 +591,7 @@ struct Compiler */ /* The list of instances. */ - GraphList instanceList; + RegionGraphList instanceList; /* Dictionary of actions. Lets actions be defined and then referenced. */ ActionDict actionDict; diff --git a/colm/parsetree.cc b/colm/parsetree.cc index a195f1b7..5a7b6003 100644 --- a/colm/parsetree.cc +++ b/colm/parsetree.cc @@ -198,7 +198,7 @@ FsmGraph *VarDef::walk( Compiler *pd ) /* If the expression below is a join operation with multiple expressions * then it just had epsilon transisions resolved. If it is a join * with only a single expression then run the epsilon op now. */ - if ( joinOrLm->type == JoinOrLm::JoinType && joinOrLm->join->exprList.length() == 1 ) + if ( joinOrLm->join->exprList.length() == 1 ) rtnVal->epsilonOp(); /* We can now unset entry points that are not longer used. */ @@ -220,9 +220,6 @@ void VarDef::makeNameTree( const InputLoc &loc, Compiler *pd ) NameInst *prevNameInst = pd->curNameInst; pd->curNameInst = pd->addNameInst( loc, name, false ); - if ( joinOrLm->type == JoinOrLm::LongestMatchType ) - pd->curNameInst->isLongestMatch = true; - /* Recurse. */ joinOrLm->makeNameTree( pd ); @@ -242,6 +239,61 @@ void VarDef::resolveNameRefs( Compiler *pd ) pd->popNameScope( nameFrame ); } +FsmGraph *RegionVarDef::walk( Compiler *pd ) +{ + /* We enter into a new name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Recurse on the expression. */ + FsmGraph *rtnVal = joinOrLm->walk( pd ); + + /* Do the tranfer of local error actions. */ + LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name ); + if ( localErrDictEl != 0 ) { + for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ ) + rtnVal->transferErrorActions( state, localErrDictEl->value ); + } + + /* We can now unset entry points that are not longer used. */ + pd->unsetObsoleteEntries( rtnVal ); + + /* If the name of the variable is referenced then add the entry point to + * the graph. */ + if ( pd->curNameInst->numRefs > 0 ) + rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState ); + + /* Pop the name scope. */ + pd->popNameScope( nameFrame ); + return rtnVal; +} + +void RegionVarDef::makeNameTree( const InputLoc &loc, Compiler *pd ) +{ + /* The variable definition enters a new scope. */ + NameInst *prevNameInst = pd->curNameInst; + pd->curNameInst = pd->addNameInst( loc, name, false ); + + pd->curNameInst->isLongestMatch = true; + + /* Recurse. */ + joinOrLm->makeNameTree( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->curNameInst = prevNameInst; +} + +void RegionVarDef::resolveNameRefs( Compiler *pd ) +{ + /* Entering into a new scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Recurse. */ + joinOrLm->resolveNameRefs( pd ); + + /* The name scope ends, pop the name instantiation. */ + pd->popNameScope( nameFrame ); +} + InputLoc TokenDef::getLoc() { return action != 0 ? action->loc : semiLoc; @@ -637,41 +689,36 @@ FsmGraph *TokenRegion::walk( Compiler *pd ) FsmGraph *JoinOrLm::walk( Compiler *pd ) { FsmGraph *rtnVal = 0; - switch ( type ) { - case JoinType: - rtnVal = join->walk( pd ); - break; - case LongestMatchType: - rtnVal = tokenRegion->walk( pd ); - break; - } + rtnVal = join->walk( pd ); return rtnVal; } void JoinOrLm::makeNameTree( Compiler *pd ) { - switch ( type ) { - case JoinType: - join->makeNameTree( pd ); - break; - case LongestMatchType: - tokenRegion->makeNameTree( pd ); - break; - } + join->makeNameTree( pd ); } void JoinOrLm::resolveNameRefs( Compiler *pd ) { - switch ( type ) { - case JoinType: - join->resolveNameRefs( pd ); - break; - case LongestMatchType: - tokenRegion->resolveNameRefs( pd ); - break; - } + join->resolveNameRefs( pd ); } +FsmGraph *RegionJoinOrLm::walk( Compiler *pd ) +{ + FsmGraph *rtnVal = 0; + rtnVal = tokenRegion->walk( pd ); + return rtnVal; +} + +void RegionJoinOrLm::makeNameTree( Compiler *pd ) +{ + tokenRegion->makeNameTree( pd ); +} + +void RegionJoinOrLm::resolveNameRefs( Compiler *pd ) +{ + tokenRegion->resolveNameRefs( pd ); +} /* Construct with a location and the first expression. */ Join::Join( Expression *expr ) diff --git a/colm/parsetree.h b/colm/parsetree.h index 965085c6..f451d789 100644 --- a/colm/parsetree.h +++ b/colm/parsetree.h @@ -167,6 +167,7 @@ struct Factor; struct Expression; struct Join; struct JoinOrLm; +struct RegionJoinOrLm; struct TokenRegion; struct Namespace; struct Context; @@ -328,6 +329,23 @@ struct VarDef JoinOrLm *joinOrLm; }; +/* + * A Variable Definition + */ +struct RegionVarDef +{ + RegionVarDef( const String &name, RegionJoinOrLm *joinOrLm ) + : name(name), joinOrLm(joinOrLm) { } + + /* Parse tree traversal. */ + FsmGraph *walk( Compiler *pd ); + void makeNameTree( const InputLoc &loc, Compiler *pd ); + void resolveNameRefs( Compiler *pd ); + + String name; + RegionJoinOrLm *joinOrLm; +}; + typedef Vector<String> StringVect; typedef CmpTable<String, CmpStr> CmpStrVect; @@ -645,6 +663,30 @@ struct GraphDictEl typedef AvlTree<GraphDictEl, String, CmpStr> GraphDict; typedef DList<GraphDictEl> GraphList; +/* Graph dictionary. */ +struct RegionGraphDictEl +: + public AvlTreeEl<RegionGraphDictEl>, + public DListEl<RegionGraphDictEl> +{ + RegionGraphDictEl( const String &key ) + : key(key), value(0), isInstance(false) { } + RegionGraphDictEl( const String &key, RegionVarDef *value ) + : key(key), value(value), isInstance(false) { } + + const String &getKey() { return key; } + + String key; + RegionVarDef *value; + bool isInstance; + + /* Location info of graph definition. Points to variable name of assignment. */ + InputLoc loc; +}; + +typedef AvlTree<RegionGraphDictEl, String, CmpStr> RegionGraphDict; +typedef DList<RegionGraphDictEl> RegionGraphList; + struct TypeAlias { TypeAlias( const InputLoc &loc, Namespace *nspace, @@ -699,7 +741,7 @@ struct Namespace GenericList genericList; /* Dictionary of graphs. Both instances and non-instances go here. */ - GraphDict graphDict; + RegionGraphDict graphDict; /* regular language definitions. */ GraphDict rlMap; @@ -722,23 +764,28 @@ typedef DList<Expression> ExprList; struct JoinOrLm { - enum Type { - JoinType, - LongestMatchType - }; - JoinOrLm( Join *join ) : - join(join), type(JoinType) {} - JoinOrLm( TokenRegion *tokenRegion ) : - tokenRegion(tokenRegion), type(LongestMatchType) {} + join(join) {} FsmGraph *walk( Compiler *pd ); void makeNameTree( Compiler *pd ); void resolveNameRefs( Compiler *pd ); Join *join; +}; + +struct RegionJoinOrLm +{ + enum Type { LongestMatchType }; + + RegionJoinOrLm( TokenRegion *tokenRegion ) : + tokenRegion(tokenRegion) {} + + FsmGraph *walk( Compiler *pd ); + void makeNameTree( Compiler *pd ); + void resolveNameRefs( Compiler *pd ); + TokenRegion *tokenRegion; - Type type; }; /* |