cleanup of ragel-derived code

The scanner code was derivied from ragel, where the same map of names to graphs is used for regular language defintions and scanners. Some of the regular lanuage defintiions are instantiations, meaning they create states. Starting to retire this by creating a separate map for regular language defs (rlMap).
author: Adrian Thurston <thurston@complang.org> 2012-05-27 21:33:56 -0400
committer: Adrian Thurston <thurston@complang.org> 2012-05-27 21:33:56 -0400
commit: 3f5b9fc33d3e5dc77f72300289f5ce618188a26b (patch)
tree: b14d8d0782c645f8b5a7821e7247d7ff6a4ff924
parent: 46d096608cd47dc5332de53aaf9248dd9b556554 (diff)
download: colm-3f5b9fc33d3e5dc77f72300289f5ce618188a26b.tar.gz
4 files changed, 40 insertions, 15 deletions
diff --git a/colm/compiler.cc b/colm/compiler.cc
index a89465d7..086fb2bc 100644
--- a/colm/compiler.cc
+++ b/colm/compiler.cc
@@ -697,7 +697,7 @@ void Compiler::createBuiltin( const char *name, BuiltinMachine builtin )
 	JoinOrLm *joinOrLm = new JoinOrLm( join );
 	VarDef *varDef = new VarDef( name, joinOrLm );
 	GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
-	rootNamespace->graphDict.insert( graphDictEl );
+	rootNamespace->rlMap.insert( graphDictEl );
 }
 
 /* Initialize the graph dict with builtin types. */
diff --git a/colm/lmparse.kh b/colm/lmparse.kh
index 5523a656..d6dcb220 100644
--- a/colm/lmparse.kh
+++ b/colm/lmparse.kh
@@ -80,6 +80,8 @@ struct ColmParser
 	int token( InputLoc &loc, int tokId, char *tokstart, int toklen );
 	void addRegularDef( const InputLoc &loc, Namespace *nspace, 
 		const String &name, JoinOrLm *joinOrLm, bool isInstance );
+	void addRegularDef2( const InputLoc &loc, Namespace *nspace, 
+		const String &name, JoinOrLm *joinOrLm, bool isInstance );
 	void addProduction( const InputLoc &loc, const String &name, 
 		ProdElList *prodElList, bool commit, CodeBlock *redBlock, LangEl *predOf );
 	void addArgvList();
diff --git a/colm/lmparse.kl b/colm/lmparse.kl
index 24c94455..a99af72c 100644
--- a/colm/lmparse.kl
+++ b/colm/lmparse.kl
@@ -488,7 +488,7 @@ region_head:
 		pd->regionList.append( tokenRegionIgn );
 		JoinOrLm *joinOrLmIgn = new JoinOrLm( tokenRegionIgn );
 		String scannerNameIgn( $2->data.length() + 2, "<%s>-ign", $2->data.data );
-		addRegularDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn, true );
+		addRegularDef2( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn, true );
 
 		/* Just for collect ignores. Will use the ignore-only start state. */
 		TokenRegion *tokenRegionCi = new TokenRegion( InputLoc(), $2->data + "_ci" ,
@@ -497,7 +497,7 @@ region_head:
 		pd->regionList.append( tokenRegionCi );
 		JoinOrLm *joinOrLmCi = new JoinOrLm( tokenRegionCi );
 		String scannerNameCi( $2->data.length() + 2, "<%s>-ci", $2->data.data );
-		addRegularDef( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi, true );
+		addRegularDef2( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi, true );
 
 		/* Just for tokens. */
 		TokenRegion *tokenRegionTok = new TokenRegion( InputLoc(), $2->data + "_tok" ,
@@ -506,7 +506,7 @@ region_head:
 		pd->regionList.append( tokenRegionTok );
 		JoinOrLm *joinOrLmTok = new JoinOrLm( tokenRegionTok );
 		String scannerNameTok( $2->data.length() + 2, "<%s>-tok", $2->data.data );
-		addRegularDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok, true );
+		addRegularDef2( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok, true );
 
 		/* Make the new token region. */
 		TokenRegion *tokenRegion = new TokenRegion( InputLoc(), $2->data,
@@ -515,7 +515,7 @@ region_head:
 		pd->regionList.append( tokenRegion );
 		JoinOrLm *joinOrLm = new JoinOrLm( tokenRegion );
 		String scannerName( $2->data.length() + 2, "<%s>", $2->data.data );
-		addRegularDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm, true );
+		addRegularDef2( InputLoc(), namespaceStack.top(), scannerName, joinOrLm, true );
 		regionStack.push( tokenRegion );
 
 		tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
@@ -958,7 +958,7 @@ literal_item: opt_no_ignore TK_Literal opt_no_ignore
 			pd->regionList.append( tokenRegionIgn );
 			JoinOrLm *joinOrLmIgn = new JoinOrLm( tokenRegionIgn );
 			String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data );
-			addRegularDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn, true );
+			addRegularDef2( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn, true );
 
 			/* Just for collect ignores. Will use the ignore-only start state. */
 			TokenRegion *tokenRegionCi = new TokenRegion( InputLoc(), name + "_ci",
@@ -967,7 +967,7 @@ literal_item: opt_no_ignore TK_Literal opt_no_ignore
 			pd->regionList.append( tokenRegionCi );
 			JoinOrLm *joinOrLmCi = new JoinOrLm( tokenRegionCi );
 			String scannerNameCi( name.length() + 2, "<%s>-ci", name.data );
-			addRegularDef( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi, true );
+			addRegularDef2( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi, true );
 
 			/* Just for tokens. */
 			TokenRegion *tokenRegionTok = new TokenRegion( InputLoc(), name + "_tok",
@@ -976,7 +976,7 @@ literal_item: opt_no_ignore TK_Literal opt_no_ignore
 			pd->regionList.append( tokenRegionTok );
 			JoinOrLm *joinOrLmTok = new JoinOrLm( tokenRegionTok );
 			String scannerNameTok( name.length() + 2, "<%s>-tok", name.data );
-			addRegularDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok, true );
+			addRegularDef2( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok, true );
 
 			/* Make a new token region just for the token. */
 			TokenRegion *tokenRegion = new TokenRegion( InputLoc(), $2->data,
@@ -985,7 +985,7 @@ literal_item: opt_no_ignore TK_Literal opt_no_ignore
 			pd->regionList.append( tokenRegion );
 			JoinOrLm *joinOrLm = new JoinOrLm( tokenRegion );
 			String scannerName( name.length() + 2, "<%s>", name.data );
-			addRegularDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm, true );
+			addRegularDef2( InputLoc(), namespaceStack.top(), scannerName, joinOrLm, true );
 			regionStack.push( tokenRegion );
 
 			tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
@@ -1189,7 +1189,7 @@ token_def_name:
 			pd->regionList.append( tokenRegionIgn );
 			JoinOrLm *joinOrLmIgn = new JoinOrLm( tokenRegionIgn );
 			String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data );
-			addRegularDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn, true );
+			addRegularDef2( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn, true );
 
 			/* Just for explicitly collecting ignores. */
 			TokenRegion *tokenRegionCi = new TokenRegion( InputLoc(), name + "_ci",
@@ -1198,7 +1198,7 @@ token_def_name:
 			pd->regionList.append( tokenRegionCi );
 			JoinOrLm *joinOrLmCi = new JoinOrLm( tokenRegionCi );
 			String scannerNameCi( name.length() + 2, "<%s>-ci", name.data );
-			addRegularDef( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi, true );
+			addRegularDef2( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi, true );
 
 			/* Just for tokens. */
 			TokenRegion *tokenRegionTok = new TokenRegion( InputLoc(), name + "_tok",
@@ -1207,7 +1207,7 @@ token_def_name:
 			pd->regionList.append( tokenRegionTok );
 			JoinOrLm *joinOrLmTok = new JoinOrLm( tokenRegionTok );
 			String scannerNameTok( name.length() + 2, "<%s>-tok", name.data );
-			addRegularDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok, true );
+			addRegularDef2( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok, true );
 			
 			/* If not inside a region, make one for the token. */
 			TokenRegion *tokenRegion = new TokenRegion( InputLoc(), name,
@@ -1216,7 +1216,7 @@ token_def_name:
 			pd->regionList.append( tokenRegion );
 			JoinOrLm *joinOrLm = new JoinOrLm( tokenRegion );
 			String scannerName( name.length() + 2, "<%s>", name.data );
-			addRegularDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm, true );
+			addRegularDef2( InputLoc(), namespaceStack.top(), scannerName, joinOrLm, true );
 			regionStack.push( tokenRegion );
 
 			tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
@@ -2397,7 +2397,7 @@ rl_factor:
 		Namespace *nspace = namespaceStack.top();
 
 		while ( nspace != 0 ) {
-			GraphDictEl *gdNode = nspace->graphDict.find( $1->data );
+			GraphDictEl *gdNode = nspace->rlMap.find( $1->data );
 			if ( gdNode != 0 ) {
 				if ( gdNode->isInstance ) {
 					/* Recover by retuning null as the factor node. */
@@ -2603,7 +2603,7 @@ void ColmParser::init()
 			pd->regionList.length(), 0 );
 	pd->regionList.append( rootRegion );
 	JoinOrLm *joinOrLm = new JoinOrLm( rootRegion );
-	addRegularDef( InputLoc(), namespaceStack.top(), rootRegionName, joinOrLm, true );
+	addRegularDef2( InputLoc(), namespaceStack.top(), rootRegionName, joinOrLm, true );
 	regionStack.push( rootRegion );
 	pd->rootRegion = rootRegion;
 
@@ -2652,6 +2652,26 @@ int ColmParser::parseLangEl( int type, const Token *token )
 void ColmParser::addRegularDef( const InputLoc &loc, Namespace *nspace,
 		const String &name, JoinOrLm *joinOrLm, bool isInstance )
 {
+	GraphDictEl *newEl = nspace->rlMap.insert( name );
+	if ( newEl != 0 ) {
+		/* New element in the dict, all good. */
+		newEl->value = new VarDef( name, joinOrLm );
+		newEl->isInstance = isInstance;
+		newEl->loc = loc;
+
+		/* It it is an instance, put on the instance list. */
+		if ( isInstance )
+			pd->instanceList.append( newEl );
+	}
+	else {
+		// Recover by ignoring the duplicate.
+		error(loc) << "regular definition \"" << name << "\" already exists" << endl;
+	}
+}
+
+void ColmParser::addRegularDef2( const InputLoc &loc, Namespace *nspace,
+		const String &name, JoinOrLm *joinOrLm, bool isInstance )
+{
 	GraphDictEl *newEl = nspace->graphDict.insert( name );
 	if ( newEl != 0 ) {
 		/* New element in the dict, all good. */
diff --git a/colm/parsetree.h b/colm/parsetree.h
index 46b7d4b1..965085c6 100644
--- a/colm/parsetree.h
+++ b/colm/parsetree.h
@@ -701,6 +701,9 @@ struct Namespace
 	/* Dictionary of graphs. Both instances and non-instances go here. */
 	GraphDict graphDict;
 
+	/* regular language definitions. */
+	GraphDict rlMap;
+
 	TypeAliasList typeAliasList;
 
 	Namespace *parentNamespace;
author	Adrian Thurston <thurston@complang.org>	2012-05-27 21:33:56 -0400
committer	Adrian Thurston <thurston@complang.org>	2012-05-27 21:33:56 -0400
commit	3f5b9fc33d3e5dc77f72300289f5ce618188a26b (patch)
tree	b14d8d0782c645f8b5a7821e7247d7ff6a4ff924
parent	46d096608cd47dc5332de53aaf9248dd9b556554 (diff)
download	colm-3f5b9fc33d3e5dc77f72300289f5ce618188a26b.tar.gz