improvements to ignore handling in the parser

Every region now also has a duplicate scanning region that is only for tokens. The duplicate ignores and tokens generate the original tokens through a TokenDef ignore mechanism. Can turn off post ignore parsing and pre-igore parsing on a token-by-token basis. Probably want to move it into the productions and specify it there. Currently don't have a specification mechanism. If an ignore is a post-token ignore it is not right-attached.
author: Adrian Thurston <thurston@complang.org> 2012-05-22 20:33:51 -0400
committer: Adrian Thurston <thurston@complang.org> 2012-05-22 20:33:51 -0400
commit: 35f086e516696d13b13d129062ae45d186e3523e (patch)
tree: f3360707423e65dbad73c203ce27deccd65d070e
parent: aa01add5ba8ea59850b4db058f6a0e53ab9c41fe (diff)
download: colm-35f086e516696d13b13d129062ae45d186e3523e.tar.gz
14 files changed, 214 insertions, 60 deletions
diff --git a/colm/bytecode.h b/colm/bytecode.h
index bb639e94..2151544d 100644
--- a/colm/bytecode.h
+++ b/colm/bytecode.h
@@ -340,20 +340,22 @@ typedef unsigned char uchar;
  */
 
 /* A tree that has been generated by a termDup. */
-#define PF_TERM_DUP      0x0001
+#define PF_TERM_DUP            0x0001
 
 /* Has been processed by the commit function. All children have also been
  * processed. */
-#define PF_COMMITTED     0x0002
+#define PF_COMMITTED           0x0002
 
 /* Created by a token generation action, not made from the input. */
-#define PF_ARTIFICIAL    0x0004
+#define PF_ARTIFICIAL          0x0004
 
 /* Named node from a pattern or constructor. */
-#define PF_NAMED         0x0008
+#define PF_NAMED               0x0008
 
 /* There is reverse code associated with this tree node. */
-#define PF_HAS_RCODE     0x0010
+#define PF_HAS_RCODE           0x0010
+
+#define PF_RIGHT_IGNORE        0x0020
 
 #define PF_LEFT_IL_ATTACHED    0x0400
 #define PF_RIGHT_IL_ATTACHED   0x0800
diff --git a/colm/declare.cc b/colm/declare.cc
index 2b2dfc70..bcd9a16c 100644
--- a/colm/declare.cc
+++ b/colm/declare.cc
@@ -228,13 +228,26 @@ void Namespace::declare( ParseData *pd )
 		g->declare( pd, this );
 
 	for ( LiteralDict::Iter l = literalDict; l.lte(); l++  ) {
-		/* Create a token for the literal. */
-		LangEl *newLangEl = declareLangEl( pd, this, l->value->name, LangEl::Term );
-		newLangEl->lit = l->value->literal;
-		newLangEl->isLiteral = true;
-		newLangEl->tokenDef = l->value;
+		if ( l->value->dupOf != 0 ) {
+			/* Duplicate of another. Use the lang el of that token. */
+			assert( l->value->dupOf->tdLangEl != 0 );
+			l->value->tdLangEl = l->value->dupOf->tdLangEl;
+		}
+		else {
+			/* Original. Create a token for the literal. */
+			LangEl *newLangEl = declareLangEl( pd, this, l->value->name, LangEl::Term );
+
+			newLangEl->lit = l->value->literal;
+			newLangEl->isLiteral = true;
+			newLangEl->tokenDef = l->value;
+
+			l->value->tdLangEl = newLangEl;
 
-		l->value->token = newLangEl;
+			if ( l->value->preNoIgnore )
+				newLangEl->preNoIgnore = true;
+			if ( l->value->postNoIgnore )
+				newLangEl->postNoIgnore = true;
+		}
 	}
 
 	for ( ContextDefList::Iter c = contextDefList; c.lte(); c++ ) {
@@ -273,15 +286,22 @@ void Namespace::declare( ParseData *pd )
 	for ( TokenDefListNs::Iter t = tokenDefList; t.lte(); t++ ) {
 		/* Literals already taken care of. */
 		if ( ! t->isLiteral ) {
-			/* Create the token. */
-			LangEl *tokEl = declareLangEl( pd, this, t->name, LangEl::Term );
-			tokEl->ignore = t->ignore;
-			tokEl->transBlock = t->codeBlock;
-			tokEl->objectDef = t->objectDef;
-			tokEl->contextIn = t->contextIn;
-			tokEl->tokenDef = t;
-
-			t->token = tokEl;
+			if ( t->dupOf != 0 ) {
+				/* Duplicate of another. Use the lang el of that token. */
+				assert( t->dupOf->tdLangEl != 0 );
+				t->tdLangEl = t->dupOf->tdLangEl;
+			}
+			else {
+				/* Create the token. */
+				LangEl *tokEl = declareLangEl( pd, this, t->name, LangEl::Term );
+				tokEl->ignore = t->ignore;
+				tokEl->transBlock = t->codeBlock;
+				tokEl->objectDef = t->objectDef;
+				tokEl->contextIn = t->contextIn;
+				tokEl->tokenDef = t;
+
+				t->tdLangEl = tokEl;
+			}
 		}
 	}
 
diff --git a/colm/fsmcodegen.cc b/colm/fsmcodegen.cc
index d98e6b0b..86302c31 100644
--- a/colm/fsmcodegen.cc
+++ b/colm/fsmcodegen.cc
@@ -216,9 +216,9 @@ void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item,
 
 	for ( TokenDefListReg::Iter lmi = item->tokenRegion->tokenDefList; lmi.lte(); lmi++ ) {
 		if ( lmi->inLmSelect ) {
-			assert( lmi->token != 0 );
+			assert( lmi->tdLangEl != 0 );
 			ret << "	case " << lmi->longestMatchId << ":\n";
-			EMIT_TOKEN( ret, lmi->token );
+			EMIT_TOKEN( ret, lmi->tdLangEl );
 			ret << "	break;\n";
 		}
 	}
@@ -231,27 +231,27 @@ void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item,
 
 void FsmCodeGen::LM_ON_LAST( ostream &ret, InlineItem *item )
 {
-	assert( item->longestMatchPart->token != 0 );
+	assert( item->longestMatchPart->tdLangEl != 0 );
 
 	ret << "	" << P() << " += 1;\n";
-	EMIT_TOKEN( ret, item->longestMatchPart->token );
+	EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl );
 	ret << "	return;\n";
 }
 
 void FsmCodeGen::LM_ON_NEXT( ostream &ret, InlineItem *item )
 {
-	assert( item->longestMatchPart->token != 0 );
+	assert( item->longestMatchPart->tdLangEl != 0 );
 
-	EMIT_TOKEN( ret, item->longestMatchPart->token );
+	EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl );
 	ret << "	return;\n";
 }
 
 void FsmCodeGen::LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item )
 {
-	assert( item->longestMatchPart->token != 0 );
+	assert( item->longestMatchPart->tdLangEl != 0 );
 
 	ret << "	" << P() << " = " << TOKEND() << ";\n";
-	EMIT_TOKEN( ret, item->longestMatchPart->token );
+	EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl );
 	ret << "	return;\n";
 }
 
diff --git a/colm/fsmexec.cc b/colm/fsmexec.cc
index 772e3146..f922c7a4 100644
--- a/colm/fsmexec.cc
+++ b/colm/fsmexec.cc
@@ -66,23 +66,23 @@ void execAction( FsmRun *fsmRun, GenAction *genAction )
 						lmi.lte(); lmi++ )
 				{
 					if ( lmi->inLmSelect && fsmRun->act == lmi->longestMatchId )
-						fsmRun->matchedToken = lmi->token->id;
+						fsmRun->matchedToken = lmi->tdLangEl->id;
 				}
 			}
 			fsmRun->returnResult = true;
 			break;
 		case InlineItem::LmOnLast:
 			fsmRun->p += 1;
-			fsmRun->matchedToken = item->longestMatchPart->token->id;
+			fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id;
 			fsmRun->returnResult = true;
 			break;
 		case InlineItem::LmOnNext:
-			fsmRun->matchedToken = item->longestMatchPart->token->id;
+			fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id;
 			fsmRun->returnResult = true;
 			break;
 		case InlineItem::LmOnLagBehind:
 			fsmRun->p = fsmRun->tokend;
-			fsmRun->matchedToken = item->longestMatchPart->token->id;
+			fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id;
 			fsmRun->returnResult = true;
 			break;
 		}
diff --git a/colm/lmparse.kl b/colm/lmparse.kl
index 3689b922..0d3e814a 100644
--- a/colm/lmparse.kl
+++ b/colm/lmparse.kl
@@ -481,7 +481,7 @@ region_def:
 region_head: 
 	KW_Lex TK_Word 
 	final {
-		/* just for ignores. */
+		/* Just for ignores. */
 		TokenRegion *tokenRegionIgn = new TokenRegion( InputLoc(), $2->data + "_ign" ,
 				pd->regionList.length(), regionStack.top() );
 		regionStack.top()->childRegions.append( tokenRegionIgn );
@@ -490,6 +490,15 @@ region_head:
 		String scannerNameIgn( $2->data.length() + 2, "<%s>-ign", $2->data.data );
 		addRegularDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn, true );
 
+		/* Just for tokens. */
+		TokenRegion *tokenRegionTok = new TokenRegion( InputLoc(), $2->data + "_tok" ,
+				pd->regionList.length(), regionStack.top() );
+		regionStack.top()->childRegions.append( tokenRegionTok );
+		pd->regionList.append( tokenRegionTok );
+		JoinOrLm *joinOrLmTok = new JoinOrLm( tokenRegionTok );
+		String scannerNameTok( $2->data.length() + 2, "<%s>-tok", $2->data.data );
+		addRegularDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok, true );
+
 		/* Make the new token region. */
 		TokenRegion *tokenRegion = new TokenRegion( InputLoc(), $2->data,
 				pd->regionList.length(), regionStack.top() );
@@ -499,7 +508,11 @@ region_head:
 		String scannerName( $2->data.length() + 2, "<%s>", $2->data.data );
 		addRegularDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm, true );
 		regionStack.push( tokenRegion );
-		tokenRegion->ignoreRegion = tokenRegionIgn;
+
+		tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
+		tokenRegion->tokenOnlyRegion = tokenRegionTok;
+
+		tokenRegionIgn->isIgnoreOnly = true;
 	};
 
 namespace_def: 
@@ -916,6 +929,22 @@ literal_item: TK_Literal
 
 		bool insideRegion = regionStack.top() != pd->rootRegion;
 		if ( !insideRegion ) {
+			TokenRegion *tokenRegionIgn = new TokenRegion( InputLoc(), name + "_ign",
+					pd->regionList.length(), regionStack.top() );
+			regionStack.top()->childRegions.append( tokenRegionIgn );
+			pd->regionList.append( tokenRegionIgn );
+			JoinOrLm *joinOrLmIgn = new JoinOrLm( tokenRegionIgn );
+			String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data );
+			addRegularDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn, true );
+
+			TokenRegion *tokenRegionTok = new TokenRegion( InputLoc(), name + "_tok",
+					pd->regionList.length(), regionStack.top() );
+			regionStack.top()->childRegions.append( tokenRegionTok );
+			pd->regionList.append( tokenRegionTok );
+			JoinOrLm *joinOrLmTok = new JoinOrLm( tokenRegionTok );
+			String scannerNameTok( name.length() + 2, "<%s>-tok", name.data );
+			addRegularDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok, true );
+
 			/* Make a new token region just for the token. */
 			TokenRegion *tokenRegion = new TokenRegion( InputLoc(), $1->data,
 					pd->regionList.length(), regionStack.top() );
@@ -925,6 +954,11 @@ literal_item: TK_Literal
 			String scannerName( name.length() + 2, "<%s>", name.data );
 			addRegularDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm, true );
 			regionStack.push( tokenRegion );
+
+			tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
+			tokenRegion->tokenOnlyRegion = tokenRegionTok;
+
+			tokenRegionIgn->isIgnoreOnly = true;
 		}
 
 		bool unusedCI;
@@ -945,13 +979,27 @@ literal_item: TK_Literal
 					Literal::LitString ) ) ) ) ) ) ) );
 			
 			TokenDef *tokenDef = new TokenDef( name, $1->data, true, false, join, 
-					0, $1->loc, pd->nextTokenId, nspace, region, 0, 0, 0 );
-
+					0, $1->loc, pd->nextTokenId++, nspace, region, 0, 0, 0 );
 			region->tokenDefList.append( tokenDef );
 			ldel = nspace->literalDict.insert( interp, tokenDef );
-			pd->nextTokenId += 1;
-
 			nspace->tokenDefList.append( tokenDef );
+
+//			if ( strcmp( interp, "%%" ) == 0 ) {
+//				tokenDef->preNoIgnore = true;
+//			}
+//			if ( strcmp( interp, ")" ) == 0 ) {
+//				tokenDef->preNoIgnore = true;
+//			}
+//			if ( strcmp( interp, "(" ) == 0 ) {
+//				tokenDef->postNoIgnore = true;
+//			}
+
+			TokenDef *tokenDefTok = new TokenDef( name + "_tok", $1->data, true, false, join, 
+					0, $1->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion, 0, 0, 0 );
+			tokenDefTok->dupOf = tokenDef;
+			region->tokenOnlyRegion->tokenDefList.append( tokenDefTok );
+			ldel = nspace->literalDict.insert( "|" + interp + "_tok", tokenDefTok );
+			nspace->tokenDefList.append( tokenDefTok );
 		}
 
 		if ( !insideRegion ) {
@@ -1013,13 +1061,26 @@ token_def:
 		/* All again for the ignore. */
 		if ( ignore ) {
 			TokenDef *tokenDefIgn = new TokenDef( name + "_ign", String(), false, ignore, join, 
-					0, $1->loc, pd->nextTokenId++, nspace, region->ignoreRegion,
+					0, $1->loc, pd->nextTokenId++, nspace, region->ignoreOnlyRegion,
 					&reCaptureVect, pd->objectDef,
 					contextStack.length() > 0 ? contextStack.top() : 0 );
 
-			region->ignoreRegion->tokenDefList.append( tokenDefIgn );
+			tokenDefIgn->dupOf = tokenDef;
+
+			region->ignoreOnlyRegion->tokenDefList.append( tokenDefIgn );
 			nspace->tokenDefList.append( tokenDefIgn );
 		}
+		else {
+			TokenDef *tokenDefTok = new TokenDef( name + "_tok", String(), false, ignore, join, 
+					0, $1->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion,
+					&reCaptureVect, pd->objectDef,
+					contextStack.length() > 0 ? contextStack.top() : 0 );
+
+			tokenDefTok->dupOf = tokenDef;
+
+			region->tokenOnlyRegion->tokenDefList.append( tokenDefTok );
+			nspace->tokenDefList.append( tokenDefTok );
+		}
 
 		/* This is created and pushed in the name. */
 		if ( !pd->insideRegion ) {
@@ -1074,6 +1135,14 @@ token_def_name:
 			String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data );
 			addRegularDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn, true );
 
+			TokenRegion *tokenRegionTok = new TokenRegion( InputLoc(), name + "_tok",
+					pd->regionList.length(), regionStack.top() );
+			regionStack.top()->childRegions.append( tokenRegionTok );
+			pd->regionList.append( tokenRegionTok );
+			JoinOrLm *joinOrLmTok = new JoinOrLm( tokenRegionTok );
+			String scannerNameTok( name.length() + 2, "<%s>-tok", name.data );
+			addRegularDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok, true );
+			
 			/* If not inside a region, make one for the token. */
 			TokenRegion *tokenRegion = new TokenRegion( InputLoc(), name,
 					pd->regionList.length(), regionStack.top() );
@@ -1083,7 +1152,11 @@ token_def_name:
 			String scannerName( name.length() + 2, "<%s>", name.data );
 			addRegularDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm, true );
 			regionStack.push( tokenRegion );
-			tokenRegion->ignoreRegion = tokenRegionIgn;
+
+			tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
+			tokenRegion->tokenOnlyRegion = tokenRegionTok;
+
+			tokenRegionIgn->isIgnoreOnly = true;
 		}
 
 		/* Reset the lable id counter. */
diff --git a/colm/parsedata.cc b/colm/parsedata.cc
index 81f5308d..f1017cae 100644
--- a/colm/parsedata.cc
+++ b/colm/parsedata.cc
@@ -1115,11 +1115,12 @@ void ParseData::createDefaultScanner()
 	name = "___DEFAULT_SCANNER_CHR";
 	defaultCharLangEl = addLangEl( this, defaultNamespace, name, LangEl::Term );
 
-	tokenDef->token = defaultCharLangEl;
+	tokenDef->tdLangEl = defaultCharLangEl;
 	defaultCharLangEl->tokenDef = tokenDef;
 }
 
-LangEl *ParseData::makeRepeatProd( Namespace *nspace, const String &repeatName, NamespaceQual *nspaceQual, const String &name )
+LangEl *ParseData::makeRepeatProd( Namespace *nspace, const String &repeatName, 
+		NamespaceQual *nspaceQual, const String &name )
 {
 	LangEl *prodName = addLangEl( this, nspace, repeatName, LangEl::NonTerm );
 	prodName->isRepeat = true;
@@ -1326,7 +1327,7 @@ void ParseData::initEmptyScanners()
 			 * in the declare pass. */
 			LangEl *lel = addLangEl( this, rootNamespace, name, LangEl::Term );
 
-			tokenDef->token = lel;
+			tokenDef->tdLangEl = lel;
 			lel->tokenDef = tokenDef;
 		}
 	}
diff --git a/colm/parsedata.h b/colm/parsedata.h
index 61b32753..a0705002 100644
--- a/colm/parsedata.h
+++ b/colm/parsedata.h
@@ -269,6 +269,8 @@ struct LangEl : public DListEl<LangEl>
 
 	Context *contextDef;
 	Context *contextIn;
+	bool preNoIgnore;
+	bool postNoIgnore;
 };
 
 struct ProdEl
diff --git a/colm/parsetree.cc b/colm/parsetree.cc
index 5a507704..f613ff3b 100644
--- a/colm/parsetree.cc
+++ b/colm/parsetree.cc
@@ -601,7 +601,7 @@ FsmGraph *TokenRegion::walk( ParseData *pd )
 	}
 	FsmGraph *retFsm = parts[0];
 
-	if ( defaultTokenDef != 0 && defaultTokenDef->token->ignore )
+	if ( defaultTokenDef != 0 && defaultTokenDef->tdLangEl->ignore )
 		error() << "ignore token cannot be a scanner's zero-length token" << endp;
 
 	/* The region is empty. Return the empty set. */
diff --git a/colm/parsetree.h b/colm/parsetree.h
index 852f11fe..b9736109 100644
--- a/colm/parsetree.h
+++ b/colm/parsetree.h
@@ -394,10 +394,11 @@ struct TokenDef
 		ReCaptureVect *pReCaptureVect, ObjectDef *objectDef, Context *contextIn )
 	: 
 		name(name), literal(literal), isLiteral(isLiteral), ignore(ignore), join(join), action(0),
-		codeBlock(codeBlock), token(0), semiLoc(semiLoc), 
+		codeBlock(codeBlock), tdLangEl(0), semiLoc(semiLoc), 
 		longestMatchId(longestMatchId), inLmSelect(false), 
 		nspace(nspace), tokenRegion(tokenRegion), objectDef(objectDef),
-		contextIn(contextIn)
+		contextIn(contextIn),
+		dupOf(dupOf), postNoIgnore(false), preNoIgnore(false)
 	{
 		if ( pReCaptureVect != 0 )
 			reCaptureVect = *pReCaptureVect;
@@ -412,7 +413,7 @@ struct TokenDef
 	Join *join;
 	Action *action;
 	CodeBlock *codeBlock;
-	LangEl *token;
+	LangEl *tdLangEl;
 	InputLoc semiLoc;
 
 	Action *setActId;
@@ -426,6 +427,10 @@ struct TokenDef
 	ReCaptureVect reCaptureVect;
 	ObjectDef *objectDef;
 	Context *contextIn;
+
+	TokenDef *dupOf;
+	bool postNoIgnore;
+	bool preNoIgnore;
 };
 
 struct LelDefList;
@@ -513,7 +518,9 @@ struct TokenRegion
 		loc(loc), name(name), id(id),
 		lmSwitchHandlesError(false), regionNameInst(0),
 		parentRegion(parentRegion), defaultTokenDef(0),
-		preEofBlock(0), ignoreRegion(0), wasEmpty(false) { }
+		preEofBlock(0), 
+		ignoreOnlyRegion(0), tokenOnlyRegion(0), 
+		wasEmpty(false), isIgnoreOnly(false) { }
 
 	/* Tree traversal. */
 	FsmGraph *walk( ParseData *pd );
@@ -547,12 +554,15 @@ struct TokenRegion
 	CodeBlock *preEofBlock;
 
 	/* Dupe of the region, containing only the ignore tokens. */
-	TokenRegion *ignoreRegion;
+	TokenRegion *ignoreOnlyRegion;
+	TokenRegion *tokenOnlyRegion;
 
 	/* We alway init empty scanners with a single token. If we had to do this
 	 * then wasEmpty is true. */
 	bool wasEmpty;
 
+	bool isIgnoreOnly;
+
 	TokenRegion *next, *prev;
 };
 
diff --git a/colm/pdabuild.cc b/colm/pdabuild.cc
index 66af8846..aba017f7 100644
--- a/colm/pdabuild.cc
+++ b/colm/pdabuild.cc
@@ -89,7 +89,9 @@ LangEl::LangEl( Namespace *nspace, const String &name, Type type )
 	predType(PredNone),
 	predValue(0),
 	contextDef(0),
-	contextIn(0)
+	contextIn(0), 
+	preNoIgnore(false),
+	postNoIgnore(false) 
 {
 }
  
@@ -479,11 +481,25 @@ void ParseData::addRegion( PdaState *tabState, PdaTrans *tabTrans, long pdaKey )
 			region = klangEl->tokenDef->tokenRegion;
 
 		if ( region != 0 ) {
-			if ( !regionVectHas( tabState->regions, region ) )
-				tabState->regions.append( region );
+			/* region. */
+			TokenRegion *scanRegion = region;
+
+			if ( klangEl->preNoIgnore )
+				scanRegion = region->tokenOnlyRegion;
+
+			if ( !regionVectHas( tabState->regions, scanRegion ) ) {
+				tabState->regions.append( scanRegion );
+			}
+
+			/* Pre-region of to state */
+			PdaState *toState = tabTrans->toState;
+			if ( !klangEl->postNoIgnore && 
+					region->ignoreOnlyRegion != 0 && 
+					!regionVectHas( toState->preRegions, region->ignoreOnlyRegion ) )
+			{
+				toState->preRegions.append( region->ignoreOnlyRegion );
+			}
 
-			if ( region->ignoreRegion != 0 && !regionVectHas( tabTrans->toState->preRegions, region->ignoreRegion ) )
-				tabTrans->toState->preRegions.append( region->ignoreRegion );
 		}
 	}
 }
@@ -1335,8 +1351,9 @@ void ParseData::makeRuntimeData()
 		long regId = reg->id+1;
 		runtimeData->regionInfo[regId].name = reg->name;
 		runtimeData->regionInfo[regId].defaultToken =
-			reg->defaultTokenDef == 0 ? -1 : reg->defaultTokenDef->token->id;
+			reg->defaultTokenDef == 0 ? -1 : reg->defaultTokenDef->tdLangEl->id;
 		runtimeData->regionInfo[regId].eofFrameId = -1;
+		runtimeData->regionInfo[regId].isIgnoreOnly = reg->isIgnoreOnly;
 
 		CodeBlock *block = reg->preEofBlock;
 		if ( block != 0 ) {
diff --git a/colm/pdacodegen.cc b/colm/pdacodegen.cc
index a259ce00..7132a609 100644
--- a/colm/pdacodegen.cc
+++ b/colm/pdacodegen.cc
@@ -356,6 +356,7 @@ void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData, PdaTables *pdaTable
 		escapeLiteralString( out, runtimeData->regionInfo[i].name );
 		out << "\", " << runtimeData->regionInfo[i].defaultToken <<
 			", " << runtimeData->regionInfo[i].eofFrameId <<
+			", " << runtimeData->regionInfo[i].isIgnoreOnly <<
 			" }";
 
 		if ( i < runtimeData->numRegions-1 )
diff --git a/colm/pdarun.c b/colm/pdarun.c
index 8b3d5f87..142017c5 100644
--- a/colm/pdarun.c
+++ b/colm/pdarun.c
@@ -406,7 +406,7 @@ void setRegion( PdaRun *pdaRun, int emptyIgnore, ParseTree *tree )
 	}
 }
 
-void ignoreTree( Program *prg, PdaRun *pdaRun, Tree *tree )
+void ignoreTree( Program *prg, FsmRun *fsmRun, PdaRun *pdaRun, Tree *tree )
 {
 	int emptyIgnore = pdaRun->accumIgnore == 0;
 
@@ -421,6 +421,9 @@ void ignoreTree( Program *prg, PdaRun *pdaRun, Tree *tree )
 
 	transferReverseCode( pdaRun, parseTree );
 
+	if ( fsmRun->preRegion >= 0 )
+		parseTree->flags |= PF_RIGHT_IGNORE;
+
 	setRegion( pdaRun, emptyIgnore, pdaRun->accumIgnore );
 }
 
@@ -536,6 +539,9 @@ static void reportParseError( Program *prg, Tree **sp, PdaRun *pdaRun )
 
 static void attachRightIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, ParseTree *parseTree )
 {
+	if ( pdaRun->accumIgnore == 0 )
+		return;
+
 	if ( pdaRun->stackTop->id > 0 && pdaRun->stackTop->id < prg->rtd->firstNonTermId ) {
 		/* OK, do it */
 		debug( REALM_PARSE, "attaching right ignore\n" );
@@ -544,7 +550,28 @@ static void attachRightIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, ParseTre
 		assert( ! ( parseTree->flags & PF_RIGHT_IL_ATTACHED ) );
 
 		ParseTree *accum = pdaRun->accumIgnore;
-		pdaRun->accumIgnore = 0;
+		ParseTree *lasta = 0, *use = accum;
+		while ( use != 0 && use->flags & PF_RIGHT_IGNORE ) {
+			lasta = use;
+			use = use->next;
+		}
+
+		if ( use == 0 ) {
+			/* Use it all. Note accum != 0 so non-empty. */
+			pdaRun->accumIgnore = 0;
+		}
+		else {
+			/* Got stopped. */
+			if ( lasta == 0 ) {
+				/* Use none. */
+				accum = 0;
+			}
+			else {
+				/* Use some. */
+				lasta->next = 0;
+				pdaRun->accumIgnore = use;
+			}
+		}
 
 		/* The data list needs to be extracted and reversed. The parse tree list
 		 * can remain in stack order. */
@@ -785,7 +812,7 @@ void sendIgnore( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmR
 	tree->tokdata = ignoreStr;
 
 	/* Send it to the pdaRun. */
-	ignoreTree( prg, pdaRun, tree );
+	ignoreTree( prg, fsmRun, pdaRun, tree );
 }
 
 
diff --git a/colm/pdarun.h b/colm/pdarun.h
index 236f8793..b8204d97 100644
--- a/colm/pdarun.h
+++ b/colm/pdarun.h
@@ -218,6 +218,7 @@ typedef struct _RegionInfo
 	const char *name;
 	long defaultToken;
 	long eofFrameId;
+	int isIgnoreOnly;;
 } RegionInfo;
 
 typedef struct _CaptureAttr
diff --git a/colm/resolve.cc b/colm/resolve.cc
index 526d3588..de548fdb 100644
--- a/colm/resolve.cc
+++ b/colm/resolve.cc
@@ -77,7 +77,7 @@ UniqueType *TypeRef::lookupTypeLiteral( ParseData *pd )
 		LiteralDictEl *ldel = nspace->literalDict.find( interp );
 
 		if ( ldel != 0 )
-			return pd->findUniqueType( TYPE_TREE, ldel->value->token );
+			return pd->findUniqueType( TYPE_TREE, ldel->value->tdLangEl );
 
 		nspace = nspace->parentNamespace;
 	}
author	Adrian Thurston <thurston@complang.org>	2012-05-22 20:33:51 -0400
committer	Adrian Thurston <thurston@complang.org>	2012-05-22 20:33:51 -0400
commit	35f086e516696d13b13d129062ae45d186e3523e (patch)
tree	f3360707423e65dbad73c203ce27deccd65d070e
parent	aa01add5ba8ea59850b4db058f6a0e53ab9c41fe (diff)
download	colm-35f086e516696d13b13d129062ae45d186e3523e.tar.gz