added a syntax for specifying no ignores

Added the keyword 'ni', which can go ahead of or before a token pattern (literal or usual), which means no-ignore. Sets the noPreIgnore and noPostIgnore bits in the token, which affect the ignore scanning and attaching.
author: Adrian Thurston <thurston@complang.org> 2012-05-23 13:52:19 +0000
committer: Adrian Thurston <thurston@complang.org> 2012-05-23 13:52:19 +0000
commit: b628bd6037bea333bccadc0850707616a730275b (patch)
tree: 75795189a3707a4818471e626ce93a28941383b8
parent: 77aac5d2c534e3206fd150b4e2b29688f6230ee1 (diff)
download: colm-b628bd6037bea333bccadc0850707616a730275b.tar.gz
12 files changed, 83 insertions, 49 deletions
diff --git a/colm.vim b/colm.vim
index d65ae0c2..d8eb44a6 100644
--- a/colm.vim
+++ b/colm.vim
@@ -64,7 +64,7 @@ syntax keyword typeKeywords
 syntax keyword Keyword
 	\ reject else elsif return yield for while if
 	\ typeid in break 
-	\ new deref
+	\ new deref ni
 
 syntax match tokenName "[a-zA-Z_][a-zA-Z_0-9]*" contained
 syntax match varCapture "[a-zA-Z_][a-zA-Z_0-9]*:" 
diff --git a/colm/declare.cc b/colm/declare.cc
index bcd9a16c..28221c63 100644
--- a/colm/declare.cc
+++ b/colm/declare.cc
@@ -243,10 +243,10 @@ void Namespace::declare( ParseData *pd )
 
 			l->value->tdLangEl = newLangEl;
 
-			if ( l->value->preNoIgnore )
-				newLangEl->preNoIgnore = true;
-			if ( l->value->postNoIgnore )
-				newLangEl->postNoIgnore = true;
+			if ( l->value->noPreIgnore )
+				newLangEl->noPreIgnore = true;
+			if ( l->value->noPostIgnore )
+				newLangEl->noPostIgnore = true;
 		}
 	}
 
@@ -300,6 +300,11 @@ void Namespace::declare( ParseData *pd )
 				tokEl->contextIn = t->contextIn;
 				tokEl->tokenDef = t;
 
+				if ( t->noPreIgnore )
+					tokEl->noPreIgnore = true;
+				if ( t->noPostIgnore )
+					tokEl->noPostIgnore = true;
+
 				t->tdLangEl = tokEl;
 			}
 		}
diff --git a/colm/lmparse.kh b/colm/lmparse.kh
index 529dfcc6..469e7ec3 100644
--- a/colm/lmparse.kh
+++ b/colm/lmparse.kh
@@ -58,7 +58,7 @@ struct ColmParser
 
 	# Patterns.
 	token KW_Match, KW_Construct, KW_Parse, KW_ParseStop, KW_New, KW_MakeToken,
-		KW_MakeTree, KW_TypeId, KW_Alias, KW_Send;
+		KW_MakeTree, KW_TypeId, KW_Alias, KW_Send, KW_Ni;
 
 	token KW_Include, KW_Preeof;
 
diff --git a/colm/lmparse.kl b/colm/lmparse.kl
index 0d3e814a..17e3da81 100644
--- a/colm/lmparse.kl
+++ b/colm/lmparse.kl
@@ -838,6 +838,15 @@ prod_el_list:
 prod_el_list: 
 	final { curProdElList = new ProdElList; };
 
+nonterm opt_no_pre_ignore { bool value; };
+nonterm opt_no_post_ignore { bool value; };
+
+opt_no_pre_ignore: KW_Ni final { $$->value = true; };
+opt_no_pre_ignore:       final { $$->value = false; };
+
+opt_no_post_ignore: KW_Ni final { $$->value = true; };
+opt_no_post_ignore:       final { $$->value = false; };
+
 nonterm prod_el
 {
 	ProdEl *factor;
@@ -922,7 +931,7 @@ literal_def: KW_Literal literal_list;
 literal_list: literal_list ',' literal_item;
 literal_list: literal_item;
 
-literal_item: TK_Literal
+literal_item: opt_no_pre_ignore TK_Literal opt_no_post_ignore
 	final {
 		/* Create a name for the literal. */
 		String name( 32, "_literal_%.4x", pd->nextTokenId );
@@ -946,7 +955,7 @@ literal_item: TK_Literal
 			addRegularDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok, true );
 
 			/* Make a new token region just for the token. */
-			TokenRegion *tokenRegion = new TokenRegion( InputLoc(), $1->data,
+			TokenRegion *tokenRegion = new TokenRegion( InputLoc(), $2->data,
 					pd->regionList.length(), regionStack.top() );
 			regionStack.top()->childRegions.append( tokenRegion );
 			pd->regionList.append( tokenRegion );
@@ -963,7 +972,7 @@ literal_item: TK_Literal
 
 		bool unusedCI;
 		String interp;
-		prepareLitString( interp, unusedCI, $1->data, $1->loc );
+		prepareLitString( interp, unusedCI, $2->data, $2->loc );
 
 		/* Look for the production's associated region. */
 		Namespace *nspace = namespaceStack.top();
@@ -971,31 +980,26 @@ literal_item: TK_Literal
 
 		LiteralDictEl *ldel = nspace->literalDict.find( interp );
 		if ( ldel != 0 )
-			error( $1->loc ) << "literal already defined in this namespace" << endp;
+			error( $2->loc ) << "literal already defined in this namespace" << endp;
 		else {
 			Join *join = new Join( new Expression( new Term( new FactorWithAug(
-				new FactorWithRep( $1->loc, new FactorWithNeg( $1->loc, new Factor(
-				new Literal( $1->loc, $1->data, 
+				new FactorWithRep( $2->loc, new FactorWithNeg( $2->loc, new Factor(
+				new Literal( $2->loc, $2->data, 
 					Literal::LitString ) ) ) ) ) ) ) );
 			
-			TokenDef *tokenDef = new TokenDef( name, $1->data, true, false, join, 
-					0, $1->loc, pd->nextTokenId++, nspace, region, 0, 0, 0 );
+			TokenDef *tokenDef = new TokenDef( name, $2->data, true, false, join, 
+					0, $2->loc, pd->nextTokenId++, nspace, region, 0, 0, 0 );
 			region->tokenDefList.append( tokenDef );
 			ldel = nspace->literalDict.insert( interp, tokenDef );
 			nspace->tokenDefList.append( tokenDef );
 
-//			if ( strcmp( interp, "%%" ) == 0 ) {
-//				tokenDef->preNoIgnore = true;
-//			}
-//			if ( strcmp( interp, ")" ) == 0 ) {
-//				tokenDef->preNoIgnore = true;
-//			}
-//			if ( strcmp( interp, "(" ) == 0 ) {
-//				tokenDef->postNoIgnore = true;
-//			}
-
-			TokenDef *tokenDefTok = new TokenDef( name + "_tok", $1->data, true, false, join, 
-					0, $1->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion, 0, 0, 0 );
+			if ( $1->value )
+				tokenDef->noPreIgnore = true;
+			if ( $3->value )
+				tokenDef->noPostIgnore = true;
+
+			TokenDef *tokenDefTok = new TokenDef( name + "_tok", $2->data, true, false, join, 
+					0, $2->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion, 0, 0, 0 );
 			tokenDefTok->dupOf = tokenDef;
 			region->tokenOnlyRegion->tokenDefList.append( tokenDefTok );
 			ldel = nspace->literalDict.insert( "|" + interp + "_tok", tokenDefTok );
@@ -1028,12 +1032,13 @@ leave_rl:
 
 token_def: 
 	token_or_ignore token_def_name obj_var_list
-	enter_rl '/' opt_rl_join leave_rl '/' opt_translate
+	enter_rl opt_no_pre_ignore '/' opt_rl_join leave_rl '/' opt_no_post_ignore  
+	opt_translate
 	final {
 		bool ignore = $1->ignore;
 		String name = $2->name;
-		Join *join = $6->join;
-		CodeBlock *transBlock = $9->transBlock;
+		Join *join = $7->join;
+		CodeBlock *transBlock = $11->transBlock;
 
 		/* Check the region if this is for an ignore. */
 		if ( ignore && !pd->insideRegion )
@@ -1058,6 +1063,11 @@ token_def:
 		region->tokenDefList.append( tokenDef );
 		nspace->tokenDefList.append( tokenDef );
 
+		if ( $5->value )
+			tokenDef->noPreIgnore = true;
+		if ( $10->value ) {cerr << "foo" << endl; 
+			tokenDef->noPostIgnore = true;}
+
 		/* All again for the ignore. */
 		if ( ignore ) {
 			TokenDef *tokenDefIgn = new TokenDef( name + "_ign", String(), false, ignore, join, 
diff --git a/colm/lmscan.rl b/colm/lmscan.rl
index cfb5530d..dbe79ca0 100644
--- a/colm/lmscan.rl
+++ b/colm/lmscan.rl
@@ -476,6 +476,7 @@ void Scanner::endSection( )
 		'context' => {token( KW_Context ); };
 		'alias' => {token( KW_Alias ); };
 		'send' => {token( KW_Send ); };
+		'ni' => {token( KW_Ni ); };
 
 		# Identifiers.
 		ident => { token( TK_Word, ts, te ); } ;
diff --git a/colm/parsedata.h b/colm/parsedata.h
index a0705002..3513d274 100644
--- a/colm/parsedata.h
+++ b/colm/parsedata.h
@@ -269,8 +269,8 @@ struct LangEl : public DListEl<LangEl>
 
 	Context *contextDef;
 	Context *contextIn;
-	bool preNoIgnore;
-	bool postNoIgnore;
+	bool noPreIgnore;
+	bool noPostIgnore;
 };
 
 struct ProdEl
@@ -290,7 +290,8 @@ struct ProdEl
 		langEl(0),
 		priorVal(priorVal),
 		type(type), 
-		objField(0) {}
+		objField(0)
+	{}
 
 	ProdEl( const InputLoc &loc, TypeRef *typeRef )
 	:
@@ -299,7 +300,9 @@ struct ProdEl
 		typeRef(typeRef), 
 		langEl(0), 
 		priorVal(0), 
-		type(ReferenceType), objField(0) {}
+		type(ReferenceType), 
+		objField(0)
+	{}
 
 	ObjField *captureField;
 	bool commit;
@@ -733,7 +736,8 @@ struct ParseData
 
 	int findIndexOff( PdaTables *pdaTables, PdaGraph *pdaGraph, PdaState *state, int &currLen );
 	void trySetTime( PdaTrans *trans, long code, long &time );
-	void addRegion( PdaState *tabState, PdaTrans *pdaTrans, long pdaKey );
+	void addRegion( PdaState *tabState, PdaTrans *pdaTrans, long pdaKey,
+			bool noPreIgnore, bool noPostIgnore );
 	PdaState *followProd( PdaState *tabState, PdaState *prodState );
 	void findFollow( AlphSet &result, PdaState *overTab, 
 			PdaState *overSrc, Definition *parentDef );
diff --git a/colm/parsetree.h b/colm/parsetree.h
index 2d39b4ee..fa0e5ab7 100644
--- a/colm/parsetree.h
+++ b/colm/parsetree.h
@@ -398,7 +398,7 @@ struct TokenDef
 		longestMatchId(longestMatchId), inLmSelect(false), 
 		nspace(nspace), tokenRegion(tokenRegion), objectDef(objectDef),
 		contextIn(contextIn),
-		dupOf(0), postNoIgnore(false), preNoIgnore(false)
+		dupOf(0), noPostIgnore(false), noPreIgnore(false)
 	{
 		if ( pReCaptureVect != 0 )
 			reCaptureVect = *pReCaptureVect;
@@ -429,8 +429,8 @@ struct TokenDef
 	Context *contextIn;
 
 	TokenDef *dupOf;
-	bool postNoIgnore;
-	bool preNoIgnore;
+	bool noPostIgnore;
+	bool noPreIgnore;
 };
 
 struct LelDefList;
diff --git a/colm/pdabuild.cc b/colm/pdabuild.cc
index aba017f7..eadbcb06 100644
--- a/colm/pdabuild.cc
+++ b/colm/pdabuild.cc
@@ -90,8 +90,8 @@ LangEl::LangEl( Namespace *nspace, const String &name, Type type )
 	predValue(0),
 	contextDef(0),
 	contextIn(0), 
-	preNoIgnore(false),
-	postNoIgnore(false) 
+	noPreIgnore(false),
+	noPostIgnore(false) 
 {
 }
  
@@ -455,7 +455,8 @@ void ParseData::pdaOrderFollow( LangEl *rootEl, PdaState *tabState,
 			trySetTime( tt->value, redCode, time );
 	
 			/* If the items token region is not recorded in the state, do it now. */
-			addRegion( expandToState, tt->value, tt->key );
+			addRegion( expandToState, tt->value, tt->key, 
+					tt->value->noPreIgnore, tt->value->noPostIgnore );
 		}
 	}
 }
@@ -469,7 +470,8 @@ bool regionVectHas( RegionVect &regVect, TokenRegion *region )
 	return false;
 }
 
-void ParseData::addRegion( PdaState *tabState, PdaTrans *tabTrans, long pdaKey )
+void ParseData::addRegion( PdaState *tabState, PdaTrans *tabTrans,
+		long pdaKey, bool noPreIgnore, bool noPostIgnore )
 {
 	LangEl *klangEl = langElIndex[pdaKey];
 	if ( klangEl != 0 && klangEl->type == LangEl::Term ) {
@@ -484,7 +486,7 @@ void ParseData::addRegion( PdaState *tabState, PdaTrans *tabTrans, long pdaKey )
 			/* region. */
 			TokenRegion *scanRegion = region;
 
-			if ( klangEl->preNoIgnore )
+			if ( klangEl->noPreIgnore )
 				scanRegion = region->tokenOnlyRegion;
 
 			if ( !regionVectHas( tabState->regions, scanRegion ) ) {
@@ -493,13 +495,12 @@ void ParseData::addRegion( PdaState *tabState, PdaTrans *tabTrans, long pdaKey )
 
 			/* Pre-region of to state */
 			PdaState *toState = tabTrans->toState;
-			if ( !klangEl->postNoIgnore && 
+			if ( !klangEl->noPostIgnore && 
 					region->ignoreOnlyRegion != 0 && 
 					!regionVectHas( toState->preRegions, region->ignoreOnlyRegion ) )
 			{
 				toState->preRegions.append( region->ignoreOnlyRegion );
 			}
-
 		}
 	}
 }
@@ -589,7 +590,8 @@ void ParseData::pdaOrderProd( LangEl *rootEl, PdaState *tabState,
 		}
 
 		/* If the items token region is not recorded in the state, do it now. */
-		addRegion( tabState, tabTrans, srcTrans->key );
+		addRegion( tabState, tabTrans, srcTrans->key, 
+				srcTrans->value->noPreIgnore, srcTrans->value->noPostIgnore );
 
 		/* Go over one in the production. */
 		pdaOrderProd( rootEl, tabTrans->toState, 
diff --git a/colm/pdagraph.cc b/colm/pdagraph.cc
index 191b7581..8f17b7a5 100644
--- a/colm/pdagraph.cc
+++ b/colm/pdagraph.cc
@@ -429,6 +429,11 @@ void PdaGraph::addInTrans( PdaTrans *destTrans, PdaTrans *srcTrans )
 
 		if ( srcTrans->toState->advanceReductions )
 			destTrans->toState->advanceReductions = true;
+
+		if ( srcTrans->noPreIgnore )
+			destTrans->noPreIgnore = true;
+		if ( srcTrans->noPostIgnore )
+			destTrans->noPostIgnore = true;
 	}
 }
 
diff --git a/colm/pdagraph.h b/colm/pdagraph.h
index c021ddde..dc11b3e1 100644
--- a/colm/pdagraph.h
+++ b/colm/pdagraph.h
@@ -192,7 +192,9 @@ struct PdaTrans
 		toState(0), 
 		isShift(false), 
 		isShiftReduce(false),
-		shiftPrior(0)
+		shiftPrior(0),
+		noPreIgnore(false),
+		noPostIgnore(false)
 	{ }
 
 	PdaTrans( const PdaTrans &other ) :
@@ -202,7 +204,9 @@ struct PdaTrans
 		isShiftReduce(other.isShiftReduce),
 		shiftPrior(other.shiftPrior),
 		reductions(other.reductions),
-		commits(other.commits)
+		commits(other.commits),
+		noPreIgnore(false),
+		noPostIgnore(false)
 	{ }
 
 	long lowKey;
@@ -231,6 +235,9 @@ struct PdaTrans
 
 	LongSet commits;
 	LongSet afterShiftCommits;
+
+	bool noPreIgnore;
+	bool noPostIgnore;
 };
 
 /* In transition list. Like DList except only has head pointers, which is all
diff --git a/test/ignore3.exp b/test/ignore3.exp
index 3bba001a..e5d19d3b 100644
--- a/test/ignore3.exp
+++ b/test/ignore3.exp
@@ -2,7 +2,7 @@ item: .a .
 item: .b .
 item: .c .
 item: .(   d   ) .
-innr: .(   .d   .) .
+innr: .(.   d   .) .
 item: .e .
 item: .f .
 item: .g.
diff --git a/test/ignore3.lm b/test/ignore3.lm
index 3286e9e3..df3ba687 100644
--- a/test/ignore3.lm
+++ b/test/ignore3.lm
@@ -1,7 +1,7 @@
 lex start
 {
 	ignore /space+/
-	literal '*', '(', ')', '!', ';'
+	literal '*', '(' ni, ni ')', '!', ';'
 	token id /[a-zA-Z_0-9]+/ 
 }
author	Adrian Thurston <thurston@complang.org>	2012-05-23 13:52:19 +0000
committer	Adrian Thurston <thurston@complang.org>	2012-05-23 13:52:19 +0000
commit	b628bd6037bea333bccadc0850707616a730275b (patch)
tree	75795189a3707a4818471e626ce93a28941383b8
parent	77aac5d2c534e3206fd150b4e2b29688f6230ee1 (diff)
download	colm-b628bd6037bea333bccadc0850707616a730275b.tar.gz