experimenting with use of a nonterm for collecting ignores.

Can say that a production should collect ignores from a region. There is a collect ignore region created, but the states from the ignore-version of the region is used. When the scanner fails to produce a token from the collect-ignore region, the collect-ignore token is generated and accepted by the fsm. Need to take it out of the data tree on reductions and put it into an ignore list. Reverse this during unparsing.
author: Adrian Thurston <thurston@complang.org> 2012-05-24 20:14:06 +0000
committer: Adrian Thurston <thurston@complang.org> 2012-05-24 20:14:06 +0000
commit: 64c59bd0dd83af9f329efeaf4e444c28caec1353 (patch)
tree: 949f55bace0eedf82c2952f2dc6c1bebaf1cf082
parent: f55263936dd172463e047ab52c0717ef8b964198 (diff)
download: colm-64c59bd0dd83af9f329efeaf4e444c28caec1353.tar.gz
13 files changed, 233 insertions, 33 deletions
diff --git a/colm/analysis.cc b/colm/analysis.cc
index a4b4f409..09bd7dbe 100644
--- a/colm/analysis.cc
+++ b/colm/analysis.cc
@@ -77,6 +77,7 @@ void ParseData::semanticAnalysis()
 
 	makeTerminalWrappers();
 	makeEofElements();
+	makeIgnoreCollectors();
 
 	/*
 	 * Parsers
diff --git a/colm/bytecode.h b/colm/bytecode.h
index 2151544d..aa395d6f 100644
--- a/colm/bytecode.h
+++ b/colm/bytecode.h
@@ -360,6 +360,8 @@ typedef unsigned char uchar;
 #define PF_LEFT_IL_ATTACHED    0x0400
 #define PF_RIGHT_IL_ATTACHED   0x0800
 
+#define PF_CI                  0x1000
+
 #define AF_LEFT_IGNORE   0x0100
 #define AF_RIGHT_IGNORE  0x0200
 
diff --git a/colm/lmparse.kl b/colm/lmparse.kl
index 6c652595..b58821f4 100644
--- a/colm/lmparse.kl
+++ b/colm/lmparse.kl
@@ -369,14 +369,14 @@ nonterm define_prod
 	Definition *definition;
 };
 
-define_prod: '[' prod_el_list ']' opt_commit opt_reduce_code opt_prec
+define_prod: opt_collect_ignore '[' prod_el_list ']' opt_commit opt_reduce_code opt_prec
 	final {
-		const InputLoc &loc = $1->loc;
+		const InputLoc &loc = $2->loc;
 		//const String &name = curDefineId;
 		ProdElList *prodElList = curProdElList;
-		bool commit = $4->commit;
-		CodeBlock *redBlock = $5->codeBlock;
-		LangEl *predOf = $6->predOf;
+		bool commit = $5->commit;
+		CodeBlock *redBlock = $6->codeBlock;
+		LangEl *predOf = $7->predOf;
 
 		//Namespace *nspace = namespaceStack.top();
 
@@ -388,6 +388,15 @@ define_prod: '[' prod_el_list ']' opt_commit opt_reduce_code opt_prec
 		pd->prodList.append( newDef );
 
 		$$->definition = newDef;
+
+		if ( $1->value ) {
+			for ( RegionList::Iter r = pd->regionList; r.lte(); r++ ) {
+				if ( strcmp( r->name.data, $1->region.data ) == 0 ) {
+					cerr << "assigning collect ignore " << r->name << endl;
+					newDef->collectIgnoreRegion = r;
+				}
+			}
+		}
 	};
 
 obj_var_list: obj_var_list var_def
@@ -490,6 +499,15 @@ region_head:
 		String scannerNameIgn( $2->data.length() + 2, "<%s>-ign", $2->data.data );
 		addRegularDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn, true );
 
+		/* Just for collect ignores. Will use the ignore-only start state. */
+		TokenRegion *tokenRegionCi = new TokenRegion( InputLoc(), $2->data + "_ci" ,
+				pd->regionList.length(), regionStack.top() );
+		regionStack.top()->childRegions.append( tokenRegionCi );
+		pd->regionList.append( tokenRegionCi );
+		JoinOrLm *joinOrLmCi = new JoinOrLm( tokenRegionCi );
+		String scannerNameCi( $2->data.length() + 2, "<%s>-ci", $2->data.data );
+		addRegularDef( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi, true );
+
 		/* Just for tokens. */
 		TokenRegion *tokenRegionTok = new TokenRegion( InputLoc(), $2->data + "_tok" ,
 				pd->regionList.length(), regionStack.top() );
@@ -511,8 +529,16 @@ region_head:
 
 		tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
 		tokenRegion->tokenOnlyRegion = tokenRegionTok;
+		tokenRegion->ciRegion = tokenRegionCi;
 
+		tokenRegion->isFullRegion = true;
 		tokenRegionIgn->isIgnoreOnly = true;
+		tokenRegionCi->isCiOnly = true;
+		tokenRegionTok->isTokenOnly = true;
+
+		tokenRegionIgn->derivedFrom = tokenRegion;
+		tokenRegionCi->derivedFrom = tokenRegion;
+		tokenRegionTok->derivedFrom = tokenRegion;
 	};
 
 namespace_def: 
@@ -847,6 +873,15 @@ opt_no_pre_ignore:       final { $$->value = false; };
 opt_no_post_ignore: KW_Ni final { $$->value = true; };
 opt_no_post_ignore:       final { $$->value = false; };
 
+nonterm class opt_collect_ignore
+{
+	bool value;
+	String region;
+};
+
+opt_collect_ignore: KW_Ni TK_Word final { $$->value = true; $$->region = $2->data; };
+opt_collect_ignore:               final { $$->value = false; };
+
 nonterm prod_el
 {
 	ProdEl *factor;
@@ -938,6 +973,7 @@ literal_item: opt_no_pre_ignore TK_Literal opt_no_post_ignore
 
 		bool insideRegion = regionStack.top() != pd->rootRegion;
 		if ( !insideRegion ) {
+			/* Just for ignores. */
 			TokenRegion *tokenRegionIgn = new TokenRegion( InputLoc(), name + "_ign",
 					pd->regionList.length(), regionStack.top() );
 			regionStack.top()->childRegions.append( tokenRegionIgn );
@@ -946,6 +982,16 @@ literal_item: opt_no_pre_ignore TK_Literal opt_no_post_ignore
 			String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data );
 			addRegularDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn, true );
 
+			/* Just for collect ignores. Will use the ignore-only start state. */
+			TokenRegion *tokenRegionCi = new TokenRegion( InputLoc(), name + "_ci",
+					pd->regionList.length(), regionStack.top() );
+			regionStack.top()->childRegions.append( tokenRegionCi );
+			pd->regionList.append( tokenRegionCi );
+			JoinOrLm *joinOrLmCi = new JoinOrLm( tokenRegionCi );
+			String scannerNameCi( name.length() + 2, "<%s>-ci", name.data );
+			addRegularDef( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi, true );
+
+			/* Just for tokens. */
 			TokenRegion *tokenRegionTok = new TokenRegion( InputLoc(), name + "_tok",
 					pd->regionList.length(), regionStack.top() );
 			regionStack.top()->childRegions.append( tokenRegionTok );
@@ -966,8 +1012,16 @@ literal_item: opt_no_pre_ignore TK_Literal opt_no_post_ignore
 
 			tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
 			tokenRegion->tokenOnlyRegion = tokenRegionTok;
+			tokenRegion->ciRegion = tokenRegionCi;
 
+			tokenRegion->isFullRegion = true;
 			tokenRegionIgn->isIgnoreOnly = true;
+			tokenRegionCi->isCiOnly = true;
+			tokenRegionTok->isTokenOnly = true;
+
+			tokenRegionIgn->derivedFrom = tokenRegion;
+			tokenRegionCi->derivedFrom = tokenRegion;
+			tokenRegionTok->derivedFrom = tokenRegion;
 		}
 
 		bool unusedCI;
@@ -1145,6 +1199,16 @@ token_def_name:
 			String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data );
 			addRegularDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn, true );
 
+			/* Just for explicitly collecting ignores. */
+			TokenRegion *tokenRegionCi = new TokenRegion( InputLoc(), name + "_ci",
+					pd->regionList.length(), regionStack.top() );
+			regionStack.top()->childRegions.append( tokenRegionCi );
+			pd->regionList.append( tokenRegionCi );
+			JoinOrLm *joinOrLmCi = new JoinOrLm( tokenRegionCi );
+			String scannerNameCi( name.length() + 2, "<%s>-ci", name.data );
+			addRegularDef( InputLoc(), namespaceStack.top(), scannerNameCi, joinOrLmCi, true );
+
+			/* Just for tokens. */
 			TokenRegion *tokenRegionTok = new TokenRegion( InputLoc(), name + "_tok",
 					pd->regionList.length(), regionStack.top() );
 			regionStack.top()->childRegions.append( tokenRegionTok );
@@ -1165,8 +1229,16 @@ token_def_name:
 
 			tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
 			tokenRegion->tokenOnlyRegion = tokenRegionTok;
+			tokenRegion->ciRegion = tokenRegionCi;
 
+			tokenRegion->isFullRegion = true;
 			tokenRegionIgn->isIgnoreOnly = true;
+			tokenRegionCi->isCiOnly = true;
+			tokenRegionTok->isTokenOnly = true;
+
+			tokenRegionIgn->derivedFrom = tokenRegion;
+			tokenRegionCi->derivedFrom = tokenRegion;
+			tokenRegionTok->derivedFrom = tokenRegion;
 		}
 
 		/* Reset the lable id counter. */
diff --git a/colm/parsedata.h b/colm/parsedata.h
index 3513d274..8d79e662 100644
--- a/colm/parsedata.h
+++ b/colm/parsedata.h
@@ -117,11 +117,13 @@ struct Definition
 	enum Type { Production };
 
 	Definition( const InputLoc &loc, LangEl *prodName, ProdElList *prodElList, 
-			bool prodCommit, CodeBlock *redBlock, int prodId, int prodNum, Type type ) : 
+			bool prodCommit, CodeBlock *redBlock, int prodId, int prodNum, Type type )
+	: 
 		loc(loc), prodName(prodName), prodElList(prodElList), 
 		prodCommit(prodCommit), redBlock(redBlock), prodId(prodId), prodNum(prodNum),
 		type(type), fsm(0), fsmLength(0), uniqueEmptyLeader(0), 
-		isLeftRec(false), localFrame(0), lhsField(0), predOf(0) {}
+		isLeftRec(false), localFrame(0), lhsField(0), predOf(0),
+		collectIgnoreRegion(0) {}
 
 	InputLoc loc;
 	LangEl *prodName;
@@ -151,6 +153,8 @@ struct Definition
 	LangEl *predOf;
 
 	UnsignedCharVect copy;
+
+	TokenRegion *collectIgnoreRegion;
 };
 
 struct CmpDefById
@@ -271,6 +275,8 @@ struct LangEl : public DListEl<LangEl>
 	Context *contextIn;
 	bool noPreIgnore;
 	bool noPostIgnore;
+	bool isCI;
+	TokenRegion *ciRegion;
 };
 
 struct ProdEl
@@ -318,7 +324,7 @@ struct ProdEl
 
 struct ProdElList : public DList<ProdEl>
 {
-	PdaGraph *walk( ParseData *pd );
+	PdaGraph *walk( ParseData *pd, Definition *prod );
 };
 
 /* This should be renamed. It is a literal string in a type reference. */
@@ -697,6 +703,7 @@ struct ParseData
 	void makeLangElNames();
 	void makeTerminalWrappers();
 	void makeEofElements();
+	void makeIgnoreCollectors();
 	void setPrecedence();
 
 	void typeDeclaration();
diff --git a/colm/parsetree.cc b/colm/parsetree.cc
index f613ff3b..aca5b179 100644
--- a/colm/parsetree.cc
+++ b/colm/parsetree.cc
@@ -206,8 +206,10 @@ FsmGraph *VarDef::walk( ParseData *pd )
 
 	/* If the name of the variable is referenced then add the entry point to
 	 * the graph. */
-	if ( pd->curNameInst->numRefs > 0 )
+	if ( pd->curNameInst->numRefs > 0 ) {
+		std::cerr << "entry: " << pd->curNameInst->name << " " << pd->curNameInst->id << endl;
 		rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
+	}
 	
 	/* Pop the name scope. */
 	pd->popNameScope( nameFrame );
diff --git a/colm/parsetree.h b/colm/parsetree.h
index fa0e5ab7..9320f412 100644
--- a/colm/parsetree.h
+++ b/colm/parsetree.h
@@ -519,8 +519,15 @@ struct TokenRegion
 		lmSwitchHandlesError(false), regionNameInst(0),
 		parentRegion(parentRegion), defaultTokenDef(0),
 		preEofBlock(0), 
-		ignoreOnlyRegion(0), tokenOnlyRegion(0), 
-		wasEmpty(false), isIgnoreOnly(false) { }
+		ignoreOnlyRegion(0), tokenOnlyRegion(0), ciRegion(0),
+		wasEmpty(false), 
+		isFullRegion(false),
+		isIgnoreOnly(false), 
+		isTokenOnly(false), 
+		isCiOnly(false),
+		ciLel(0),
+		derivedFrom(0)
+	{ }
 
 	/* Tree traversal. */
 	FsmGraph *walk( ParseData *pd );
@@ -556,12 +563,19 @@ struct TokenRegion
 	/* Dupe of the region, containing only the ignore tokens. */
 	TokenRegion *ignoreOnlyRegion;
 	TokenRegion *tokenOnlyRegion;
+	TokenRegion *ciRegion;
 
 	/* We alway init empty scanners with a single token. If we had to do this
 	 * then wasEmpty is true. */
 	bool wasEmpty;
 
+	bool isFullRegion;
 	bool isIgnoreOnly;
+	bool isTokenOnly;
+	bool isCiOnly;
+
+	LangEl *ciLel;
+	TokenRegion *derivedFrom;
 
 	TokenRegion *next, *prev;
 };
diff --git a/colm/pdabuild.cc b/colm/pdabuild.cc
index eadbcb06..de0bbac8 100644
--- a/colm/pdabuild.cc
+++ b/colm/pdabuild.cc
@@ -91,22 +91,37 @@ LangEl::LangEl( Namespace *nspace, const String &name, Type type )
 	contextDef(0),
 	contextIn(0), 
 	noPreIgnore(false),
-	noPostIgnore(false) 
+	noPostIgnore(false),
+	isCI(false),
+	ciRegion(0)
 {
 }
  
-PdaGraph *ProdElList::walk( ParseData *pd )
+PdaGraph *ProdElList::walk( ParseData *pd, Definition *prod )
 {
 	PdaGraph *prodFsm = new PdaGraph();
 	PdaState *last = prodFsm->addState();
 	prodFsm->setStartState( last );
 
+	if ( prod->collectIgnoreRegion != 0 ) {
+		cerr << "production " << prod->data << " has collect ignore region " << 
+				prod->collectIgnoreRegion->name << endl;
+
+		/* Use the IGNORE TOKEN lang el for the region. */
+		long value = prod->collectIgnoreRegion->ciLel->id;
+
+		PdaState *newState = prodFsm->addState();
+		PdaTrans *newTrans = prodFsm->appendNewTrans( last, newState, value, value );
+
+		newTrans->isShift = true;
+		newTrans->shiftPrior = 0; // WAT
+		last = newState;
+	}
+
 	int prodLength = 0;
 	for ( Iter prodEl = first(); prodEl.lte(); prodEl++, prodLength++ ) {
 		//PdaGraph *itemFsm = prodEl->walk( pd );
-		long value = 0;
-
-		value = prodEl->langEl->id;
+		long value = prodEl->langEl->id;
 
 		PdaState *newState = prodFsm->addState();
 		PdaTrans *newTrans = prodFsm->appendNewTrans( last, newState, value, value );
@@ -282,7 +297,7 @@ void ParseData::makeProdFsms()
 
 	/* Build FSMs for all production language elements. */
 	for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
-		prod->fsm = prod->prodElList->walk( this );
+		prod->fsm = prod->prodElList->walk( this, prod );
 
 	makeNonTermFirstSets();
 	makeFirstSets();
@@ -473,20 +488,25 @@ bool regionVectHas( RegionVect &regVect, TokenRegion *region )
 void ParseData::addRegion( PdaState *tabState, PdaTrans *tabTrans,
 		long pdaKey, bool noPreIgnore, bool noPostIgnore )
 {
-	LangEl *klangEl = langElIndex[pdaKey];
-	if ( klangEl != 0 && klangEl->type == LangEl::Term ) {
+	LangEl *langEl = langElIndex[pdaKey];
+	if ( langEl != 0 && langEl->type == LangEl::Term ) {
 		TokenRegion *region = 0;
 
 		/* If it is not the eof, then use the region associated 
 		 * with the token definition. */
-		if ( !klangEl->isEOF && klangEl->tokenDef != 0 )
-			region = klangEl->tokenDef->tokenRegion;
+		if ( langEl->isCI ) {
+			cerr << "isCI" << endl;
+			region = langEl->ciRegion->ciRegion;
+		}
+		else if ( !langEl->isEOF && langEl->tokenDef != 0 ) {
+			region = langEl->tokenDef->tokenRegion;
+		}
 
 		if ( region != 0 ) {
 			/* region. */
 			TokenRegion *scanRegion = region;
 
-			if ( klangEl->noPreIgnore )
+			if ( langEl->noPreIgnore )
 				scanRegion = region->tokenOnlyRegion;
 
 			if ( !regionVectHas( tabState->regions, scanRegion ) ) {
@@ -495,7 +515,7 @@ void ParseData::addRegion( PdaState *tabState, PdaTrans *tabTrans,
 
 			/* Pre-region of to state */
 			PdaState *toState = tabTrans->toState;
-			if ( !klangEl->noPostIgnore && 
+			if ( !langEl->noPostIgnore && 
 					region->ignoreOnlyRegion != 0 && 
 					!regionVectHas( toState->preRegions, region->ignoreOnlyRegion ) )
 			{
@@ -859,7 +879,6 @@ void ParseData::computeAdvanceReductions( LangEl *langEl, PdaGraph *pdaGraph )
 	}
 }
 
-
 void ParseData::verifyParseStopGrammar( LangEl *langEl, PdaGraph *pdaGraph )
 {
 	/* Get the entry into the graph and traverse over the root. The resulting
@@ -1356,6 +1375,8 @@ void ParseData::makeRuntimeData()
 			reg->defaultTokenDef == 0 ? -1 : reg->defaultTokenDef->tdLangEl->id;
 		runtimeData->regionInfo[regId].eofFrameId = -1;
 		runtimeData->regionInfo[regId].isIgnoreOnly = reg->isIgnoreOnly;
+		runtimeData->regionInfo[regId].isCiOnly = reg->isCiOnly;
+		runtimeData->regionInfo[regId].ciLelId = reg->isCiOnly ? reg->derivedFrom->ciLel->id : 0;
 
 		CodeBlock *block = reg->preEofBlock;
 		if ( block != 0 ) {
diff --git a/colm/pdacodegen.cc b/colm/pdacodegen.cc
index 7132a609..9e3dca47 100644
--- a/colm/pdacodegen.cc
+++ b/colm/pdacodegen.cc
@@ -357,6 +357,8 @@ void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData, PdaTables *pdaTable
 		out << "\", " << runtimeData->regionInfo[i].defaultToken <<
 			", " << runtimeData->regionInfo[i].eofFrameId <<
 			", " << runtimeData->regionInfo[i].isIgnoreOnly <<
+			", " << runtimeData->regionInfo[i].isCiOnly <<
+			", " << runtimeData->regionInfo[i].ciLelId <<
 			" }";
 
 		if ( i < runtimeData->numRegions-1 )
diff --git a/colm/pdarun.c b/colm/pdarun.c
index e21ca5fa..d6886dc5 100644
--- a/colm/pdarun.c
+++ b/colm/pdarun.c
@@ -893,6 +893,44 @@ static void sendIgnoreTree( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsm
 	ignoreTree2( prg, pdaRun, tree );
 }
 
+static void sendCi( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, PdaRun *pdaRun, int id )
+{
+	debug( REALM_PARSE, "token: CI\n" );
+
+/**/
+
+	int emptyIgnore = pdaRun->accumIgnore == 0;
+
+	/* Make the token data. */
+	Head *tokdata = headAllocate( prg );
+	tokdata->location = locationAllocate( prg );
+	tokdata->location->line = inputStream->line;
+	tokdata->location->column = inputStream->column;
+	tokdata->location->byte = inputStream->byte;
+
+	debug( REALM_PARSE, "token: %s  text: %.*s\n",
+		prg->rtd->lelInfo[id].name,
+		stringLength(tokdata), stringData(tokdata) );
+
+	updatePosition( inputStream, fsmRun->tokstart, tokdata->length );
+
+	Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, inputStream, id, tokdata );
+
+	incrementSteps( pdaRun );
+
+	ParseTree *parseTree = parseTreeAllocate( prg );
+	parseTree->id = input->tree->id;
+	parseTree->shadow = input;
+
+	parseTree->flags |= PF_CI;
+		
+	pdaRun->parseInput = parseTree;
+
+	/* Store any alternate scanning region. */
+	if ( input != 0 && pdaRun->cs >= 0 )
+		setRegion( pdaRun, emptyIgnore, parseTree );
+}
+
 
 static void sendEof( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, PdaRun *pdaRun )
 {
@@ -1158,12 +1196,22 @@ case PcrStart:
 		 * */
 		pdaRun->tokenId = scanToken( prg, pdaRun, fsmRun, inputStream );
 
-		if ( pdaRun->tokenId == SCAN_ERROR && fsmRun->preRegion >= 0 ) {
-			fsmRun->preRegion = -1;
-			fsmRun->cs = fsmRun->ncs;
-			debug( REALM_PARSE,  "moving from pre region to main region: %s\n",
-				prg->rtd->regionInfo[fsmRun->region].name );
-			continue;
+		if ( pdaRun->tokenId == SCAN_ERROR ) {
+			if ( fsmRun->preRegion >= 0 ) {
+				fsmRun->preRegion = -1;
+				fsmRun->cs = fsmRun->ncs;
+				debug( REALM_PARSE,  "moving from pre region to main region: %s\n",
+					prg->rtd->regionInfo[fsmRun->region].name );
+				continue;
+			}
+		}
+
+		if ( ( pdaRun->tokenId == SCAN_ERROR /*|| pdaRun->tokenId == SCAN_LANG_EL*/ ) &&
+				( prg->rtd->regionInfo[fsmRun->region].ciLelId > 0 ) )
+		{
+			debug( REALM_PARSE, "sending a collect ignore\n" );
+			sendCi( prg, sp, inputStream, fsmRun, pdaRun, prg->rtd->regionInfo[fsmRun->region].ciLelId );
+			goto yes;
 		}
 
 		if ( pdaRun->tokenId == SCAN_TRY_AGAIN_LATER ) {
@@ -1285,6 +1333,7 @@ case PcrGeneration:
 			/* Is a plain token. */
 			sendToken( prg, sp, inputStream, fsmRun, pdaRun, pdaRun->tokenId );
 		}
+yes:
 
 		if ( pdaRun->parseInput != 0 )
 			transferReverseCode( pdaRun, pdaRun->parseInput );
@@ -1858,6 +1907,7 @@ again:
 		child = last = 0;
 		dataChild = dataLast = 0;
 		for ( r = 0; r < rhsLen; r++ ) {
+
 			/* The child. */
 			child = pdaRun->stackTop;
 			dataChild = child->shadow;
@@ -1872,6 +1922,11 @@ again:
 			child->next = last;
 			dataChild->next = dataLast;
 
+//			if ( child->flags & PF_CI ) {
+//				debug( REALM_PARSE, "advancing over CI\n" );
+//				dataChild = dataChild->next;
+//			}
+
 			/* Track last for reversal. */
 			last = child;
 			dataLast = dataChild;
diff --git a/colm/pdarun.h b/colm/pdarun.h
index b8204d97..4ab648a6 100644
--- a/colm/pdarun.h
+++ b/colm/pdarun.h
@@ -218,7 +218,9 @@ typedef struct _RegionInfo
 	const char *name;
 	long defaultToken;
 	long eofFrameId;
-	int isIgnoreOnly;;
+	int isIgnoreOnly;
+	int isCiOnly;
+	int ciLelId;
 } RegionInfo;
 
 typedef struct _CaptureAttr
diff --git a/colm/redbuild.cc b/colm/redbuild.cc
index eaad21fa..9a196316 100644
--- a/colm/redbuild.cc
+++ b/colm/redbuild.cc
@@ -559,7 +559,12 @@ void RedFsmBuild::makeEntryPoints()
 		if ( reg->regionNameInst == 0 )
 			addRegionToEntry( reg->id, pd->defaultRegion->id );
 		else {
-			NameInst *regionName = reg->regionNameInst->parent;
+			TokenRegion *use = reg;
+
+			if ( use->isCiOnly )
+				use = use->derivedFrom->ignoreOnlyRegion;
+
+			NameInst *regionName = use->regionNameInst->parent;
 			addRegionToEntry( reg->id, regionName->id );
 		}
 	}
diff --git a/colm/redfsm.cc b/colm/redfsm.cc
index d3a65b7c..911488ad 100644
--- a/colm/redfsm.cc
+++ b/colm/redfsm.cc
@@ -1098,6 +1098,7 @@ FsmTables *RedFsm::makeFsmTables()
 
 	pos = 1;
 	for ( RegionToEntry::Iter en = regionToEntry; en.lte(); en++ ) {
+		std::cerr << "en: " << *en << std::endl;
 		/* Find the entry state from the entry id. */
 		RedEntryMapEl *entryMapEl = redEntryMap.find( *en );
 		
diff --git a/colm/resolve.cc b/colm/resolve.cc
index de548fdb..0cc59dcd 100644
--- a/colm/resolve.cc
+++ b/colm/resolve.cc
@@ -761,6 +761,23 @@ void ParseData::makeEofElements()
 	}
 }
 
+void ParseData::makeIgnoreCollectors()
+{
+	for ( RegionList::Iter region = regionList; region.lte(); region++ ) {
+		if ( region->isFullRegion ) {
+			cout << "region: " << region->name << endl;
+
+			String name( region->name.length() + 5, "_ign_%s", region->name.data );
+			LangEl *ignLel = new LangEl( rootNamespace, name, LangEl::Term );
+			langEls.append( ignLel );
+			ignLel->isCI = true;
+			ignLel->ciRegion = region;
+
+			region->ciLel = ignLel;
+		}
+	}
+}
+
 void ParseData::typeResolve()
 {
 	/*
@@ -786,5 +803,4 @@ void ParseData::typeResolve()
 	 * productions. They get tacked onto the end of the list of productions.
 	 * Doing it at the end results processing a growing list. */
 	resolveProductionEls();
-
 }
author	Adrian Thurston <thurston@complang.org>	2012-05-24 20:14:06 +0000
committer	Adrian Thurston <thurston@complang.org>	2012-05-24 20:14:06 +0000
commit	64c59bd0dd83af9f329efeaf4e444c28caec1353 (patch)
tree	949f55bace0eedf82c2952f2dc6c1bebaf1cf082
parent	f55263936dd172463e047ab52c0717ef8b964198 (diff)
download	colm-64c59bd0dd83af9f329efeaf4e444c28caec1353.tar.gz