summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2012-05-22 20:33:51 -0400
committerAdrian Thurston <thurston@complang.org>2012-05-22 20:33:51 -0400
commit35f086e516696d13b13d129062ae45d186e3523e (patch)
treef3360707423e65dbad73c203ce27deccd65d070e
parentaa01add5ba8ea59850b4db058f6a0e53ab9c41fe (diff)
downloadcolm-35f086e516696d13b13d129062ae45d186e3523e.tar.gz
improvements to ignore handling in the parser
Every region now also has a duplicate scanning region that is only for tokens. The duplicate ignores and tokens generate the original tokens through a TokenDef ignore mechanism. Can turn off post ignore parsing and pre-igore parsing on a token-by-token basis. Probably want to move it into the productions and specify it there. Currently don't have a specification mechanism. If an ignore is a post-token ignore it is not right-attached.
-rw-r--r--colm/bytecode.h12
-rw-r--r--colm/declare.cc50
-rw-r--r--colm/fsmcodegen.cc16
-rw-r--r--colm/fsmexec.cc8
-rw-r--r--colm/lmparse.kl91
-rw-r--r--colm/parsedata.cc7
-rw-r--r--colm/parsedata.h2
-rw-r--r--colm/parsetree.cc2
-rw-r--r--colm/parsetree.h20
-rw-r--r--colm/pdabuild.cc29
-rw-r--r--colm/pdacodegen.cc1
-rw-r--r--colm/pdarun.c33
-rw-r--r--colm/pdarun.h1
-rw-r--r--colm/resolve.cc2
14 files changed, 214 insertions, 60 deletions
diff --git a/colm/bytecode.h b/colm/bytecode.h
index bb639e94..2151544d 100644
--- a/colm/bytecode.h
+++ b/colm/bytecode.h
@@ -340,20 +340,22 @@ typedef unsigned char uchar;
*/
/* A tree that has been generated by a termDup. */
-#define PF_TERM_DUP 0x0001
+#define PF_TERM_DUP 0x0001
/* Has been processed by the commit function. All children have also been
* processed. */
-#define PF_COMMITTED 0x0002
+#define PF_COMMITTED 0x0002
/* Created by a token generation action, not made from the input. */
-#define PF_ARTIFICIAL 0x0004
+#define PF_ARTIFICIAL 0x0004
/* Named node from a pattern or constructor. */
-#define PF_NAMED 0x0008
+#define PF_NAMED 0x0008
/* There is reverse code associated with this tree node. */
-#define PF_HAS_RCODE 0x0010
+#define PF_HAS_RCODE 0x0010
+
+#define PF_RIGHT_IGNORE 0x0020
#define PF_LEFT_IL_ATTACHED 0x0400
#define PF_RIGHT_IL_ATTACHED 0x0800
diff --git a/colm/declare.cc b/colm/declare.cc
index 2b2dfc70..bcd9a16c 100644
--- a/colm/declare.cc
+++ b/colm/declare.cc
@@ -228,13 +228,26 @@ void Namespace::declare( ParseData *pd )
g->declare( pd, this );
for ( LiteralDict::Iter l = literalDict; l.lte(); l++ ) {
- /* Create a token for the literal. */
- LangEl *newLangEl = declareLangEl( pd, this, l->value->name, LangEl::Term );
- newLangEl->lit = l->value->literal;
- newLangEl->isLiteral = true;
- newLangEl->tokenDef = l->value;
+ if ( l->value->dupOf != 0 ) {
+ /* Duplicate of another. Use the lang el of that token. */
+ assert( l->value->dupOf->tdLangEl != 0 );
+ l->value->tdLangEl = l->value->dupOf->tdLangEl;
+ }
+ else {
+ /* Original. Create a token for the literal. */
+ LangEl *newLangEl = declareLangEl( pd, this, l->value->name, LangEl::Term );
+
+ newLangEl->lit = l->value->literal;
+ newLangEl->isLiteral = true;
+ newLangEl->tokenDef = l->value;
+
+ l->value->tdLangEl = newLangEl;
- l->value->token = newLangEl;
+ if ( l->value->preNoIgnore )
+ newLangEl->preNoIgnore = true;
+ if ( l->value->postNoIgnore )
+ newLangEl->postNoIgnore = true;
+ }
}
for ( ContextDefList::Iter c = contextDefList; c.lte(); c++ ) {
@@ -273,15 +286,22 @@ void Namespace::declare( ParseData *pd )
for ( TokenDefListNs::Iter t = tokenDefList; t.lte(); t++ ) {
/* Literals already taken care of. */
if ( ! t->isLiteral ) {
- /* Create the token. */
- LangEl *tokEl = declareLangEl( pd, this, t->name, LangEl::Term );
- tokEl->ignore = t->ignore;
- tokEl->transBlock = t->codeBlock;
- tokEl->objectDef = t->objectDef;
- tokEl->contextIn = t->contextIn;
- tokEl->tokenDef = t;
-
- t->token = tokEl;
+ if ( t->dupOf != 0 ) {
+ /* Duplicate of another. Use the lang el of that token. */
+ assert( t->dupOf->tdLangEl != 0 );
+ t->tdLangEl = t->dupOf->tdLangEl;
+ }
+ else {
+ /* Create the token. */
+ LangEl *tokEl = declareLangEl( pd, this, t->name, LangEl::Term );
+ tokEl->ignore = t->ignore;
+ tokEl->transBlock = t->codeBlock;
+ tokEl->objectDef = t->objectDef;
+ tokEl->contextIn = t->contextIn;
+ tokEl->tokenDef = t;
+
+ t->tdLangEl = tokEl;
+ }
}
}
diff --git a/colm/fsmcodegen.cc b/colm/fsmcodegen.cc
index d98e6b0b..86302c31 100644
--- a/colm/fsmcodegen.cc
+++ b/colm/fsmcodegen.cc
@@ -216,9 +216,9 @@ void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item,
for ( TokenDefListReg::Iter lmi = item->tokenRegion->tokenDefList; lmi.lte(); lmi++ ) {
if ( lmi->inLmSelect ) {
- assert( lmi->token != 0 );
+ assert( lmi->tdLangEl != 0 );
ret << " case " << lmi->longestMatchId << ":\n";
- EMIT_TOKEN( ret, lmi->token );
+ EMIT_TOKEN( ret, lmi->tdLangEl );
ret << " break;\n";
}
}
@@ -231,27 +231,27 @@ void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item,
void FsmCodeGen::LM_ON_LAST( ostream &ret, InlineItem *item )
{
- assert( item->longestMatchPart->token != 0 );
+ assert( item->longestMatchPart->tdLangEl != 0 );
ret << " " << P() << " += 1;\n";
- EMIT_TOKEN( ret, item->longestMatchPart->token );
+ EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl );
ret << " return;\n";
}
void FsmCodeGen::LM_ON_NEXT( ostream &ret, InlineItem *item )
{
- assert( item->longestMatchPart->token != 0 );
+ assert( item->longestMatchPart->tdLangEl != 0 );
- EMIT_TOKEN( ret, item->longestMatchPart->token );
+ EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl );
ret << " return;\n";
}
void FsmCodeGen::LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item )
{
- assert( item->longestMatchPart->token != 0 );
+ assert( item->longestMatchPart->tdLangEl != 0 );
ret << " " << P() << " = " << TOKEND() << ";\n";
- EMIT_TOKEN( ret, item->longestMatchPart->token );
+ EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl );
ret << " return;\n";
}
diff --git a/colm/fsmexec.cc b/colm/fsmexec.cc
index 772e3146..f922c7a4 100644
--- a/colm/fsmexec.cc
+++ b/colm/fsmexec.cc
@@ -66,23 +66,23 @@ void execAction( FsmRun *fsmRun, GenAction *genAction )
lmi.lte(); lmi++ )
{
if ( lmi->inLmSelect && fsmRun->act == lmi->longestMatchId )
- fsmRun->matchedToken = lmi->token->id;
+ fsmRun->matchedToken = lmi->tdLangEl->id;
}
}
fsmRun->returnResult = true;
break;
case InlineItem::LmOnLast:
fsmRun->p += 1;
- fsmRun->matchedToken = item->longestMatchPart->token->id;
+ fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id;
fsmRun->returnResult = true;
break;
case InlineItem::LmOnNext:
- fsmRun->matchedToken = item->longestMatchPart->token->id;
+ fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id;
fsmRun->returnResult = true;
break;
case InlineItem::LmOnLagBehind:
fsmRun->p = fsmRun->tokend;
- fsmRun->matchedToken = item->longestMatchPart->token->id;
+ fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id;
fsmRun->returnResult = true;
break;
}
diff --git a/colm/lmparse.kl b/colm/lmparse.kl
index 3689b922..0d3e814a 100644
--- a/colm/lmparse.kl
+++ b/colm/lmparse.kl
@@ -481,7 +481,7 @@ region_def:
region_head:
KW_Lex TK_Word
final {
- /* just for ignores. */
+ /* Just for ignores. */
TokenRegion *tokenRegionIgn = new TokenRegion( InputLoc(), $2->data + "_ign" ,
pd->regionList.length(), regionStack.top() );
regionStack.top()->childRegions.append( tokenRegionIgn );
@@ -490,6 +490,15 @@ region_head:
String scannerNameIgn( $2->data.length() + 2, "<%s>-ign", $2->data.data );
addRegularDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn, true );
+ /* Just for tokens. */
+ TokenRegion *tokenRegionTok = new TokenRegion( InputLoc(), $2->data + "_tok" ,
+ pd->regionList.length(), regionStack.top() );
+ regionStack.top()->childRegions.append( tokenRegionTok );
+ pd->regionList.append( tokenRegionTok );
+ JoinOrLm *joinOrLmTok = new JoinOrLm( tokenRegionTok );
+ String scannerNameTok( $2->data.length() + 2, "<%s>-tok", $2->data.data );
+ addRegularDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok, true );
+
/* Make the new token region. */
TokenRegion *tokenRegion = new TokenRegion( InputLoc(), $2->data,
pd->regionList.length(), regionStack.top() );
@@ -499,7 +508,11 @@ region_head:
String scannerName( $2->data.length() + 2, "<%s>", $2->data.data );
addRegularDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm, true );
regionStack.push( tokenRegion );
- tokenRegion->ignoreRegion = tokenRegionIgn;
+
+ tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
+ tokenRegion->tokenOnlyRegion = tokenRegionTok;
+
+ tokenRegionIgn->isIgnoreOnly = true;
};
namespace_def:
@@ -916,6 +929,22 @@ literal_item: TK_Literal
bool insideRegion = regionStack.top() != pd->rootRegion;
if ( !insideRegion ) {
+ TokenRegion *tokenRegionIgn = new TokenRegion( InputLoc(), name + "_ign",
+ pd->regionList.length(), regionStack.top() );
+ regionStack.top()->childRegions.append( tokenRegionIgn );
+ pd->regionList.append( tokenRegionIgn );
+ JoinOrLm *joinOrLmIgn = new JoinOrLm( tokenRegionIgn );
+ String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data );
+ addRegularDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn, true );
+
+ TokenRegion *tokenRegionTok = new TokenRegion( InputLoc(), name + "_tok",
+ pd->regionList.length(), regionStack.top() );
+ regionStack.top()->childRegions.append( tokenRegionTok );
+ pd->regionList.append( tokenRegionTok );
+ JoinOrLm *joinOrLmTok = new JoinOrLm( tokenRegionTok );
+ String scannerNameTok( name.length() + 2, "<%s>-tok", name.data );
+ addRegularDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok, true );
+
/* Make a new token region just for the token. */
TokenRegion *tokenRegion = new TokenRegion( InputLoc(), $1->data,
pd->regionList.length(), regionStack.top() );
@@ -925,6 +954,11 @@ literal_item: TK_Literal
String scannerName( name.length() + 2, "<%s>", name.data );
addRegularDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm, true );
regionStack.push( tokenRegion );
+
+ tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
+ tokenRegion->tokenOnlyRegion = tokenRegionTok;
+
+ tokenRegionIgn->isIgnoreOnly = true;
}
bool unusedCI;
@@ -945,13 +979,27 @@ literal_item: TK_Literal
Literal::LitString ) ) ) ) ) ) ) );
TokenDef *tokenDef = new TokenDef( name, $1->data, true, false, join,
- 0, $1->loc, pd->nextTokenId, nspace, region, 0, 0, 0 );
-
+ 0, $1->loc, pd->nextTokenId++, nspace, region, 0, 0, 0 );
region->tokenDefList.append( tokenDef );
ldel = nspace->literalDict.insert( interp, tokenDef );
- pd->nextTokenId += 1;
-
nspace->tokenDefList.append( tokenDef );
+
+// if ( strcmp( interp, "%%" ) == 0 ) {
+// tokenDef->preNoIgnore = true;
+// }
+// if ( strcmp( interp, ")" ) == 0 ) {
+// tokenDef->preNoIgnore = true;
+// }
+// if ( strcmp( interp, "(" ) == 0 ) {
+// tokenDef->postNoIgnore = true;
+// }
+
+ TokenDef *tokenDefTok = new TokenDef( name + "_tok", $1->data, true, false, join,
+ 0, $1->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion, 0, 0, 0 );
+ tokenDefTok->dupOf = tokenDef;
+ region->tokenOnlyRegion->tokenDefList.append( tokenDefTok );
+ ldel = nspace->literalDict.insert( "|" + interp + "_tok", tokenDefTok );
+ nspace->tokenDefList.append( tokenDefTok );
}
if ( !insideRegion ) {
@@ -1013,13 +1061,26 @@ token_def:
/* All again for the ignore. */
if ( ignore ) {
TokenDef *tokenDefIgn = new TokenDef( name + "_ign", String(), false, ignore, join,
- 0, $1->loc, pd->nextTokenId++, nspace, region->ignoreRegion,
+ 0, $1->loc, pd->nextTokenId++, nspace, region->ignoreOnlyRegion,
&reCaptureVect, pd->objectDef,
contextStack.length() > 0 ? contextStack.top() : 0 );
- region->ignoreRegion->tokenDefList.append( tokenDefIgn );
+ tokenDefIgn->dupOf = tokenDef;
+
+ region->ignoreOnlyRegion->tokenDefList.append( tokenDefIgn );
nspace->tokenDefList.append( tokenDefIgn );
}
+ else {
+ TokenDef *tokenDefTok = new TokenDef( name + "_tok", String(), false, ignore, join,
+ 0, $1->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion,
+ &reCaptureVect, pd->objectDef,
+ contextStack.length() > 0 ? contextStack.top() : 0 );
+
+ tokenDefTok->dupOf = tokenDef;
+
+ region->tokenOnlyRegion->tokenDefList.append( tokenDefTok );
+ nspace->tokenDefList.append( tokenDefTok );
+ }
/* This is created and pushed in the name. */
if ( !pd->insideRegion ) {
@@ -1074,6 +1135,14 @@ token_def_name:
String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data );
addRegularDef( InputLoc(), namespaceStack.top(), scannerNameIgn, joinOrLmIgn, true );
+ TokenRegion *tokenRegionTok = new TokenRegion( InputLoc(), name + "_tok",
+ pd->regionList.length(), regionStack.top() );
+ regionStack.top()->childRegions.append( tokenRegionTok );
+ pd->regionList.append( tokenRegionTok );
+ JoinOrLm *joinOrLmTok = new JoinOrLm( tokenRegionTok );
+ String scannerNameTok( name.length() + 2, "<%s>-tok", name.data );
+ addRegularDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok, true );
+
/* If not inside a region, make one for the token. */
TokenRegion *tokenRegion = new TokenRegion( InputLoc(), name,
pd->regionList.length(), regionStack.top() );
@@ -1083,7 +1152,11 @@ token_def_name:
String scannerName( name.length() + 2, "<%s>", name.data );
addRegularDef( InputLoc(), namespaceStack.top(), scannerName, joinOrLm, true );
regionStack.push( tokenRegion );
- tokenRegion->ignoreRegion = tokenRegionIgn;
+
+ tokenRegion->ignoreOnlyRegion = tokenRegionIgn;
+ tokenRegion->tokenOnlyRegion = tokenRegionTok;
+
+ tokenRegionIgn->isIgnoreOnly = true;
}
/* Reset the lable id counter. */
diff --git a/colm/parsedata.cc b/colm/parsedata.cc
index 81f5308d..f1017cae 100644
--- a/colm/parsedata.cc
+++ b/colm/parsedata.cc
@@ -1115,11 +1115,12 @@ void ParseData::createDefaultScanner()
name = "___DEFAULT_SCANNER_CHR";
defaultCharLangEl = addLangEl( this, defaultNamespace, name, LangEl::Term );
- tokenDef->token = defaultCharLangEl;
+ tokenDef->tdLangEl = defaultCharLangEl;
defaultCharLangEl->tokenDef = tokenDef;
}
-LangEl *ParseData::makeRepeatProd( Namespace *nspace, const String &repeatName, NamespaceQual *nspaceQual, const String &name )
+LangEl *ParseData::makeRepeatProd( Namespace *nspace, const String &repeatName,
+ NamespaceQual *nspaceQual, const String &name )
{
LangEl *prodName = addLangEl( this, nspace, repeatName, LangEl::NonTerm );
prodName->isRepeat = true;
@@ -1326,7 +1327,7 @@ void ParseData::initEmptyScanners()
* in the declare pass. */
LangEl *lel = addLangEl( this, rootNamespace, name, LangEl::Term );
- tokenDef->token = lel;
+ tokenDef->tdLangEl = lel;
lel->tokenDef = tokenDef;
}
}
diff --git a/colm/parsedata.h b/colm/parsedata.h
index 61b32753..a0705002 100644
--- a/colm/parsedata.h
+++ b/colm/parsedata.h
@@ -269,6 +269,8 @@ struct LangEl : public DListEl<LangEl>
Context *contextDef;
Context *contextIn;
+ bool preNoIgnore;
+ bool postNoIgnore;
};
struct ProdEl
diff --git a/colm/parsetree.cc b/colm/parsetree.cc
index 5a507704..f613ff3b 100644
--- a/colm/parsetree.cc
+++ b/colm/parsetree.cc
@@ -601,7 +601,7 @@ FsmGraph *TokenRegion::walk( ParseData *pd )
}
FsmGraph *retFsm = parts[0];
- if ( defaultTokenDef != 0 && defaultTokenDef->token->ignore )
+ if ( defaultTokenDef != 0 && defaultTokenDef->tdLangEl->ignore )
error() << "ignore token cannot be a scanner's zero-length token" << endp;
/* The region is empty. Return the empty set. */
diff --git a/colm/parsetree.h b/colm/parsetree.h
index 852f11fe..b9736109 100644
--- a/colm/parsetree.h
+++ b/colm/parsetree.h
@@ -394,10 +394,11 @@ struct TokenDef
ReCaptureVect *pReCaptureVect, ObjectDef *objectDef, Context *contextIn )
:
name(name), literal(literal), isLiteral(isLiteral), ignore(ignore), join(join), action(0),
- codeBlock(codeBlock), token(0), semiLoc(semiLoc),
+ codeBlock(codeBlock), tdLangEl(0), semiLoc(semiLoc),
longestMatchId(longestMatchId), inLmSelect(false),
nspace(nspace), tokenRegion(tokenRegion), objectDef(objectDef),
- contextIn(contextIn)
+ contextIn(contextIn),
+ dupOf(dupOf), postNoIgnore(false), preNoIgnore(false)
{
if ( pReCaptureVect != 0 )
reCaptureVect = *pReCaptureVect;
@@ -412,7 +413,7 @@ struct TokenDef
Join *join;
Action *action;
CodeBlock *codeBlock;
- LangEl *token;
+ LangEl *tdLangEl;
InputLoc semiLoc;
Action *setActId;
@@ -426,6 +427,10 @@ struct TokenDef
ReCaptureVect reCaptureVect;
ObjectDef *objectDef;
Context *contextIn;
+
+ TokenDef *dupOf;
+ bool postNoIgnore;
+ bool preNoIgnore;
};
struct LelDefList;
@@ -513,7 +518,9 @@ struct TokenRegion
loc(loc), name(name), id(id),
lmSwitchHandlesError(false), regionNameInst(0),
parentRegion(parentRegion), defaultTokenDef(0),
- preEofBlock(0), ignoreRegion(0), wasEmpty(false) { }
+ preEofBlock(0),
+ ignoreOnlyRegion(0), tokenOnlyRegion(0),
+ wasEmpty(false), isIgnoreOnly(false) { }
/* Tree traversal. */
FsmGraph *walk( ParseData *pd );
@@ -547,12 +554,15 @@ struct TokenRegion
CodeBlock *preEofBlock;
/* Dupe of the region, containing only the ignore tokens. */
- TokenRegion *ignoreRegion;
+ TokenRegion *ignoreOnlyRegion;
+ TokenRegion *tokenOnlyRegion;
/* We alway init empty scanners with a single token. If we had to do this
* then wasEmpty is true. */
bool wasEmpty;
+ bool isIgnoreOnly;
+
TokenRegion *next, *prev;
};
diff --git a/colm/pdabuild.cc b/colm/pdabuild.cc
index 66af8846..aba017f7 100644
--- a/colm/pdabuild.cc
+++ b/colm/pdabuild.cc
@@ -89,7 +89,9 @@ LangEl::LangEl( Namespace *nspace, const String &name, Type type )
predType(PredNone),
predValue(0),
contextDef(0),
- contextIn(0)
+ contextIn(0),
+ preNoIgnore(false),
+ postNoIgnore(false)
{
}
@@ -479,11 +481,25 @@ void ParseData::addRegion( PdaState *tabState, PdaTrans *tabTrans, long pdaKey )
region = klangEl->tokenDef->tokenRegion;
if ( region != 0 ) {
- if ( !regionVectHas( tabState->regions, region ) )
- tabState->regions.append( region );
+ /* region. */
+ TokenRegion *scanRegion = region;
+
+ if ( klangEl->preNoIgnore )
+ scanRegion = region->tokenOnlyRegion;
+
+ if ( !regionVectHas( tabState->regions, scanRegion ) ) {
+ tabState->regions.append( scanRegion );
+ }
+
+ /* Pre-region of to state */
+ PdaState *toState = tabTrans->toState;
+ if ( !klangEl->postNoIgnore &&
+ region->ignoreOnlyRegion != 0 &&
+ !regionVectHas( toState->preRegions, region->ignoreOnlyRegion ) )
+ {
+ toState->preRegions.append( region->ignoreOnlyRegion );
+ }
- if ( region->ignoreRegion != 0 && !regionVectHas( tabTrans->toState->preRegions, region->ignoreRegion ) )
- tabTrans->toState->preRegions.append( region->ignoreRegion );
}
}
}
@@ -1335,8 +1351,9 @@ void ParseData::makeRuntimeData()
long regId = reg->id+1;
runtimeData->regionInfo[regId].name = reg->name;
runtimeData->regionInfo[regId].defaultToken =
- reg->defaultTokenDef == 0 ? -1 : reg->defaultTokenDef->token->id;
+ reg->defaultTokenDef == 0 ? -1 : reg->defaultTokenDef->tdLangEl->id;
runtimeData->regionInfo[regId].eofFrameId = -1;
+ runtimeData->regionInfo[regId].isIgnoreOnly = reg->isIgnoreOnly;
CodeBlock *block = reg->preEofBlock;
if ( block != 0 ) {
diff --git a/colm/pdacodegen.cc b/colm/pdacodegen.cc
index a259ce00..7132a609 100644
--- a/colm/pdacodegen.cc
+++ b/colm/pdacodegen.cc
@@ -356,6 +356,7 @@ void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData, PdaTables *pdaTable
escapeLiteralString( out, runtimeData->regionInfo[i].name );
out << "\", " << runtimeData->regionInfo[i].defaultToken <<
", " << runtimeData->regionInfo[i].eofFrameId <<
+ ", " << runtimeData->regionInfo[i].isIgnoreOnly <<
" }";
if ( i < runtimeData->numRegions-1 )
diff --git a/colm/pdarun.c b/colm/pdarun.c
index 8b3d5f87..142017c5 100644
--- a/colm/pdarun.c
+++ b/colm/pdarun.c
@@ -406,7 +406,7 @@ void setRegion( PdaRun *pdaRun, int emptyIgnore, ParseTree *tree )
}
}
-void ignoreTree( Program *prg, PdaRun *pdaRun, Tree *tree )
+void ignoreTree( Program *prg, FsmRun *fsmRun, PdaRun *pdaRun, Tree *tree )
{
int emptyIgnore = pdaRun->accumIgnore == 0;
@@ -421,6 +421,9 @@ void ignoreTree( Program *prg, PdaRun *pdaRun, Tree *tree )
transferReverseCode( pdaRun, parseTree );
+ if ( fsmRun->preRegion >= 0 )
+ parseTree->flags |= PF_RIGHT_IGNORE;
+
setRegion( pdaRun, emptyIgnore, pdaRun->accumIgnore );
}
@@ -536,6 +539,9 @@ static void reportParseError( Program *prg, Tree **sp, PdaRun *pdaRun )
static void attachRightIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, ParseTree *parseTree )
{
+ if ( pdaRun->accumIgnore == 0 )
+ return;
+
if ( pdaRun->stackTop->id > 0 && pdaRun->stackTop->id < prg->rtd->firstNonTermId ) {
/* OK, do it */
debug( REALM_PARSE, "attaching right ignore\n" );
@@ -544,7 +550,28 @@ static void attachRightIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, ParseTre
assert( ! ( parseTree->flags & PF_RIGHT_IL_ATTACHED ) );
ParseTree *accum = pdaRun->accumIgnore;
- pdaRun->accumIgnore = 0;
+ ParseTree *lasta = 0, *use = accum;
+ while ( use != 0 && use->flags & PF_RIGHT_IGNORE ) {
+ lasta = use;
+ use = use->next;
+ }
+
+ if ( use == 0 ) {
+ /* Use it all. Note accum != 0 so non-empty. */
+ pdaRun->accumIgnore = 0;
+ }
+ else {
+ /* Got stopped. */
+ if ( lasta == 0 ) {
+ /* Use none. */
+ accum = 0;
+ }
+ else {
+ /* Use some. */
+ lasta->next = 0;
+ pdaRun->accumIgnore = use;
+ }
+ }
/* The data list needs to be extracted and reversed. The parse tree list
* can remain in stack order. */
@@ -785,7 +812,7 @@ void sendIgnore( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmR
tree->tokdata = ignoreStr;
/* Send it to the pdaRun. */
- ignoreTree( prg, pdaRun, tree );
+ ignoreTree( prg, fsmRun, pdaRun, tree );
}
diff --git a/colm/pdarun.h b/colm/pdarun.h
index 236f8793..b8204d97 100644
--- a/colm/pdarun.h
+++ b/colm/pdarun.h
@@ -218,6 +218,7 @@ typedef struct _RegionInfo
const char *name;
long defaultToken;
long eofFrameId;
+ int isIgnoreOnly;;
} RegionInfo;
typedef struct _CaptureAttr
diff --git a/colm/resolve.cc b/colm/resolve.cc
index 526d3588..de548fdb 100644
--- a/colm/resolve.cc
+++ b/colm/resolve.cc
@@ -77,7 +77,7 @@ UniqueType *TypeRef::lookupTypeLiteral( ParseData *pd )
LiteralDictEl *ldel = nspace->literalDict.find( interp );
if ( ldel != 0 )
- return pd->findUniqueType( TYPE_TREE, ldel->value->token );
+ return pd->findUniqueType( TYPE_TREE, ldel->value->tdLangEl );
nspace = nspace->parentNamespace;
}