summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2012-05-23 13:52:19 +0000
committerAdrian Thurston <thurston@complang.org>2012-05-23 13:52:19 +0000
commitb628bd6037bea333bccadc0850707616a730275b (patch)
tree75795189a3707a4818471e626ce93a28941383b8
parent77aac5d2c534e3206fd150b4e2b29688f6230ee1 (diff)
downloadcolm-b628bd6037bea333bccadc0850707616a730275b.tar.gz
added a syntax for specifying no ignores
Added the keyword 'ni', which can go ahead of or before a token pattern (literal or usual), which means no-ignore. Sets the noPreIgnore and noPostIgnore bits in the token, which affect the ignore scanning and attaching.
-rw-r--r--colm.vim2
-rw-r--r--colm/declare.cc13
-rw-r--r--colm/lmparse.kh2
-rw-r--r--colm/lmparse.kl56
-rw-r--r--colm/lmscan.rl1
-rw-r--r--colm/parsedata.h14
-rw-r--r--colm/parsetree.h6
-rw-r--r--colm/pdabuild.cc18
-rw-r--r--colm/pdagraph.cc5
-rw-r--r--colm/pdagraph.h11
-rw-r--r--test/ignore3.exp2
-rw-r--r--test/ignore3.lm2
12 files changed, 83 insertions, 49 deletions
diff --git a/colm.vim b/colm.vim
index d65ae0c2..d8eb44a6 100644
--- a/colm.vim
+++ b/colm.vim
@@ -64,7 +64,7 @@ syntax keyword typeKeywords
syntax keyword Keyword
\ reject else elsif return yield for while if
\ typeid in break
- \ new deref
+ \ new deref ni
syntax match tokenName "[a-zA-Z_][a-zA-Z_0-9]*" contained
syntax match varCapture "[a-zA-Z_][a-zA-Z_0-9]*:"
diff --git a/colm/declare.cc b/colm/declare.cc
index bcd9a16c..28221c63 100644
--- a/colm/declare.cc
+++ b/colm/declare.cc
@@ -243,10 +243,10 @@ void Namespace::declare( ParseData *pd )
l->value->tdLangEl = newLangEl;
- if ( l->value->preNoIgnore )
- newLangEl->preNoIgnore = true;
- if ( l->value->postNoIgnore )
- newLangEl->postNoIgnore = true;
+ if ( l->value->noPreIgnore )
+ newLangEl->noPreIgnore = true;
+ if ( l->value->noPostIgnore )
+ newLangEl->noPostIgnore = true;
}
}
@@ -300,6 +300,11 @@ void Namespace::declare( ParseData *pd )
tokEl->contextIn = t->contextIn;
tokEl->tokenDef = t;
+ if ( t->noPreIgnore )
+ tokEl->noPreIgnore = true;
+ if ( t->noPostIgnore )
+ tokEl->noPostIgnore = true;
+
t->tdLangEl = tokEl;
}
}
diff --git a/colm/lmparse.kh b/colm/lmparse.kh
index 529dfcc6..469e7ec3 100644
--- a/colm/lmparse.kh
+++ b/colm/lmparse.kh
@@ -58,7 +58,7 @@ struct ColmParser
# Patterns.
token KW_Match, KW_Construct, KW_Parse, KW_ParseStop, KW_New, KW_MakeToken,
- KW_MakeTree, KW_TypeId, KW_Alias, KW_Send;
+ KW_MakeTree, KW_TypeId, KW_Alias, KW_Send, KW_Ni;
token KW_Include, KW_Preeof;
diff --git a/colm/lmparse.kl b/colm/lmparse.kl
index 0d3e814a..17e3da81 100644
--- a/colm/lmparse.kl
+++ b/colm/lmparse.kl
@@ -838,6 +838,15 @@ prod_el_list:
prod_el_list:
final { curProdElList = new ProdElList; };
+nonterm opt_no_pre_ignore { bool value; };
+nonterm opt_no_post_ignore { bool value; };
+
+opt_no_pre_ignore: KW_Ni final { $$->value = true; };
+opt_no_pre_ignore: final { $$->value = false; };
+
+opt_no_post_ignore: KW_Ni final { $$->value = true; };
+opt_no_post_ignore: final { $$->value = false; };
+
nonterm prod_el
{
ProdEl *factor;
@@ -922,7 +931,7 @@ literal_def: KW_Literal literal_list;
literal_list: literal_list ',' literal_item;
literal_list: literal_item;
-literal_item: TK_Literal
+literal_item: opt_no_pre_ignore TK_Literal opt_no_post_ignore
final {
/* Create a name for the literal. */
String name( 32, "_literal_%.4x", pd->nextTokenId );
@@ -946,7 +955,7 @@ literal_item: TK_Literal
addRegularDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok, true );
/* Make a new token region just for the token. */
- TokenRegion *tokenRegion = new TokenRegion( InputLoc(), $1->data,
+ TokenRegion *tokenRegion = new TokenRegion( InputLoc(), $2->data,
pd->regionList.length(), regionStack.top() );
regionStack.top()->childRegions.append( tokenRegion );
pd->regionList.append( tokenRegion );
@@ -963,7 +972,7 @@ literal_item: TK_Literal
bool unusedCI;
String interp;
- prepareLitString( interp, unusedCI, $1->data, $1->loc );
+ prepareLitString( interp, unusedCI, $2->data, $2->loc );
/* Look for the production's associated region. */
Namespace *nspace = namespaceStack.top();
@@ -971,31 +980,26 @@ literal_item: TK_Literal
LiteralDictEl *ldel = nspace->literalDict.find( interp );
if ( ldel != 0 )
- error( $1->loc ) << "literal already defined in this namespace" << endp;
+ error( $2->loc ) << "literal already defined in this namespace" << endp;
else {
Join *join = new Join( new Expression( new Term( new FactorWithAug(
- new FactorWithRep( $1->loc, new FactorWithNeg( $1->loc, new Factor(
- new Literal( $1->loc, $1->data,
+ new FactorWithRep( $2->loc, new FactorWithNeg( $2->loc, new Factor(
+ new Literal( $2->loc, $2->data,
Literal::LitString ) ) ) ) ) ) ) );
- TokenDef *tokenDef = new TokenDef( name, $1->data, true, false, join,
- 0, $1->loc, pd->nextTokenId++, nspace, region, 0, 0, 0 );
+ TokenDef *tokenDef = new TokenDef( name, $2->data, true, false, join,
+ 0, $2->loc, pd->nextTokenId++, nspace, region, 0, 0, 0 );
region->tokenDefList.append( tokenDef );
ldel = nspace->literalDict.insert( interp, tokenDef );
nspace->tokenDefList.append( tokenDef );
-// if ( strcmp( interp, "%%" ) == 0 ) {
-// tokenDef->preNoIgnore = true;
-// }
-// if ( strcmp( interp, ")" ) == 0 ) {
-// tokenDef->preNoIgnore = true;
-// }
-// if ( strcmp( interp, "(" ) == 0 ) {
-// tokenDef->postNoIgnore = true;
-// }
-
- TokenDef *tokenDefTok = new TokenDef( name + "_tok", $1->data, true, false, join,
- 0, $1->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion, 0, 0, 0 );
+ if ( $1->value )
+ tokenDef->noPreIgnore = true;
+ if ( $3->value )
+ tokenDef->noPostIgnore = true;
+
+ TokenDef *tokenDefTok = new TokenDef( name + "_tok", $2->data, true, false, join,
+ 0, $2->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion, 0, 0, 0 );
tokenDefTok->dupOf = tokenDef;
region->tokenOnlyRegion->tokenDefList.append( tokenDefTok );
ldel = nspace->literalDict.insert( "|" + interp + "_tok", tokenDefTok );
@@ -1028,12 +1032,13 @@ leave_rl:
token_def:
token_or_ignore token_def_name obj_var_list
- enter_rl '/' opt_rl_join leave_rl '/' opt_translate
+ enter_rl opt_no_pre_ignore '/' opt_rl_join leave_rl '/' opt_no_post_ignore
+ opt_translate
final {
bool ignore = $1->ignore;
String name = $2->name;
- Join *join = $6->join;
- CodeBlock *transBlock = $9->transBlock;
+ Join *join = $7->join;
+ CodeBlock *transBlock = $11->transBlock;
/* Check the region if this is for an ignore. */
if ( ignore && !pd->insideRegion )
@@ -1058,6 +1063,11 @@ token_def:
region->tokenDefList.append( tokenDef );
nspace->tokenDefList.append( tokenDef );
+ if ( $5->value )
+ tokenDef->noPreIgnore = true;
+ if ( $10->value ) {cerr << "foo" << endl;
+ tokenDef->noPostIgnore = true;}
+
/* All again for the ignore. */
if ( ignore ) {
TokenDef *tokenDefIgn = new TokenDef( name + "_ign", String(), false, ignore, join,
diff --git a/colm/lmscan.rl b/colm/lmscan.rl
index cfb5530d..dbe79ca0 100644
--- a/colm/lmscan.rl
+++ b/colm/lmscan.rl
@@ -476,6 +476,7 @@ void Scanner::endSection( )
'context' => {token( KW_Context ); };
'alias' => {token( KW_Alias ); };
'send' => {token( KW_Send ); };
+ 'ni' => {token( KW_Ni ); };
# Identifiers.
ident => { token( TK_Word, ts, te ); } ;
diff --git a/colm/parsedata.h b/colm/parsedata.h
index a0705002..3513d274 100644
--- a/colm/parsedata.h
+++ b/colm/parsedata.h
@@ -269,8 +269,8 @@ struct LangEl : public DListEl<LangEl>
Context *contextDef;
Context *contextIn;
- bool preNoIgnore;
- bool postNoIgnore;
+ bool noPreIgnore;
+ bool noPostIgnore;
};
struct ProdEl
@@ -290,7 +290,8 @@ struct ProdEl
langEl(0),
priorVal(priorVal),
type(type),
- objField(0) {}
+ objField(0)
+ {}
ProdEl( const InputLoc &loc, TypeRef *typeRef )
:
@@ -299,7 +300,9 @@ struct ProdEl
typeRef(typeRef),
langEl(0),
priorVal(0),
- type(ReferenceType), objField(0) {}
+ type(ReferenceType),
+ objField(0)
+ {}
ObjField *captureField;
bool commit;
@@ -733,7 +736,8 @@ struct ParseData
int findIndexOff( PdaTables *pdaTables, PdaGraph *pdaGraph, PdaState *state, int &currLen );
void trySetTime( PdaTrans *trans, long code, long &time );
- void addRegion( PdaState *tabState, PdaTrans *pdaTrans, long pdaKey );
+ void addRegion( PdaState *tabState, PdaTrans *pdaTrans, long pdaKey,
+ bool noPreIgnore, bool noPostIgnore );
PdaState *followProd( PdaState *tabState, PdaState *prodState );
void findFollow( AlphSet &result, PdaState *overTab,
PdaState *overSrc, Definition *parentDef );
diff --git a/colm/parsetree.h b/colm/parsetree.h
index 2d39b4ee..fa0e5ab7 100644
--- a/colm/parsetree.h
+++ b/colm/parsetree.h
@@ -398,7 +398,7 @@ struct TokenDef
longestMatchId(longestMatchId), inLmSelect(false),
nspace(nspace), tokenRegion(tokenRegion), objectDef(objectDef),
contextIn(contextIn),
- dupOf(0), postNoIgnore(false), preNoIgnore(false)
+ dupOf(0), noPostIgnore(false), noPreIgnore(false)
{
if ( pReCaptureVect != 0 )
reCaptureVect = *pReCaptureVect;
@@ -429,8 +429,8 @@ struct TokenDef
Context *contextIn;
TokenDef *dupOf;
- bool postNoIgnore;
- bool preNoIgnore;
+ bool noPostIgnore;
+ bool noPreIgnore;
};
struct LelDefList;
diff --git a/colm/pdabuild.cc b/colm/pdabuild.cc
index aba017f7..eadbcb06 100644
--- a/colm/pdabuild.cc
+++ b/colm/pdabuild.cc
@@ -90,8 +90,8 @@ LangEl::LangEl( Namespace *nspace, const String &name, Type type )
predValue(0),
contextDef(0),
contextIn(0),
- preNoIgnore(false),
- postNoIgnore(false)
+ noPreIgnore(false),
+ noPostIgnore(false)
{
}
@@ -455,7 +455,8 @@ void ParseData::pdaOrderFollow( LangEl *rootEl, PdaState *tabState,
trySetTime( tt->value, redCode, time );
/* If the items token region is not recorded in the state, do it now. */
- addRegion( expandToState, tt->value, tt->key );
+ addRegion( expandToState, tt->value, tt->key,
+ tt->value->noPreIgnore, tt->value->noPostIgnore );
}
}
}
@@ -469,7 +470,8 @@ bool regionVectHas( RegionVect &regVect, TokenRegion *region )
return false;
}
-void ParseData::addRegion( PdaState *tabState, PdaTrans *tabTrans, long pdaKey )
+void ParseData::addRegion( PdaState *tabState, PdaTrans *tabTrans,
+ long pdaKey, bool noPreIgnore, bool noPostIgnore )
{
LangEl *klangEl = langElIndex[pdaKey];
if ( klangEl != 0 && klangEl->type == LangEl::Term ) {
@@ -484,7 +486,7 @@ void ParseData::addRegion( PdaState *tabState, PdaTrans *tabTrans, long pdaKey )
/* region. */
TokenRegion *scanRegion = region;
- if ( klangEl->preNoIgnore )
+ if ( klangEl->noPreIgnore )
scanRegion = region->tokenOnlyRegion;
if ( !regionVectHas( tabState->regions, scanRegion ) ) {
@@ -493,13 +495,12 @@ void ParseData::addRegion( PdaState *tabState, PdaTrans *tabTrans, long pdaKey )
/* Pre-region of to state */
PdaState *toState = tabTrans->toState;
- if ( !klangEl->postNoIgnore &&
+ if ( !klangEl->noPostIgnore &&
region->ignoreOnlyRegion != 0 &&
!regionVectHas( toState->preRegions, region->ignoreOnlyRegion ) )
{
toState->preRegions.append( region->ignoreOnlyRegion );
}
-
}
}
}
@@ -589,7 +590,8 @@ void ParseData::pdaOrderProd( LangEl *rootEl, PdaState *tabState,
}
/* If the items token region is not recorded in the state, do it now. */
- addRegion( tabState, tabTrans, srcTrans->key );
+ addRegion( tabState, tabTrans, srcTrans->key,
+ srcTrans->value->noPreIgnore, srcTrans->value->noPostIgnore );
/* Go over one in the production. */
pdaOrderProd( rootEl, tabTrans->toState,
diff --git a/colm/pdagraph.cc b/colm/pdagraph.cc
index 191b7581..8f17b7a5 100644
--- a/colm/pdagraph.cc
+++ b/colm/pdagraph.cc
@@ -429,6 +429,11 @@ void PdaGraph::addInTrans( PdaTrans *destTrans, PdaTrans *srcTrans )
if ( srcTrans->toState->advanceReductions )
destTrans->toState->advanceReductions = true;
+
+ if ( srcTrans->noPreIgnore )
+ destTrans->noPreIgnore = true;
+ if ( srcTrans->noPostIgnore )
+ destTrans->noPostIgnore = true;
}
}
diff --git a/colm/pdagraph.h b/colm/pdagraph.h
index c021ddde..dc11b3e1 100644
--- a/colm/pdagraph.h
+++ b/colm/pdagraph.h
@@ -192,7 +192,9 @@ struct PdaTrans
toState(0),
isShift(false),
isShiftReduce(false),
- shiftPrior(0)
+ shiftPrior(0),
+ noPreIgnore(false),
+ noPostIgnore(false)
{ }
PdaTrans( const PdaTrans &other ) :
@@ -202,7 +204,9 @@ struct PdaTrans
isShiftReduce(other.isShiftReduce),
shiftPrior(other.shiftPrior),
reductions(other.reductions),
- commits(other.commits)
+ commits(other.commits),
+ noPreIgnore(false),
+ noPostIgnore(false)
{ }
long lowKey;
@@ -231,6 +235,9 @@ struct PdaTrans
LongSet commits;
LongSet afterShiftCommits;
+
+ bool noPreIgnore;
+ bool noPostIgnore;
};
/* In transition list. Like DList except only has head pointers, which is all
diff --git a/test/ignore3.exp b/test/ignore3.exp
index 3bba001a..e5d19d3b 100644
--- a/test/ignore3.exp
+++ b/test/ignore3.exp
@@ -2,7 +2,7 @@ item: .a .
item: .b .
item: .c .
item: .( d ) .
-innr: .( .d .) .
+innr: .(. d .) .
item: .e .
item: .f .
item: .g.
diff --git a/test/ignore3.lm b/test/ignore3.lm
index 3286e9e3..df3ba687 100644
--- a/test/ignore3.lm
+++ b/test/ignore3.lm
@@ -1,7 +1,7 @@
lex start
{
ignore /space+/
- literal '*', '(', ')', '!', ';'
+ literal '*', '(' ni, ni ')', '!', ';'
token id /[a-zA-Z_0-9]+/
}