diff options
author | Adrian Thurston <thurston@complang.org> | 2012-05-23 13:52:19 +0000 |
---|---|---|
committer | Adrian Thurston <thurston@complang.org> | 2012-05-23 13:52:19 +0000 |
commit | b628bd6037bea333bccadc0850707616a730275b (patch) | |
tree | 75795189a3707a4818471e626ce93a28941383b8 | |
parent | 77aac5d2c534e3206fd150b4e2b29688f6230ee1 (diff) | |
download | colm-b628bd6037bea333bccadc0850707616a730275b.tar.gz |
added a syntax for specifying no ignores
Added the keyword 'ni', which can go ahead of or before a token pattern
(literal or usual), which means no-ignore. Sets the noPreIgnore and
noPostIgnore bits in the token, which affect the ignore scanning and attaching.
-rw-r--r-- | colm.vim | 2 | ||||
-rw-r--r-- | colm/declare.cc | 13 | ||||
-rw-r--r-- | colm/lmparse.kh | 2 | ||||
-rw-r--r-- | colm/lmparse.kl | 56 | ||||
-rw-r--r-- | colm/lmscan.rl | 1 | ||||
-rw-r--r-- | colm/parsedata.h | 14 | ||||
-rw-r--r-- | colm/parsetree.h | 6 | ||||
-rw-r--r-- | colm/pdabuild.cc | 18 | ||||
-rw-r--r-- | colm/pdagraph.cc | 5 | ||||
-rw-r--r-- | colm/pdagraph.h | 11 | ||||
-rw-r--r-- | test/ignore3.exp | 2 | ||||
-rw-r--r-- | test/ignore3.lm | 2 |
12 files changed, 83 insertions, 49 deletions
@@ -64,7 +64,7 @@ syntax keyword typeKeywords syntax keyword Keyword \ reject else elsif return yield for while if \ typeid in break - \ new deref + \ new deref ni syntax match tokenName "[a-zA-Z_][a-zA-Z_0-9]*" contained syntax match varCapture "[a-zA-Z_][a-zA-Z_0-9]*:" diff --git a/colm/declare.cc b/colm/declare.cc index bcd9a16c..28221c63 100644 --- a/colm/declare.cc +++ b/colm/declare.cc @@ -243,10 +243,10 @@ void Namespace::declare( ParseData *pd ) l->value->tdLangEl = newLangEl; - if ( l->value->preNoIgnore ) - newLangEl->preNoIgnore = true; - if ( l->value->postNoIgnore ) - newLangEl->postNoIgnore = true; + if ( l->value->noPreIgnore ) + newLangEl->noPreIgnore = true; + if ( l->value->noPostIgnore ) + newLangEl->noPostIgnore = true; } } @@ -300,6 +300,11 @@ void Namespace::declare( ParseData *pd ) tokEl->contextIn = t->contextIn; tokEl->tokenDef = t; + if ( t->noPreIgnore ) + tokEl->noPreIgnore = true; + if ( t->noPostIgnore ) + tokEl->noPostIgnore = true; + t->tdLangEl = tokEl; } } diff --git a/colm/lmparse.kh b/colm/lmparse.kh index 529dfcc6..469e7ec3 100644 --- a/colm/lmparse.kh +++ b/colm/lmparse.kh @@ -58,7 +58,7 @@ struct ColmParser # Patterns. token KW_Match, KW_Construct, KW_Parse, KW_ParseStop, KW_New, KW_MakeToken, - KW_MakeTree, KW_TypeId, KW_Alias, KW_Send; + KW_MakeTree, KW_TypeId, KW_Alias, KW_Send, KW_Ni; token KW_Include, KW_Preeof; diff --git a/colm/lmparse.kl b/colm/lmparse.kl index 0d3e814a..17e3da81 100644 --- a/colm/lmparse.kl +++ b/colm/lmparse.kl @@ -838,6 +838,15 @@ prod_el_list: prod_el_list: final { curProdElList = new ProdElList; }; +nonterm opt_no_pre_ignore { bool value; }; +nonterm opt_no_post_ignore { bool value; }; + +opt_no_pre_ignore: KW_Ni final { $$->value = true; }; +opt_no_pre_ignore: final { $$->value = false; }; + +opt_no_post_ignore: KW_Ni final { $$->value = true; }; +opt_no_post_ignore: final { $$->value = false; }; + nonterm prod_el { ProdEl *factor; @@ -922,7 +931,7 @@ literal_def: KW_Literal literal_list; literal_list: literal_list ',' literal_item; literal_list: literal_item; -literal_item: TK_Literal +literal_item: opt_no_pre_ignore TK_Literal opt_no_post_ignore final { /* Create a name for the literal. */ String name( 32, "_literal_%.4x", pd->nextTokenId ); @@ -946,7 +955,7 @@ literal_item: TK_Literal addRegularDef( InputLoc(), namespaceStack.top(), scannerNameTok, joinOrLmTok, true ); /* Make a new token region just for the token. */ - TokenRegion *tokenRegion = new TokenRegion( InputLoc(), $1->data, + TokenRegion *tokenRegion = new TokenRegion( InputLoc(), $2->data, pd->regionList.length(), regionStack.top() ); regionStack.top()->childRegions.append( tokenRegion ); pd->regionList.append( tokenRegion ); @@ -963,7 +972,7 @@ literal_item: TK_Literal bool unusedCI; String interp; - prepareLitString( interp, unusedCI, $1->data, $1->loc ); + prepareLitString( interp, unusedCI, $2->data, $2->loc ); /* Look for the production's associated region. */ Namespace *nspace = namespaceStack.top(); @@ -971,31 +980,26 @@ literal_item: TK_Literal LiteralDictEl *ldel = nspace->literalDict.find( interp ); if ( ldel != 0 ) - error( $1->loc ) << "literal already defined in this namespace" << endp; + error( $2->loc ) << "literal already defined in this namespace" << endp; else { Join *join = new Join( new Expression( new Term( new FactorWithAug( - new FactorWithRep( $1->loc, new FactorWithNeg( $1->loc, new Factor( - new Literal( $1->loc, $1->data, + new FactorWithRep( $2->loc, new FactorWithNeg( $2->loc, new Factor( + new Literal( $2->loc, $2->data, Literal::LitString ) ) ) ) ) ) ) ); - TokenDef *tokenDef = new TokenDef( name, $1->data, true, false, join, - 0, $1->loc, pd->nextTokenId++, nspace, region, 0, 0, 0 ); + TokenDef *tokenDef = new TokenDef( name, $2->data, true, false, join, + 0, $2->loc, pd->nextTokenId++, nspace, region, 0, 0, 0 ); region->tokenDefList.append( tokenDef ); ldel = nspace->literalDict.insert( interp, tokenDef ); nspace->tokenDefList.append( tokenDef ); -// if ( strcmp( interp, "%%" ) == 0 ) { -// tokenDef->preNoIgnore = true; -// } -// if ( strcmp( interp, ")" ) == 0 ) { -// tokenDef->preNoIgnore = true; -// } -// if ( strcmp( interp, "(" ) == 0 ) { -// tokenDef->postNoIgnore = true; -// } - - TokenDef *tokenDefTok = new TokenDef( name + "_tok", $1->data, true, false, join, - 0, $1->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion, 0, 0, 0 ); + if ( $1->value ) + tokenDef->noPreIgnore = true; + if ( $3->value ) + tokenDef->noPostIgnore = true; + + TokenDef *tokenDefTok = new TokenDef( name + "_tok", $2->data, true, false, join, + 0, $2->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion, 0, 0, 0 ); tokenDefTok->dupOf = tokenDef; region->tokenOnlyRegion->tokenDefList.append( tokenDefTok ); ldel = nspace->literalDict.insert( "|" + interp + "_tok", tokenDefTok ); @@ -1028,12 +1032,13 @@ leave_rl: token_def: token_or_ignore token_def_name obj_var_list - enter_rl '/' opt_rl_join leave_rl '/' opt_translate + enter_rl opt_no_pre_ignore '/' opt_rl_join leave_rl '/' opt_no_post_ignore + opt_translate final { bool ignore = $1->ignore; String name = $2->name; - Join *join = $6->join; - CodeBlock *transBlock = $9->transBlock; + Join *join = $7->join; + CodeBlock *transBlock = $11->transBlock; /* Check the region if this is for an ignore. */ if ( ignore && !pd->insideRegion ) @@ -1058,6 +1063,11 @@ token_def: region->tokenDefList.append( tokenDef ); nspace->tokenDefList.append( tokenDef ); + if ( $5->value ) + tokenDef->noPreIgnore = true; + if ( $10->value ) {cerr << "foo" << endl; + tokenDef->noPostIgnore = true;} + /* All again for the ignore. */ if ( ignore ) { TokenDef *tokenDefIgn = new TokenDef( name + "_ign", String(), false, ignore, join, diff --git a/colm/lmscan.rl b/colm/lmscan.rl index cfb5530d..dbe79ca0 100644 --- a/colm/lmscan.rl +++ b/colm/lmscan.rl @@ -476,6 +476,7 @@ void Scanner::endSection( ) 'context' => {token( KW_Context ); }; 'alias' => {token( KW_Alias ); }; 'send' => {token( KW_Send ); }; + 'ni' => {token( KW_Ni ); }; # Identifiers. ident => { token( TK_Word, ts, te ); } ; diff --git a/colm/parsedata.h b/colm/parsedata.h index a0705002..3513d274 100644 --- a/colm/parsedata.h +++ b/colm/parsedata.h @@ -269,8 +269,8 @@ struct LangEl : public DListEl<LangEl> Context *contextDef; Context *contextIn; - bool preNoIgnore; - bool postNoIgnore; + bool noPreIgnore; + bool noPostIgnore; }; struct ProdEl @@ -290,7 +290,8 @@ struct ProdEl langEl(0), priorVal(priorVal), type(type), - objField(0) {} + objField(0) + {} ProdEl( const InputLoc &loc, TypeRef *typeRef ) : @@ -299,7 +300,9 @@ struct ProdEl typeRef(typeRef), langEl(0), priorVal(0), - type(ReferenceType), objField(0) {} + type(ReferenceType), + objField(0) + {} ObjField *captureField; bool commit; @@ -733,7 +736,8 @@ struct ParseData int findIndexOff( PdaTables *pdaTables, PdaGraph *pdaGraph, PdaState *state, int &currLen ); void trySetTime( PdaTrans *trans, long code, long &time ); - void addRegion( PdaState *tabState, PdaTrans *pdaTrans, long pdaKey ); + void addRegion( PdaState *tabState, PdaTrans *pdaTrans, long pdaKey, + bool noPreIgnore, bool noPostIgnore ); PdaState *followProd( PdaState *tabState, PdaState *prodState ); void findFollow( AlphSet &result, PdaState *overTab, PdaState *overSrc, Definition *parentDef ); diff --git a/colm/parsetree.h b/colm/parsetree.h index 2d39b4ee..fa0e5ab7 100644 --- a/colm/parsetree.h +++ b/colm/parsetree.h @@ -398,7 +398,7 @@ struct TokenDef longestMatchId(longestMatchId), inLmSelect(false), nspace(nspace), tokenRegion(tokenRegion), objectDef(objectDef), contextIn(contextIn), - dupOf(0), postNoIgnore(false), preNoIgnore(false) + dupOf(0), noPostIgnore(false), noPreIgnore(false) { if ( pReCaptureVect != 0 ) reCaptureVect = *pReCaptureVect; @@ -429,8 +429,8 @@ struct TokenDef Context *contextIn; TokenDef *dupOf; - bool postNoIgnore; - bool preNoIgnore; + bool noPostIgnore; + bool noPreIgnore; }; struct LelDefList; diff --git a/colm/pdabuild.cc b/colm/pdabuild.cc index aba017f7..eadbcb06 100644 --- a/colm/pdabuild.cc +++ b/colm/pdabuild.cc @@ -90,8 +90,8 @@ LangEl::LangEl( Namespace *nspace, const String &name, Type type ) predValue(0), contextDef(0), contextIn(0), - preNoIgnore(false), - postNoIgnore(false) + noPreIgnore(false), + noPostIgnore(false) { } @@ -455,7 +455,8 @@ void ParseData::pdaOrderFollow( LangEl *rootEl, PdaState *tabState, trySetTime( tt->value, redCode, time ); /* If the items token region is not recorded in the state, do it now. */ - addRegion( expandToState, tt->value, tt->key ); + addRegion( expandToState, tt->value, tt->key, + tt->value->noPreIgnore, tt->value->noPostIgnore ); } } } @@ -469,7 +470,8 @@ bool regionVectHas( RegionVect ®Vect, TokenRegion *region ) return false; } -void ParseData::addRegion( PdaState *tabState, PdaTrans *tabTrans, long pdaKey ) +void ParseData::addRegion( PdaState *tabState, PdaTrans *tabTrans, + long pdaKey, bool noPreIgnore, bool noPostIgnore ) { LangEl *klangEl = langElIndex[pdaKey]; if ( klangEl != 0 && klangEl->type == LangEl::Term ) { @@ -484,7 +486,7 @@ void ParseData::addRegion( PdaState *tabState, PdaTrans *tabTrans, long pdaKey ) /* region. */ TokenRegion *scanRegion = region; - if ( klangEl->preNoIgnore ) + if ( klangEl->noPreIgnore ) scanRegion = region->tokenOnlyRegion; if ( !regionVectHas( tabState->regions, scanRegion ) ) { @@ -493,13 +495,12 @@ void ParseData::addRegion( PdaState *tabState, PdaTrans *tabTrans, long pdaKey ) /* Pre-region of to state */ PdaState *toState = tabTrans->toState; - if ( !klangEl->postNoIgnore && + if ( !klangEl->noPostIgnore && region->ignoreOnlyRegion != 0 && !regionVectHas( toState->preRegions, region->ignoreOnlyRegion ) ) { toState->preRegions.append( region->ignoreOnlyRegion ); } - } } } @@ -589,7 +590,8 @@ void ParseData::pdaOrderProd( LangEl *rootEl, PdaState *tabState, } /* If the items token region is not recorded in the state, do it now. */ - addRegion( tabState, tabTrans, srcTrans->key ); + addRegion( tabState, tabTrans, srcTrans->key, + srcTrans->value->noPreIgnore, srcTrans->value->noPostIgnore ); /* Go over one in the production. */ pdaOrderProd( rootEl, tabTrans->toState, diff --git a/colm/pdagraph.cc b/colm/pdagraph.cc index 191b7581..8f17b7a5 100644 --- a/colm/pdagraph.cc +++ b/colm/pdagraph.cc @@ -429,6 +429,11 @@ void PdaGraph::addInTrans( PdaTrans *destTrans, PdaTrans *srcTrans ) if ( srcTrans->toState->advanceReductions ) destTrans->toState->advanceReductions = true; + + if ( srcTrans->noPreIgnore ) + destTrans->noPreIgnore = true; + if ( srcTrans->noPostIgnore ) + destTrans->noPostIgnore = true; } } diff --git a/colm/pdagraph.h b/colm/pdagraph.h index c021ddde..dc11b3e1 100644 --- a/colm/pdagraph.h +++ b/colm/pdagraph.h @@ -192,7 +192,9 @@ struct PdaTrans toState(0), isShift(false), isShiftReduce(false), - shiftPrior(0) + shiftPrior(0), + noPreIgnore(false), + noPostIgnore(false) { } PdaTrans( const PdaTrans &other ) : @@ -202,7 +204,9 @@ struct PdaTrans isShiftReduce(other.isShiftReduce), shiftPrior(other.shiftPrior), reductions(other.reductions), - commits(other.commits) + commits(other.commits), + noPreIgnore(false), + noPostIgnore(false) { } long lowKey; @@ -231,6 +235,9 @@ struct PdaTrans LongSet commits; LongSet afterShiftCommits; + + bool noPreIgnore; + bool noPostIgnore; }; /* In transition list. Like DList except only has head pointers, which is all diff --git a/test/ignore3.exp b/test/ignore3.exp index 3bba001a..e5d19d3b 100644 --- a/test/ignore3.exp +++ b/test/ignore3.exp @@ -2,7 +2,7 @@ item: .a . item: .b . item: .c . item: .( d ) . -innr: .( .d .) . +innr: .(. d .) . item: .e . item: .f . item: .g. diff --git a/test/ignore3.lm b/test/ignore3.lm index 3286e9e3..df3ba687 100644 --- a/test/ignore3.lm +++ b/test/ignore3.lm @@ -1,7 +1,7 @@ lex start { ignore /space+/ - literal '*', '(', ')', '!', ';' + literal '*', '(' ni, ni ')', '!', ';' token id /[a-zA-Z_0-9]+/ } |