diff options
-rw-r--r-- | colm/fsmcodegen.cpp | 12 | ||||
-rw-r--r-- | colm/fsmexec.cpp | 12 | ||||
-rw-r--r-- | colm/fsmrun.cpp | 52 | ||||
-rw-r--r-- | colm/fsmrun.h | 6 | ||||
-rw-r--r-- | colm/keyops.h | 4 | ||||
-rw-r--r-- | colm/lmparse.kh | 1 | ||||
-rw-r--r-- | colm/lmparse.kl | 15 | ||||
-rw-r--r-- | colm/parsedata.h | 11 | ||||
-rw-r--r-- | colm/parsetree.h | 13 | ||||
-rw-r--r-- | colm/pdabuild.cpp | 9 | ||||
-rw-r--r-- | colm/pdacodegen.cpp | 2 | ||||
-rw-r--r-- | colm/pdarun.h | 11 | ||||
-rw-r--r-- | colm/redbuild.cpp | 2 | ||||
-rw-r--r-- | colm/redfsm.h | 4 |
14 files changed, 87 insertions, 67 deletions
diff --git a/colm/fsmcodegen.cpp b/colm/fsmcodegen.cpp index 6d819970..fe5a6fb3 100644 --- a/colm/fsmcodegen.cpp +++ b/colm/fsmcodegen.cpp @@ -321,16 +321,8 @@ void FsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState, bool in ret << "\t{"; INLINE_LIST( ret, action->inlineList, targState, inFinish ); - if ( action->objField ) { - ObjField *field = action->objField; - if ( action->markType == MarkEnter ) - ret << "mark_enter[" << field->offset << "] = " << P() << ";\n"; - else if ( action->markType == MarkLeave ) - ret << "mark_leave[" << field->offset << "] = " << P() << ";\n"; - } - - if ( action->markType == MarkMatchEnd ) - ret << "mark_match_end[" << action->matchEndNum << "] = " << P() << ";\n"; + if ( action->markId >= 0 ) + ret << "mark[" << action->markId << "] = " << P() << ";\n"; ret << "}\n"; diff --git a/colm/fsmexec.cpp b/colm/fsmexec.cpp index ea1af928..8f5d7600 100644 --- a/colm/fsmexec.cpp +++ b/colm/fsmexec.cpp @@ -87,16 +87,8 @@ void FsmRun::execAction( GenAction *genAction ) } } - if ( genAction->objField ) { - ObjField *field = genAction->objField; - if ( genAction->markType == MarkEnter ) - mark_enter[field->offset] = p; - else if ( genAction->markType == MarkLeave ) - mark_leave[field->offset] = p; - } - - if ( genAction->markType == MarkMatchEnd ) - mark_match_end[genAction->matchEndNum] = p; + if ( genAction->markType == MarkMark ) + mark[genAction->markId] = p; } void FsmRun::execute() diff --git a/colm/fsmrun.cpp b/colm/fsmrun.cpp index bfbc898d..0f20b8cc 100644 --- a/colm/fsmrun.cpp +++ b/colm/fsmrun.cpp @@ -511,15 +511,15 @@ Kid *FsmRun::makeToken( int id, Head *tokdata, bool namedLangEl, int bindId ) /* No children and ignores get added later. */ input->tree->child = attrs; - /* Set attributes for the labelled components. */ - for ( int i = 0; i < 32; i++ ) { - if ( mark_leave[i] != 0 ) { - Head *data = string_alloc_new( prg, - mark_enter[i], mark_leave[i] - mark_enter[i] ); - set_attr( input->tree, i, construct_string( prg, data ) ); - tree_upref( get_attr( input->tree, i ) ); - } - } +// /* Set attributes for the labelled components. */ +// for ( int i = 0; i < 32; i++ ) { +// if ( mark_leave[i] != 0 ) { +// Head *data = string_alloc_new( prg, +// mark_enter[i], mark_leave[i] - mark_enter[i] ); +// set_attr( input->tree, i, construct_string( prg, data ) ); +// tree_upref( get_attr( input->tree, i ) ); +// } +// } /* If the item is bound then store it in the bindings array. */ if ( bindId > 0 ) { @@ -621,8 +621,8 @@ void FsmRun::execGen( long id ) #endif LangElInfo *lelInfo = parser->tables->rtd->lelInfo; - if ( lelInfo[id].matchEnd >= 0 ) - p = mark_match_end[lelInfo[id].matchEnd]; + if ( lelInfo[id].markId >= 0 ) + p = mark[lelInfo[id].markId]; /* Make the token data. */ long length = p - tokstart; @@ -636,8 +636,7 @@ void FsmRun::execGen( long id ) generationAction( id, tokdata, false, 0 ); - memset( mark_leave, 0, sizeof(mark_leave) ); - memset( mark_match_end, 0, sizeof(mark_match_end) ); + memset( mark, 0, sizeof(mark) ); } void FsmRun::sendIgnore( long id ) @@ -649,8 +648,8 @@ void FsmRun::sendIgnore( long id ) #endif LangElInfo *lelInfo = parser->tables->rtd->lelInfo; - if ( lelInfo[id].matchEnd >= 0 ) - p = mark_match_end[lelInfo[id].matchEnd]; + if ( lelInfo[id].markId >= 0 ) + p = mark[lelInfo[id].markId]; /* Make the ignore string. */ int length = p - tokstart; @@ -670,8 +669,7 @@ void FsmRun::sendIgnore( long id ) region = parser->getNextRegion(); cs = tables->entryByRegion[region]; - memset( mark_leave, 0, sizeof(mark_leave) ); - memset( mark_match_end, 0, sizeof(mark_match_end) ); + memset( mark, 0, sizeof(mark) ); } void FsmRun::sendToken( long id ) @@ -683,8 +681,8 @@ void FsmRun::sendToken( long id ) #endif LangElInfo *lelInfo = parser->tables->rtd->lelInfo; - if ( lelInfo[id].matchEnd >= 0 ) - p = mark_match_end[lelInfo[id].matchEnd]; + if ( lelInfo[id].markId >= 0 ) + p = mark[lelInfo[id].markId]; /* Make the token data. */ long length = p - tokstart; @@ -698,8 +696,7 @@ void FsmRun::sendToken( long id ) Kid *input = makeToken( id, tokdata, false, 0 ); send_handle_error( this, parser, input ); - memset( mark_leave, 0, sizeof(mark_leave) ); - memset( mark_match_end, 0, sizeof(mark_match_end) ); + memset( mark, 0, sizeof(mark) ); } void FsmRun::emitToken( KlangEl *token ) @@ -850,8 +847,7 @@ long FsmRun::run( PdaRun *destParser ) tokend = 0; region = parser->getNextRegion(); cs = tables->entryByRegion[region]; - memset( mark_leave, 0, sizeof(mark_leave) ); - memset( mark_match_end, 0, sizeof(mark_match_end) ); + memset( mark, 0, sizeof(mark) ); /* Start with the EOF test. The pattern and replacement input sources can * be EOF from the start. */ @@ -998,8 +994,16 @@ long FsmRun::run( PdaRun *destParser ) runBuf->next->length = FSM_BUFSIZE - have; /* Compute tokstart and tokend. */ - tokend = runBuf->buf + (tokend - tokstart); + long dist = tokstart - runBuf->buf; + + tokend -= dist; tokstart = runBuf->buf; + + /* Shift any markers. */ + for ( int i = 0; i < MARK_SLOTS; i++ ) { + if ( mark[i] != 0 ) + mark[i] -= dist; + } } p = pe = runBuf->buf + have; peof = 0; diff --git a/colm/fsmrun.h b/colm/fsmrun.h index 07930b49..ed25c829 100644 --- a/colm/fsmrun.h +++ b/colm/fsmrun.h @@ -81,6 +81,8 @@ struct RunBuf RunBuf *next; }; +#define MARK_SLOTS 32 + struct FsmRun { FsmRun( Program *prg ); @@ -122,9 +124,7 @@ struct FsmRun bool eofSent; RunBuf *runBuf; bool gotoResume; - char *mark_enter[32]; - char *mark_leave[32]; - char *mark_match_end[32]; + char *mark[MARK_SLOTS]; }; void send_queued_tokens( FsmRun *fsmRun, PdaRun *parser ); diff --git a/colm/keyops.h b/colm/keyops.h index 791495ee..b5af65e7 100644 --- a/colm/keyops.h +++ b/colm/keyops.h @@ -28,9 +28,7 @@ enum MarkType { MarkNone, - MarkEnter, - MarkLeave, - MarkMatchEnd + MarkMark }; typedef unsigned long long Size; diff --git a/colm/lmparse.kh b/colm/lmparse.kh index b51d0e3d..2cc3e99a 100644 --- a/colm/lmparse.kh +++ b/colm/lmparse.kh @@ -103,6 +103,7 @@ struct Parser ProdElList *curProdElList; PredType predType; + ReCaptureVect reCaptureVect; }; %% write token_defs; diff --git a/colm/lmparse.kl b/colm/lmparse.kl index fa46c9fc..b796318e 100644 --- a/colm/lmparse.kl +++ b/colm/lmparse.kl @@ -734,6 +734,9 @@ token_def: region->tokenDefList.append( tokenDef ); tokEl->tokenDef = tokenDef; + tokenDef->reCaptureVect = reCaptureVect; + reCaptureVect.empty(); + /* Create the object def for the token. */ ObjectDef *objectDef = new ObjectDef( ObjectDef::UserType, name, pd->objFieldMap, new ObjMethodMap(), pd->nextObjectId++ ); @@ -841,6 +844,9 @@ rl_def: /* Generic creation of machine for instantiation and assignment. */ JoinOrLm *joinOrLm = new JoinOrLm( $4->join ); addRegularDef( $2->loc, namespaceStack.top(), $2->data, joinOrLm, false ); + + if ( reCaptureVect.length() > 0 ) + error($1->loc) << "rl definitions cannot capture vars" << endl; }; type class token_data @@ -1497,8 +1503,7 @@ opt_rl_join: rl_join opt_context if ( $2->context != 0 ) { /* Create the enter and leaving actions that will mark the substring. */ - Action *mark = new Action( MarkMatchEnd, 0 ); - mark->matchEndNum = pd->nextMatchEndNum++; + Action *mark = new Action( MarkMark, pd->nextMatchEndNum++ ); pd->actionList.append( mark ); $$->join->context = $2->context; @@ -1636,14 +1641,16 @@ factor_with_label: pd->objFieldMap->insert( $1->data, objField ); /* Create the enter and leaving actions that will mark the substring. */ - Action *enter = new Action( MarkEnter, objField ); - Action *leave = new Action( MarkLeave, objField ); + Action *enter = new Action( MarkMark, pd->nextMatchEndNum++ ); + Action *leave = new Action( MarkMark, pd->nextMatchEndNum++ ); pd->actionList.append( enter ); pd->actionList.append( leave ); /* Add entering and leaving actions. */ $$->factorWithAug->actions.append( ParserAction( $1->loc, at_start, 0, enter ) ); $$->factorWithAug->actions.append( ParserAction( $1->loc, at_leave, 0, leave ) ); + + reCaptureVect.append( ReCapture( objField, enter, leave ) ); }; nonterm factor_with_ep diff --git a/colm/parsedata.h b/colm/parsedata.h index 855855fd..93fe2de0 100644 --- a/colm/parsedata.h +++ b/colm/parsedata.h @@ -279,6 +279,7 @@ struct PdaLiteral long value; }; + /* Forwards. */ using std::ostream; @@ -299,7 +300,7 @@ public: name(name), markType(MarkNone), objField(0), - matchEndNum(0), + markId(-1), inlineList(inlineList), actionId(-1), numTransRefs(0), @@ -312,12 +313,12 @@ public: { } - Action( MarkType markType, ObjField *objField ) + Action( MarkType markType, long markId ) : name("mark"), markType(markType), - objField(objField), - matchEndNum(0), + objField(0), + markId(markId), inlineList(new InlineList), actionId(-1), numTransRefs(0), @@ -339,7 +340,7 @@ public: MarkType markType; ObjField *objField; - long matchEndNum; + long markId; InlineList *inlineList; int actionId; diff --git a/colm/parsetree.h b/colm/parsetree.h index 86324848..ece6ac5f 100644 --- a/colm/parsetree.h +++ b/colm/parsetree.h @@ -271,6 +271,18 @@ struct NamespaceQual Namespace *getQual( ParseData *pd ); }; +struct ReCapture +{ + ReCapture( ObjField *objField, Action *markEnter, Action *markLeave ) + : objField(objField), markEnter(markEnter), markLeave(markLeave) {} + + ObjField *objField; + Action *markEnter; + Action *markLeave; +}; + +typedef Vector<ReCapture> ReCaptureVect; + struct TokenDef { TokenDef( Join *join, KlangEl *token, InputLoc &semiLoc, @@ -295,6 +307,7 @@ struct TokenDef bool inLmSelect; Namespace *nspace; TokenRegion *tokenRegion; + ReCaptureVect reCaptureVect; TokenDef *prev, *next; }; diff --git a/colm/pdabuild.cpp b/colm/pdabuild.cpp index 99c20bad..07449330 100644 --- a/colm/pdabuild.cpp +++ b/colm/pdabuild.cpp @@ -1411,10 +1411,11 @@ void ParseData::makeRuntimeData() runtimeData->lelInfo[i].termDupId = lel->termDup == 0 ? 0 : lel->termDup->id; runtimeData->lelInfo[i].genericId = lel->generic == 0 ? 0 : lel->generic->id; - if ( lel->tokenDef != 0 && lel->tokenDef->join != 0 && lel->tokenDef->join->context != 0 ) - runtimeData->lelInfo[i].matchEnd = lel->tokenDef->join->mark->matchEndNum; + if ( lel->tokenDef != 0 && lel->tokenDef->join != 0 && + lel->tokenDef->join->context != 0 ) + runtimeData->lelInfo[i].markId = lel->tokenDef->join->mark->markId; else - runtimeData->lelInfo[i].matchEnd = -1; + runtimeData->lelInfo[i].markId = -1; } else { memset(&runtimeData->lelInfo[i], 0, sizeof(LangElInfo) ); @@ -1505,6 +1506,8 @@ void ParseData::makeRuntimeData() runtimeData->litlen[el->value] = el->key.length(); } + /* FIXME: Captured attributes go here. */ + runtimeData->fsmTables = fsmTables; runtimeData->pdaTables = pdaTables; diff --git a/colm/pdacodegen.cpp b/colm/pdacodegen.cpp index 62a8b8df..34d79061 100644 --- a/colm/pdacodegen.cpp +++ b/colm/pdacodegen.cpp @@ -193,7 +193,7 @@ void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData, PdaTables *pdaTable out << runtimeData->lelInfo[i].genericId << ", "; - out << runtimeData->lelInfo[i].matchEnd; + out << runtimeData->lelInfo[i].markId; out << " }"; diff --git a/colm/pdarun.h b/colm/pdarun.h index a52edfbc..528f7073 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -353,7 +353,7 @@ struct LangElInfo long termDupId; long genericId; - long matchEnd; + long markId; }; struct ObjFieldInfo @@ -387,6 +387,13 @@ struct RegionInfo long eofFrameId; }; +struct CaptureAttr +{ + long mark_enter; + long mark_leave; + long offset; +}; + struct RuntimeData { LangElInfo *lelInfo; @@ -421,6 +428,8 @@ struct RuntimeData Head **literals; long numLiterals; +// CaptureAttr *captureAttr; + FsmTables *fsmTables; PdaTables *pdaTables; int *startStates; diff --git a/colm/redbuild.cpp b/colm/redbuild.cpp index 2df68aea..47b8c60d 100644 --- a/colm/redbuild.cpp +++ b/colm/redbuild.cpp @@ -432,7 +432,7 @@ void RedFsmBuild::newAction( int anum, char *name, int line, int col, Action *ac redFsm->allActions[anum].inlineList = action->inlineList; redFsm->allActions[anum].objField = action->objField; redFsm->allActions[anum].markType = action->markType; - redFsm->allActions[anum].matchEndNum = action->matchEndNum; + redFsm->allActions[anum].markId = action->markId; } void RedFsmBuild::makeAction( Action *action ) diff --git a/colm/redfsm.h b/colm/redfsm.h index 4ddb0dbf..305b67f9 100644 --- a/colm/redfsm.h +++ b/colm/redfsm.h @@ -67,7 +67,7 @@ struct GenAction actionId(0), markType(MarkNone), objField(0), - matchEndNum(0), + markId(-1), numTransRefs(0), numToStateRefs(0), numFromStateRefs(0), @@ -82,7 +82,7 @@ struct GenAction int actionId; MarkType markType; ObjField *objField; - long matchEndNum; + long markId; string nameOrLoc(); |