summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--colm/fsmcodegen.cpp12
-rw-r--r--colm/fsmexec.cpp12
-rw-r--r--colm/fsmrun.cpp52
-rw-r--r--colm/fsmrun.h6
-rw-r--r--colm/keyops.h4
-rw-r--r--colm/lmparse.kh1
-rw-r--r--colm/lmparse.kl15
-rw-r--r--colm/parsedata.h11
-rw-r--r--colm/parsetree.h13
-rw-r--r--colm/pdabuild.cpp9
-rw-r--r--colm/pdacodegen.cpp2
-rw-r--r--colm/pdarun.h11
-rw-r--r--colm/redbuild.cpp2
-rw-r--r--colm/redfsm.h4
14 files changed, 87 insertions, 67 deletions
diff --git a/colm/fsmcodegen.cpp b/colm/fsmcodegen.cpp
index 6d819970..fe5a6fb3 100644
--- a/colm/fsmcodegen.cpp
+++ b/colm/fsmcodegen.cpp
@@ -321,16 +321,8 @@ void FsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState, bool in
ret << "\t{";
INLINE_LIST( ret, action->inlineList, targState, inFinish );
- if ( action->objField ) {
- ObjField *field = action->objField;
- if ( action->markType == MarkEnter )
- ret << "mark_enter[" << field->offset << "] = " << P() << ";\n";
- else if ( action->markType == MarkLeave )
- ret << "mark_leave[" << field->offset << "] = " << P() << ";\n";
- }
-
- if ( action->markType == MarkMatchEnd )
- ret << "mark_match_end[" << action->matchEndNum << "] = " << P() << ";\n";
+ if ( action->markId >= 0 )
+ ret << "mark[" << action->markId << "] = " << P() << ";\n";
ret << "}\n";
diff --git a/colm/fsmexec.cpp b/colm/fsmexec.cpp
index ea1af928..8f5d7600 100644
--- a/colm/fsmexec.cpp
+++ b/colm/fsmexec.cpp
@@ -87,16 +87,8 @@ void FsmRun::execAction( GenAction *genAction )
}
}
- if ( genAction->objField ) {
- ObjField *field = genAction->objField;
- if ( genAction->markType == MarkEnter )
- mark_enter[field->offset] = p;
- else if ( genAction->markType == MarkLeave )
- mark_leave[field->offset] = p;
- }
-
- if ( genAction->markType == MarkMatchEnd )
- mark_match_end[genAction->matchEndNum] = p;
+ if ( genAction->markType == MarkMark )
+ mark[genAction->markId] = p;
}
void FsmRun::execute()
diff --git a/colm/fsmrun.cpp b/colm/fsmrun.cpp
index bfbc898d..0f20b8cc 100644
--- a/colm/fsmrun.cpp
+++ b/colm/fsmrun.cpp
@@ -511,15 +511,15 @@ Kid *FsmRun::makeToken( int id, Head *tokdata, bool namedLangEl, int bindId )
/* No children and ignores get added later. */
input->tree->child = attrs;
- /* Set attributes for the labelled components. */
- for ( int i = 0; i < 32; i++ ) {
- if ( mark_leave[i] != 0 ) {
- Head *data = string_alloc_new( prg,
- mark_enter[i], mark_leave[i] - mark_enter[i] );
- set_attr( input->tree, i, construct_string( prg, data ) );
- tree_upref( get_attr( input->tree, i ) );
- }
- }
+// /* Set attributes for the labelled components. */
+// for ( int i = 0; i < 32; i++ ) {
+// if ( mark_leave[i] != 0 ) {
+// Head *data = string_alloc_new( prg,
+// mark_enter[i], mark_leave[i] - mark_enter[i] );
+// set_attr( input->tree, i, construct_string( prg, data ) );
+// tree_upref( get_attr( input->tree, i ) );
+// }
+// }
/* If the item is bound then store it in the bindings array. */
if ( bindId > 0 ) {
@@ -621,8 +621,8 @@ void FsmRun::execGen( long id )
#endif
LangElInfo *lelInfo = parser->tables->rtd->lelInfo;
- if ( lelInfo[id].matchEnd >= 0 )
- p = mark_match_end[lelInfo[id].matchEnd];
+ if ( lelInfo[id].markId >= 0 )
+ p = mark[lelInfo[id].markId];
/* Make the token data. */
long length = p - tokstart;
@@ -636,8 +636,7 @@ void FsmRun::execGen( long id )
generationAction( id, tokdata, false, 0 );
- memset( mark_leave, 0, sizeof(mark_leave) );
- memset( mark_match_end, 0, sizeof(mark_match_end) );
+ memset( mark, 0, sizeof(mark) );
}
void FsmRun::sendIgnore( long id )
@@ -649,8 +648,8 @@ void FsmRun::sendIgnore( long id )
#endif
LangElInfo *lelInfo = parser->tables->rtd->lelInfo;
- if ( lelInfo[id].matchEnd >= 0 )
- p = mark_match_end[lelInfo[id].matchEnd];
+ if ( lelInfo[id].markId >= 0 )
+ p = mark[lelInfo[id].markId];
/* Make the ignore string. */
int length = p - tokstart;
@@ -670,8 +669,7 @@ void FsmRun::sendIgnore( long id )
region = parser->getNextRegion();
cs = tables->entryByRegion[region];
- memset( mark_leave, 0, sizeof(mark_leave) );
- memset( mark_match_end, 0, sizeof(mark_match_end) );
+ memset( mark, 0, sizeof(mark) );
}
void FsmRun::sendToken( long id )
@@ -683,8 +681,8 @@ void FsmRun::sendToken( long id )
#endif
LangElInfo *lelInfo = parser->tables->rtd->lelInfo;
- if ( lelInfo[id].matchEnd >= 0 )
- p = mark_match_end[lelInfo[id].matchEnd];
+ if ( lelInfo[id].markId >= 0 )
+ p = mark[lelInfo[id].markId];
/* Make the token data. */
long length = p - tokstart;
@@ -698,8 +696,7 @@ void FsmRun::sendToken( long id )
Kid *input = makeToken( id, tokdata, false, 0 );
send_handle_error( this, parser, input );
- memset( mark_leave, 0, sizeof(mark_leave) );
- memset( mark_match_end, 0, sizeof(mark_match_end) );
+ memset( mark, 0, sizeof(mark) );
}
void FsmRun::emitToken( KlangEl *token )
@@ -850,8 +847,7 @@ long FsmRun::run( PdaRun *destParser )
tokend = 0;
region = parser->getNextRegion();
cs = tables->entryByRegion[region];
- memset( mark_leave, 0, sizeof(mark_leave) );
- memset( mark_match_end, 0, sizeof(mark_match_end) );
+ memset( mark, 0, sizeof(mark) );
/* Start with the EOF test. The pattern and replacement input sources can
* be EOF from the start. */
@@ -998,8 +994,16 @@ long FsmRun::run( PdaRun *destParser )
runBuf->next->length = FSM_BUFSIZE - have;
/* Compute tokstart and tokend. */
- tokend = runBuf->buf + (tokend - tokstart);
+ long dist = tokstart - runBuf->buf;
+
+ tokend -= dist;
tokstart = runBuf->buf;
+
+ /* Shift any markers. */
+ for ( int i = 0; i < MARK_SLOTS; i++ ) {
+ if ( mark[i] != 0 )
+ mark[i] -= dist;
+ }
}
p = pe = runBuf->buf + have;
peof = 0;
diff --git a/colm/fsmrun.h b/colm/fsmrun.h
index 07930b49..ed25c829 100644
--- a/colm/fsmrun.h
+++ b/colm/fsmrun.h
@@ -81,6 +81,8 @@ struct RunBuf
RunBuf *next;
};
+#define MARK_SLOTS 32
+
struct FsmRun
{
FsmRun( Program *prg );
@@ -122,9 +124,7 @@ struct FsmRun
bool eofSent;
RunBuf *runBuf;
bool gotoResume;
- char *mark_enter[32];
- char *mark_leave[32];
- char *mark_match_end[32];
+ char *mark[MARK_SLOTS];
};
void send_queued_tokens( FsmRun *fsmRun, PdaRun *parser );
diff --git a/colm/keyops.h b/colm/keyops.h
index 791495ee..b5af65e7 100644
--- a/colm/keyops.h
+++ b/colm/keyops.h
@@ -28,9 +28,7 @@
enum MarkType
{
MarkNone,
- MarkEnter,
- MarkLeave,
- MarkMatchEnd
+ MarkMark
};
typedef unsigned long long Size;
diff --git a/colm/lmparse.kh b/colm/lmparse.kh
index b51d0e3d..2cc3e99a 100644
--- a/colm/lmparse.kh
+++ b/colm/lmparse.kh
@@ -103,6 +103,7 @@ struct Parser
ProdElList *curProdElList;
PredType predType;
+ ReCaptureVect reCaptureVect;
};
%% write token_defs;
diff --git a/colm/lmparse.kl b/colm/lmparse.kl
index fa46c9fc..b796318e 100644
--- a/colm/lmparse.kl
+++ b/colm/lmparse.kl
@@ -734,6 +734,9 @@ token_def:
region->tokenDefList.append( tokenDef );
tokEl->tokenDef = tokenDef;
+ tokenDef->reCaptureVect = reCaptureVect;
+ reCaptureVect.empty();
+
/* Create the object def for the token. */
ObjectDef *objectDef = new ObjectDef( ObjectDef::UserType, name,
pd->objFieldMap, new ObjMethodMap(), pd->nextObjectId++ );
@@ -841,6 +844,9 @@ rl_def:
/* Generic creation of machine for instantiation and assignment. */
JoinOrLm *joinOrLm = new JoinOrLm( $4->join );
addRegularDef( $2->loc, namespaceStack.top(), $2->data, joinOrLm, false );
+
+ if ( reCaptureVect.length() > 0 )
+ error($1->loc) << "rl definitions cannot capture vars" << endl;
};
type class token_data
@@ -1497,8 +1503,7 @@ opt_rl_join: rl_join opt_context
if ( $2->context != 0 ) {
/* Create the enter and leaving actions that will mark the substring. */
- Action *mark = new Action( MarkMatchEnd, 0 );
- mark->matchEndNum = pd->nextMatchEndNum++;
+ Action *mark = new Action( MarkMark, pd->nextMatchEndNum++ );
pd->actionList.append( mark );
$$->join->context = $2->context;
@@ -1636,14 +1641,16 @@ factor_with_label:
pd->objFieldMap->insert( $1->data, objField );
/* Create the enter and leaving actions that will mark the substring. */
- Action *enter = new Action( MarkEnter, objField );
- Action *leave = new Action( MarkLeave, objField );
+ Action *enter = new Action( MarkMark, pd->nextMatchEndNum++ );
+ Action *leave = new Action( MarkMark, pd->nextMatchEndNum++ );
pd->actionList.append( enter );
pd->actionList.append( leave );
/* Add entering and leaving actions. */
$$->factorWithAug->actions.append( ParserAction( $1->loc, at_start, 0, enter ) );
$$->factorWithAug->actions.append( ParserAction( $1->loc, at_leave, 0, leave ) );
+
+ reCaptureVect.append( ReCapture( objField, enter, leave ) );
};
nonterm factor_with_ep
diff --git a/colm/parsedata.h b/colm/parsedata.h
index 855855fd..93fe2de0 100644
--- a/colm/parsedata.h
+++ b/colm/parsedata.h
@@ -279,6 +279,7 @@ struct PdaLiteral
long value;
};
+
/* Forwards. */
using std::ostream;
@@ -299,7 +300,7 @@ public:
name(name),
markType(MarkNone),
objField(0),
- matchEndNum(0),
+ markId(-1),
inlineList(inlineList),
actionId(-1),
numTransRefs(0),
@@ -312,12 +313,12 @@ public:
{
}
- Action( MarkType markType, ObjField *objField )
+ Action( MarkType markType, long markId )
:
name("mark"),
markType(markType),
- objField(objField),
- matchEndNum(0),
+ objField(0),
+ markId(markId),
inlineList(new InlineList),
actionId(-1),
numTransRefs(0),
@@ -339,7 +340,7 @@ public:
MarkType markType;
ObjField *objField;
- long matchEndNum;
+ long markId;
InlineList *inlineList;
int actionId;
diff --git a/colm/parsetree.h b/colm/parsetree.h
index 86324848..ece6ac5f 100644
--- a/colm/parsetree.h
+++ b/colm/parsetree.h
@@ -271,6 +271,18 @@ struct NamespaceQual
Namespace *getQual( ParseData *pd );
};
+struct ReCapture
+{
+ ReCapture( ObjField *objField, Action *markEnter, Action *markLeave )
+ : objField(objField), markEnter(markEnter), markLeave(markLeave) {}
+
+ ObjField *objField;
+ Action *markEnter;
+ Action *markLeave;
+};
+
+typedef Vector<ReCapture> ReCaptureVect;
+
struct TokenDef
{
TokenDef( Join *join, KlangEl *token, InputLoc &semiLoc,
@@ -295,6 +307,7 @@ struct TokenDef
bool inLmSelect;
Namespace *nspace;
TokenRegion *tokenRegion;
+ ReCaptureVect reCaptureVect;
TokenDef *prev, *next;
};
diff --git a/colm/pdabuild.cpp b/colm/pdabuild.cpp
index 99c20bad..07449330 100644
--- a/colm/pdabuild.cpp
+++ b/colm/pdabuild.cpp
@@ -1411,10 +1411,11 @@ void ParseData::makeRuntimeData()
runtimeData->lelInfo[i].termDupId = lel->termDup == 0 ? 0 : lel->termDup->id;
runtimeData->lelInfo[i].genericId = lel->generic == 0 ? 0 : lel->generic->id;
- if ( lel->tokenDef != 0 && lel->tokenDef->join != 0 && lel->tokenDef->join->context != 0 )
- runtimeData->lelInfo[i].matchEnd = lel->tokenDef->join->mark->matchEndNum;
+ if ( lel->tokenDef != 0 && lel->tokenDef->join != 0 &&
+ lel->tokenDef->join->context != 0 )
+ runtimeData->lelInfo[i].markId = lel->tokenDef->join->mark->markId;
else
- runtimeData->lelInfo[i].matchEnd = -1;
+ runtimeData->lelInfo[i].markId = -1;
}
else {
memset(&runtimeData->lelInfo[i], 0, sizeof(LangElInfo) );
@@ -1505,6 +1506,8 @@ void ParseData::makeRuntimeData()
runtimeData->litlen[el->value] = el->key.length();
}
+ /* FIXME: Captured attributes go here. */
+
runtimeData->fsmTables = fsmTables;
runtimeData->pdaTables = pdaTables;
diff --git a/colm/pdacodegen.cpp b/colm/pdacodegen.cpp
index 62a8b8df..34d79061 100644
--- a/colm/pdacodegen.cpp
+++ b/colm/pdacodegen.cpp
@@ -193,7 +193,7 @@ void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData, PdaTables *pdaTable
out << runtimeData->lelInfo[i].genericId << ", ";
- out << runtimeData->lelInfo[i].matchEnd;
+ out << runtimeData->lelInfo[i].markId;
out << " }";
diff --git a/colm/pdarun.h b/colm/pdarun.h
index a52edfbc..528f7073 100644
--- a/colm/pdarun.h
+++ b/colm/pdarun.h
@@ -353,7 +353,7 @@ struct LangElInfo
long termDupId;
long genericId;
- long matchEnd;
+ long markId;
};
struct ObjFieldInfo
@@ -387,6 +387,13 @@ struct RegionInfo
long eofFrameId;
};
+struct CaptureAttr
+{
+ long mark_enter;
+ long mark_leave;
+ long offset;
+};
+
struct RuntimeData
{
LangElInfo *lelInfo;
@@ -421,6 +428,8 @@ struct RuntimeData
Head **literals;
long numLiterals;
+// CaptureAttr *captureAttr;
+
FsmTables *fsmTables;
PdaTables *pdaTables;
int *startStates;
diff --git a/colm/redbuild.cpp b/colm/redbuild.cpp
index 2df68aea..47b8c60d 100644
--- a/colm/redbuild.cpp
+++ b/colm/redbuild.cpp
@@ -432,7 +432,7 @@ void RedFsmBuild::newAction( int anum, char *name, int line, int col, Action *ac
redFsm->allActions[anum].inlineList = action->inlineList;
redFsm->allActions[anum].objField = action->objField;
redFsm->allActions[anum].markType = action->markType;
- redFsm->allActions[anum].matchEndNum = action->matchEndNum;
+ redFsm->allActions[anum].markId = action->markId;
}
void RedFsmBuild::makeAction( Action *action )
diff --git a/colm/redfsm.h b/colm/redfsm.h
index 4ddb0dbf..305b67f9 100644
--- a/colm/redfsm.h
+++ b/colm/redfsm.h
@@ -67,7 +67,7 @@ struct GenAction
actionId(0),
markType(MarkNone),
objField(0),
- matchEndNum(0),
+ markId(-1),
numTransRefs(0),
numToStateRefs(0),
numFromStateRefs(0),
@@ -82,7 +82,7 @@ struct GenAction
int actionId;
MarkType markType;
ObjField *objField;
- long matchEndNum;
+ long markId;
string nameOrLoc();