summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2009-02-27 03:42:08 +0000
committerAdrian Thurston <thurston@complang.org>2009-02-27 03:42:08 +0000
commit2c21e2cd0340f2a777530da692bc0669bc8dc1bd (patch)
tree4fde6cf88d1a7d460b120b069ebd2b09f1ef8777
parent1b9131c53d32c10e2f69186f7795a5d764f2eab2 (diff)
downloadcolm-2c21e2cd0340f2a777530da692bc0669bc8dc1bd.tar.gz
Some work on unifying the marking facilities for trailing context and sub-regex
capture. Now using the same marking array for the trailing context and the capture. The set markers are now shifted long with partial token matches. But not yet propigating the list of captures associated with a token to the runtime data.
-rw-r--r--colm/fsmcodegen.cpp12
-rw-r--r--colm/fsmexec.cpp12
-rw-r--r--colm/fsmrun.cpp52
-rw-r--r--colm/fsmrun.h6
-rw-r--r--colm/keyops.h4
-rw-r--r--colm/lmparse.kh1
-rw-r--r--colm/lmparse.kl15
-rw-r--r--colm/parsedata.h11
-rw-r--r--colm/parsetree.h13
-rw-r--r--colm/pdabuild.cpp9
-rw-r--r--colm/pdacodegen.cpp2
-rw-r--r--colm/pdarun.h11
-rw-r--r--colm/redbuild.cpp2
-rw-r--r--colm/redfsm.h4
14 files changed, 87 insertions, 67 deletions
diff --git a/colm/fsmcodegen.cpp b/colm/fsmcodegen.cpp
index 6d819970..fe5a6fb3 100644
--- a/colm/fsmcodegen.cpp
+++ b/colm/fsmcodegen.cpp
@@ -321,16 +321,8 @@ void FsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState, bool in
ret << "\t{";
INLINE_LIST( ret, action->inlineList, targState, inFinish );
- if ( action->objField ) {
- ObjField *field = action->objField;
- if ( action->markType == MarkEnter )
- ret << "mark_enter[" << field->offset << "] = " << P() << ";\n";
- else if ( action->markType == MarkLeave )
- ret << "mark_leave[" << field->offset << "] = " << P() << ";\n";
- }
-
- if ( action->markType == MarkMatchEnd )
- ret << "mark_match_end[" << action->matchEndNum << "] = " << P() << ";\n";
+ if ( action->markId >= 0 )
+ ret << "mark[" << action->markId << "] = " << P() << ";\n";
ret << "}\n";
diff --git a/colm/fsmexec.cpp b/colm/fsmexec.cpp
index ea1af928..8f5d7600 100644
--- a/colm/fsmexec.cpp
+++ b/colm/fsmexec.cpp
@@ -87,16 +87,8 @@ void FsmRun::execAction( GenAction *genAction )
}
}
- if ( genAction->objField ) {
- ObjField *field = genAction->objField;
- if ( genAction->markType == MarkEnter )
- mark_enter[field->offset] = p;
- else if ( genAction->markType == MarkLeave )
- mark_leave[field->offset] = p;
- }
-
- if ( genAction->markType == MarkMatchEnd )
- mark_match_end[genAction->matchEndNum] = p;
+ if ( genAction->markType == MarkMark )
+ mark[genAction->markId] = p;
}
void FsmRun::execute()
diff --git a/colm/fsmrun.cpp b/colm/fsmrun.cpp
index bfbc898d..0f20b8cc 100644
--- a/colm/fsmrun.cpp
+++ b/colm/fsmrun.cpp
@@ -511,15 +511,15 @@ Kid *FsmRun::makeToken( int id, Head *tokdata, bool namedLangEl, int bindId )
/* No children and ignores get added later. */
input->tree->child = attrs;
- /* Set attributes for the labelled components. */
- for ( int i = 0; i < 32; i++ ) {
- if ( mark_leave[i] != 0 ) {
- Head *data = string_alloc_new( prg,
- mark_enter[i], mark_leave[i] - mark_enter[i] );
- set_attr( input->tree, i, construct_string( prg, data ) );
- tree_upref( get_attr( input->tree, i ) );
- }
- }
+// /* Set attributes for the labelled components. */
+// for ( int i = 0; i < 32; i++ ) {
+// if ( mark_leave[i] != 0 ) {
+// Head *data = string_alloc_new( prg,
+// mark_enter[i], mark_leave[i] - mark_enter[i] );
+// set_attr( input->tree, i, construct_string( prg, data ) );
+// tree_upref( get_attr( input->tree, i ) );
+// }
+// }
/* If the item is bound then store it in the bindings array. */
if ( bindId > 0 ) {
@@ -621,8 +621,8 @@ void FsmRun::execGen( long id )
#endif
LangElInfo *lelInfo = parser->tables->rtd->lelInfo;
- if ( lelInfo[id].matchEnd >= 0 )
- p = mark_match_end[lelInfo[id].matchEnd];
+ if ( lelInfo[id].markId >= 0 )
+ p = mark[lelInfo[id].markId];
/* Make the token data. */
long length = p - tokstart;
@@ -636,8 +636,7 @@ void FsmRun::execGen( long id )
generationAction( id, tokdata, false, 0 );
- memset( mark_leave, 0, sizeof(mark_leave) );
- memset( mark_match_end, 0, sizeof(mark_match_end) );
+ memset( mark, 0, sizeof(mark) );
}
void FsmRun::sendIgnore( long id )
@@ -649,8 +648,8 @@ void FsmRun::sendIgnore( long id )
#endif
LangElInfo *lelInfo = parser->tables->rtd->lelInfo;
- if ( lelInfo[id].matchEnd >= 0 )
- p = mark_match_end[lelInfo[id].matchEnd];
+ if ( lelInfo[id].markId >= 0 )
+ p = mark[lelInfo[id].markId];
/* Make the ignore string. */
int length = p - tokstart;
@@ -670,8 +669,7 @@ void FsmRun::sendIgnore( long id )
region = parser->getNextRegion();
cs = tables->entryByRegion[region];
- memset( mark_leave, 0, sizeof(mark_leave) );
- memset( mark_match_end, 0, sizeof(mark_match_end) );
+ memset( mark, 0, sizeof(mark) );
}
void FsmRun::sendToken( long id )
@@ -683,8 +681,8 @@ void FsmRun::sendToken( long id )
#endif
LangElInfo *lelInfo = parser->tables->rtd->lelInfo;
- if ( lelInfo[id].matchEnd >= 0 )
- p = mark_match_end[lelInfo[id].matchEnd];
+ if ( lelInfo[id].markId >= 0 )
+ p = mark[lelInfo[id].markId];
/* Make the token data. */
long length = p - tokstart;
@@ -698,8 +696,7 @@ void FsmRun::sendToken( long id )
Kid *input = makeToken( id, tokdata, false, 0 );
send_handle_error( this, parser, input );
- memset( mark_leave, 0, sizeof(mark_leave) );
- memset( mark_match_end, 0, sizeof(mark_match_end) );
+ memset( mark, 0, sizeof(mark) );
}
void FsmRun::emitToken( KlangEl *token )
@@ -850,8 +847,7 @@ long FsmRun::run( PdaRun *destParser )
tokend = 0;
region = parser->getNextRegion();
cs = tables->entryByRegion[region];
- memset( mark_leave, 0, sizeof(mark_leave) );
- memset( mark_match_end, 0, sizeof(mark_match_end) );
+ memset( mark, 0, sizeof(mark) );
/* Start with the EOF test. The pattern and replacement input sources can
* be EOF from the start. */
@@ -998,8 +994,16 @@ long FsmRun::run( PdaRun *destParser )
runBuf->next->length = FSM_BUFSIZE - have;
/* Compute tokstart and tokend. */
- tokend = runBuf->buf + (tokend - tokstart);
+ long dist = tokstart - runBuf->buf;
+
+ tokend -= dist;
tokstart = runBuf->buf;
+
+ /* Shift any markers. */
+ for ( int i = 0; i < MARK_SLOTS; i++ ) {
+ if ( mark[i] != 0 )
+ mark[i] -= dist;
+ }
}
p = pe = runBuf->buf + have;
peof = 0;
diff --git a/colm/fsmrun.h b/colm/fsmrun.h
index 07930b49..ed25c829 100644
--- a/colm/fsmrun.h
+++ b/colm/fsmrun.h
@@ -81,6 +81,8 @@ struct RunBuf
RunBuf *next;
};
+#define MARK_SLOTS 32
+
struct FsmRun
{
FsmRun( Program *prg );
@@ -122,9 +124,7 @@ struct FsmRun
bool eofSent;
RunBuf *runBuf;
bool gotoResume;
- char *mark_enter[32];
- char *mark_leave[32];
- char *mark_match_end[32];
+ char *mark[MARK_SLOTS];
};
void send_queued_tokens( FsmRun *fsmRun, PdaRun *parser );
diff --git a/colm/keyops.h b/colm/keyops.h
index 791495ee..b5af65e7 100644
--- a/colm/keyops.h
+++ b/colm/keyops.h
@@ -28,9 +28,7 @@
enum MarkType
{
MarkNone,
- MarkEnter,
- MarkLeave,
- MarkMatchEnd
+ MarkMark
};
typedef unsigned long long Size;
diff --git a/colm/lmparse.kh b/colm/lmparse.kh
index b51d0e3d..2cc3e99a 100644
--- a/colm/lmparse.kh
+++ b/colm/lmparse.kh
@@ -103,6 +103,7 @@ struct Parser
ProdElList *curProdElList;
PredType predType;
+ ReCaptureVect reCaptureVect;
};
%% write token_defs;
diff --git a/colm/lmparse.kl b/colm/lmparse.kl
index fa46c9fc..b796318e 100644
--- a/colm/lmparse.kl
+++ b/colm/lmparse.kl
@@ -734,6 +734,9 @@ token_def:
region->tokenDefList.append( tokenDef );
tokEl->tokenDef = tokenDef;
+ tokenDef->reCaptureVect = reCaptureVect;
+ reCaptureVect.empty();
+
/* Create the object def for the token. */
ObjectDef *objectDef = new ObjectDef( ObjectDef::UserType, name,
pd->objFieldMap, new ObjMethodMap(), pd->nextObjectId++ );
@@ -841,6 +844,9 @@ rl_def:
/* Generic creation of machine for instantiation and assignment. */
JoinOrLm *joinOrLm = new JoinOrLm( $4->join );
addRegularDef( $2->loc, namespaceStack.top(), $2->data, joinOrLm, false );
+
+ if ( reCaptureVect.length() > 0 )
+ error($1->loc) << "rl definitions cannot capture vars" << endl;
};
type class token_data
@@ -1497,8 +1503,7 @@ opt_rl_join: rl_join opt_context
if ( $2->context != 0 ) {
/* Create the enter and leaving actions that will mark the substring. */
- Action *mark = new Action( MarkMatchEnd, 0 );
- mark->matchEndNum = pd->nextMatchEndNum++;
+ Action *mark = new Action( MarkMark, pd->nextMatchEndNum++ );
pd->actionList.append( mark );
$$->join->context = $2->context;
@@ -1636,14 +1641,16 @@ factor_with_label:
pd->objFieldMap->insert( $1->data, objField );
/* Create the enter and leaving actions that will mark the substring. */
- Action *enter = new Action( MarkEnter, objField );
- Action *leave = new Action( MarkLeave, objField );
+ Action *enter = new Action( MarkMark, pd->nextMatchEndNum++ );
+ Action *leave = new Action( MarkMark, pd->nextMatchEndNum++ );
pd->actionList.append( enter );
pd->actionList.append( leave );
/* Add entering and leaving actions. */
$$->factorWithAug->actions.append( ParserAction( $1->loc, at_start, 0, enter ) );
$$->factorWithAug->actions.append( ParserAction( $1->loc, at_leave, 0, leave ) );
+
+ reCaptureVect.append( ReCapture( objField, enter, leave ) );
};
nonterm factor_with_ep
diff --git a/colm/parsedata.h b/colm/parsedata.h
index 855855fd..93fe2de0 100644
--- a/colm/parsedata.h
+++ b/colm/parsedata.h
@@ -279,6 +279,7 @@ struct PdaLiteral
long value;
};
+
/* Forwards. */
using std::ostream;
@@ -299,7 +300,7 @@ public:
name(name),
markType(MarkNone),
objField(0),
- matchEndNum(0),
+ markId(-1),
inlineList(inlineList),
actionId(-1),
numTransRefs(0),
@@ -312,12 +313,12 @@ public:
{
}
- Action( MarkType markType, ObjField *objField )
+ Action( MarkType markType, long markId )
:
name("mark"),
markType(markType),
- objField(objField),
- matchEndNum(0),
+ objField(0),
+ markId(markId),
inlineList(new InlineList),
actionId(-1),
numTransRefs(0),
@@ -339,7 +340,7 @@ public:
MarkType markType;
ObjField *objField;
- long matchEndNum;
+ long markId;
InlineList *inlineList;
int actionId;
diff --git a/colm/parsetree.h b/colm/parsetree.h
index 86324848..ece6ac5f 100644
--- a/colm/parsetree.h
+++ b/colm/parsetree.h
@@ -271,6 +271,18 @@ struct NamespaceQual
Namespace *getQual( ParseData *pd );
};
+struct ReCapture
+{
+ ReCapture( ObjField *objField, Action *markEnter, Action *markLeave )
+ : objField(objField), markEnter(markEnter), markLeave(markLeave) {}
+
+ ObjField *objField;
+ Action *markEnter;
+ Action *markLeave;
+};
+
+typedef Vector<ReCapture> ReCaptureVect;
+
struct TokenDef
{
TokenDef( Join *join, KlangEl *token, InputLoc &semiLoc,
@@ -295,6 +307,7 @@ struct TokenDef
bool inLmSelect;
Namespace *nspace;
TokenRegion *tokenRegion;
+ ReCaptureVect reCaptureVect;
TokenDef *prev, *next;
};
diff --git a/colm/pdabuild.cpp b/colm/pdabuild.cpp
index 99c20bad..07449330 100644
--- a/colm/pdabuild.cpp
+++ b/colm/pdabuild.cpp
@@ -1411,10 +1411,11 @@ void ParseData::makeRuntimeData()
runtimeData->lelInfo[i].termDupId = lel->termDup == 0 ? 0 : lel->termDup->id;
runtimeData->lelInfo[i].genericId = lel->generic == 0 ? 0 : lel->generic->id;
- if ( lel->tokenDef != 0 && lel->tokenDef->join != 0 && lel->tokenDef->join->context != 0 )
- runtimeData->lelInfo[i].matchEnd = lel->tokenDef->join->mark->matchEndNum;
+ if ( lel->tokenDef != 0 && lel->tokenDef->join != 0 &&
+ lel->tokenDef->join->context != 0 )
+ runtimeData->lelInfo[i].markId = lel->tokenDef->join->mark->markId;
else
- runtimeData->lelInfo[i].matchEnd = -1;
+ runtimeData->lelInfo[i].markId = -1;
}
else {
memset(&runtimeData->lelInfo[i], 0, sizeof(LangElInfo) );
@@ -1505,6 +1506,8 @@ void ParseData::makeRuntimeData()
runtimeData->litlen[el->value] = el->key.length();
}
+ /* FIXME: Captured attributes go here. */
+
runtimeData->fsmTables = fsmTables;
runtimeData->pdaTables = pdaTables;
diff --git a/colm/pdacodegen.cpp b/colm/pdacodegen.cpp
index 62a8b8df..34d79061 100644
--- a/colm/pdacodegen.cpp
+++ b/colm/pdacodegen.cpp
@@ -193,7 +193,7 @@ void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData, PdaTables *pdaTable
out << runtimeData->lelInfo[i].genericId << ", ";
- out << runtimeData->lelInfo[i].matchEnd;
+ out << runtimeData->lelInfo[i].markId;
out << " }";
diff --git a/colm/pdarun.h b/colm/pdarun.h
index a52edfbc..528f7073 100644
--- a/colm/pdarun.h
+++ b/colm/pdarun.h
@@ -353,7 +353,7 @@ struct LangElInfo
long termDupId;
long genericId;
- long matchEnd;
+ long markId;
};
struct ObjFieldInfo
@@ -387,6 +387,13 @@ struct RegionInfo
long eofFrameId;
};
+struct CaptureAttr
+{
+ long mark_enter;
+ long mark_leave;
+ long offset;
+};
+
struct RuntimeData
{
LangElInfo *lelInfo;
@@ -421,6 +428,8 @@ struct RuntimeData
Head **literals;
long numLiterals;
+// CaptureAttr *captureAttr;
+
FsmTables *fsmTables;
PdaTables *pdaTables;
int *startStates;
diff --git a/colm/redbuild.cpp b/colm/redbuild.cpp
index 2df68aea..47b8c60d 100644
--- a/colm/redbuild.cpp
+++ b/colm/redbuild.cpp
@@ -432,7 +432,7 @@ void RedFsmBuild::newAction( int anum, char *name, int line, int col, Action *ac
redFsm->allActions[anum].inlineList = action->inlineList;
redFsm->allActions[anum].objField = action->objField;
redFsm->allActions[anum].markType = action->markType;
- redFsm->allActions[anum].matchEndNum = action->matchEndNum;
+ redFsm->allActions[anum].markId = action->markId;
}
void RedFsmBuild::makeAction( Action *action )
diff --git a/colm/redfsm.h b/colm/redfsm.h
index 4ddb0dbf..305b67f9 100644
--- a/colm/redfsm.h
+++ b/colm/redfsm.h
@@ -67,7 +67,7 @@ struct GenAction
actionId(0),
markType(MarkNone),
objField(0),
- matchEndNum(0),
+ markId(-1),
numTransRefs(0),
numToStateRefs(0),
numFromStateRefs(0),
@@ -82,7 +82,7 @@ struct GenAction
int actionId;
MarkType markType;
ObjField *objField;
- long matchEndNum;
+ long markId;
string nameOrLoc();