summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2009-03-02 03:46:28 +0000
committerAdrian Thurston <thurston@complang.org>2009-03-02 03:46:28 +0000
commitdb51647c541b6a0827d2791e82232a55dc140aed (patch)
tree8288206bcad263d2d322216f0dd22487bc4fb4ab
parentd7884f1080497f0432dbe7b6226c5294c5a98f9b (diff)
downloadcolm-db51647c541b6a0827d2791e82232a55dc140aed.tar.gz
Regex subexpression captures are now bound as local variables in token generation actions.
-rw-r--r--colm/bytecode.cpp30
-rw-r--r--colm/bytecode.h5
-rw-r--r--colm/compile.cpp9
-rw-r--r--colm/fsmrun.cpp14
-rw-r--r--colm/lmparse.kl15
-rw-r--r--colm/pdarun.cpp4
6 files changed, 64 insertions, 13 deletions
diff --git a/colm/bytecode.cpp b/colm/bytecode.cpp
index 08a1ac98..75417880 100644
--- a/colm/bytecode.cpp
+++ b/colm/bytecode.cpp
@@ -402,7 +402,7 @@ void Program::run()
if ( rtd->rootCodeLen > 0 ) {
CodeVect reverseCode;
- Execution execution( this, reverseCode, 0, rtd->rootCode, 0, 0 );
+ Execution execution( this, reverseCode, 0, rtd->rootCode, 0, 0, 0 );
execution.execute( root );
/* Pull out the reverse code and free it. */
@@ -422,7 +422,7 @@ void Program::run()
}
Execution::Execution( Program *prg, CodeVect &reverseCode,
- PdaRun *parser, Code *code, Tree *lhs, Head *matchText )
+ PdaRun *parser, Code *code, Tree *lhs, long genId, Head *matchText )
:
prg(prg),
parser(parser),
@@ -430,6 +430,7 @@ Execution::Execution( Program *prg, CodeVect &reverseCode,
frame(0), iframe(0),
lhs(lhs),
parsed(0),
+ genId(genId),
matchText(matchText),
reject(false),
reverseCode(reverseCode),
@@ -945,6 +946,31 @@ again:
push( prg->global );
break;
}
+ case IN_INIT_CAPTURES: {
+ uchar ncaps;
+ read_byte( ncaps );
+
+ #ifdef COLM_LOG_BYTECODE
+ if ( colm_log_bytecode ) {
+ cerr << "IN_INIT_CAPTURES " << ncaps << endl;
+ }
+ #endif
+
+ /* If there are captures (this is a translate block) then copy them into
+ * the local frame now. */
+ LangElInfo *lelInfo = prg->rtd->lelInfo;
+ char **mark = parser->fsmRun->mark;
+
+ for ( int i = 0; i < lelInfo[genId].numCaptureAttr; i++ ) {
+ CaptureAttr *ca = &prg->rtd->captureAttr[lelInfo[genId].captureAttr + i];
+ Head *data = string_alloc_new( prg,
+ mark[ca->mark_enter], mark[ca->mark_leave] - mark[ca->mark_enter] );
+ Tree *string = construct_string( prg, data );
+ tree_upref( string );
+ set_local( frame, -1 - i, string );
+ }
+ break;
+ }
case IN_INIT_RHS_EL: {
Half position;
short field;
diff --git a/colm/bytecode.h b/colm/bytecode.h
index 1b3eb02a..d7dc0d00 100644
--- a/colm/bytecode.h
+++ b/colm/bytecode.h
@@ -110,6 +110,7 @@ typedef unsigned char uchar;
#define IN_GET_TOKEN_POS_R 0x35
#define IN_INIT_RHS_EL 0x3b
+#define IN_INIT_CAPTURES 0xaa
#define IN_TRITER_FROM_REF 0x3c
#define IN_TRITER_ADVANCE 0x3d
@@ -688,7 +689,8 @@ struct Program
struct Execution
{
Execution( Program *prg, CodeVect &reverseCode,
- PdaRun *parser, Code *code, Tree *lhs, Head *matchText );
+ PdaRun *parser, Code *code, Tree *lhs,
+ long genId, Head *matchText );
Program *prg;
PdaTables *pdaTables;
@@ -702,6 +704,7 @@ struct Execution
Tree *lhs;
Tree *parsed;
+ long genId;
Head *matchText;
bool reject;
diff --git a/colm/compile.cpp b/colm/compile.cpp
index 5fce43ce..d4d2f503 100644
--- a/colm/compile.cpp
+++ b/colm/compile.cpp
@@ -2208,6 +2208,15 @@ void ParseData::compileTranslateBlock( KlangEl *langEl )
code.append( IN_INIT_LOCALS );
code.appendHalf( 0 );
+ if ( langEl->tokenDef->reCaptureVect.length() > 0 ) {
+ code.append( IN_INIT_CAPTURES );
+ code.append( langEl->tokenDef->reCaptureVect.length() );
+
+ ObjFieldList::Iter f = *curLocalFrame->objFieldList;
+ for ( int i = 0; i < langEl->tokenDef->reCaptureVect.length(); i++, f++ )
+ curLocalFrame->referenceField( this, f->value );
+ }
+
/* Set the local frame and compile the reduce block. */
block->compile( this, code );
diff --git a/colm/fsmrun.cpp b/colm/fsmrun.cpp
index fc9c45d2..6f184b35 100644
--- a/colm/fsmrun.cpp
+++ b/colm/fsmrun.cpp
@@ -265,7 +265,7 @@ void FsmRun::sendBackIgnore( Kid *ignore )
/* Check for reverse code. */
if ( ignore->tree->flags & AF_HAS_RCODE ) {
Execution execution( prg, parser->reverseCode,
- parser, 0, 0, 0 );
+ parser, 0, 0, 0, 0 );
/* Do the reverse exeuction. */
execution.rexecute( parser->root, parser->allReverseCode );
@@ -307,7 +307,7 @@ void FsmRun::sendBack( Kid *input )
/* Check for reverse code. */
if ( input->tree->flags & AF_HAS_RCODE ) {
Execution execution( prg, parser->reverseCode,
- parser, 0, 0, 0 );
+ parser, 0, 0, 0, 0 );
/* Do the reverse exeuction. */
execution.rexecute( parser->root, parser->allReverseCode );
@@ -441,10 +441,10 @@ void FsmRun::sendNamedLangEl()
send_handle_error( this, parser, input );
}
-void execute_generation_action( Program *prg, PdaRun *parser, Code *code, Head *tokdata )
+void execute_generation_action( Program *prg, PdaRun *parser, Code *code, long id, Head *tokdata )
{
/* Execute the translation. */
- Execution execution( prg, parser->reverseCode, parser, code, 0, tokdata );
+ Execution execution( prg, parser->reverseCode, parser, code, 0, id, tokdata );
execution.execute( parser->root );
/* If there is revese code but nothing generated we need a noToken. */
@@ -481,7 +481,7 @@ void FsmRun::generationAction( int id, Head *tokdata, bool namedLangEl, int bind
parser->tables->rtd->lelInfo[id].frameId].codeWV;
/* Execute the action and process the queue. */
- execute_generation_action( prg, parser, code, tokdata );
+ execute_generation_action( prg, parser, code, id, tokdata );
/* Finished with the match text. */
string_free( prg, tokdata );
@@ -518,8 +518,8 @@ Kid *FsmRun::makeToken( int id, Head *tokdata, bool namedLangEl, int bindId )
Head *data = string_alloc_new( prg,
mark[ca->mark_enter], mark[ca->mark_leave] - mark[ca->mark_enter] );
Tree *string = construct_string( prg, data );
- set_attr( input->tree, ca->offset, string );
tree_upref( string );
+ set_attr( input->tree, ca->offset, string );
}
}
@@ -772,7 +772,7 @@ void FsmRun::sendEOF( )
Code *code = parser->tables->rtd->frameInfo[frameId].codeWV;
/* Execute the action and process the queue. */
- execute_generation_action( prg, parser, code, 0 );
+ execute_generation_action( prg, parser, code, input->tree->id, 0 );
/* Send the generated tokens. */
send_queued_tokens( this, parser );
diff --git a/colm/lmparse.kl b/colm/lmparse.kl
index 6f974c21..d4618c00 100644
--- a/colm/lmparse.kl
+++ b/colm/lmparse.kl
@@ -91,6 +91,20 @@ block_open: '{'
"local", new ObjFieldMap(), new ObjFieldList,
new ObjMethodMap(), pd->nextObjectId++ );
pd->curLocalFrame = $$->localFrame;
+
+ /* Add captures to the local frame. We Depend on these becoming the
+ * first local variables so we can compute their location. */
+
+ /* Make local variables corresponding to the local capture vector. */
+ for ( ReCaptureVect::Iter c = reCaptureVect; c.lte(); c++ )
+ {
+ ObjField *objField = new ObjField( c->objField->loc,
+ c->objField->typeRef, c->objField->name );
+
+ /* Insert it into the field map. */
+ pd->curLocalFrame->objFieldMap->insert( objField->name, objField );
+ pd->curLocalFrame->objFieldList->append( objField );
+ }
};
block_close: '}'
@@ -1987,7 +2001,6 @@ opt_commit: KW_Commit final { $$->commit = true; };
void Parser::init()
{
-
/* Set up the root namespace. */
const char *rootNamespaceName = "___ROOT_NAMESPACE";
Namespace *rootNamespace = new Namespace( InputLoc(),
diff --git a/colm/pdarun.cpp b/colm/pdarun.cpp
index 07bd8d09..b85a32c0 100644
--- a/colm/pdarun.cpp
+++ b/colm/pdarun.cpp
@@ -463,7 +463,7 @@ again:
/* Execution environment for the reduction code. */
Execution execution( prg, reverseCode,
- this, fi->codeWV, redLel->tree, 0 );
+ this, fi->codeWV, redLel->tree, 0, 0 );
/* Execute it. */
execution.execute( root );
@@ -629,7 +629,7 @@ parseError:
/* Check for an execution environment. */
if ( undoLel->tree->flags & AF_HAS_RCODE ) {
- Execution execution( prg, reverseCode, this, 0, 0, 0 );
+ Execution execution( prg, reverseCode, this, 0, 0, 0, 0 );
/* Do the reverse exeuction. */
execution.rexecute( root, allReverseCode );