diff options
author | Adrian Thurston <thurston@complang.org> | 2009-03-02 03:46:28 +0000 |
---|---|---|
committer | Adrian Thurston <thurston@complang.org> | 2009-03-02 03:46:28 +0000 |
commit | 9920839cf5cd47ee8cfc16ea12a64a8980b48813 (patch) | |
tree | febe0d6f69e7d3f5fc2f69d557146805cdb9d5d7 | |
parent | c05e3ea99909528aa4e962cae94c7aeb3453b614 (diff) | |
download | colm-9920839cf5cd47ee8cfc16ea12a64a8980b48813.tar.gz |
Regex subexpression captures are now bound as local variables in token generation actions.
-rw-r--r-- | bytecode.cpp | 30 | ||||
-rw-r--r-- | bytecode.h | 5 | ||||
-rw-r--r-- | compile.cpp | 9 | ||||
-rw-r--r-- | fsmrun.cpp | 14 | ||||
-rw-r--r-- | lmparse.kl | 15 | ||||
-rw-r--r-- | pdarun.cpp | 4 |
6 files changed, 64 insertions, 13 deletions
diff --git a/bytecode.cpp b/bytecode.cpp index 08a1ac98..75417880 100644 --- a/bytecode.cpp +++ b/bytecode.cpp @@ -402,7 +402,7 @@ void Program::run() if ( rtd->rootCodeLen > 0 ) { CodeVect reverseCode; - Execution execution( this, reverseCode, 0, rtd->rootCode, 0, 0 ); + Execution execution( this, reverseCode, 0, rtd->rootCode, 0, 0, 0 ); execution.execute( root ); /* Pull out the reverse code and free it. */ @@ -422,7 +422,7 @@ void Program::run() } Execution::Execution( Program *prg, CodeVect &reverseCode, - PdaRun *parser, Code *code, Tree *lhs, Head *matchText ) + PdaRun *parser, Code *code, Tree *lhs, long genId, Head *matchText ) : prg(prg), parser(parser), @@ -430,6 +430,7 @@ Execution::Execution( Program *prg, CodeVect &reverseCode, frame(0), iframe(0), lhs(lhs), parsed(0), + genId(genId), matchText(matchText), reject(false), reverseCode(reverseCode), @@ -945,6 +946,31 @@ again: push( prg->global ); break; } + case IN_INIT_CAPTURES: { + uchar ncaps; + read_byte( ncaps ); + + #ifdef COLM_LOG_BYTECODE + if ( colm_log_bytecode ) { + cerr << "IN_INIT_CAPTURES " << ncaps << endl; + } + #endif + + /* If there are captures (this is a translate block) then copy them into + * the local frame now. */ + LangElInfo *lelInfo = prg->rtd->lelInfo; + char **mark = parser->fsmRun->mark; + + for ( int i = 0; i < lelInfo[genId].numCaptureAttr; i++ ) { + CaptureAttr *ca = &prg->rtd->captureAttr[lelInfo[genId].captureAttr + i]; + Head *data = string_alloc_new( prg, + mark[ca->mark_enter], mark[ca->mark_leave] - mark[ca->mark_enter] ); + Tree *string = construct_string( prg, data ); + tree_upref( string ); + set_local( frame, -1 - i, string ); + } + break; + } case IN_INIT_RHS_EL: { Half position; short field; @@ -110,6 +110,7 @@ typedef unsigned char uchar; #define IN_GET_TOKEN_POS_R 0x35 #define IN_INIT_RHS_EL 0x3b +#define IN_INIT_CAPTURES 0xaa #define IN_TRITER_FROM_REF 0x3c #define IN_TRITER_ADVANCE 0x3d @@ -688,7 +689,8 @@ struct Program struct Execution { Execution( Program *prg, CodeVect &reverseCode, - PdaRun *parser, Code *code, Tree *lhs, Head *matchText ); + PdaRun *parser, Code *code, Tree *lhs, + long genId, Head *matchText ); Program *prg; PdaTables *pdaTables; @@ -702,6 +704,7 @@ struct Execution Tree *lhs; Tree *parsed; + long genId; Head *matchText; bool reject; diff --git a/compile.cpp b/compile.cpp index 5fce43ce..d4d2f503 100644 --- a/compile.cpp +++ b/compile.cpp @@ -2208,6 +2208,15 @@ void ParseData::compileTranslateBlock( KlangEl *langEl ) code.append( IN_INIT_LOCALS ); code.appendHalf( 0 ); + if ( langEl->tokenDef->reCaptureVect.length() > 0 ) { + code.append( IN_INIT_CAPTURES ); + code.append( langEl->tokenDef->reCaptureVect.length() ); + + ObjFieldList::Iter f = *curLocalFrame->objFieldList; + for ( int i = 0; i < langEl->tokenDef->reCaptureVect.length(); i++, f++ ) + curLocalFrame->referenceField( this, f->value ); + } + /* Set the local frame and compile the reduce block. */ block->compile( this, code ); @@ -265,7 +265,7 @@ void FsmRun::sendBackIgnore( Kid *ignore ) /* Check for reverse code. */ if ( ignore->tree->flags & AF_HAS_RCODE ) { Execution execution( prg, parser->reverseCode, - parser, 0, 0, 0 ); + parser, 0, 0, 0, 0 ); /* Do the reverse exeuction. */ execution.rexecute( parser->root, parser->allReverseCode ); @@ -307,7 +307,7 @@ void FsmRun::sendBack( Kid *input ) /* Check for reverse code. */ if ( input->tree->flags & AF_HAS_RCODE ) { Execution execution( prg, parser->reverseCode, - parser, 0, 0, 0 ); + parser, 0, 0, 0, 0 ); /* Do the reverse exeuction. */ execution.rexecute( parser->root, parser->allReverseCode ); @@ -441,10 +441,10 @@ void FsmRun::sendNamedLangEl() send_handle_error( this, parser, input ); } -void execute_generation_action( Program *prg, PdaRun *parser, Code *code, Head *tokdata ) +void execute_generation_action( Program *prg, PdaRun *parser, Code *code, long id, Head *tokdata ) { /* Execute the translation. */ - Execution execution( prg, parser->reverseCode, parser, code, 0, tokdata ); + Execution execution( prg, parser->reverseCode, parser, code, 0, id, tokdata ); execution.execute( parser->root ); /* If there is revese code but nothing generated we need a noToken. */ @@ -481,7 +481,7 @@ void FsmRun::generationAction( int id, Head *tokdata, bool namedLangEl, int bind parser->tables->rtd->lelInfo[id].frameId].codeWV; /* Execute the action and process the queue. */ - execute_generation_action( prg, parser, code, tokdata ); + execute_generation_action( prg, parser, code, id, tokdata ); /* Finished with the match text. */ string_free( prg, tokdata ); @@ -518,8 +518,8 @@ Kid *FsmRun::makeToken( int id, Head *tokdata, bool namedLangEl, int bindId ) Head *data = string_alloc_new( prg, mark[ca->mark_enter], mark[ca->mark_leave] - mark[ca->mark_enter] ); Tree *string = construct_string( prg, data ); - set_attr( input->tree, ca->offset, string ); tree_upref( string ); + set_attr( input->tree, ca->offset, string ); } } @@ -772,7 +772,7 @@ void FsmRun::sendEOF( ) Code *code = parser->tables->rtd->frameInfo[frameId].codeWV; /* Execute the action and process the queue. */ - execute_generation_action( prg, parser, code, 0 ); + execute_generation_action( prg, parser, code, input->tree->id, 0 ); /* Send the generated tokens. */ send_queued_tokens( this, parser ); @@ -91,6 +91,20 @@ block_open: '{' "local", new ObjFieldMap(), new ObjFieldList, new ObjMethodMap(), pd->nextObjectId++ ); pd->curLocalFrame = $$->localFrame; + + /* Add captures to the local frame. We Depend on these becoming the + * first local variables so we can compute their location. */ + + /* Make local variables corresponding to the local capture vector. */ + for ( ReCaptureVect::Iter c = reCaptureVect; c.lte(); c++ ) + { + ObjField *objField = new ObjField( c->objField->loc, + c->objField->typeRef, c->objField->name ); + + /* Insert it into the field map. */ + pd->curLocalFrame->objFieldMap->insert( objField->name, objField ); + pd->curLocalFrame->objFieldList->append( objField ); + } }; block_close: '}' @@ -1987,7 +2001,6 @@ opt_commit: KW_Commit final { $$->commit = true; }; void Parser::init() { - /* Set up the root namespace. */ const char *rootNamespaceName = "___ROOT_NAMESPACE"; Namespace *rootNamespace = new Namespace( InputLoc(), @@ -463,7 +463,7 @@ again: /* Execution environment for the reduction code. */ Execution execution( prg, reverseCode, - this, fi->codeWV, redLel->tree, 0 ); + this, fi->codeWV, redLel->tree, 0, 0 ); /* Execute it. */ execution.execute( root ); @@ -629,7 +629,7 @@ parseError: /* Check for an execution environment. */ if ( undoLel->tree->flags & AF_HAS_RCODE ) { - Execution execution( prg, reverseCode, this, 0, 0, 0 ); + Execution execution( prg, reverseCode, this, 0, 0, 0, 0 ); /* Do the reverse exeuction. */ execution.rexecute( root, allReverseCode ); |