diff options
author | Adrian Thurston <thurston@complang.org> | 2011-12-31 20:55:10 +0000 |
---|---|---|
committer | Adrian Thurston <thurston@complang.org> | 2011-12-31 20:55:10 +0000 |
commit | 863bead07680b832a07221f08291f19266a2397f (patch) | |
tree | 982e4ae2ddf04dfc4f09b88c71f29c07bd385434 | |
parent | 127782c33740f49cac09317d1136dcc77a02d97c (diff) | |
download | colm-863bead07680b832a07221f08291f19266a2397f.tar.gz |
Bringing back the connection between fsm runs and input streams. This allows us
to clear the buffered data only when we need to. refs #341.
-rw-r--r-- | colm/bytecode.c | 20 | ||||
-rw-r--r-- | colm/input.c | 24 | ||||
-rw-r--r-- | colm/input.h | 8 | ||||
-rw-r--r-- | colm/pdarun.c | 73 | ||||
-rw-r--r-- | colm/pdarun.h | 9 |
5 files changed, 102 insertions, 32 deletions
diff --git a/colm/bytecode.c b/colm/bytecode.c index 6d50134d..8de480f0 100644 --- a/colm/bytecode.c +++ b/colm/bytecode.c @@ -228,7 +228,6 @@ case PcrStart: long pcr = parseLoop( prg, sp, accum->pdaRun, accum->fsmRun, accum->accumStream->in, entry ); while ( pcr != PcrDone ) { - clearBuffered( accum->fsmRun ); return pcr; case PcrReduction: @@ -248,8 +247,6 @@ case PcrRevReduction2: case PcrDone: break; } - clearBuffered( accum->fsmRun ); - return PcrDone; } @@ -267,8 +264,6 @@ case PcrStart: while ( pcr != PcrDone ) { - clearBuffered( accum->fsmRun ); - return pcr; case PcrReduction: case PcrGeneration: @@ -300,8 +295,6 @@ case PcrRevReduction2: case PcrDone: break; } - clearBuffered( accum->fsmRun ); - return PcrDone; } @@ -313,14 +306,7 @@ long undoParseFrag( Program *prg, Tree **sp, Accum *accum, long steps, long entr debug( REALM_PARSE, "undo parse frag, target steps: %ld, pdarun steps: %ld\n", steps, pdaRun->steps ); - /* If there is a token started, but never finished for a lack of data, we - * must first backup over it. */ - if ( fsmRun->tokstart != 0 ) { - fsmRun->p = fsmRun->tokstart; - fsmRun->tokstart = 0; - } - -// sendBackBuffered( inputStream ); + resetToken( fsmRun ); switch ( entry ) { case PcrStart: @@ -336,8 +322,6 @@ case PcrStart: long pcr = parseLoop( prg, sp, pdaRun, fsmRun, inputStream, entry ); while ( pcr != PcrDone ) { -// sendBackBuffered( accum->fsmRun, accum->accumStream->in ); - return pcr; case PcrReduction: case PcrGeneration: @@ -361,8 +345,6 @@ case PcrRevReduction2: case PcrDone: break; } -// sendBackBuffered( accum->fsmRun, accum->accumStream->in ); - return PcrDone; } diff --git a/colm/input.c b/colm/input.c index 20a8944b..250c7d7d 100644 --- a/colm/input.c +++ b/colm/input.c @@ -525,7 +525,7 @@ void unsetEof( InputStream *is ) } } -int getData( InputStream *is, int skip, char *dest, int length, int *copied ) +int getData( FsmRun *fsmRun, InputStream *is, int skip, char *dest, int length, int *copied ) { int ret = 0; *copied = 0; @@ -543,6 +543,8 @@ int getData( InputStream *is, int skip, char *dest, int length, int *copied ) Stream *stream = (Stream*)buf->tree; int type = stream->in->funcs->getData( stream->in, skip, dest, length, copied ); + attachInput2( fsmRun, stream->in ); + if ( type == INPUT_EOD && is->eof ) { ret = INPUT_EOF; break; @@ -592,6 +594,8 @@ int getData( InputStream *is, int skip, char *dest, int length, int *copied ) buf = buf->next; } + attachInput1( fsmRun, is ); + #if DEBUG switch ( ret ) { case INPUT_DATA: @@ -663,13 +667,18 @@ int consumeData( InputStream *is, int length ) return consumed; } -int undoConsumeData( InputStream *is, const char *data, int length ) +int undoConsumeData( FsmRun *fsmRun, InputStream *is, const char *data, int length ) { debug( REALM_INPUT, "undoing consume of %ld bytes\n", length ); if ( isSourceStream( is ) ) { Stream *stream = (Stream*)is->queue->tree; - return stream->in->funcs->undoConsumeData( stream->in, data, length ); + int len = stream->in->funcs->undoConsumeData( stream->in, data, length ); + + if ( stream->in->attached2 != 0 ) + detachInput2( stream->in->attached2, stream->in ); + + return len; } else { RunBuf *newBuf = newRunBuf(); @@ -677,6 +686,9 @@ int undoConsumeData( InputStream *is, const char *data, int length ) memcpy( newBuf->data, data, length ); inputStreamPrepend2( is, newBuf ); + if ( is->attached1 != 0 ) + detachInput1( is->attached1, is ); + return length; } } @@ -718,7 +730,7 @@ struct LangEl *consumeLangEl( InputStream *is, long *bindId, char **data, long * return stream->in->funcs->consumeLangEl( stream->in, bindId, data, length ); } else { - return 0; + assert( false ); } } @@ -729,7 +741,7 @@ void undoConsumeLangEl( InputStream *is ) return stream->in->funcs->undoConsumeLangEl( stream->in ); } else { - assert(false); + assert( false ); } } @@ -774,7 +786,7 @@ Tree *undoPrependData( InputStream *is, int length ) int have = 0; while ( have < length ) { int res = 0; - getData( is, 0, tmp, length-have, &res ); + getData( 0, is, 0, tmp, length-have, &res ); have += res; } return 0; diff --git a/colm/input.h b/colm/input.h index fec76374..d267c866 100644 --- a/colm/input.h +++ b/colm/input.h @@ -151,6 +151,8 @@ struct _SourceStream struct PatternItem *patItem; struct Replacement *replacement; struct ReplItem *replItem; + + struct _FsmRun *attached2; }; SourceStream *newInputStreamPattern( struct Pattern *pattern ); @@ -192,15 +194,17 @@ struct _InputStream struct PatternItem *patItem; struct Replacement *replacement; struct ReplItem *replItem; + + struct _FsmRun *attached1; }; typedef struct _InputStream InputStream; /* The input stream interface. */ -int getData( InputStream *in, int offset, char *dest, int length, int *copied ); +int getData( struct _FsmRun *fsmRun, InputStream *in, int offset, char *dest, int length, int *copied ); int consumeData( InputStream *in, int length ); -int undoConsumeData( InputStream *is, const char *data, int length ); +int undoConsumeData( struct _FsmRun *fsmRun, InputStream *is, const char *data, int length ); struct ColmTree *consumeTree( InputStream *in ); void undoConsumeTree( InputStream *in, struct ColmTree *tree, int ignore ); diff --git a/colm/pdarun.c b/colm/pdarun.c index b6958896..9a2c8053 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -70,6 +70,9 @@ void initFsmRun( FsmRun *fsmRun, Program *prg ) fsmRun->p = fsmRun->pe = fsmRun->runBuf->data; fsmRun->peof = 0; + + fsmRun->attached1 = 0; + fsmRun->attached2 = 0; } void clearFsmRun( Program *prg, FsmRun *fsmRun ) @@ -144,7 +147,7 @@ Head *streamPull( Program *prg, FsmRun *fsmRun, InputStream *inputStream, long l fsmRun->runBuf = runBuf; int len = 0; - getData( inputStream, 0, runBuf->data, length, &len ); + getData( fsmRun, inputStream, 0, runBuf->data, length, &len ); consumeData( inputStream, length ); fsmRun->p = fsmRun->pe = runBuf->data + length; @@ -250,11 +253,11 @@ static void sendBackText( FsmRun *fsmRun, InputStream *inputStream, const char * /* If there is data in the current buffer then send the whole send back * should be in this buffer. */ - clearBuffered( fsmRun ); + //clearBuffered( fsmRun ); /* slide data back. */ // fsmRun->p = fsmRun->pe = fsmRun->runBuf->data; - undoConsumeData( inputStream, data, length ); + undoConsumeData( fsmRun, inputStream, data, length ); // #if COLM_LOG // if ( memcmp( data, fsmRun->p, length ) != 0 ) @@ -386,6 +389,57 @@ void detachIgnores( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, Kid treeDownref( prg, sp, leftIgnore ); } +void attachInput1( FsmRun *fsmRun, InputStream *is ) +{ + if ( is->attached1 != 0 && is->attached1 != fsmRun ) + detachInput1( is->attached1, is ); + + debug( REALM_INPUT, "attaching fsm run to input stream: %p %p\n", fsmRun, is ); + fsmRun->attached1 = is; + is->attached1 = fsmRun; +} + +void attachInput2( FsmRun *fsmRun, SourceStream *is ) +{ + if ( is->attached2 != 0 && is->attached2 != fsmRun ) + detachInput2( is->attached2, is ); + + debug( REALM_INPUT, "attaching fsm run to source stream: %p %p\n", fsmRun, is ); + fsmRun->attached2 = is; + is->attached2 = fsmRun; +} + +void detachInput1( FsmRun *fsmRun, InputStream *is ) +{ + debug( REALM_INPUT, "detaching fsm run from input stream: %p %p\n", fsmRun, is ); + + fsmRun->attached1 = 0; + is->attached1 = 0; + + clearBuffered( fsmRun ); + + if ( fsmRun->attached2 != 0 ) { + fsmRun->attached2->attached2 = 0; + fsmRun->attached2 = 0; + } +} + +void detachInput2( FsmRun *fsmRun, SourceStream *is ) +{ + debug( REALM_INPUT, "detaching fsm run from source stream: %p %p\n", fsmRun, is ); + + fsmRun->attached2 = 0; + is->attached2 = 0; + + clearBuffered( fsmRun ); + + if ( fsmRun->attached1 != 0 ) { + fsmRun->attached1->attached1 = 0; + fsmRun->attached1 = 0; + } +} + + void clearBuffered( FsmRun *fsmRun ) { /* If there is data in the current buffer then send the whole send back @@ -399,6 +453,16 @@ void clearBuffered( FsmRun *fsmRun ) } } +void resetToken( FsmRun *fsmRun ) +{ + /* If there is a token started, but never finished for a lack of data, we + * must first backup over it. */ + if ( fsmRun->tokstart != 0 ) { + fsmRun->p = fsmRun->tokstart; + fsmRun->tokstart = 0; + } +} + /* Stops on: * PcrRevToken */ @@ -873,7 +937,6 @@ void newToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun ) fsmRun->tokstart = 0; fsmRun->tokend = 0; fsmRun->matchedToken = 0; - fsmRun->tokstart = 0; /* Set the state using the state of the parser. */ fsmRun->region = pdaRunGetNextRegion( pdaRun, 0 ); @@ -1020,7 +1083,7 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *input int have = fsmRun->tokstart != 0 ? fsmRun->p - fsmRun->tokstart : 0; int len = 0; debug( REALM_SCAN, "fetching data: have: %d space: %d\n", have, space ); - int type = getData( inputStream, have, fsmRun->p, space, &len ); + int type = getData( fsmRun, inputStream, have, fsmRun->p, space, &len ); switch ( type ) { case INPUT_DATA: diff --git a/colm/pdarun.h b/colm/pdarun.h index 79b679e6..3c593018 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -82,6 +82,9 @@ typedef struct _FsmRun int returnResult; char *mark[MARK_SLOTS]; long matchedToken; + + InputStream *attached1; + SourceStream *attached2; } FsmRun; void initFsmRun( FsmRun *fsmRun, struct ColmProgram *prg ); @@ -450,6 +453,12 @@ void attachIgnore( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, Kid *inpu void detachIgnores( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, Kid *input ); void clearBuffered( FsmRun *fsmRun ); +void resetToken( FsmRun *fsmRun ); + +void detachInput1( FsmRun *fsmRun, InputStream *is ); +void attachInput1( FsmRun *fsmRun, InputStream *is ); +void detachInput2( FsmRun *fsmRun, SourceStream *is ); +void attachInput2( FsmRun *fsmRun, SourceStream *is ); #ifdef __cplusplus } |