summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2011-12-31 20:55:10 +0000
committerAdrian Thurston <thurston@complang.org>2011-12-31 20:55:10 +0000
commit863bead07680b832a07221f08291f19266a2397f (patch)
tree982e4ae2ddf04dfc4f09b88c71f29c07bd385434
parent127782c33740f49cac09317d1136dcc77a02d97c (diff)
downloadcolm-863bead07680b832a07221f08291f19266a2397f.tar.gz
Bringing back the connection between fsm runs and input streams. This allows us
to clear the buffered data only when we need to. refs #341.
-rw-r--r--colm/bytecode.c20
-rw-r--r--colm/input.c24
-rw-r--r--colm/input.h8
-rw-r--r--colm/pdarun.c73
-rw-r--r--colm/pdarun.h9
5 files changed, 102 insertions, 32 deletions
diff --git a/colm/bytecode.c b/colm/bytecode.c
index 6d50134d..8de480f0 100644
--- a/colm/bytecode.c
+++ b/colm/bytecode.c
@@ -228,7 +228,6 @@ case PcrStart:
long pcr = parseLoop( prg, sp, accum->pdaRun, accum->fsmRun, accum->accumStream->in, entry );
while ( pcr != PcrDone ) {
- clearBuffered( accum->fsmRun );
return pcr;
case PcrReduction:
@@ -248,8 +247,6 @@ case PcrRevReduction2:
case PcrDone:
break; }
- clearBuffered( accum->fsmRun );
-
return PcrDone;
}
@@ -267,8 +264,6 @@ case PcrStart:
while ( pcr != PcrDone ) {
- clearBuffered( accum->fsmRun );
-
return pcr;
case PcrReduction:
case PcrGeneration:
@@ -300,8 +295,6 @@ case PcrRevReduction2:
case PcrDone:
break; }
- clearBuffered( accum->fsmRun );
-
return PcrDone;
}
@@ -313,14 +306,7 @@ long undoParseFrag( Program *prg, Tree **sp, Accum *accum, long steps, long entr
debug( REALM_PARSE, "undo parse frag, target steps: %ld, pdarun steps: %ld\n", steps, pdaRun->steps );
- /* If there is a token started, but never finished for a lack of data, we
- * must first backup over it. */
- if ( fsmRun->tokstart != 0 ) {
- fsmRun->p = fsmRun->tokstart;
- fsmRun->tokstart = 0;
- }
-
-// sendBackBuffered( inputStream );
+ resetToken( fsmRun );
switch ( entry ) {
case PcrStart:
@@ -336,8 +322,6 @@ case PcrStart:
long pcr = parseLoop( prg, sp, pdaRun, fsmRun, inputStream, entry );
while ( pcr != PcrDone ) {
-// sendBackBuffered( accum->fsmRun, accum->accumStream->in );
-
return pcr;
case PcrReduction:
case PcrGeneration:
@@ -361,8 +345,6 @@ case PcrRevReduction2:
case PcrDone:
break; }
-// sendBackBuffered( accum->fsmRun, accum->accumStream->in );
-
return PcrDone;
}
diff --git a/colm/input.c b/colm/input.c
index 20a8944b..250c7d7d 100644
--- a/colm/input.c
+++ b/colm/input.c
@@ -525,7 +525,7 @@ void unsetEof( InputStream *is )
}
}
-int getData( InputStream *is, int skip, char *dest, int length, int *copied )
+int getData( FsmRun *fsmRun, InputStream *is, int skip, char *dest, int length, int *copied )
{
int ret = 0;
*copied = 0;
@@ -543,6 +543,8 @@ int getData( InputStream *is, int skip, char *dest, int length, int *copied )
Stream *stream = (Stream*)buf->tree;
int type = stream->in->funcs->getData( stream->in, skip, dest, length, copied );
+ attachInput2( fsmRun, stream->in );
+
if ( type == INPUT_EOD && is->eof ) {
ret = INPUT_EOF;
break;
@@ -592,6 +594,8 @@ int getData( InputStream *is, int skip, char *dest, int length, int *copied )
buf = buf->next;
}
+ attachInput1( fsmRun, is );
+
#if DEBUG
switch ( ret ) {
case INPUT_DATA:
@@ -663,13 +667,18 @@ int consumeData( InputStream *is, int length )
return consumed;
}
-int undoConsumeData( InputStream *is, const char *data, int length )
+int undoConsumeData( FsmRun *fsmRun, InputStream *is, const char *data, int length )
{
debug( REALM_INPUT, "undoing consume of %ld bytes\n", length );
if ( isSourceStream( is ) ) {
Stream *stream = (Stream*)is->queue->tree;
- return stream->in->funcs->undoConsumeData( stream->in, data, length );
+ int len = stream->in->funcs->undoConsumeData( stream->in, data, length );
+
+ if ( stream->in->attached2 != 0 )
+ detachInput2( stream->in->attached2, stream->in );
+
+ return len;
}
else {
RunBuf *newBuf = newRunBuf();
@@ -677,6 +686,9 @@ int undoConsumeData( InputStream *is, const char *data, int length )
memcpy( newBuf->data, data, length );
inputStreamPrepend2( is, newBuf );
+ if ( is->attached1 != 0 )
+ detachInput1( is->attached1, is );
+
return length;
}
}
@@ -718,7 +730,7 @@ struct LangEl *consumeLangEl( InputStream *is, long *bindId, char **data, long *
return stream->in->funcs->consumeLangEl( stream->in, bindId, data, length );
}
else {
- return 0;
+ assert( false );
}
}
@@ -729,7 +741,7 @@ void undoConsumeLangEl( InputStream *is )
return stream->in->funcs->undoConsumeLangEl( stream->in );
}
else {
- assert(false);
+ assert( false );
}
}
@@ -774,7 +786,7 @@ Tree *undoPrependData( InputStream *is, int length )
int have = 0;
while ( have < length ) {
int res = 0;
- getData( is, 0, tmp, length-have, &res );
+ getData( 0, is, 0, tmp, length-have, &res );
have += res;
}
return 0;
diff --git a/colm/input.h b/colm/input.h
index fec76374..d267c866 100644
--- a/colm/input.h
+++ b/colm/input.h
@@ -151,6 +151,8 @@ struct _SourceStream
struct PatternItem *patItem;
struct Replacement *replacement;
struct ReplItem *replItem;
+
+ struct _FsmRun *attached2;
};
SourceStream *newInputStreamPattern( struct Pattern *pattern );
@@ -192,15 +194,17 @@ struct _InputStream
struct PatternItem *patItem;
struct Replacement *replacement;
struct ReplItem *replItem;
+
+ struct _FsmRun *attached1;
};
typedef struct _InputStream InputStream;
/* The input stream interface. */
-int getData( InputStream *in, int offset, char *dest, int length, int *copied );
+int getData( struct _FsmRun *fsmRun, InputStream *in, int offset, char *dest, int length, int *copied );
int consumeData( InputStream *in, int length );
-int undoConsumeData( InputStream *is, const char *data, int length );
+int undoConsumeData( struct _FsmRun *fsmRun, InputStream *is, const char *data, int length );
struct ColmTree *consumeTree( InputStream *in );
void undoConsumeTree( InputStream *in, struct ColmTree *tree, int ignore );
diff --git a/colm/pdarun.c b/colm/pdarun.c
index b6958896..9a2c8053 100644
--- a/colm/pdarun.c
+++ b/colm/pdarun.c
@@ -70,6 +70,9 @@ void initFsmRun( FsmRun *fsmRun, Program *prg )
fsmRun->p = fsmRun->pe = fsmRun->runBuf->data;
fsmRun->peof = 0;
+
+ fsmRun->attached1 = 0;
+ fsmRun->attached2 = 0;
}
void clearFsmRun( Program *prg, FsmRun *fsmRun )
@@ -144,7 +147,7 @@ Head *streamPull( Program *prg, FsmRun *fsmRun, InputStream *inputStream, long l
fsmRun->runBuf = runBuf;
int len = 0;
- getData( inputStream, 0, runBuf->data, length, &len );
+ getData( fsmRun, inputStream, 0, runBuf->data, length, &len );
consumeData( inputStream, length );
fsmRun->p = fsmRun->pe = runBuf->data + length;
@@ -250,11 +253,11 @@ static void sendBackText( FsmRun *fsmRun, InputStream *inputStream, const char *
/* If there is data in the current buffer then send the whole send back
* should be in this buffer. */
- clearBuffered( fsmRun );
+ //clearBuffered( fsmRun );
/* slide data back. */
// fsmRun->p = fsmRun->pe = fsmRun->runBuf->data;
- undoConsumeData( inputStream, data, length );
+ undoConsumeData( fsmRun, inputStream, data, length );
// #if COLM_LOG
// if ( memcmp( data, fsmRun->p, length ) != 0 )
@@ -386,6 +389,57 @@ void detachIgnores( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, Kid
treeDownref( prg, sp, leftIgnore );
}
+void attachInput1( FsmRun *fsmRun, InputStream *is )
+{
+ if ( is->attached1 != 0 && is->attached1 != fsmRun )
+ detachInput1( is->attached1, is );
+
+ debug( REALM_INPUT, "attaching fsm run to input stream: %p %p\n", fsmRun, is );
+ fsmRun->attached1 = is;
+ is->attached1 = fsmRun;
+}
+
+void attachInput2( FsmRun *fsmRun, SourceStream *is )
+{
+ if ( is->attached2 != 0 && is->attached2 != fsmRun )
+ detachInput2( is->attached2, is );
+
+ debug( REALM_INPUT, "attaching fsm run to source stream: %p %p\n", fsmRun, is );
+ fsmRun->attached2 = is;
+ is->attached2 = fsmRun;
+}
+
+void detachInput1( FsmRun *fsmRun, InputStream *is )
+{
+ debug( REALM_INPUT, "detaching fsm run from input stream: %p %p\n", fsmRun, is );
+
+ fsmRun->attached1 = 0;
+ is->attached1 = 0;
+
+ clearBuffered( fsmRun );
+
+ if ( fsmRun->attached2 != 0 ) {
+ fsmRun->attached2->attached2 = 0;
+ fsmRun->attached2 = 0;
+ }
+}
+
+void detachInput2( FsmRun *fsmRun, SourceStream *is )
+{
+ debug( REALM_INPUT, "detaching fsm run from source stream: %p %p\n", fsmRun, is );
+
+ fsmRun->attached2 = 0;
+ is->attached2 = 0;
+
+ clearBuffered( fsmRun );
+
+ if ( fsmRun->attached1 != 0 ) {
+ fsmRun->attached1->attached1 = 0;
+ fsmRun->attached1 = 0;
+ }
+}
+
+
void clearBuffered( FsmRun *fsmRun )
{
/* If there is data in the current buffer then send the whole send back
@@ -399,6 +453,16 @@ void clearBuffered( FsmRun *fsmRun )
}
}
+void resetToken( FsmRun *fsmRun )
+{
+ /* If there is a token started, but never finished for a lack of data, we
+ * must first backup over it. */
+ if ( fsmRun->tokstart != 0 ) {
+ fsmRun->p = fsmRun->tokstart;
+ fsmRun->tokstart = 0;
+ }
+}
+
/* Stops on:
* PcrRevToken
*/
@@ -873,7 +937,6 @@ void newToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun )
fsmRun->tokstart = 0;
fsmRun->tokend = 0;
fsmRun->matchedToken = 0;
- fsmRun->tokstart = 0;
/* Set the state using the state of the parser. */
fsmRun->region = pdaRunGetNextRegion( pdaRun, 0 );
@@ -1020,7 +1083,7 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *input
int have = fsmRun->tokstart != 0 ? fsmRun->p - fsmRun->tokstart : 0;
int len = 0;
debug( REALM_SCAN, "fetching data: have: %d space: %d\n", have, space );
- int type = getData( inputStream, have, fsmRun->p, space, &len );
+ int type = getData( fsmRun, inputStream, have, fsmRun->p, space, &len );
switch ( type ) {
case INPUT_DATA:
diff --git a/colm/pdarun.h b/colm/pdarun.h
index 79b679e6..3c593018 100644
--- a/colm/pdarun.h
+++ b/colm/pdarun.h
@@ -82,6 +82,9 @@ typedef struct _FsmRun
int returnResult;
char *mark[MARK_SLOTS];
long matchedToken;
+
+ InputStream *attached1;
+ SourceStream *attached2;
} FsmRun;
void initFsmRun( FsmRun *fsmRun, struct ColmProgram *prg );
@@ -450,6 +453,12 @@ void attachIgnore( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, Kid *inpu
void detachIgnores( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, Kid *input );
void clearBuffered( FsmRun *fsmRun );
+void resetToken( FsmRun *fsmRun );
+
+void detachInput1( FsmRun *fsmRun, InputStream *is );
+void attachInput1( FsmRun *fsmRun, InputStream *is );
+void detachInput2( FsmRun *fsmRun, SourceStream *is );
+void attachInput2( FsmRun *fsmRun, SourceStream *is );
#ifdef __cplusplus
}