From 31789197f8e9dc3829790e3ddc9ce36087634172 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sun, 27 Jan 2013 09:21:43 -0500 Subject: copy data into fsm run just ahead of the consume calls This is the first changes for scanning data in input streams and eliminating the holding of data in FsmRun. --- colm/pdarun.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/colm/pdarun.c b/colm/pdarun.c index 1a14c43c..5bd0553a 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -129,8 +129,6 @@ void decrementSteps( PdaRun *pdaRun ) debug( REALM_PARSE, "steps down to %ld\n", pdaRun->steps ); } -/* Load up a token, starting from tokstart if it is set. If not set then - * start it at data. */ Head *streamPull( Program *prg, FsmRun *fsmRun, StreamImpl *is, long length ) { /* We should not be in the midst of getting a token. */ @@ -140,8 +138,8 @@ Head *streamPull( Program *prg, FsmRun *fsmRun, StreamImpl *is, long length ) runBuf->next = fsmRun->runBuf; fsmRun->runBuf = runBuf; - int len = 0; - is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &len ); + int lenCopied = 0; + is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &lenCopied ); is->funcs->consumeData( is, length ); fsmRun->p = fsmRun->pe = runBuf->data + length; @@ -250,7 +248,6 @@ static void sendBackIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsm debug( REALM_PARSE, "trigger parse stop, steps = target = %d\n", pdaRun->targetSteps ); pdaRun->stop = true; } - } void attachStream( FsmRun *fsmRun, StreamImpl *is ) @@ -786,7 +783,17 @@ void sendIgnore( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, PdaRun Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) { long length = fsmRun->p - fsmRun->tokstart; - Head *head = stringAllocPointer( prg, fsmRun->tokstart, length ); + + RunBuf *runBuf = newRunBuf(); + runBuf->next = fsmRun->runBuf; + fsmRun->runBuf = runBuf; + + int lenCopied = 0; + is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &lenCopied ); + fsmRun->p = fsmRun->pe = runBuf->data + length; + + Head *head = stringAllocPointer( prg, runBuf->data, length ); + head->location = locationAllocate( prg ); head->location->line = is->line; head->location->column = is->column; @@ -801,7 +808,18 @@ Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) { long length = fsmRun->p - fsmRun->tokstart; - Head *head = stringAllocPointer( prg, fsmRun->tokstart, length ); + + RunBuf *runBuf = newRunBuf(); + runBuf->next = fsmRun->runBuf; + fsmRun->runBuf = runBuf; + + int lenCopied = 0; + is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &lenCopied ); + is->funcs->consumeData( is, length ); + fsmRun->p = fsmRun->pe = runBuf->data + length; + + Head *head = stringAllocPointer( prg, runBuf->data, length ); + head->location = locationAllocate( prg ); head->location->line = is->line; head->location->column = is->column; @@ -809,8 +827,6 @@ Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) debug( REALM_PARSE, "location byte: %d\n", is->byte ); - is->funcs->consumeData( is, length ); - return head; } -- cgit v1.2.1 From 46259a9a5538b49706d7264ce9884bc475ef5eaf Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sun, 27 Jan 2013 09:38:04 -0500 Subject: experimenting with EOS, very rough --- colm/input.c | 18 ++++++++++++++---- colm/input.h | 8 +++++--- colm/pdarun.c | 15 +++++++++++++-- 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/colm/input.c b/colm/input.c index ba144d94..6aef03c2 100644 --- a/colm/input.c +++ b/colm/input.c @@ -467,9 +467,16 @@ int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length, Stream *stream = (Stream*)buf->tree; int type = stream->in->funcs->getData( fsmRun, stream->in, skip, dest, length, copied ); - if ( type == INPUT_EOD && is->eof ) { - ret = INPUT_EOF; - break; +// if ( type == INPUT_EOD && !stream->in->eosSent ) { +// stream->in->eosSent = 1; +// ret = INPUT_EOS; +// continue; +// } + + if ( type == INPUT_EOD || type == INPUT_EOF ) { + debug( REALM_INPUT, "skipping over input\n" ); + buf = buf->next; + continue; } ret = type; @@ -558,6 +565,7 @@ int _consumeData( StreamImpl *is, int length ) if ( buf->type == RunBufSourceType ) { Stream *stream = (Stream*)buf->tree; int slen = stream->in->funcs->consumeData( stream->in, length ); + debug( REALM_INPUT, " got %d bytes from source\n", slen ); consumed += slen; length -= slen; @@ -578,8 +586,10 @@ int _consumeData( StreamImpl *is, int length ) } } - if ( length == 0 ) + if ( length == 0 ) { + debug( REALM_INPUT, "exiting consume\n", length ); break; + } RunBuf *runBuf = inputStreamPopHead( is ); free( runBuf ); diff --git a/colm/input.h b/colm/input.h index 3bfc43c5..7a8007c2 100644 --- a/colm/input.h +++ b/colm/input.h @@ -35,9 +35,10 @@ extern "C" { /* This is for data sources to return, not for the wrapper. */ #define INPUT_EOD 2 #define INPUT_EOF 3 -#define INPUT_LANG_EL 4 -#define INPUT_TREE 5 -#define INPUT_IGNORE 6 +#define INPUT_EOS 4 +#define INPUT_LANG_EL 5 +#define INPUT_TREE 6 +#define INPUT_IGNORE 7 /* * pdaRun <- fsmRun <- stream @@ -131,6 +132,7 @@ struct _StreamImpl char eofSent; char eof; + char eosSent; RunBuf *queue; RunBuf *queueTail; diff --git a/colm/pdarun.c b/colm/pdarun.c index 5bd0553a..68fae8d9 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -280,8 +280,10 @@ void detachSource( FsmRun *fsmRun, StreamImpl *is ) void clearBuffered( FsmRun *fsmRun ) { if ( fsmRun->tokstart != 0 ) { - fsmRun->p = fsmRun->pe = fsmRun->tokstart; - fsmRun->tokstart = 0; + //fsmRun->p = fsmRun->pe = fsmRun->tokstart; + //fsmRun->tokstart = 0; + + fsmRun->pe = fsmRun->p; } else { fsmRun->pe = fsmRun->p; @@ -1116,6 +1118,15 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) fsmRun->pe = fsmRun->p + len; break; + case INPUT_EOS: + if ( fsmRun->tokstart != 0 ) + fsmRun->peof = fsmRun->pe; + debug( REALM_SCAN, "EOS *******************\n" ); +// else { +// return SCAN_EOS; +// } + break; + case INPUT_EOF: if ( fsmRun->tokstart != 0 ) fsmRun->peof = fsmRun->pe; -- cgit v1.2.1 From 9e1fd9d153df3cca0810f30fce0797b7bce12002 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sun, 27 Jan 2013 11:59:12 -0500 Subject: separate scanner buf from the token consume buf --- colm/pdarun.c | 55 ++++++++++++++++++++++++++++++------------------------- colm/pdarun.h | 3 ++- 2 files changed, 32 insertions(+), 26 deletions(-) diff --git a/colm/pdarun.c b/colm/pdarun.c index 5bd0553a..a334f358 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -60,14 +60,13 @@ void initFsmRun( FsmRun *fsmRun, Program *prg ) { fsmRun->tables = prg->rtd->fsmTables; - fsmRun->runBuf = 0; - /* Run buffers need to stick around because - * token strings point into them. */ - fsmRun->runBuf = newRunBuf(); - fsmRun->runBuf->next = 0; + fsmRun->scanBuf = newRunBuf(); + fsmRun->scanBuf->next = 0; - fsmRun->p = fsmRun->pe = fsmRun->runBuf->data; + fsmRun->consumeBuf = 0; + + fsmRun->p = fsmRun->pe = fsmRun->scanBuf->data; fsmRun->peof = 0; fsmRun->preRegion = -1; @@ -75,9 +74,9 @@ void initFsmRun( FsmRun *fsmRun, Program *prg ) void clearFsmRun( Program *prg, FsmRun *fsmRun ) { - if ( fsmRun->runBuf != 0 ) { + if ( fsmRun->consumeBuf != 0 ) { /* Transfer the run buf list to the program */ - RunBuf *head = fsmRun->runBuf; + RunBuf *head = fsmRun->consumeBuf; RunBuf *tail = head; while ( tail->next != 0 ) tail = tail->next; @@ -135,13 +134,15 @@ Head *streamPull( Program *prg, FsmRun *fsmRun, StreamImpl *is, long length ) assert( fsmRun->tokstart == 0 ); RunBuf *runBuf = newRunBuf(); - runBuf->next = fsmRun->runBuf; - fsmRun->runBuf = runBuf; + runBuf->next = fsmRun->consumeBuf; + fsmRun->consumeBuf = runBuf; int lenCopied = 0; is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &lenCopied ); is->funcs->consumeData( is, length ); - fsmRun->p = fsmRun->pe = runBuf->data + length; + + fsmRun->p = fsmRun->pe = fsmRun->scanBuf->data; + //fsmRun->peof = 0; Head *tokdata = stringAllocPointer( prg, runBuf->data, length ); updatePosition( is, runBuf->data, length ); @@ -785,12 +786,14 @@ Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) long length = fsmRun->p - fsmRun->tokstart; RunBuf *runBuf = newRunBuf(); - runBuf->next = fsmRun->runBuf; - fsmRun->runBuf = runBuf; + runBuf->next = fsmRun->consumeBuf; + fsmRun->consumeBuf = runBuf; int lenCopied = 0; is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &lenCopied ); - fsmRun->p = fsmRun->pe = runBuf->data + length; + + fsmRun->p = fsmRun->pe = fsmRun->scanBuf->data; + //fsmRun->peof = 0; Head *head = stringAllocPointer( prg, runBuf->data, length ); @@ -810,13 +813,15 @@ Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) long length = fsmRun->p - fsmRun->tokstart; RunBuf *runBuf = newRunBuf(); - runBuf->next = fsmRun->runBuf; - fsmRun->runBuf = runBuf; + runBuf->next = fsmRun->consumeBuf; + fsmRun->consumeBuf = runBuf; int lenCopied = 0; is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &lenCopied ); is->funcs->consumeData( is, length ); - fsmRun->p = fsmRun->pe = runBuf->data + length; + + fsmRun->p = fsmRun->pe = fsmRun->scanBuf->data; + //fsmRun->peof = 0; Head *head = stringAllocPointer( prg, runBuf->data, length ); @@ -1049,7 +1054,7 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) /* There may be space left in the current buffer. If not then we need * to make some. */ - long space = fsmRun->runBuf->data + FSM_BUFSIZE - fsmRun->pe; + long space = fsmRun->scanBuf->data + FSM_BUFSIZE - fsmRun->pe; if ( space == 0 ) { /* Create a new run buf. */ RunBuf *newBuf = newRunBuf(); @@ -1059,15 +1064,15 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) if ( fsmRun->tokstart == 0 ) { /* No prefix. We filled the previous buffer. */ - fsmRun->runBuf->length = FSM_BUFSIZE; + fsmRun->scanBuf->length = FSM_BUFSIZE; } else { int i; debug( REALM_SCAN, "copying data over to new buffer\n" ); - assert( fsmRun->runBuf->offset == 0 ); + assert( fsmRun->scanBuf->offset == 0 ); - if ( fsmRun->tokstart == fsmRun->runBuf->data ) { + if ( fsmRun->tokstart == fsmRun->scanBuf->data ) { /* A token is started and it is already at the beginning * of the current buffer. This means buffer is full and it * must be grown. Probably need to do this sooner. */ @@ -1079,7 +1084,7 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) memcpy( newBuf->data, fsmRun->tokstart, have ); /* Compute the length of the previous buffer. */ - fsmRun->runBuf->length = FSM_BUFSIZE - have; + fsmRun->scanBuf->length = FSM_BUFSIZE - have; /* Compute tokstart and tokend. */ long dist = fsmRun->tokstart - newBuf->data; @@ -1097,12 +1102,12 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) fsmRun->p = fsmRun->pe = newBuf->data + have; fsmRun->peof = 0; - newBuf->next = fsmRun->runBuf; - fsmRun->runBuf = newBuf; + newBuf->next = fsmRun->scanBuf; + fsmRun->scanBuf = newBuf; } /* We don't have any data. What is next in the input inputStream? */ - space = fsmRun->runBuf->data + FSM_BUFSIZE - fsmRun->pe; + space = fsmRun->scanBuf->data + FSM_BUFSIZE - fsmRun->pe; assert( space > 0 ); /* Get more data. */ diff --git a/colm/pdarun.h b/colm/pdarun.h index 4b37c5cd..67e55dbb 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -73,7 +73,8 @@ typedef struct _FsmRun { FsmTables *tables; - RunBuf *runBuf; + RunBuf *scanBuf; + RunBuf *consumeBuf; /* FsmRun State. */ long region, preRegion; -- cgit v1.2.1 From 73d0bdf8c771116da360cf8cf4b075f4d8431db6 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sun, 27 Jan 2013 12:48:45 -0500 Subject: test case for nested streams --- test/include1.in | 14 ++++++++ test/include1.lm | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ test/include1a.in | 2 ++ test/include1b.in | 2 ++ test/include1c.in | 2 ++ 5 files changed, 118 insertions(+) create mode 100644 test/include1.in create mode 100644 test/include1.lm create mode 100644 test/include1a.in create mode 100644 test/include1b.in create mode 100644 test/include1c.in diff --git a/test/include1.in b/test/include1.in new file mode 100644 index 00000000..9c7aa806 --- /dev/null +++ b/test/include1.in @@ -0,0 +1,14 @@ + +hello; + +#include "include1a.in" + +there; + +#include "include1b.in" + +dude; + +#include "include1c.in" + +and dudettes; diff --git a/test/include1.lm b/test/include1.lm new file mode 100644 index 00000000..03a3b046 --- /dev/null +++ b/test/include1.lm @@ -0,0 +1,98 @@ +namespace string + lex + literal '"' + token data /[^"\\]+/ + token escape /'\\' any/ + end + + def string_data + [data] + | [escape] + + def string + ['"' string_data* '"'] + + str unquote( S: string ) + { + match S ['"' DL: string_data* '"'] + for E: escape in DL + E.data = 'x' + return $DL + } + +end string + +namespace hash + + lex + literal 'define', 'include' + literal '#', '\n' ni + + token id /[a-zA-Z_][a-zA-Z_0-9]*/ + token number /[0-9]+/ + + ignore /[ \t]/ + end + + def hash + ['#' 'define' Id: id number '\n'] + | ['#' 'include' Inc: string::string '\n'] + +end hash + +token rest_of_line /[^\n]* '\n'/ + +namespace lang + + lex + ignore /space/ + literal '*', '(', ')', ';' + token id /[a-zA-Z_][a-zA-Z_0-9]*/ + token number /[0-9]+/ + + token hash /'#'/ { + parse_stop H: hash::hash[ input ] + if ( H.tree ) { + if ( H.tree.Inc ) { + FN: str = unquote( H.tree.Inc ) + print( 'opening ' FN '\n' ) + IS: stream = open( FN 'r' ) + if ( ! IS ) { + print( 'ERROR: failed to open ' FN '\n' ) + exit(1) + } + input.push( IS ) + } + } + else { + parse_stop L: rest_of_line[ input ] + if ! L.tree { + print( "ERROR: stuck: " L.error ) + exit(1) + } + print( "ERROR: failed to parse # directive: " L.tree ) + } + } + end + + def item + [id] + | ['(' item* ')'] + + def statement + [item* ';'] + + def start + [statement*] + +end lang + +parse Input: lang::start[ stdin ] + +if ! Input.tree + print( Input.error '\n' ) +else { + #print( Input.tree '\n' ) + S: lang::start = Input.tree + print( Input.tree '\n' ) +} diff --git a/test/include1a.in b/test/include1a.in new file mode 100644 index 00000000..26da0afa --- /dev/null +++ b/test/include1a.in @@ -0,0 +1,2 @@ +a; +b; diff --git a/test/include1b.in b/test/include1b.in new file mode 100644 index 00000000..6c574323 --- /dev/null +++ b/test/include1b.in @@ -0,0 +1,2 @@ +c; +d; diff --git a/test/include1c.in b/test/include1c.in new file mode 100644 index 00000000..5373832d --- /dev/null +++ b/test/include1c.in @@ -0,0 +1,2 @@ +e; +f; -- cgit v1.2.1 From d2d780618386860d7fb6260579ca705f8bb902e8 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sun, 27 Jan 2013 14:31:14 -0500 Subject: repeatedly call getData in match extraction The getData interface was originally designed to stop on buffer boundaries. Need to call it in a loop when using it in extractMatch. --- colm/pdarun.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/colm/pdarun.c b/colm/pdarun.c index a334f358..f762689d 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -817,7 +817,14 @@ Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) fsmRun->consumeBuf = runBuf; int lenCopied = 0; + int total = 0; is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &lenCopied ); + total += lenCopied; + while ( total < length ) { + is->funcs->getData( fsmRun, is, total, runBuf->data+total, length-total, &lenCopied ); + total += lenCopied; + } + is->funcs->consumeData( is, length ); fsmRun->p = fsmRun->pe = fsmRun->scanBuf->data; -- cgit v1.2.1 From 3280a6ae3077e35cde9a429b4fb566a07c6a31c7 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Mon, 28 Jan 2013 21:45:07 -0500 Subject: removed the attach/detach calls Still need a clearBuffered call in the undoConsume, not entirely sure why. --- colm/ctinput.cc | 4 ---- colm/input.c | 34 ++-------------------------------- colm/input.h | 1 - colm/pdarun.c | 27 --------------------------- colm/pdarun.h | 3 --- 5 files changed, 2 insertions(+), 67 deletions(-) diff --git a/colm/ctinput.cc b/colm/ctinput.cc index bde9f424..a10d03f5 100644 --- a/colm/ctinput.cc +++ b/colm/ctinput.cc @@ -67,8 +67,6 @@ int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *d PatternItem *buf = ss->patItem; int offset = ss->offset; - attachStream( fsmRun, ss ); - while ( true ) { if ( buf == 0 ) return INPUT_EOD; @@ -244,8 +242,6 @@ int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest ConsItem *buf = ss->consItem; int offset = ss->offset; - attachStream( fsmRun, ss ); - while ( true ) { if ( buf == 0 ) return INPUT_EOD; diff --git a/colm/input.c b/colm/input.c index ba144d94..bad36f2b 100644 --- a/colm/input.c +++ b/colm/input.c @@ -233,8 +233,6 @@ int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, buf = buf->next; } - attachStream( fsmRun, ss ); - return ret; } @@ -452,8 +450,6 @@ int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length, int ret = 0; *copied = 0; - attachStream( fsmRun, is ); - /* Move over skip bytes. */ RunBuf *buf = is->queue; while ( true ) { @@ -596,8 +592,7 @@ int _undoConsumeData( FsmRun *fsmRun, StreamImpl *is, const char *data, int leng Stream *stream = (Stream*)is->queue->tree; int len = stream->in->funcs->undoConsumeData( fsmRun, stream->in, data, length ); - if ( stream->in->attached != 0 ) - detachStream( stream->in->attached, stream->in ); + clearBuffered( fsmRun ); return len; } @@ -607,8 +602,7 @@ int _undoConsumeData( FsmRun *fsmRun, StreamImpl *is, const char *data, int leng memcpy( newBuf->data, data, length ); inputStreamPrepend( is, newBuf ); - if ( is->attached != 0 ) - detachStream( is->attached, is ); + clearBuffered( fsmRun ); return length; } @@ -635,9 +629,6 @@ Tree *_consumeTree( StreamImpl *is ) void _undoConsumeTree( StreamImpl *is, Tree *tree, int ignore ) { - if ( is->attached != 0 ) - detachStream( is->attached, is ); - /* Create a new buffer for the data. This is the easy implementation. * Something better is needed here. It puts a max on the amount of * data that can be pushed back to the inputStream. */ @@ -671,9 +662,6 @@ void _undoConsumeLangEl( StreamImpl *is ) void _prependData( StreamImpl *is, const char *data, long length ) { - if ( is->attached != 0 ) - detachStream( is->attached, is ); - if ( isSourceStream( is ) && ((Stream*)is->queue->tree)->in->funcs == &streamFuncs ) { Stream *stream = (Stream*)is->queue->tree; @@ -695,9 +683,6 @@ void _prependData( StreamImpl *is, const char *data, long length ) void _prependTree( StreamImpl *is, Tree *tree, int ignore ) { - if ( is->attached != 0 ) - detachStream( is->attached, is ); - /* Create a new buffer for the data. This is the easy implementation. * Something better is needed here. It puts a max on the amount of * data that can be pushed back to the inputStream. */ @@ -720,9 +705,6 @@ void _prependStream( StreamImpl *in, struct ColmTree *tree ) int _undoPrependData( StreamImpl *is, int length ) { - if ( is->attached != 0 ) - detachStream( is->attached, is ); - debug( REALM_INPUT, "consuming %d bytes\n", length ); int consumed = 0; @@ -769,9 +751,6 @@ int _undoPrependData( StreamImpl *is, int length ) Tree *_undoPrependTree( StreamImpl *is ) { - if ( is->attached != 0 ) - detachStream( is->attached, is ); - while ( is->queue != 0 && is->queue->type == RunBufDataType && is->queue->offset == is->queue->length ) { RunBuf *runBuf = inputStreamPopHead( is ); free( runBuf ); @@ -809,9 +788,6 @@ void _appendData( StreamImpl *is, const char *data, long len ) Tree *_undoAppendData( StreamImpl *is, int length ) { - if ( is->attached != 0 ) - detachStream( is->attached, is ); - int consumed = 0; /* Move over skip bytes. */ @@ -871,9 +847,6 @@ void _appendStream( StreamImpl *in, struct ColmTree *tree ) Tree *_undoAppendStream( StreamImpl *is ) { - if ( is->attached != 0 ) - detachStream( is->attached, is ); - RunBuf *runBuf = inputStreamPopTail( is ); Tree *tree = runBuf->tree; free( runBuf ); @@ -882,9 +855,6 @@ Tree *_undoAppendStream( StreamImpl *is ) Tree *_undoAppendTree( StreamImpl *is ) { - if ( is->attached != 0 ) - detachStream( is->attached, is ); - RunBuf *runBuf = inputStreamPopTail( is ); Tree *tree = runBuf->tree; free( runBuf ); diff --git a/colm/input.h b/colm/input.h index 3bfc43c5..0da468a1 100644 --- a/colm/input.h +++ b/colm/input.h @@ -127,7 +127,6 @@ struct StreamFuncs struct _StreamImpl { struct StreamFuncs *funcs; - struct _FsmRun *attached; char eofSent; char eof; diff --git a/colm/pdarun.c b/colm/pdarun.c index f762689d..6983589a 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -251,33 +251,6 @@ static void sendBackIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsm } } -void attachStream( FsmRun *fsmRun, StreamImpl *is ) -{ - if ( is->attached != 0 && is->attached != fsmRun ) - detachStream( is->attached, is ); - - if ( is->attached != fsmRun ) { - debug( REALM_INPUT, "attaching FsmRun to stream: %p %p\n", fsmRun, is ); - is->attached = fsmRun; - } -} - -void detachStream( FsmRun *fsmRun, StreamImpl *is ) -{ - debug( REALM_INPUT, "detaching FsmRun from stream: %p %p\n", fsmRun, is ); - - is->attached = 0; - clearBuffered( fsmRun ); -} - -void detachSource( FsmRun *fsmRun, StreamImpl *is ) -{ - debug( REALM_INPUT, "detaching fsm run from source stream: %p %p\n", fsmRun, is ); - - is->attached = 0; - clearBuffered( fsmRun ); -} - void clearBuffered( FsmRun *fsmRun ) { if ( fsmRun->tokstart != 0 ) { diff --git a/colm/pdarun.h b/colm/pdarun.h index 67e55dbb..81d376c4 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -462,9 +462,6 @@ void undoParseStream( struct ColmProgram *prg, Tree **sp, StreamImpl *inputStrea void clearBuffered( FsmRun *fsmRun ); void resetToken( FsmRun *fsmRun ); -void detachStream( FsmRun *fsmRun, StreamImpl *is ); -void attachStream( FsmRun *fsmRun, StreamImpl *is ); - #ifdef __cplusplus } #endif -- cgit v1.2.1 From 08192f44658d3294d74066262d0d62a62eaa4587 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Mon, 28 Jan 2013 21:47:17 -0500 Subject: added empty expected output for the yet-to-work include1.lm test --- test/include1.exp | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test/include1.exp diff --git a/test/include1.exp b/test/include1.exp new file mode 100644 index 00000000..e69de29b -- cgit v1.2.1 From 0815d6da3c82dcb4a29da971ed49f323e8f6ac0c Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Mon, 28 Jan 2013 21:54:08 -0500 Subject: the include1 test now functions on the eos branch --- test/include1.exp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/test/include1.exp b/test/include1.exp index e69de29b..502194e3 100644 --- a/test/include1.exp +++ b/test/include1.exp @@ -0,0 +1,19 @@ +opening include1a.in +opening include1b.in +opening include1c.in +hello; + +a; +b; + +there; + +c; +d; + +dude; + +e; +f; + +and dudettes; -- cgit v1.2.1 From 2e74066ca8fb99c6fcccf210f8b9c2400a62e821 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sat, 2 Feb 2013 10:15:56 -0500 Subject: forked the getData input function into getParseBlock After forking we can rework this function to return a pointer to a parse block, instead of copying data in. --- colm/ctinput.cc | 96 +++++++++++++++++++++++++++++++++ colm/input.c | 162 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ colm/input.h | 4 +- colm/pdarun.c | 2 +- 4 files changed, 262 insertions(+), 2 deletions(-) diff --git a/colm/ctinput.cc b/colm/ctinput.cc index a10d03f5..10f89da9 100644 --- a/colm/ctinput.cc +++ b/colm/ctinput.cc @@ -60,6 +60,53 @@ LangEl *inputStreamPatternGetLangEl( StreamImpl *ss, long *bindId, char **data, return klangEl; } +int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) +{ + *copied = 0; + + PatternItem *buf = ss->patItem; + int offset = ss->offset; + + while ( true ) { + if ( buf == 0 ) + return INPUT_EOD; + + if ( buf->type == PatternItem::FactorType ) + return INPUT_LANG_EL; + + assert ( buf->type == PatternItem::InputText ); + int avail = buf->data.length() - offset; + + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[offset]; + int slen = avail <= length ? avail : length; + + /* Need to skip? */ + if ( skip > 0 && slen <= skip ) { + /* Skipping the the whole source. */ + skip -= slen; + } + else { + /* Either skip is zero, or less than slen. Skip goes to zero. + * Some data left over, copy it. */ + src += skip; + slen -= skip; + skip = 0; + + memcpy( dest, src, slen ) ; + *copied += slen; + break; + } + } + + buf = buf->next; + offset = 0; + } + + return INPUT_DATA; +} + int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) { *copied = 0; @@ -187,6 +234,7 @@ extern "C" void initPatFuncs() memset( &patternFuncs, 0, sizeof(StreamFuncs) ); patternFuncs.getData = &inputStreamPatternGetData; + patternFuncs.getParseBlock = &inputStreamPatternGetParseBlock; patternFuncs.consumeData = &inputStreamPatternConsumeData; patternFuncs.undoConsumeData = &inputStreamPatternUndoConsumeData; @@ -235,6 +283,53 @@ LangEl *inputStreamConsGetLangEl( StreamImpl *ss, long *bindId, char **data, lon return klangEl; } +int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) +{ + *copied = 0; + + ConsItem *buf = ss->consItem; + int offset = ss->offset; + + while ( true ) { + if ( buf == 0 ) + return INPUT_EOD; + + if ( buf->type == ConsItem::ExprType || buf->type == ConsItem::FactorType ) + return INPUT_LANG_EL; + + assert ( buf->type == ConsItem::InputText ); + int avail = buf->data.length() - offset; + + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[offset]; + int slen = avail <= length ? avail : length; + + /* Need to skip? */ + if ( skip > 0 && slen <= skip ) { + /* Skipping the the whole source. */ + skip -= slen; + } + else { + /* Either skip is zero, or less than slen. Skip goes to zero. + * Some data left over, copy it. */ + src += skip; + slen -= skip; + skip = 0; + + memcpy( dest, src, slen ) ; + *copied += slen; + break; + } + } + + buf = buf->next; + offset = 0; + } + + return INPUT_DATA; +} + int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) { *copied = 0; @@ -364,6 +459,7 @@ extern "C" void initConsFuncs() memset( &replFuncs, 0, sizeof(StreamFuncs) ); replFuncs.getData = &inputStreamConsGetData; + replFuncs.getParseBlock = &inputStreamConsGetParseBlock; replFuncs.consumeData = &inputStreamConsConsumeData; replFuncs.undoConsumeData = &inputStreamConsUndoConsumeData; diff --git a/colm/input.c b/colm/input.c index 14c1cb1b..ad52f56b 100644 --- a/colm/input.c +++ b/colm/input.c @@ -139,6 +139,7 @@ void initStreamFuncs() { memset( &streamFuncs, 0, sizeof(struct StreamFuncs) ); streamFuncs.getData = &_getData; + streamFuncs.getParseBlock = &_getParseBlock; streamFuncs.consumeData = &_consumeData; streamFuncs.undoConsumeData = &_undoConsumeData; streamFuncs.consumeTree = &_consumeTree; @@ -177,6 +178,65 @@ void initInputFuncs() * Base run-time input streams. */ +int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) +{ + int ret = 0; + *copied = 0; + + /* Move over skip bytes. */ + RunBuf *buf = ss->queue; + while ( true ) { + if ( buf == 0 ) { + /* Got through the in-mem buffers without copying anything. */ + RunBuf *runBuf = newRunBuf(); + sourceStreamAppend( ss, runBuf ); + int received = ss->funcs->getDataSource( ss, runBuf->data, FSM_BUFSIZE ); + if ( received == 0 ) { + ret = INPUT_EOD; + break; + } + runBuf->length = received; + + int slen = received < length ? received : length; + memcpy( dest, runBuf->data, slen ); + *copied = slen; + ret = INPUT_DATA; + break; + } + + int avail = buf->length - buf->offset; + + /* Anything available in the current buffer. */ + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[buf->offset]; + + /* Need to skip? */ + if ( skip > 0 && skip >= avail ) { + /* Skipping the the whole source. */ + skip -= avail; + } + else { + /* Either skip is zero, or less than slen. Skip goes to zero. + * Some data left over, copy it. */ + src += skip; + avail -= skip; + skip = 0; + + int slen = avail < length ? avail : length; + memcpy( dest, src, slen ) ; + *copied += slen; + ret = INPUT_DATA; + break; + } + } + + buf = buf->next; + } + + return ret; +} + int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) { int ret = 0; @@ -303,6 +363,7 @@ void initFileFuncs() { memset( &fileFuncs, 0, sizeof(struct StreamFuncs) ); fileFuncs.getData = &fdGetData; + fileFuncs.getParseBlock = &fdGetParseBlock; fileFuncs.consumeData = &fdConsumeData; fileFuncs.undoConsumeData = &fdUndoConsumeData; fileFuncs.getDataSource = &fileGetDataSource; @@ -328,6 +389,7 @@ void initFdFuncs() { memset( &fdFuncs, 0, sizeof(struct StreamFuncs) ); fdFuncs.getData = &fdGetData; + fdFuncs.getParseBlock = &fdGetParseBlock; fdFuncs.consumeData = &fdConsumeData; fdFuncs.undoConsumeData = &fdUndoConsumeData; fdFuncs.getDataSource = &fdGetDataSource; @@ -445,6 +507,106 @@ void _unsetEof( StreamImpl *is ) } } +int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length, int *copied ) +{ + int ret = 0; + *copied = 0; + + /* Move over skip bytes. */ + RunBuf *buf = is->queue; + while ( true ) { + if ( buf == 0 ) { + /* Got through the in-mem buffers without copying anything. */ + ret = is->eof ? INPUT_EOF : INPUT_EOD; + break; + } + + if ( buf->type == RunBufSourceType ) { + Stream *stream = (Stream*)buf->tree; + int type = stream->in->funcs->getData( fsmRun, stream->in, skip, dest, length, copied ); + +// if ( type == INPUT_EOD && !stream->in->eosSent ) { +// stream->in->eosSent = 1; +// ret = INPUT_EOS; +// continue; +// } + + if ( type == INPUT_EOD || type == INPUT_EOF ) { + debug( REALM_INPUT, "skipping over input\n" ); + buf = buf->next; + continue; + } + + ret = type; + break; + } + + if ( buf->type == RunBufTokenType ) { + ret = INPUT_TREE; + break; + } + + if ( buf->type == RunBufIgnoreType ) { + ret = INPUT_IGNORE; + break; + } + + int avail = buf->length - buf->offset; + + /* Anything available in the current buffer. */ + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[buf->offset]; + + /* Need to skip? */ + if ( skip > 0 && skip >= avail ) { + /* Skipping the the whole source. */ + skip -= avail; + } + else { + /* Either skip is zero, or less than slen. Skip goes to zero. + * Some data left over, copy it. */ + src += skip; + avail -= skip; + skip = 0; + + int slen = avail <= length ? avail : length; + memcpy( dest, src, slen ) ; + *copied += slen; + ret = INPUT_DATA; + break; + } + } + + buf = buf->next; + } + +#if DEBUG + switch ( ret ) { + case INPUT_DATA: + debug( REALM_INPUT, "get data: DATA copied: %d: %.*s\n", *copied, (int)*copied, dest ); + break; + case INPUT_EOD: + debug( REALM_INPUT, "get data: EOD\n" ); + break; + case INPUT_EOF: + debug( REALM_INPUT, "get data: EOF\n" ); + break; + case INPUT_TREE: + debug( REALM_INPUT, "get data: TREE\n" ); + break; + case INPUT_IGNORE: + debug( REALM_INPUT, "get data: IGNORE\n" ); + break; + case INPUT_LANG_EL: + debug( REALM_INPUT, "get data: LANG_EL\n" ); + break; + } +#endif + + return ret; +} + int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length, int *copied ) { int ret = 0; diff --git a/colm/input.h b/colm/input.h index 13cd48c0..59473932 100644 --- a/colm/input.h +++ b/colm/input.h @@ -88,7 +88,8 @@ typedef struct _StreamImpl StreamImpl; struct StreamFuncs { - /* Data. */ + int (*getParseBlock)( struct _FsmRun *fsmRun, StreamImpl *ss, int offset, char *dest, int length, int *copied ); + int (*getData)( struct _FsmRun *fsmRun, StreamImpl *ss, int offset, char *dest, int length, int *copied ); int (*consumeData)( StreamImpl *ss, int length ); @@ -168,6 +169,7 @@ void initConsFuncs(); /* The input stream interface. */ int _getData( struct _FsmRun *fsmRun, StreamImpl *in, int offset, char *dest, int length, int *copied ); +int _getParseBlock( struct _FsmRun *fsmRun, StreamImpl *in, int offset, char *dest, int length, int *copied ); int _consumeData( StreamImpl *in, int length ); int _undoConsumeData( struct _FsmRun *fsmRun, StreamImpl *is, const char *data, int length ); diff --git a/colm/pdarun.c b/colm/pdarun.c index e655d9e4..5ff7921e 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -1096,7 +1096,7 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) int have = fsmRun->tokstart != 0 ? fsmRun->p - fsmRun->tokstart : 0; int len = 0; debug( REALM_SCAN, "fetching data: have: %d space: %d\n", have, space ); - int type = is->funcs->getData( fsmRun, is, have, fsmRun->p, space, &len ); + int type = is->funcs->getParseBlock( fsmRun, is, have, fsmRun->p, space, &len ); switch ( type ) { case INPUT_DATA: -- cgit v1.2.1 From 116b212223225cc519862330d42b2426737fb4f4 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sat, 2 Feb 2013 11:08:17 -0500 Subject: return pointer to data bclok from getParseBlock The next step is to use this pointer instead of the scanBuf to do the scanning. --- colm/ctinput.cc | 8 ++++++-- colm/input.c | 11 ++++++++--- colm/input.h | 9 ++++++--- colm/pdarun.c | 7 +++++-- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/colm/ctinput.cc b/colm/ctinput.cc index 10f89da9..9c2f848e 100644 --- a/colm/ctinput.cc +++ b/colm/ctinput.cc @@ -60,7 +60,8 @@ LangEl *inputStreamPatternGetLangEl( StreamImpl *ss, long *bindId, char **data, return klangEl; } -int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) +int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, + char *dest, int length, char **pdp, int *copied ) { *copied = 0; @@ -95,6 +96,7 @@ int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, c skip = 0; memcpy( dest, src, slen ) ; + *pdp = src; *copied += slen; break; } @@ -283,7 +285,8 @@ LangEl *inputStreamConsGetLangEl( StreamImpl *ss, long *bindId, char **data, lon return klangEl; } -int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) +int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, + char *dest, int length, char **pdp, int *copied ) { *copied = 0; @@ -318,6 +321,7 @@ int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, char skip = 0; memcpy( dest, src, slen ) ; + *pdp = src; *copied += slen; break; } diff --git a/colm/input.c b/colm/input.c index ad52f56b..5adfb35e 100644 --- a/colm/input.c +++ b/colm/input.c @@ -178,7 +178,8 @@ void initInputFuncs() * Base run-time input streams. */ -int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) +int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, + char *dest, int length, char **pdp, int *copied ) { int ret = 0; *copied = 0; @@ -199,6 +200,7 @@ int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int l int slen = received < length ? received : length; memcpy( dest, runBuf->data, slen ); + *pdp = runBuf->data; *copied = slen; ret = INPUT_DATA; break; @@ -225,6 +227,7 @@ int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int l int slen = avail < length ? avail : length; memcpy( dest, src, slen ) ; + *pdp = src; *copied += slen; ret = INPUT_DATA; break; @@ -507,7 +510,8 @@ void _unsetEof( StreamImpl *is ) } } -int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length, int *copied ) +int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip, + char *dest, int length, char **pdp, int *copied ) { int ret = 0; *copied = 0; @@ -523,7 +527,7 @@ int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int le if ( buf->type == RunBufSourceType ) { Stream *stream = (Stream*)buf->tree; - int type = stream->in->funcs->getData( fsmRun, stream->in, skip, dest, length, copied ); + int type = stream->in->funcs->getParseBlock( fsmRun, stream->in, skip, dest, length, pdp, copied ); // if ( type == INPUT_EOD && !stream->in->eosSent ) { // stream->in->eosSent = 1; @@ -572,6 +576,7 @@ int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int le int slen = avail <= length ? avail : length; memcpy( dest, src, slen ) ; + *pdp = src; *copied += slen; ret = INPUT_DATA; break; diff --git a/colm/input.h b/colm/input.h index 59473932..e2b2fce1 100644 --- a/colm/input.h +++ b/colm/input.h @@ -88,7 +88,8 @@ typedef struct _StreamImpl StreamImpl; struct StreamFuncs { - int (*getParseBlock)( struct _FsmRun *fsmRun, StreamImpl *ss, int offset, char *dest, int length, int *copied ); + int (*getParseBlock)( struct _FsmRun *fsmRun, StreamImpl *ss, int offset, + char *dest, int length, char **pdp, int *copied ); int (*getData)( struct _FsmRun *fsmRun, StreamImpl *ss, int offset, char *dest, int length, int *copied ); @@ -168,8 +169,10 @@ void initConsFuncs(); /* The input stream interface. */ -int _getData( struct _FsmRun *fsmRun, StreamImpl *in, int offset, char *dest, int length, int *copied ); -int _getParseBlock( struct _FsmRun *fsmRun, StreamImpl *in, int offset, char *dest, int length, int *copied ); +int _getData( struct _FsmRun *fsmRun, StreamImpl *in, int offset, + char *dest, int length, int *copied ); +int _getParseBlock( struct _FsmRun *fsmRun, StreamImpl *in, int offset, + char *dest, int length, char **pdp, int *copied ); int _consumeData( StreamImpl *in, int length ); int _undoConsumeData( struct _FsmRun *fsmRun, StreamImpl *is, const char *data, int length ); diff --git a/colm/pdarun.c b/colm/pdarun.c index 5ff7921e..ace7b019 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -1094,9 +1094,12 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) /* Get more data. */ int have = fsmRun->tokstart != 0 ? fsmRun->p - fsmRun->tokstart : 0; - int len = 0; + debug( REALM_SCAN, "fetching data: have: %d space: %d\n", have, space ); - int type = is->funcs->getParseBlock( fsmRun, is, have, fsmRun->p, space, &len ); + + char *pd = 0; + int len = 0; + int type = is->funcs->getParseBlock( fsmRun, is, have, fsmRun->p, space, &pd, &len ); switch ( type ) { case INPUT_DATA: -- cgit v1.2.1 From d7ff62a2f5cb920dbbf1a6f7516be9cd417b892e Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sun, 3 Feb 2013 11:35:55 -0500 Subject: refraining from copying in getParsBlock, basics working --- colm/ctinput.cc | 14 +++---- colm/input.c | 32 +++++++--------- colm/input.h | 11 +++--- colm/pdarun.c | 111 +++++++++++++++----------------------------------------- colm/pdarun.h | 2 +- 5 files changed, 56 insertions(+), 114 deletions(-) diff --git a/colm/ctinput.cc b/colm/ctinput.cc index 9c2f848e..a445a8fc 100644 --- a/colm/ctinput.cc +++ b/colm/ctinput.cc @@ -60,8 +60,8 @@ LangEl *inputStreamPatternGetLangEl( StreamImpl *ss, long *bindId, char **data, return klangEl; } -int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, - char *dest, int length, char **pdp, int *copied ) +int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, + int skip, char **pdp, int *copied ) { *copied = 0; @@ -81,7 +81,7 @@ int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, if ( avail > 0 ) { /* The source data from the current buffer. */ char *src = &buf->data[offset]; - int slen = avail <= length ? avail : length; + int slen = avail; /* Need to skip? */ if ( skip > 0 && slen <= skip ) { @@ -95,7 +95,6 @@ int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, slen -= skip; skip = 0; - memcpy( dest, src, slen ) ; *pdp = src; *copied += slen; break; @@ -285,8 +284,8 @@ LangEl *inputStreamConsGetLangEl( StreamImpl *ss, long *bindId, char **data, lon return klangEl; } -int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, - char *dest, int length, char **pdp, int *copied ) +int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, + int skip, char **pdp, int *copied ) { *copied = 0; @@ -306,7 +305,7 @@ int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, if ( avail > 0 ) { /* The source data from the current buffer. */ char *src = &buf->data[offset]; - int slen = avail <= length ? avail : length; + int slen = avail; /* Need to skip? */ if ( skip > 0 && slen <= skip ) { @@ -320,7 +319,6 @@ int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, slen -= skip; skip = 0; - memcpy( dest, src, slen ) ; *pdp = src; *copied += slen; break; diff --git a/colm/input.c b/colm/input.c index 5adfb35e..5512450f 100644 --- a/colm/input.c +++ b/colm/input.c @@ -178,8 +178,8 @@ void initInputFuncs() * Base run-time input streams. */ -int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, - char *dest, int length, char **pdp, int *copied ) +int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, + int skip, char **pdp, int *copied ) { int ret = 0; *copied = 0; @@ -198,8 +198,7 @@ int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, } runBuf->length = received; - int slen = received < length ? received : length; - memcpy( dest, runBuf->data, slen ); + int slen = received; *pdp = runBuf->data; *copied = slen; ret = INPUT_DATA; @@ -225,8 +224,7 @@ int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, avail -= skip; skip = 0; - int slen = avail < length ? avail : length; - memcpy( dest, src, slen ) ; + int slen = avail; *pdp = src; *copied += slen; ret = INPUT_DATA; @@ -510,8 +508,8 @@ void _unsetEof( StreamImpl *is ) } } -int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip, - char *dest, int length, char **pdp, int *copied ) +int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, + int skip, char **pdp, int *copied ) { int ret = 0; *copied = 0; @@ -527,7 +525,7 @@ int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip, if ( buf->type == RunBufSourceType ) { Stream *stream = (Stream*)buf->tree; - int type = stream->in->funcs->getParseBlock( fsmRun, stream->in, skip, dest, length, pdp, copied ); + int type = stream->in->funcs->getParseBlock( fsmRun, stream->in, skip, pdp, copied ); // if ( type == INPUT_EOD && !stream->in->eosSent ) { // stream->in->eosSent = 1; @@ -574,10 +572,8 @@ int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip, avail -= skip; skip = 0; - int slen = avail <= length ? avail : length; - memcpy( dest, src, slen ) ; *pdp = src; - *copied += slen; + *copied += avail; ret = INPUT_DATA; break; } @@ -589,22 +585,22 @@ int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip, #if DEBUG switch ( ret ) { case INPUT_DATA: - debug( REALM_INPUT, "get data: DATA copied: %d: %.*s\n", *copied, (int)*copied, dest ); + debug( REALM_INPUT, "get parse block: DATA: %d\n", *copied ); break; case INPUT_EOD: - debug( REALM_INPUT, "get data: EOD\n" ); + debug( REALM_INPUT, "get parse block: EOD\n" ); break; case INPUT_EOF: - debug( REALM_INPUT, "get data: EOF\n" ); + debug( REALM_INPUT, "get parse block: EOF\n" ); break; case INPUT_TREE: - debug( REALM_INPUT, "get data: TREE\n" ); + debug( REALM_INPUT, "get parse block: TREE\n" ); break; case INPUT_IGNORE: - debug( REALM_INPUT, "get data: IGNORE\n" ); + debug( REALM_INPUT, "get parse block: IGNORE\n" ); break; case INPUT_LANG_EL: - debug( REALM_INPUT, "get data: LANG_EL\n" ); + debug( REALM_INPUT, "get parse block: LANG_EL\n" ); break; } #endif diff --git a/colm/input.h b/colm/input.h index e2b2fce1..e37a9383 100644 --- a/colm/input.h +++ b/colm/input.h @@ -88,10 +88,11 @@ typedef struct _StreamImpl StreamImpl; struct StreamFuncs { - int (*getParseBlock)( struct _FsmRun *fsmRun, StreamImpl *ss, int offset, - char *dest, int length, char **pdp, int *copied ); + int (*getParseBlock)( struct _FsmRun *fsmRun, StreamImpl *ss, + int skip, char **pdp, int *copied ); - int (*getData)( struct _FsmRun *fsmRun, StreamImpl *ss, int offset, char *dest, int length, int *copied ); + int (*getData)( struct _FsmRun *fsmRun, StreamImpl *ss, + int offset, char *dest, int length, int *copied ); int (*consumeData)( StreamImpl *ss, int length ); int (*undoConsumeData)( struct _FsmRun *fsmRun, StreamImpl *ss, const char *data, int length ); @@ -171,8 +172,8 @@ void initConsFuncs(); int _getData( struct _FsmRun *fsmRun, StreamImpl *in, int offset, char *dest, int length, int *copied ); -int _getParseBlock( struct _FsmRun *fsmRun, StreamImpl *in, int offset, - char *dest, int length, char **pdp, int *copied ); +int _getParseBlock( struct _FsmRun *fsmRun, StreamImpl *in, + int skip, char **pdp, int *copied ); int _consumeData( StreamImpl *in, int length ); int _undoConsumeData( struct _FsmRun *fsmRun, StreamImpl *is, const char *data, int length ); diff --git a/colm/pdarun.c b/colm/pdarun.c index ace7b019..b226f004 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -61,13 +61,11 @@ void initFsmRun( FsmRun *fsmRun, Program *prg ) { fsmRun->tables = prg->rtd->fsmTables; - fsmRun->scanBuf = newRunBuf(); - fsmRun->scanBuf->next = 0; - fsmRun->consumeBuf = 0; - fsmRun->p = fsmRun->pe = fsmRun->scanBuf->data; - fsmRun->peof = 0; + fsmRun->p = fsmRun->pe = 0; + fsmRun->have = 0; + fsmRun->peof = (char*)-1; fsmRun->preRegion = -1; } @@ -141,8 +139,9 @@ Head *streamPull( Program *prg, FsmRun *fsmRun, StreamImpl *is, long length ) is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &lenCopied ); is->funcs->consumeData( is, length ); - fsmRun->p = fsmRun->pe = fsmRun->scanBuf->data; - //fsmRun->peof = 0; + fsmRun->p = fsmRun->pe = 0; + fsmRun->have = 0; + //fsmRun->peof = (char*)-1; Head *tokdata = stringAllocPointer( prg, runBuf->data, length ); updatePosition( is, runBuf->data, length ); @@ -767,8 +766,9 @@ Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) int lenCopied = 0; is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &lenCopied ); - fsmRun->p = fsmRun->pe = fsmRun->scanBuf->data; - //fsmRun->peof = 0; + fsmRun->p = fsmRun->pe = 0; + fsmRun->have = 0; + //fsmRun->peof = (char*)-1; Head *head = stringAllocPointer( prg, runBuf->data, length ); @@ -802,8 +802,9 @@ Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) is->funcs->consumeData( is, length ); - fsmRun->p = fsmRun->pe = fsmRun->scanBuf->data; - //fsmRun->peof = 0; + fsmRun->p = fsmRun->pe = 0; + fsmRun->have = 0; + //fsmRun->peof = (char*)-1; Head *head = stringAllocPointer( prg, runBuf->data, length ); @@ -936,6 +937,10 @@ static void sendEof( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, Pd void newToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun ) { + fsmRun->p = fsmRun->pe = 0; + fsmRun->have = 0; + fsmRun->peof = (char*)-1; + /* Init the scanner vars. */ fsmRun->act = 0; fsmRun->tokstart = 0; @@ -999,7 +1004,10 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) return SCAN_UNDO; while ( true ) { + char *start = fsmRun->p; fsmExecute( fsmRun, is ); + if ( fsmRun->p != 0 ) + fsmRun->have += fsmRun->p - start; /* First check if scanning stopped because we have a token. */ if ( fsmRun->matchedToken > 0 ) { @@ -1029,93 +1037,32 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) return SCAN_ERROR; } - /* Got here because the state machine didn't match a token or - * encounter an error. Must be because we got to the end of the buffer - * data. */ + /* Got here because the state machine didn't match a token or encounter + * an error. Must be because we got to the end of the buffer data. */ assert( fsmRun->p == fsmRun->pe ); - /* There may be space left in the current buffer. If not then we need - * to make some. */ - long space = fsmRun->scanBuf->data + FSM_BUFSIZE - fsmRun->pe; - if ( space == 0 ) { - /* Create a new run buf. */ - RunBuf *newBuf = newRunBuf(); - - /* If partway through a token then preserve the prefix. */ - long have = 0; - - if ( fsmRun->tokstart == 0 ) { - /* No prefix. We filled the previous buffer. */ - fsmRun->scanBuf->length = FSM_BUFSIZE; - } - else { - int i; - - debug( REALM_SCAN, "copying data over to new buffer\n" ); - assert( fsmRun->scanBuf->offset == 0 ); - - if ( fsmRun->tokstart == fsmRun->scanBuf->data ) { - /* A token is started and it is already at the beginning - * of the current buffer. This means buffer is full and it - * must be grown. Probably need to do this sooner. */ - fatal( "OUT OF BUFFER SPACE\n" ); - } - - /* There is data that needs to be shifted over. */ - have = fsmRun->pe - fsmRun->tokstart; - memcpy( newBuf->data, fsmRun->tokstart, have ); - - /* Compute the length of the previous buffer. */ - fsmRun->scanBuf->length = FSM_BUFSIZE - have; - - /* Compute tokstart and tokend. */ - long dist = fsmRun->tokstart - newBuf->data; - - fsmRun->tokend -= dist; - fsmRun->tokstart = newBuf->data; - - /* Shift any markers. */ - for ( i = 0; i < MARK_SLOTS; i++ ) { - if ( fsmRun->mark[i] != 0 ) - fsmRun->mark[i] -= dist; - } - } - - fsmRun->p = fsmRun->pe = newBuf->data + have; - fsmRun->peof = 0; - - newBuf->next = fsmRun->scanBuf; - fsmRun->scanBuf = newBuf; - } - - /* We don't have any data. What is next in the input inputStream? */ - space = fsmRun->scanBuf->data + FSM_BUFSIZE - fsmRun->pe; - assert( space > 0 ); - - /* Get more data. */ - int have = fsmRun->tokstart != 0 ? fsmRun->p - fsmRun->tokstart : 0; - - debug( REALM_SCAN, "fetching data: have: %d space: %d\n", have, space ); - char *pd = 0; int len = 0; - int type = is->funcs->getParseBlock( fsmRun, is, have, fsmRun->p, space, &pd, &len ); + int type = is->funcs->getParseBlock( fsmRun, is, fsmRun->have, &pd, &len ); switch ( type ) { case INPUT_DATA: - fsmRun->pe = fsmRun->p + len; + fsmRun->p = pd; + fsmRun->pe = pd + len; break; case INPUT_EOS: + //fsmRun->p = fsmRun->pe = 0; if ( fsmRun->tokstart != 0 ) fsmRun->peof = fsmRun->pe; debug( REALM_SCAN, "EOS *******************\n" ); -// else { -// return SCAN_EOS; -// } + //else { + // return SCAN_EOS; + //} break; case INPUT_EOF: + //fsmRun->p = fsmRun->pe = 0; if ( fsmRun->tokstart != 0 ) fsmRun->peof = fsmRun->pe; else diff --git a/colm/pdarun.h b/colm/pdarun.h index 81d376c4..ac08889e 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -73,7 +73,6 @@ typedef struct _FsmRun { FsmTables *tables; - RunBuf *scanBuf; RunBuf *consumeBuf; /* FsmRun State. */ @@ -81,6 +80,7 @@ typedef struct _FsmRun long cs, ncs, act; char *tokstart, *tokend; char *p, *pe, *peof; + int have; int returnResult; char *mark[MARK_SLOTS]; long matchedToken; -- cgit v1.2.1 From 4c9aa13d4a84ded39bc78cbfb290abe3bdd939da Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sun, 3 Feb 2013 12:12:02 -0500 Subject: working out appropriate usage of p, pe with get parse block --- colm/pdarun.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/colm/pdarun.c b/colm/pdarun.c index b226f004..b8f68b3b 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -268,8 +268,9 @@ void resetToken( FsmRun *fsmRun ) /* If there is a token started, but never finished for a lack of data, we * must first backup over it. */ if ( fsmRun->tokstart != 0 ) { - fsmRun->p = fsmRun->tokstart; - fsmRun->tokstart = 0; + fsmRun->p = fsmRun->pe = 0; + fsmRun->have = 0; + fsmRun->peof = (char*)-1; } } @@ -740,7 +741,7 @@ void sendIgnore( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, PdaRun /* Make the ignore string. */ Head *ignoreStr = extractMatch( prg, fsmRun, is ); - updatePosition( is, fsmRun->tokstart, ignoreStr->length ); + updatePosition( is, ignoreStr->data, ignoreStr->length ); debug( REALM_PARSE, "ignoring: %.*s\n", ignoreStr->length, ignoreStr->data ); @@ -757,7 +758,7 @@ void sendIgnore( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, PdaRun /* Doesn't consume. */ Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) { - long length = fsmRun->p - fsmRun->tokstart; + long length = fsmRun->have; RunBuf *runBuf = newRunBuf(); runBuf->next = fsmRun->consumeBuf; @@ -785,7 +786,7 @@ Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) /* Consumes. */ Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) { - long length = fsmRun->p - fsmRun->tokstart; + long length = fsmRun->have; RunBuf *runBuf = newRunBuf(); runBuf->next = fsmRun->consumeBuf; @@ -804,6 +805,7 @@ Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) fsmRun->p = fsmRun->pe = 0; fsmRun->have = 0; + fsmRun->tokstart = 0; //fsmRun->peof = (char*)-1; Head *head = stringAllocPointer( prg, runBuf->data, length ); @@ -829,7 +831,7 @@ static void sendToken( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, prg->rtd->lelInfo[id].name, stringLength(tokdata), stringData(tokdata) ); - updatePosition( is, fsmRun->tokstart, tokdata->length ); + updatePosition( is, stringData(tokdata), stringLength(tokdata) ); Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, is, id, tokdata ); @@ -886,7 +888,7 @@ static void sendCi( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, Pda prg->rtd->lelInfo[id].name, stringLength(tokdata), stringData(tokdata) ); - updatePosition( is, fsmRun->tokstart, tokdata->length ); + updatePosition( is, stringData(tokdata), stringLength(tokdata) ); Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, is, id, tokdata ); @@ -1052,7 +1054,8 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) break; case INPUT_EOS: - //fsmRun->p = fsmRun->pe = 0; + fsmRun->p = fsmRun->pe = 0; + //fsmRun->have = 0; if ( fsmRun->tokstart != 0 ) fsmRun->peof = fsmRun->pe; debug( REALM_SCAN, "EOS *******************\n" ); @@ -1062,7 +1065,8 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) break; case INPUT_EOF: - //fsmRun->p = fsmRun->pe = 0; + fsmRun->p = fsmRun->pe = 0; + //fsmRun->have = 0; if ( fsmRun->tokstart != 0 ) fsmRun->peof = fsmRun->pe; else @@ -1231,8 +1235,9 @@ case PcrPreEof: /* Note that we don't update the position now. It is done when the token * data is pulled from the inputStream. */ - fsmRun->p = fsmRun->tokstart; - fsmRun->tokstart = 0; + fsmRun->p = fsmRun->pe = 0; + fsmRun->have = 0; + fsmRun->peof = (char*)-1; pdaRun->fi = &prg->rtd->frameInfo[prg->rtd->lelInfo[pdaRun->tokenId].frameId]; pdaRun->frameId = prg->rtd->lelInfo[pdaRun->tokenId].frameId; -- cgit v1.2.1 From e3cdaaeadc86e45f7f2c365f5a61a5a060c6f10b Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sun, 3 Feb 2013 12:58:07 -0500 Subject: don't check tokstart in stream pull, no longer relevant --- colm/pdarun.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/colm/pdarun.c b/colm/pdarun.c index b8f68b3b..b810a1be 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -128,8 +128,8 @@ void decrementSteps( PdaRun *pdaRun ) Head *streamPull( Program *prg, FsmRun *fsmRun, StreamImpl *is, long length ) { - /* We should not be in the midst of getting a token. */ - assert( fsmRun->tokstart == 0 ); +// /* We should not be in the midst of getting a token. */ +// assert( fsmRun->tokstart == 0 ); RunBuf *runBuf = newRunBuf(); runBuf->next = fsmRun->consumeBuf; -- cgit v1.2.1 From d86eb63c48139c9b5a9951137bc0bfbf6137658e Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sun, 3 Feb 2013 14:55:24 -0500 Subject: don't touch p in the FSM execution, wrecks the have computation --- colm/fsmcodegen.cc | 8 ++++---- colm/fsmexec.cc | 6 +++--- colm/pdarun.c | 3 +++ 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/colm/fsmcodegen.cc b/colm/fsmcodegen.cc index 212de648..30abf796 100644 --- a/colm/fsmcodegen.cc +++ b/colm/fsmcodegen.cc @@ -172,14 +172,14 @@ void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item, int targState, int inFinish ) { ret << - " " << P() << " = " << TOKEND() << ";\n" + //" " << P() << " = " << TOKEND() << ";\n" " switch( " << ACT() << " ) {\n"; /* If the switch handles error then we also forced the error state. It * will exist. */ if ( item->tokenRegion->lmSwitchHandlesError ) { - ret << " case 0: " << P() << " = " << TOKSTART() << - "; goto st" << redFsm->errState->id << ";\n"; + ret << " case 0: " //<< P() << " = " << TOKSTART() << ";" << + "goto st" << redFsm->errState->id << ";\n"; } for ( TokenDefListReg::Iter lmi = item->tokenRegion->tokenDefList; lmi.lte(); lmi++ ) { @@ -218,7 +218,7 @@ void FsmCodeGen::LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item ) { assert( item->longestMatchPart->tdLangEl != 0 ); - ret << " " << P() << " = " << TOKEND() << ";\n"; +// ret << " " << P() << " = " << TOKEND() << ";\n"; EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl ); ret << " return;\n"; } diff --git a/colm/fsmexec.cc b/colm/fsmexec.cc index 9b945374..bb24e086 100644 --- a/colm/fsmexec.cc +++ b/colm/fsmexec.cc @@ -56,9 +56,9 @@ void execAction( FsmRun *fsmRun, GenAction *genAction ) case InlineItem::LmSwitch: /* If the switch handles error then we also forced the error state. It * will exist. */ - fsmRun->p = fsmRun->tokend; + //fsmRun->p = fsmRun->tokend; if ( item->tokenRegion->lmSwitchHandlesError && fsmRun->act == 0 ) { - fsmRun->p = fsmRun->tokstart; + //fsmRun->p = fsmRun->tokstart; fsmRun->cs = fsmRun->tables->errorState; } else { @@ -81,7 +81,7 @@ void execAction( FsmRun *fsmRun, GenAction *genAction ) fsmRun->returnResult = true; break; case InlineItem::LmOnLagBehind: - fsmRun->p = fsmRun->tokend; + //fsmRun->p = fsmRun->tokend; fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id; fsmRun->returnResult = true; break; diff --git a/colm/pdarun.c b/colm/pdarun.c index b810a1be..2f071a3c 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -788,6 +788,8 @@ Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) { long length = fsmRun->have; + debug( REALM_PARSE, "extracting token of length: %ld\n", length ); + RunBuf *runBuf = newRunBuf(); runBuf->next = fsmRun->consumeBuf; fsmRun->consumeBuf = runBuf; @@ -1074,6 +1076,7 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) break; case INPUT_EOD: + fsmRun->p = fsmRun->pe = 0; return SCAN_TRY_AGAIN_LATER; case INPUT_LANG_EL: -- cgit v1.2.1 From 92f7064afbba7b59604058732b743e42b6b9331f Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Mon, 4 Feb 2013 21:05:56 -0500 Subject: renamed FsmRun "have" to toklen, compute it in fsmExecute --- colm/fsmcodegen.cc | 13 ++++++++----- colm/fsmexec.cc | 11 +++++++++-- colm/pdarun.c | 23 ++++++++++------------- colm/pdarun.h | 2 +- 4 files changed, 28 insertions(+), 21 deletions(-) diff --git a/colm/fsmcodegen.cc b/colm/fsmcodegen.cc index 30abf796..aded4148 100644 --- a/colm/fsmcodegen.cc +++ b/colm/fsmcodegen.cc @@ -194,7 +194,7 @@ void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item, ret << " }\n" "\t" - " return;\n"; + " goto out;\n"; } void FsmCodeGen::LM_ON_LAST( ostream &ret, InlineItem *item ) @@ -203,7 +203,7 @@ void FsmCodeGen::LM_ON_LAST( ostream &ret, InlineItem *item ) ret << " " << P() << " += 1;\n"; EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl ); - ret << " return;\n"; + ret << " goto out;\n"; } void FsmCodeGen::LM_ON_NEXT( ostream &ret, InlineItem *item ) @@ -211,7 +211,7 @@ void FsmCodeGen::LM_ON_NEXT( ostream &ret, InlineItem *item ) assert( item->longestMatchPart->tdLangEl != 0 ); EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl ); - ret << " return;\n"; + ret << " goto out;\n"; } void FsmCodeGen::LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item ) @@ -220,7 +220,7 @@ void FsmCodeGen::LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item ) // ret << " " << P() << " = " << TOKEND() << ";\n"; EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl ); - ret << " return;\n"; + ret << " goto out;\n"; } @@ -848,6 +848,7 @@ void FsmCodeGen::writeExec() out << "void fsmExecute( FsmRun *fsmRun, StreamImpl *inputStream )\n" "{\n" + " char *start = fsmRun->p;\n" "/*_resume:*/\n"; if ( redFsm->errState != 0 ) { @@ -872,7 +873,9 @@ void FsmCodeGen::writeExec() " }\n"; out << - " out: {}\n" + " out:\n" + " if ( fsmRun->p != 0 )\n" + " fsmRun->toklen += fsmRun->p - start;\n" "}\n" "\n"; } diff --git a/colm/fsmexec.cc b/colm/fsmexec.cc index bb24e086..ac9cd2d8 100644 --- a/colm/fsmexec.cc +++ b/colm/fsmexec.cc @@ -99,6 +99,8 @@ void fsmExecute( FsmRun *fsmRun, StreamImpl *inputStream ) const long *_acts; unsigned int _nacts; const char *_keys; + + char *start = fsmRun->p; /* Init the token match to nothing (the sentinal). */ fsmRun->matchedToken = 0; @@ -176,7 +178,7 @@ _match: while ( _nacts-- > 0 ) execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] ); if ( fsmRun->returnResult ) - return; + goto final; _again: _acts = fsmRun->tables->actions + fsmRun->tables->toStateActions[fsmRun->cs]; @@ -201,8 +203,13 @@ out: while ( _nacts-- > 0 ) execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] ); if ( fsmRun->returnResult ) - return; + goto final; } + +final: + + if ( fsmRun->p != 0 ) + fsmRun->toklen += fsmRun->p - start; } diff --git a/colm/pdarun.c b/colm/pdarun.c index 2f071a3c..cf15e2f6 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -64,7 +64,7 @@ void initFsmRun( FsmRun *fsmRun, Program *prg ) fsmRun->consumeBuf = 0; fsmRun->p = fsmRun->pe = 0; - fsmRun->have = 0; + fsmRun->toklen = 0; fsmRun->peof = (char*)-1; fsmRun->preRegion = -1; @@ -140,7 +140,7 @@ Head *streamPull( Program *prg, FsmRun *fsmRun, StreamImpl *is, long length ) is->funcs->consumeData( is, length ); fsmRun->p = fsmRun->pe = 0; - fsmRun->have = 0; + fsmRun->toklen = 0; //fsmRun->peof = (char*)-1; Head *tokdata = stringAllocPointer( prg, runBuf->data, length ); @@ -269,7 +269,7 @@ void resetToken( FsmRun *fsmRun ) * must first backup over it. */ if ( fsmRun->tokstart != 0 ) { fsmRun->p = fsmRun->pe = 0; - fsmRun->have = 0; + fsmRun->toklen = 0; fsmRun->peof = (char*)-1; } } @@ -758,7 +758,7 @@ void sendIgnore( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, PdaRun /* Doesn't consume. */ Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) { - long length = fsmRun->have; + long length = fsmRun->toklen; RunBuf *runBuf = newRunBuf(); runBuf->next = fsmRun->consumeBuf; @@ -768,7 +768,7 @@ Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &lenCopied ); fsmRun->p = fsmRun->pe = 0; - fsmRun->have = 0; + fsmRun->toklen = 0; //fsmRun->peof = (char*)-1; Head *head = stringAllocPointer( prg, runBuf->data, length ); @@ -786,7 +786,7 @@ Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) /* Consumes. */ Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) { - long length = fsmRun->have; + long length = fsmRun->toklen; debug( REALM_PARSE, "extracting token of length: %ld\n", length ); @@ -806,7 +806,7 @@ Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) is->funcs->consumeData( is, length ); fsmRun->p = fsmRun->pe = 0; - fsmRun->have = 0; + fsmRun->toklen = 0; fsmRun->tokstart = 0; //fsmRun->peof = (char*)-1; @@ -942,7 +942,7 @@ static void sendEof( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, Pd void newToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun ) { fsmRun->p = fsmRun->pe = 0; - fsmRun->have = 0; + fsmRun->toklen = 0; fsmRun->peof = (char*)-1; /* Init the scanner vars. */ @@ -1008,10 +1008,7 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) return SCAN_UNDO; while ( true ) { - char *start = fsmRun->p; fsmExecute( fsmRun, is ); - if ( fsmRun->p != 0 ) - fsmRun->have += fsmRun->p - start; /* First check if scanning stopped because we have a token. */ if ( fsmRun->matchedToken > 0 ) { @@ -1047,7 +1044,7 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) char *pd = 0; int len = 0; - int type = is->funcs->getParseBlock( fsmRun, is, fsmRun->have, &pd, &len ); + int type = is->funcs->getParseBlock( fsmRun, is, fsmRun->toklen, &pd, &len ); switch ( type ) { case INPUT_DATA: @@ -1239,7 +1236,7 @@ case PcrPreEof: * data is pulled from the inputStream. */ fsmRun->p = fsmRun->pe = 0; - fsmRun->have = 0; + fsmRun->toklen = 0; fsmRun->peof = (char*)-1; pdaRun->fi = &prg->rtd->frameInfo[prg->rtd->lelInfo[pdaRun->tokenId].frameId]; diff --git a/colm/pdarun.h b/colm/pdarun.h index ac08889e..5d8b3cea 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -79,8 +79,8 @@ typedef struct _FsmRun long region, preRegion; long cs, ncs, act; char *tokstart, *tokend; + long toklen; char *p, *pe, *peof; - int have; int returnResult; char *mark[MARK_SLOTS]; long matchedToken; -- cgit v1.2.1 From 63e620c91ae722386b97ab812cf4f4deeef91b71 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Mon, 4 Feb 2013 22:28:28 -0500 Subject: keep tokend as an int, not pointer Since we are no longer maintaining a contiguous token buffer during scanning we cannot use a pointer for tokend. Turn it into an offset (toklen). --- colm/fsmcodegen.cc | 20 +++++++++++--------- colm/fsmcodegen.h | 2 ++ colm/fsmexec.cc | 29 +++++++++++++++++++---------- colm/pdarun.c | 2 +- colm/pdarun.h | 5 ++++- 5 files changed, 37 insertions(+), 21 deletions(-) diff --git a/colm/fsmcodegen.cc b/colm/fsmcodegen.cc index aded4148..708922de 100644 --- a/colm/fsmcodegen.cc +++ b/colm/fsmcodegen.cc @@ -146,7 +146,7 @@ void FsmCodeGen::SET_ACT( ostream &ret, InlineItem *item ) void FsmCodeGen::SET_TOKEND( ostream &ret, InlineItem *item ) { /* The tokend action sets tokend. */ - ret << TOKEND() << " = " << P() << "+1;"; + ret << "{ " << TOKEND() << " = " << TOKLEN() << " + ( " << P() << " - " << BLOCK_START() << " ) + 1; }"; } void FsmCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item ) { @@ -172,7 +172,7 @@ void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item, int targState, int inFinish ) { ret << - //" " << P() << " = " << TOKEND() << ";\n" + " " << TOKLEN() << " = " << TOKEND() << ";\n" " switch( " << ACT() << " ) {\n"; /* If the switch handles error then we also forced the error state. It @@ -194,7 +194,7 @@ void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item, ret << " }\n" "\t" - " goto out;\n"; + " goto skip_toklen;\n"; } void FsmCodeGen::LM_ON_LAST( ostream &ret, InlineItem *item ) @@ -218,9 +218,9 @@ void FsmCodeGen::LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item ) { assert( item->longestMatchPart->tdLangEl != 0 ); -// ret << " " << P() << " = " << TOKEND() << ";\n"; + ret << " " << TOKLEN() << " = " << TOKEND() << ";\n"; EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl ); - ret << " goto out;\n"; + ret << " goto skip_toklen;\n"; } @@ -848,7 +848,7 @@ void FsmCodeGen::writeExec() out << "void fsmExecute( FsmRun *fsmRun, StreamImpl *inputStream )\n" "{\n" - " char *start = fsmRun->p;\n" + " " << BLOCK_START() << " = fsmRun->p;\n" "/*_resume:*/\n"; if ( redFsm->errState != 0 ) { @@ -873,9 +873,11 @@ void FsmCodeGen::writeExec() " }\n"; out << - " out:\n" - " if ( fsmRun->p != 0 )\n" - " fsmRun->toklen += fsmRun->p - start;\n" + "out:\n" + " if ( " << P() << " != 0 )\n" + " " << TOKLEN() << " += " << P() << " - " << BLOCK_START() << ";\n" + "skip_toklen:\n" + " {}\n" "}\n" "\n"; } diff --git a/colm/fsmcodegen.h b/colm/fsmcodegen.h index 1b004f5e..d1f23166 100644 --- a/colm/fsmcodegen.h +++ b/colm/fsmcodegen.h @@ -105,6 +105,8 @@ protected: string TOP() { return ACCESS() + "top"; } string TOKSTART() { return ACCESS() + "tokstart"; } string TOKEND() { return ACCESS() + "tokend"; } + string BLOCK_START() { return ACCESS() + "start"; } + string TOKLEN() { return ACCESS() + "toklen"; } string ACT() { return ACCESS() + "act"; } string MATCHED_TOKEN() { return ACCESS() + "matchedToken"; } diff --git a/colm/fsmexec.cc b/colm/fsmexec.cc index ac9cd2d8..94d2e18e 100644 --- a/colm/fsmexec.cc +++ b/colm/fsmexec.cc @@ -42,7 +42,7 @@ void execAction( FsmRun *fsmRun, GenAction *genAction ) fsmRun->act = item->longestMatchPart->longestMatchId; break; case InlineItem::LmSetTokEnd: - fsmRun->tokend = fsmRun->p + 1; + fsmRun->tokend = fsmRun->toklen + ( fsmRun->p - fsmRun->start ) + 1; break; case InlineItem::LmInitTokStart: assert(false); @@ -56,9 +56,8 @@ void execAction( FsmRun *fsmRun, GenAction *genAction ) case InlineItem::LmSwitch: /* If the switch handles error then we also forced the error state. It * will exist. */ - //fsmRun->p = fsmRun->tokend; + fsmRun->toklen = fsmRun->tokend; if ( item->tokenRegion->lmSwitchHandlesError && fsmRun->act == 0 ) { - //fsmRun->p = fsmRun->tokstart; fsmRun->cs = fsmRun->tables->errorState; } else { @@ -70,6 +69,7 @@ void execAction( FsmRun *fsmRun, GenAction *genAction ) } } fsmRun->returnResult = true; + fsmRun->skipToklen = true; break; case InlineItem::LmOnLast: fsmRun->p += 1; @@ -81,9 +81,10 @@ void execAction( FsmRun *fsmRun, GenAction *genAction ) fsmRun->returnResult = true; break; case InlineItem::LmOnLagBehind: - //fsmRun->p = fsmRun->tokend; + fsmRun->toklen = fsmRun->tokend; fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id; fsmRun->returnResult = true; + fsmRun->skipToklen = true; break; } } @@ -100,7 +101,7 @@ void fsmExecute( FsmRun *fsmRun, StreamImpl *inputStream ) unsigned int _nacts; const char *_keys; - char *start = fsmRun->p; + fsmRun->start = fsmRun->p; /* Init the token match to nothing (the sentinal). */ fsmRun->matchedToken = 0; @@ -173,12 +174,16 @@ _match: goto _again; fsmRun->returnResult = false; + fsmRun->skipToklen = false; _acts = fsmRun->tables->actions + fsmRun->tables->transActionsWI[_trans]; _nacts = (unsigned int) *_acts++; while ( _nacts-- > 0 ) execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] ); - if ( fsmRun->returnResult ) + if ( fsmRun->returnResult ) { + if ( fsmRun->skipToklen ) + goto skip_toklen; goto final; + } _again: _acts = fsmRun->tables->actions + fsmRun->tables->toStateActions[fsmRun->cs]; @@ -194,6 +199,7 @@ _again: out: if ( fsmRun->p == fsmRun->peof ) { fsmRun->returnResult = false; + fsmRun->skipToklen = false; _acts = fsmRun->tables->actions + fsmRun->tables->eofActions[fsmRun->cs]; _nacts = (unsigned int) *_acts++; @@ -202,14 +208,17 @@ out: while ( _nacts-- > 0 ) execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] ); - if ( fsmRun->returnResult ) + if ( fsmRun->returnResult ) { + if ( fsmRun->skipToklen ) + goto skip_toklen; goto final; + } } final: if ( fsmRun->p != 0 ) - fsmRun->toklen += fsmRun->p - start; + fsmRun->toklen += fsmRun->p - fsmRun->start; +skip_toklen: + {} } - - diff --git a/colm/pdarun.c b/colm/pdarun.c index cf15e2f6..85c9148a 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -1031,7 +1031,7 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) /* Check for a default token in the region. If one is there * then send it and continue with the processing loop. */ if ( prg->rtd->regionInfo[fsmRun->region].defaultToken >= 0 ) { - fsmRun->tokstart = fsmRun->tokend = fsmRun->p; + fsmRun->toklen = 0; return prg->rtd->regionInfo[fsmRun->region].defaultToken; } diff --git a/colm/pdarun.h b/colm/pdarun.h index 5d8b3cea..350c10fb 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -78,10 +78,13 @@ typedef struct _FsmRun /* FsmRun State. */ long region, preRegion; long cs, ncs, act; - char *tokstart, *tokend; + char *start; + char *tokstart; + long tokend; long toklen; char *p, *pe, *peof; int returnResult; + int skipToklen; char *mark[MARK_SLOTS]; long matchedToken; } FsmRun; -- cgit v1.2.1 From 08e102f8c7eddc8876daeb55b21c15b6403d1024 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Tue, 5 Feb 2013 21:54:50 -0500 Subject: return the amount copied from getData Was returning this via a pointer passed as an arg and returning the type of data block. Since getData is now used only for copying data after a successful match, we only need the length copied. --- colm/ctinput.cc | 24 +++++------ colm/input.c | 129 ++++++++++++++++++++------------------------------------ colm/input.h | 4 +- colm/pdarun.c | 17 +++----- 4 files changed, 63 insertions(+), 111 deletions(-) diff --git a/colm/ctinput.cc b/colm/ctinput.cc index a445a8fc..90e794bd 100644 --- a/colm/ctinput.cc +++ b/colm/ctinput.cc @@ -108,19 +108,19 @@ int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, return INPUT_DATA; } -int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) +int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length ) { - *copied = 0; + int copied = 0; PatternItem *buf = ss->patItem; int offset = ss->offset; while ( true ) { if ( buf == 0 ) - return INPUT_EOD; + return 0; if ( buf->type == PatternItem::FactorType ) - return INPUT_LANG_EL; + return 0; assert ( buf->type == PatternItem::InputText ); int avail = buf->data.length() - offset; @@ -143,7 +143,7 @@ int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *d skip = 0; memcpy( dest, src, slen ) ; - *copied += slen; + copied += slen; break; } } @@ -152,7 +152,7 @@ int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *d offset = 0; } - return INPUT_DATA; + return copied; } void inputStreamPatternBackup( StreamImpl *ss ) @@ -332,19 +332,19 @@ int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, return INPUT_DATA; } -int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) +int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length ) { - *copied = 0; + int copied = 0; ConsItem *buf = ss->consItem; int offset = ss->offset; while ( true ) { if ( buf == 0 ) - return INPUT_EOD; + return 0; if ( buf->type == ConsItem::ExprType || buf->type == ConsItem::FactorType ) - return INPUT_LANG_EL; + return 0; assert ( buf->type == ConsItem::InputText ); int avail = buf->data.length() - offset; @@ -367,7 +367,7 @@ int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest skip = 0; memcpy( dest, src, slen ) ; - *copied += slen; + copied += slen; break; } } @@ -376,7 +376,7 @@ int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest offset = 0; } - return INPUT_DATA; + return copied; } void inputStreamConsBackup( StreamImpl *ss ) diff --git a/colm/input.c b/colm/input.c index 5512450f..eba93864 100644 --- a/colm/input.c +++ b/colm/input.c @@ -135,36 +135,6 @@ static void sourceStreamPrepend( StreamImpl *ss, RunBuf *runBuf ) } } -void initStreamFuncs() -{ - memset( &streamFuncs, 0, sizeof(struct StreamFuncs) ); - streamFuncs.getData = &_getData; - streamFuncs.getParseBlock = &_getParseBlock; - streamFuncs.consumeData = &_consumeData; - streamFuncs.undoConsumeData = &_undoConsumeData; - streamFuncs.consumeTree = &_consumeTree; - streamFuncs.undoConsumeTree = &_undoConsumeTree; - streamFuncs.consumeLangEl = &_consumeLangEl; - streamFuncs.undoConsumeLangEl = &_undoConsumeLangEl; - - streamFuncs.setEof = &_setEof; - streamFuncs.unsetEof = &_unsetEof; - - streamFuncs.prependData = &_prependData; - streamFuncs.prependTree = &_prependTree; - streamFuncs.prependStream = &_prependStream; - streamFuncs.undoPrependData = &_undoPrependData; - streamFuncs.undoPrependTree = &_undoPrependTree; - - streamFuncs.appendData = &_appendData; - streamFuncs.appendTree = &_appendTree; - streamFuncs.appendStream = &_appendStream; - streamFuncs.undoAppendData = &_undoAppendData; - streamFuncs.undoAppendTree = &_undoAppendTree; - streamFuncs.undoAppendStream = &_undoAppendStream; -} - - void initInputFuncs() { initStreamFuncs(); @@ -238,10 +208,9 @@ int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, return ret; } -int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) +int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length ) { - int ret = 0; - *copied = 0; + int copied = 0; /* Move over skip bytes. */ RunBuf *buf = ss->queue; @@ -252,15 +221,13 @@ int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, sourceStreamAppend( ss, runBuf ); int received = ss->funcs->getDataSource( ss, runBuf->data, FSM_BUFSIZE ); if ( received == 0 ) { - ret = INPUT_EOD; break; } runBuf->length = received; int slen = received < length ? received : length; memcpy( dest, runBuf->data, slen ); - *copied = slen; - ret = INPUT_DATA; + copied = slen; break; } @@ -285,8 +252,7 @@ int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int slen = avail < length ? avail : length; memcpy( dest, src, slen ) ; - *copied += slen; - ret = INPUT_DATA; + copied += slen; break; } } @@ -294,7 +260,7 @@ int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, buf = buf->next; } - return ret; + return copied; } int fdConsumeData( StreamImpl *ss, int length ) @@ -608,49 +574,37 @@ int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, return ret; } -int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length, int *copied ) +int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length ) { - int ret = 0; - *copied = 0; + int copied = 0; /* Move over skip bytes. */ RunBuf *buf = is->queue; while ( true ) { if ( buf == 0 ) { /* Got through the in-mem buffers without copying anything. */ - ret = is->eof ? INPUT_EOF : INPUT_EOD; break; } if ( buf->type == RunBufSourceType ) { Stream *stream = (Stream*)buf->tree; - int type = stream->in->funcs->getData( fsmRun, stream->in, skip, dest, length, copied ); + copied += stream->in->funcs->getData( fsmRun, stream->in, skip, dest, length ); -// if ( type == INPUT_EOD && !stream->in->eosSent ) { -// stream->in->eosSent = 1; -// ret = INPUT_EOS; -// continue; -// } - - if ( type == INPUT_EOD || type == INPUT_EOF ) { + if ( copied == 0 ) { debug( REALM_INPUT, "skipping over input\n" ); buf = buf->next; continue; } - ret = type; + //ret = type; break; } - if ( buf->type == RunBufTokenType ) { - ret = INPUT_TREE; + if ( buf->type == RunBufTokenType ) break; - } - if ( buf->type == RunBufIgnoreType ) { - ret = INPUT_IGNORE; + if ( buf->type == RunBufIgnoreType ) break; - } int avail = buf->length - buf->offset; @@ -673,8 +627,7 @@ int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length, int slen = avail <= length ? avail : length; memcpy( dest, src, slen ) ; - *copied += slen; - ret = INPUT_DATA; + copied += slen; break; } } @@ -682,30 +635,7 @@ int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length, buf = buf->next; } -#if DEBUG - switch ( ret ) { - case INPUT_DATA: - debug( REALM_INPUT, "get data: DATA copied: %d: %.*s\n", *copied, (int)*copied, dest ); - break; - case INPUT_EOD: - debug( REALM_INPUT, "get data: EOD\n" ); - break; - case INPUT_EOF: - debug( REALM_INPUT, "get data: EOF\n" ); - break; - case INPUT_TREE: - debug( REALM_INPUT, "get data: TREE\n" ); - break; - case INPUT_IGNORE: - debug( REALM_INPUT, "get data: IGNORE\n" ); - break; - case INPUT_LANG_EL: - debug( REALM_INPUT, "get data: LANG_EL\n" ); - break; - } -#endif - - return ret; + return copied; } int _consumeData( StreamImpl *is, int length ) @@ -1033,3 +963,34 @@ Tree *_undoAppendTree( StreamImpl *is ) free( runBuf ); return tree; } + +void initStreamFuncs() +{ + memset( &streamFuncs, 0, sizeof(struct StreamFuncs) ); + streamFuncs.getData = &_getData; + streamFuncs.getParseBlock = &_getParseBlock; + streamFuncs.consumeData = &_consumeData; + streamFuncs.undoConsumeData = &_undoConsumeData; + streamFuncs.consumeTree = &_consumeTree; + streamFuncs.undoConsumeTree = &_undoConsumeTree; + streamFuncs.consumeLangEl = &_consumeLangEl; + streamFuncs.undoConsumeLangEl = &_undoConsumeLangEl; + + streamFuncs.setEof = &_setEof; + streamFuncs.unsetEof = &_unsetEof; + + streamFuncs.prependData = &_prependData; + streamFuncs.prependTree = &_prependTree; + streamFuncs.prependStream = &_prependStream; + streamFuncs.undoPrependData = &_undoPrependData; + streamFuncs.undoPrependTree = &_undoPrependTree; + + streamFuncs.appendData = &_appendData; + streamFuncs.appendTree = &_appendTree; + streamFuncs.appendStream = &_appendStream; + streamFuncs.undoAppendData = &_undoAppendData; + streamFuncs.undoAppendTree = &_undoAppendTree; + streamFuncs.undoAppendStream = &_undoAppendStream; +} + + diff --git a/colm/input.h b/colm/input.h index e37a9383..036f0cbd 100644 --- a/colm/input.h +++ b/colm/input.h @@ -92,7 +92,7 @@ struct StreamFuncs int skip, char **pdp, int *copied ); int (*getData)( struct _FsmRun *fsmRun, StreamImpl *ss, - int offset, char *dest, int length, int *copied ); + int offset, char *dest, int length ); int (*consumeData)( StreamImpl *ss, int length ); int (*undoConsumeData)( struct _FsmRun *fsmRun, StreamImpl *ss, const char *data, int length ); @@ -170,8 +170,6 @@ void initConsFuncs(); /* The input stream interface. */ -int _getData( struct _FsmRun *fsmRun, StreamImpl *in, int offset, - char *dest, int length, int *copied ); int _getParseBlock( struct _FsmRun *fsmRun, StreamImpl *in, int skip, char **pdp, int *copied ); int _consumeData( StreamImpl *in, int length ); diff --git a/colm/pdarun.c b/colm/pdarun.c index 85c9148a..d5fab334 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -135,8 +135,7 @@ Head *streamPull( Program *prg, FsmRun *fsmRun, StreamImpl *is, long length ) runBuf->next = fsmRun->consumeBuf; fsmRun->consumeBuf = runBuf; - int lenCopied = 0; - is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &lenCopied ); + is->funcs->getData( fsmRun, is, 0, runBuf->data, length ); is->funcs->consumeData( is, length ); fsmRun->p = fsmRun->pe = 0; @@ -764,8 +763,7 @@ Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) runBuf->next = fsmRun->consumeBuf; fsmRun->consumeBuf = runBuf; - int lenCopied = 0; - is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &lenCopied ); + is->funcs->getData( fsmRun, is, 0, runBuf->data, length ); fsmRun->p = fsmRun->pe = 0; fsmRun->toklen = 0; @@ -794,14 +792,9 @@ Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) runBuf->next = fsmRun->consumeBuf; fsmRun->consumeBuf = runBuf; - int lenCopied = 0; - int total = 0; - is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &lenCopied ); - total += lenCopied; - while ( total < length ) { - is->funcs->getData( fsmRun, is, total, runBuf->data+total, length-total, &lenCopied ); - total += lenCopied; - } + int total = is->funcs->getData( fsmRun, is, 0, runBuf->data, length ); + while ( total < length ) + total += is->funcs->getData( fsmRun, is, total, runBuf->data+total, length-total ); is->funcs->consumeData( is, length ); -- cgit v1.2.1 From 6cee3af74110755e5a4a71f56dc1fdd6980a5443 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Wed, 6 Feb 2013 20:36:05 -0500 Subject: made the input stream functions static --- colm/input.c | 42 +++++++++++++++++++++--------------------- colm/input.h | 30 ------------------------------ 2 files changed, 21 insertions(+), 51 deletions(-) diff --git a/colm/input.c b/colm/input.c index eba93864..9084b21e 100644 --- a/colm/input.c +++ b/colm/input.c @@ -457,13 +457,13 @@ static int isSourceStream( StreamImpl *is ) return false; } -void _setEof( StreamImpl *is ) +static void _setEof( StreamImpl *is ) { debug( REALM_INPUT, "setting EOF in input stream\n" ); is->eof = true; } -void _unsetEof( StreamImpl *is ) +static void _unsetEof( StreamImpl *is ) { if ( isSourceStream( is ) ) { Stream *stream = (Stream*)is->queue->tree; @@ -474,7 +474,7 @@ void _unsetEof( StreamImpl *is ) } } -int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, +static int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip, char **pdp, int *copied ) { int ret = 0; @@ -574,7 +574,7 @@ int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, return ret; } -int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length ) +static int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length ) { int copied = 0; @@ -638,7 +638,7 @@ int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length ) return copied; } -int _consumeData( StreamImpl *is, int length ) +static int _consumeData( StreamImpl *is, int length ) { debug( REALM_INPUT, "consuming %d bytes\n", length ); @@ -687,7 +687,7 @@ int _consumeData( StreamImpl *is, int length ) return consumed; } -int _undoConsumeData( FsmRun *fsmRun, StreamImpl *is, const char *data, int length ) +static int _undoConsumeData( FsmRun *fsmRun, StreamImpl *is, const char *data, int length ) { debug( REALM_INPUT, "undoing consume of %ld bytes\n", length ); @@ -711,7 +711,7 @@ int _undoConsumeData( FsmRun *fsmRun, StreamImpl *is, const char *data, int leng } } -Tree *_consumeTree( StreamImpl *is ) +static Tree *_consumeTree( StreamImpl *is ) { while ( is->queue != 0 && is->queue->type == RunBufDataType && is->queue->offset == is->queue->length ) { RunBuf *runBuf = inputStreamPopHead( is ); @@ -730,7 +730,7 @@ Tree *_consumeTree( StreamImpl *is ) return 0; } -void _undoConsumeTree( StreamImpl *is, Tree *tree, int ignore ) +static void _undoConsumeTree( StreamImpl *is, Tree *tree, int ignore ) { /* Create a new buffer for the data. This is the easy implementation. * Something better is needed here. It puts a max on the amount of @@ -741,7 +741,7 @@ void _undoConsumeTree( StreamImpl *is, Tree *tree, int ignore ) inputStreamPrepend( is, newBuf ); } -struct LangEl *_consumeLangEl( StreamImpl *is, long *bindId, char **data, long *length ) +static struct LangEl *_consumeLangEl( StreamImpl *is, long *bindId, char **data, long *length ) { if ( isSourceStream( is ) ) { Stream *stream = (Stream*)is->queue->tree; @@ -752,7 +752,7 @@ struct LangEl *_consumeLangEl( StreamImpl *is, long *bindId, char **data, long * } } -void _undoConsumeLangEl( StreamImpl *is ) +static void _undoConsumeLangEl( StreamImpl *is ) { if ( isSourceStream( is ) ) { Stream *stream = (Stream*)is->queue->tree; @@ -763,7 +763,7 @@ void _undoConsumeLangEl( StreamImpl *is ) } } -void _prependData( StreamImpl *is, const char *data, long length ) +static void _prependData( StreamImpl *is, const char *data, long length ) { if ( isSourceStream( is ) && ((Stream*)is->queue->tree)->in->funcs == &streamFuncs ) { Stream *stream = (Stream*)is->queue->tree; @@ -784,7 +784,7 @@ void _prependData( StreamImpl *is, const char *data, long length ) } } -void _prependTree( StreamImpl *is, Tree *tree, int ignore ) +static void _prependTree( StreamImpl *is, Tree *tree, int ignore ) { /* Create a new buffer for the data. This is the easy implementation. * Something better is needed here. It puts a max on the amount of @@ -795,7 +795,7 @@ void _prependTree( StreamImpl *is, Tree *tree, int ignore ) inputStreamPrepend( is, newBuf ); } -void _prependStream( StreamImpl *in, struct ColmTree *tree ) +static void _prependStream( StreamImpl *in, struct ColmTree *tree ) { /* Create a new buffer for the data. This is the easy implementation. * Something better is needed here. It puts a max on the amount of @@ -806,7 +806,7 @@ void _prependStream( StreamImpl *in, struct ColmTree *tree ) inputStreamPrepend( in, newBuf ); } -int _undoPrependData( StreamImpl *is, int length ) +static int _undoPrependData( StreamImpl *is, int length ) { debug( REALM_INPUT, "consuming %d bytes\n", length ); @@ -852,7 +852,7 @@ int _undoPrependData( StreamImpl *is, int length ) return consumed; } -Tree *_undoPrependTree( StreamImpl *is ) +static Tree *_undoPrependTree( StreamImpl *is ) { while ( is->queue != 0 && is->queue->type == RunBufDataType && is->queue->offset == is->queue->length ) { RunBuf *runBuf = inputStreamPopHead( is ); @@ -871,7 +871,7 @@ Tree *_undoPrependTree( StreamImpl *is ) return 0; } -void _appendData( StreamImpl *is, const char *data, long len ) +static void _appendData( StreamImpl *is, const char *data, long len ) { while ( len > 0 ) { RunBuf *ad = newRunBuf(); @@ -889,7 +889,7 @@ void _appendData( StreamImpl *is, const char *data, long len ) } } -Tree *_undoAppendData( StreamImpl *is, int length ) +static Tree *_undoAppendData( StreamImpl *is, int length ) { int consumed = 0; @@ -926,7 +926,7 @@ Tree *_undoAppendData( StreamImpl *is, int length ) return 0; } -void _appendTree( StreamImpl *is, Tree *tree ) +static void _appendTree( StreamImpl *is, Tree *tree ) { RunBuf *ad = newRunBuf(); @@ -937,7 +937,7 @@ void _appendTree( StreamImpl *is, Tree *tree ) ad->length = 0; } -void _appendStream( StreamImpl *in, struct ColmTree *tree ) +static void _appendStream( StreamImpl *in, struct ColmTree *tree ) { RunBuf *ad = newRunBuf(); @@ -948,7 +948,7 @@ void _appendStream( StreamImpl *in, struct ColmTree *tree ) ad->length = 0; } -Tree *_undoAppendStream( StreamImpl *is ) +static Tree *_undoAppendTree( StreamImpl *is ) { RunBuf *runBuf = inputStreamPopTail( is ); Tree *tree = runBuf->tree; @@ -956,7 +956,7 @@ Tree *_undoAppendStream( StreamImpl *is ) return tree; } -Tree *_undoAppendTree( StreamImpl *is ) +static Tree *_undoAppendStream( StreamImpl *is ) { RunBuf *runBuf = inputStreamPopTail( is ); Tree *tree = runBuf->tree; diff --git a/colm/input.h b/colm/input.h index 036f0cbd..41701021 100644 --- a/colm/input.h +++ b/colm/input.h @@ -168,36 +168,6 @@ void initStaticFuncs(); void initPatFuncs(); void initConsFuncs(); -/* The input stream interface. */ - -int _getParseBlock( struct _FsmRun *fsmRun, StreamImpl *in, - int skip, char **pdp, int *copied ); -int _consumeData( StreamImpl *in, int length ); -int _undoConsumeData( struct _FsmRun *fsmRun, StreamImpl *is, const char *data, int length ); - -struct ColmTree *_consumeTree( StreamImpl *in ); -void _undoConsumeTree( StreamImpl *in, struct ColmTree *tree, int ignore ); - -struct LangEl *_consumeLangEl( StreamImpl *in, long *bindId, char **data, long *length ); -void _undoConsumeLangEl( StreamImpl *in ); - -void _setEof( StreamImpl *is ); -void _unsetEof( StreamImpl *is ); - -void _prependData( StreamImpl *in, const char *data, long len ); -void _prependTree( StreamImpl *is, struct ColmTree *tree, int ignore ); -void _prependStream( StreamImpl *in, struct ColmTree *tree ); -int _undoPrependData( StreamImpl *is, int length ); -struct ColmTree *_undoPrependTree( StreamImpl *is ); -struct ColmTree *_undoPrependStream( StreamImpl *in ); - -void _appendData( StreamImpl *in, const char *data, long len ); -void _appendTree( StreamImpl *in, struct ColmTree *tree ); -void _appendStream( StreamImpl *in, struct ColmTree *tree ); -struct ColmTree *_undoAppendData( StreamImpl *in, int length ); -struct ColmTree *_undoAppendTree( StreamImpl *in ); -struct ColmTree *_undoAppendStream( StreamImpl *in ); - #ifdef __cplusplus } #endif -- cgit v1.2.1 From c7552a1f5bcc620f5a5c5ef51ba091529775adf6 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Wed, 6 Feb 2013 21:37:01 -0500 Subject: getData continues until length data is copied The getData function now continues to fetch data until length data is copied or there is no more data. Don't need to call it in a loop, or pass in a skip argument. --- colm/ctinput.cc | 52 +++++++++++------------------------- colm/input.c | 82 ++++++++++++++++++++++----------------------------------- colm/pdarun.c | 4 +-- 3 files changed, 48 insertions(+), 90 deletions(-) diff --git a/colm/ctinput.cc b/colm/ctinput.cc index 90e794bd..624f88b0 100644 --- a/colm/ctinput.cc +++ b/colm/ctinput.cc @@ -117,10 +117,10 @@ int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *d while ( true ) { if ( buf == 0 ) - return 0; + break; if ( buf->type == PatternItem::FactorType ) - return 0; + break; assert ( buf->type == PatternItem::InputText ); int avail = buf->data.length() - offset; @@ -130,24 +130,14 @@ int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *d char *src = &buf->data[offset]; int slen = avail <= length ? avail : length; - /* Need to skip? */ - if ( skip > 0 && slen <= skip ) { - /* Skipping the the whole source. */ - skip -= slen; - } - else { - /* Either skip is zero, or less than slen. Skip goes to zero. - * Some data left over, copy it. */ - src += skip; - slen -= skip; - skip = 0; - - memcpy( dest, src, slen ) ; - copied += slen; - break; - } + memcpy( dest+copied, src, slen ) ; + copied += slen; + length -= slen; } + if ( length == 0 ) + break; + buf = buf->next; offset = 0; } @@ -341,10 +331,10 @@ int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest while ( true ) { if ( buf == 0 ) - return 0; + break; if ( buf->type == ConsItem::ExprType || buf->type == ConsItem::FactorType ) - return 0; + break; assert ( buf->type == ConsItem::InputText ); int avail = buf->data.length() - offset; @@ -354,24 +344,14 @@ int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest char *src = &buf->data[offset]; int slen = avail <= length ? avail : length; - /* Need to skip? */ - if ( skip > 0 && slen <= skip ) { - /* Skipping the the whole source. */ - skip -= slen; - } - else { - /* Either skip is zero, or less than slen. Skip goes to zero. - * Some data left over, copy it. */ - src += skip; - slen -= skip; - skip = 0; - - memcpy( dest, src, slen ) ; - copied += slen; - break; - } + memcpy( dest+copied, src, slen ) ; + copied += slen; + length -= slen; } + if ( length == 0 ) + break; + buf = buf->next; offset = 0; } diff --git a/colm/input.c b/colm/input.c index 9084b21e..3ce576b6 100644 --- a/colm/input.c +++ b/colm/input.c @@ -220,15 +220,11 @@ int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length RunBuf *runBuf = newRunBuf(); sourceStreamAppend( ss, runBuf ); int received = ss->funcs->getDataSource( ss, runBuf->data, FSM_BUFSIZE ); - if ( received == 0 ) { - break; - } runBuf->length = received; + if ( received == 0 ) + break; - int slen = received < length ? received : length; - memcpy( dest, runBuf->data, slen ); - copied = slen; - break; + buf = runBuf; } int avail = buf->length - buf->offset; @@ -238,23 +234,15 @@ int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length /* The source data from the current buffer. */ char *src = &buf->data[buf->offset]; - /* Need to skip? */ - if ( skip > 0 && skip >= avail ) { - /* Skipping the the whole source. */ - skip -= avail; - } - else { - /* Either skip is zero, or less than slen. Skip goes to zero. - * Some data left over, copy it. */ - src += skip; - avail -= skip; - skip = 0; + int slen = avail < length ? avail : length; + memcpy( dest+copied, src, slen ) ; + copied += slen; + length -= slen; + } - int slen = avail < length ? avail : length; - memcpy( dest, src, slen ) ; - copied += slen; - break; - } + if ( length == 0 ) { + debug( REALM_INPUT, "exiting get data\n", length ); + break; } buf = buf->next; @@ -588,50 +576,42 @@ static int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int l if ( buf->type == RunBufSourceType ) { Stream *stream = (Stream*)buf->tree; - copied += stream->in->funcs->getData( fsmRun, stream->in, skip, dest, length ); + int glen = stream->in->funcs->getData( fsmRun, stream->in, 0, dest+copied, length ); - if ( copied == 0 ) { + if ( glen == 0 ) { debug( REALM_INPUT, "skipping over input\n" ); buf = buf->next; continue; } - //ret = type; - break; + copied += glen; + length -= glen; } - - if ( buf->type == RunBufTokenType ) + else if ( buf->type == RunBufTokenType ) break; - - if ( buf->type == RunBufIgnoreType ) + else if ( buf->type == RunBufIgnoreType ) break; + else { + int avail = buf->length - buf->offset; - int avail = buf->length - buf->offset; - - /* Anything available in the current buffer. */ - if ( avail > 0 ) { - /* The source data from the current buffer. */ - char *src = &buf->data[buf->offset]; - - /* Need to skip? */ - if ( skip > 0 && skip >= avail ) { - /* Skipping the the whole source. */ - skip -= avail; - } - else { - /* Either skip is zero, or less than slen. Skip goes to zero. - * Some data left over, copy it. */ - src += skip; - avail -= skip; - skip = 0; + /* Anything available in the current buffer. */ + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[buf->offset]; int slen = avail <= length ? avail : length; - memcpy( dest, src, slen ) ; + memcpy( dest+copied, src, slen ) ; + copied += slen; - break; + length -= slen; } } + if ( length == 0 ) { + debug( REALM_INPUT, "exiting get data\n", length ); + break; + } + buf = buf->next; } diff --git a/colm/pdarun.c b/colm/pdarun.c index d5fab334..39ee60c3 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -792,9 +792,7 @@ Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) runBuf->next = fsmRun->consumeBuf; fsmRun->consumeBuf = runBuf; - int total = is->funcs->getData( fsmRun, is, 0, runBuf->data, length ); - while ( total < length ) - total += is->funcs->getData( fsmRun, is, total, runBuf->data+total, length-total ); + is->funcs->getData( fsmRun, is, 0, runBuf->data, length ); is->funcs->consumeData( is, length ); -- cgit v1.2.1 From 820ca9af9ae1fd05e7cf4b8de03da9199529c7c3 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Wed, 6 Feb 2013 21:48:12 -0500 Subject: took skip out of the getData interface, not needed --- colm/ctinput.cc | 4 ++-- colm/input.c | 6 +++--- colm/input.h | 2 +- colm/pdarun.c | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/colm/ctinput.cc b/colm/ctinput.cc index 624f88b0..a4ff653b 100644 --- a/colm/ctinput.cc +++ b/colm/ctinput.cc @@ -108,7 +108,7 @@ int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, return INPUT_DATA; } -int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length ) +int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, char *dest, int length ) { int copied = 0; @@ -322,7 +322,7 @@ int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, return INPUT_DATA; } -int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length ) +int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, char *dest, int length ) { int copied = 0; diff --git a/colm/input.c b/colm/input.c index 3ce576b6..46b72fc6 100644 --- a/colm/input.c +++ b/colm/input.c @@ -208,7 +208,7 @@ int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, return ret; } -int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length ) +int fdGetData( FsmRun *fsmRun, StreamImpl *ss, char *dest, int length ) { int copied = 0; @@ -562,7 +562,7 @@ static int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, return ret; } -static int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length ) +static int _getData( FsmRun *fsmRun, StreamImpl *is, char *dest, int length ) { int copied = 0; @@ -576,7 +576,7 @@ static int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int l if ( buf->type == RunBufSourceType ) { Stream *stream = (Stream*)buf->tree; - int glen = stream->in->funcs->getData( fsmRun, stream->in, 0, dest+copied, length ); + int glen = stream->in->funcs->getData( fsmRun, stream->in, dest+copied, length ); if ( glen == 0 ) { debug( REALM_INPUT, "skipping over input\n" ); diff --git a/colm/input.h b/colm/input.h index 41701021..00ce509d 100644 --- a/colm/input.h +++ b/colm/input.h @@ -92,7 +92,7 @@ struct StreamFuncs int skip, char **pdp, int *copied ); int (*getData)( struct _FsmRun *fsmRun, StreamImpl *ss, - int offset, char *dest, int length ); + char *dest, int length ); int (*consumeData)( StreamImpl *ss, int length ); int (*undoConsumeData)( struct _FsmRun *fsmRun, StreamImpl *ss, const char *data, int length ); diff --git a/colm/pdarun.c b/colm/pdarun.c index 39ee60c3..af1967c6 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -135,7 +135,7 @@ Head *streamPull( Program *prg, FsmRun *fsmRun, StreamImpl *is, long length ) runBuf->next = fsmRun->consumeBuf; fsmRun->consumeBuf = runBuf; - is->funcs->getData( fsmRun, is, 0, runBuf->data, length ); + is->funcs->getData( fsmRun, is, runBuf->data, length ); is->funcs->consumeData( is, length ); fsmRun->p = fsmRun->pe = 0; @@ -763,7 +763,7 @@ Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) runBuf->next = fsmRun->consumeBuf; fsmRun->consumeBuf = runBuf; - is->funcs->getData( fsmRun, is, 0, runBuf->data, length ); + is->funcs->getData( fsmRun, is, runBuf->data, length ); fsmRun->p = fsmRun->pe = 0; fsmRun->toklen = 0; @@ -792,7 +792,7 @@ Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) runBuf->next = fsmRun->consumeBuf; fsmRun->consumeBuf = runBuf; - is->funcs->getData( fsmRun, is, 0, runBuf->data, length ); + is->funcs->getData( fsmRun, is, runBuf->data, length ); is->funcs->consumeData( is, length ); -- cgit v1.2.1 From fb5ccc9bb7b44e322aaf581bd7445de03bfd84db Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Thu, 7 Feb 2013 21:58:32 +0000 Subject: eliminated duplicate definition of StreamImpl typedef --- colm/input.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/colm/input.h b/colm/input.h index 036f0cbd..b72c2406 100644 --- a/colm/input.h +++ b/colm/input.h @@ -156,8 +156,6 @@ struct _StreamImpl struct ConsItem *consItem; }; -typedef struct _StreamImpl StreamImpl; - StreamImpl *newSourceStreamPat( struct Pattern *pattern ); StreamImpl *newSourceStreamCons( struct Constructor *constructor ); StreamImpl *newSourceStreamFile( FILE *file ); -- cgit v1.2.1 From 7ac8061c708f32140be747dc5a6f312f3ebbfb29 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Thu, 7 Feb 2013 20:48:58 -0500 Subject: the eof marker is now a single bit rather than a pointer --- colm/fsmcodegen.cc | 2 +- colm/fsmcodegen.h | 2 +- colm/fsmexec.cc | 2 +- colm/pdarun.c | 21 +++++++++------------ colm/pdarun.h | 10 +++++++--- 5 files changed, 19 insertions(+), 18 deletions(-) diff --git a/colm/fsmcodegen.cc b/colm/fsmcodegen.cc index 708922de..e82198c4 100644 --- a/colm/fsmcodegen.cc +++ b/colm/fsmcodegen.cc @@ -731,7 +731,7 @@ std::ostream &FsmCodeGen::EXIT_STATES() for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { out << " case " << st->id << ": out" << st->id << ": "; if ( st->eofTrans != 0 ) { - out << "if ( " << PE() << " == " << PEOF() << " ) {"; + out << "if ( " << DATA_EOF() << " ) {"; TRANS_GOTO( st->eofTrans, 0 ); out << "\n"; out << "}"; diff --git a/colm/fsmcodegen.h b/colm/fsmcodegen.h index d1f23166..5a24a0d2 100644 --- a/colm/fsmcodegen.h +++ b/colm/fsmcodegen.h @@ -99,7 +99,7 @@ protected: string P() { return ACCESS() + "p"; } string PE() { return ACCESS() + "pe"; } - string PEOF() { return ACCESS() + "peof"; } + string DATA_EOF() { return ACCESS() + "eof"; } string CS(); string TOP() { return ACCESS() + "top"; } diff --git a/colm/fsmexec.cc b/colm/fsmexec.cc index 94d2e18e..d3b06555 100644 --- a/colm/fsmexec.cc +++ b/colm/fsmexec.cc @@ -197,7 +197,7 @@ _again: if ( ++fsmRun->p != fsmRun->pe ) goto _loop_head; out: - if ( fsmRun->p == fsmRun->peof ) { + if ( fsmRun->eof ) { fsmRun->returnResult = false; fsmRun->skipToklen = false; _acts = fsmRun->tables->actions + fsmRun->tables->eofActions[fsmRun->cs]; diff --git a/colm/pdarun.c b/colm/pdarun.c index af1967c6..1b30af27 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -65,7 +65,7 @@ void initFsmRun( FsmRun *fsmRun, Program *prg ) fsmRun->p = fsmRun->pe = 0; fsmRun->toklen = 0; - fsmRun->peof = (char*)-1; + fsmRun->eof = 0; fsmRun->preRegion = -1; } @@ -140,7 +140,6 @@ Head *streamPull( Program *prg, FsmRun *fsmRun, StreamImpl *is, long length ) fsmRun->p = fsmRun->pe = 0; fsmRun->toklen = 0; - //fsmRun->peof = (char*)-1; Head *tokdata = stringAllocPointer( prg, runBuf->data, length ); updatePosition( is, runBuf->data, length ); @@ -269,7 +268,7 @@ void resetToken( FsmRun *fsmRun ) if ( fsmRun->tokstart != 0 ) { fsmRun->p = fsmRun->pe = 0; fsmRun->toklen = 0; - fsmRun->peof = (char*)-1; + fsmRun->eof = 0; } } @@ -767,7 +766,6 @@ Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) fsmRun->p = fsmRun->pe = 0; fsmRun->toklen = 0; - //fsmRun->peof = (char*)-1; Head *head = stringAllocPointer( prg, runBuf->data, length ); @@ -799,7 +797,6 @@ Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) fsmRun->p = fsmRun->pe = 0; fsmRun->toklen = 0; fsmRun->tokstart = 0; - //fsmRun->peof = (char*)-1; Head *head = stringAllocPointer( prg, runBuf->data, length ); @@ -934,7 +931,7 @@ void newToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun ) { fsmRun->p = fsmRun->pe = 0; fsmRun->toklen = 0; - fsmRun->peof = (char*)-1; + fsmRun->eof = 0; /* Init the scanner vars. */ fsmRun->act = 0; @@ -1047,7 +1044,7 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) fsmRun->p = fsmRun->pe = 0; //fsmRun->have = 0; if ( fsmRun->tokstart != 0 ) - fsmRun->peof = fsmRun->pe; + fsmRun->eof = 1; debug( REALM_SCAN, "EOS *******************\n" ); //else { // return SCAN_EOS; @@ -1058,7 +1055,7 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) fsmRun->p = fsmRun->pe = 0; //fsmRun->have = 0; if ( fsmRun->tokstart != 0 ) - fsmRun->peof = fsmRun->pe; + fsmRun->eof = 1; else return SCAN_EOF; break; @@ -1069,20 +1066,20 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) case INPUT_LANG_EL: if ( fsmRun->tokstart != 0 ) - fsmRun->peof = fsmRun->pe; + fsmRun->eof = 1; else return SCAN_LANG_EL; break; case INPUT_TREE: if ( fsmRun->tokstart != 0 ) - fsmRun->peof = fsmRun->pe; + fsmRun->eof = 1; else return SCAN_TREE; break; case INPUT_IGNORE: if ( fsmRun->tokstart != 0 ) - fsmRun->peof = fsmRun->pe; + fsmRun->eof = 1; else return SCAN_IGNORE; break; @@ -1228,7 +1225,7 @@ case PcrPreEof: fsmRun->p = fsmRun->pe = 0; fsmRun->toklen = 0; - fsmRun->peof = (char*)-1; + fsmRun->eof = 0; pdaRun->fi = &prg->rtd->frameInfo[prg->rtd->lelInfo[pdaRun->tokenId].frameId]; pdaRun->frameId = prg->rtd->lelInfo[pdaRun->tokenId].frameId; diff --git a/colm/pdarun.h b/colm/pdarun.h index 350c10fb..1da0ef42 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -82,9 +82,13 @@ typedef struct _FsmRun char *tokstart; long tokend; long toklen; - char *p, *pe, *peof; - int returnResult; - int skipToklen; + char *p, *pe; + + /* Bits. */ + char eof; + char returnResult; + char skipToklen; + char *mark[MARK_SLOTS]; long matchedToken; } FsmRun; -- cgit v1.2.1 From 3f588ae9af03b1aac58e6269dad8a3bc50a86b5f Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sat, 9 Feb 2013 09:00:14 -0500 Subject: don't allocate new runbufs for every match pulled from a stream --- colm/pdarun.c | 144 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 79 insertions(+), 65 deletions(-) diff --git a/colm/pdarun.c b/colm/pdarun.c index 1b30af27..73cc5b5c 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -126,23 +126,94 @@ void decrementSteps( PdaRun *pdaRun ) debug( REALM_PARSE, "steps down to %ld\n", pdaRun->steps ); } +Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) +{ + long length = fsmRun->toklen; + + debug( REALM_PARSE, "extracting token of length: %ld\n", length ); + + RunBuf *runBuf = fsmRun->consumeBuf; + if ( runBuf == 0 || length > ( FSM_BUFSIZE - runBuf->length ) ) { + runBuf = newRunBuf(); + runBuf->next = fsmRun->consumeBuf; + fsmRun->consumeBuf = runBuf; + } + + char *dest = runBuf->data + runBuf->length; + + is->funcs->getData( fsmRun, is, dest, length ); + is->funcs->consumeData( is, length ); + + runBuf->length += length; + + fsmRun->p = fsmRun->pe = 0; + fsmRun->toklen = 0; + fsmRun->tokstart = 0; + + Head *head = stringAllocPointer( prg, dest, length ); + + head->location = locationAllocate( prg ); + head->location->line = is->line; + head->location->column = is->column; + head->location->byte = is->byte; + + debug( REALM_PARSE, "location byte: %d\n", is->byte ); + + return head; +} + +Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) +{ + long length = fsmRun->toklen; + + RunBuf *runBuf = fsmRun->consumeBuf; + if ( runBuf == 0 || length > ( FSM_BUFSIZE - runBuf->length ) ) { + runBuf = newRunBuf(); + runBuf->next = fsmRun->consumeBuf; + fsmRun->consumeBuf = runBuf; + } + + char *dest = runBuf->data + runBuf->length; + + is->funcs->getData( fsmRun, is, dest, length ); + + fsmRun->p = fsmRun->pe = 0; + fsmRun->toklen = 0; + + Head *head = stringAllocPointer( prg, dest, length ); + + head->location = locationAllocate( prg ); + head->location->line = is->line; + head->location->column = is->column; + head->location->byte = is->byte; + + debug( REALM_PARSE, "location byte: %d\n", is->byte ); + + return head; +} + + Head *streamPull( Program *prg, FsmRun *fsmRun, StreamImpl *is, long length ) { -// /* We should not be in the midst of getting a token. */ -// assert( fsmRun->tokstart == 0 ); + RunBuf *runBuf = fsmRun->consumeBuf; + if ( length > ( FSM_BUFSIZE - runBuf->length ) ) { + runBuf = newRunBuf(); + runBuf->next = fsmRun->consumeBuf; + fsmRun->consumeBuf = runBuf; + } - RunBuf *runBuf = newRunBuf(); - runBuf->next = fsmRun->consumeBuf; - fsmRun->consumeBuf = runBuf; + char *dest = runBuf->data + runBuf->length; - is->funcs->getData( fsmRun, is, runBuf->data, length ); + is->funcs->getData( fsmRun, is, dest, length ); is->funcs->consumeData( is, length ); + runBuf->length += length; + fsmRun->p = fsmRun->pe = 0; fsmRun->toklen = 0; - Head *tokdata = stringAllocPointer( prg, runBuf->data, length ); - updatePosition( is, runBuf->data, length ); + Head *tokdata = stringAllocPointer( prg, dest, length ); + updatePosition( is, dest, length ); return tokdata; } @@ -753,63 +824,6 @@ void sendIgnore( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, PdaRun } -/* Doesn't consume. */ -Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) -{ - long length = fsmRun->toklen; - - RunBuf *runBuf = newRunBuf(); - runBuf->next = fsmRun->consumeBuf; - fsmRun->consumeBuf = runBuf; - - is->funcs->getData( fsmRun, is, runBuf->data, length ); - - fsmRun->p = fsmRun->pe = 0; - fsmRun->toklen = 0; - - Head *head = stringAllocPointer( prg, runBuf->data, length ); - - head->location = locationAllocate( prg ); - head->location->line = is->line; - head->location->column = is->column; - head->location->byte = is->byte; - - debug( REALM_PARSE, "location byte: %d\n", is->byte ); - - return head; -} - -/* Consumes. */ -Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) -{ - long length = fsmRun->toklen; - - debug( REALM_PARSE, "extracting token of length: %ld\n", length ); - - RunBuf *runBuf = newRunBuf(); - runBuf->next = fsmRun->consumeBuf; - fsmRun->consumeBuf = runBuf; - - is->funcs->getData( fsmRun, is, runBuf->data, length ); - - is->funcs->consumeData( is, length ); - - fsmRun->p = fsmRun->pe = 0; - fsmRun->toklen = 0; - fsmRun->tokstart = 0; - - Head *head = stringAllocPointer( prg, runBuf->data, length ); - - head->location = locationAllocate( prg ); - head->location->line = is->line; - head->location->column = is->column; - head->location->byte = is->byte; - - debug( REALM_PARSE, "location byte: %d\n", is->byte ); - - return head; -} - static void sendToken( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, PdaRun *pdaRun, long id ) { int emptyIgnore = pdaRun->accumIgnore == 0; -- cgit v1.2.1 From 090d408a204a730a180be13635de0e0c05f2b08e Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sat, 9 Feb 2013 09:47:44 -0500 Subject: put the data fetch at the top of the scanToken loop Now that we are copying into FSM on consume, it makes sense to get buffer blocks at the top of the scan token loop. --- colm/pdarun.c | 74 +++++++++++++++++++++++++++-------------------------------- 1 file changed, 34 insertions(+), 40 deletions(-) diff --git a/colm/pdarun.c b/colm/pdarun.c index 73cc5b5c..ec868c7a 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -192,7 +192,6 @@ Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) return head; } - Head *streamPull( Program *prg, FsmRun *fsmRun, StreamImpl *is, long length ) { RunBuf *runBuf = fsmRun->consumeBuf; @@ -1010,40 +1009,6 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) return SCAN_UNDO; while ( true ) { - fsmExecute( fsmRun, is ); - - /* First check if scanning stopped because we have a token. */ - if ( fsmRun->matchedToken > 0 ) { - /* If the token has a marker indicating the end (due to trailing - * context) then adjust data now. */ - LangElInfo *lelInfo = prg->rtd->lelInfo; - if ( lelInfo[fsmRun->matchedToken].markId >= 0 ) - fsmRun->p = fsmRun->mark[lelInfo[fsmRun->matchedToken].markId]; - - return fsmRun->matchedToken; - } - - /* Check for error. */ - if ( fsmRun->cs == fsmRun->tables->errorState ) { - /* If a token was started, but not finished (tokstart != 0) then - * restore data to the beginning of that token. */ - if ( fsmRun->tokstart != 0 ) - fsmRun->p = fsmRun->tokstart; - - /* Check for a default token in the region. If one is there - * then send it and continue with the processing loop. */ - if ( prg->rtd->regionInfo[fsmRun->region].defaultToken >= 0 ) { - fsmRun->toklen = 0; - return prg->rtd->regionInfo[fsmRun->region].defaultToken; - } - - return SCAN_ERROR; - } - - /* Got here because the state machine didn't match a token or encounter - * an error. Must be because we got to the end of the buffer data. */ - assert( fsmRun->p == fsmRun->pe ); - char *pd = 0; int len = 0; int type = is->funcs->getParseBlock( fsmRun, is, fsmRun->toklen, &pd, &len ); @@ -1056,18 +1021,13 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) case INPUT_EOS: fsmRun->p = fsmRun->pe = 0; - //fsmRun->have = 0; if ( fsmRun->tokstart != 0 ) fsmRun->eof = 1; debug( REALM_SCAN, "EOS *******************\n" ); - //else { - // return SCAN_EOS; - //} break; case INPUT_EOF: fsmRun->p = fsmRun->pe = 0; - //fsmRun->have = 0; if ( fsmRun->tokstart != 0 ) fsmRun->eof = 1; else @@ -1098,6 +1058,40 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) return SCAN_IGNORE; break; } + + fsmExecute( fsmRun, is ); + + /* First check if scanning stopped because we have a token. */ + if ( fsmRun->matchedToken > 0 ) { + /* If the token has a marker indicating the end (due to trailing + * context) then adjust data now. */ + LangElInfo *lelInfo = prg->rtd->lelInfo; + if ( lelInfo[fsmRun->matchedToken].markId >= 0 ) + fsmRun->p = fsmRun->mark[lelInfo[fsmRun->matchedToken].markId]; + + return fsmRun->matchedToken; + } + + /* Check for error. */ + if ( fsmRun->cs == fsmRun->tables->errorState ) { + /* If a token was started, but not finished (tokstart != 0) then + * restore data to the beginning of that token. */ + if ( fsmRun->tokstart != 0 ) + fsmRun->p = fsmRun->tokstart; + + /* Check for a default token in the region. If one is there + * then send it and continue with the processing loop. */ + if ( prg->rtd->regionInfo[fsmRun->region].defaultToken >= 0 ) { + fsmRun->toklen = 0; + return prg->rtd->regionInfo[fsmRun->region].defaultToken; + } + + return SCAN_ERROR; + } + + /* Got here because the state machine didn't match a token or encounter + * an error. Must be because we got to the end of the buffer data. */ + assert( fsmRun->p == fsmRun->pe ); } /* Should not be reached. */ -- cgit v1.2.1 From 4d792dcb30ec53bd4b8ad98b3909d355dd3bf021 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sat, 9 Feb 2013 12:18:03 -0500 Subject: removed the fsmrun.h header, which was empty --- colm/Makefile.am | 4 ++-- colm/bytecode.c | 1 - colm/codegen.cc | 1 - colm/compiler.cc | 1 - colm/ctinput.cc | 1 - colm/declare.cc | 1 - colm/exports.cc | 1 - colm/fsmcodegen.cc | 2 -- colm/fsmcodegen.h | 1 - colm/fsmexec.cc | 1 - colm/fsmrun.h | 36 ------------------------------------ colm/input.c | 1 - colm/lmparse.kl | 1 - colm/parsetree.cc | 1 - colm/parsetree.h | 1 - colm/pdabuild.cc | 1 - colm/pdarun.c | 1 - colm/pdarun.h | 1 - colm/program.c | 1 - colm/redbuild.h | 1 - colm/redfsm.cc | 1 - colm/resolve.cc | 1 - colm/synthesis.cc | 1 - 23 files changed, 2 insertions(+), 60 deletions(-) delete mode 100644 colm/fsmrun.h diff --git a/colm/Makefile.am b/colm/Makefile.am index 5d7f1101..226bafbf 100644 --- a/colm/Makefile.am +++ b/colm/Makefile.am @@ -28,7 +28,7 @@ RUNTIME_SRC = \ RUNTIME_HDR = \ bytecode.h config.h defs.h debug.h pool.h input.h \ - fsmrun.h pdarun.h map.h tree.h program.h colm.h + pdarun.h map.h tree.h program.h colm.h lib_LIBRARIES = libcolmp.a libcolmd.a @@ -49,7 +49,7 @@ colm_LDADD = libcolmp.a colm_SOURCES = \ buffer.h bytecode.h colm.h debug.h dotgen.h fsmcodegen.h fsmgraph.h \ - fsmrun.h input.h keyops.h lmparse.h lmscan.h map.h parsedata.h \ + input.h keyops.h lmparse.h lmscan.h map.h parsedata.h \ parsetree.h pcheck.h pdacodegen.h pdagraph.h pdarun.h pool.h redbuild.h \ redfsm.h rtvector.h tree.h version.h global.h colm.h \ \ diff --git a/colm/bytecode.c b/colm/bytecode.c index 94e7d9b6..e8a89c7d 100644 --- a/colm/bytecode.c +++ b/colm/bytecode.c @@ -20,7 +20,6 @@ */ #include -#include #include #include #include diff --git a/colm/codegen.cc b/colm/codegen.cc index f55257e7..653d8d51 100644 --- a/colm/codegen.cc +++ b/colm/codegen.cc @@ -23,7 +23,6 @@ #include "fsmcodegen.h" #include "redfsm.h" #include "bstmap.h" -#include "fsmrun.h" #include "debug.h" #include #include diff --git a/colm/compiler.cc b/colm/compiler.cc index 8043b45f..687e2791 100644 --- a/colm/compiler.cc +++ b/colm/compiler.cc @@ -34,7 +34,6 @@ #include "redbuild.h" #include "pdacodegen.h" #include "fsmcodegen.h" -#include "fsmrun.h" #include "pdarun.h" #include "colm.h" #include "pool.h" diff --git a/colm/ctinput.cc b/colm/ctinput.cc index a4ff653b..c2d74fc1 100644 --- a/colm/ctinput.cc +++ b/colm/ctinput.cc @@ -22,7 +22,6 @@ #include "parsedata.h" #include "parsetree.h" #include "input.h" -#include "fsmrun.h" #include "debug.h" #include "pool.h" diff --git a/colm/declare.cc b/colm/declare.cc index 6a285277..c2cdec51 100644 --- a/colm/declare.cc +++ b/colm/declare.cc @@ -21,7 +21,6 @@ #include "bytecode.h" #include "parsedata.h" -#include "fsmrun.h" #include #include diff --git a/colm/exports.cc b/colm/exports.cc index df94abdf..3556e249 100644 --- a/colm/exports.cc +++ b/colm/exports.cc @@ -23,7 +23,6 @@ #include "fsmcodegen.h" #include "redfsm.h" #include "bstmap.h" -#include "fsmrun.h" #include "debug.h" #include #include diff --git a/colm/fsmcodegen.cc b/colm/fsmcodegen.cc index e82198c4..2c9c3387 100644 --- a/colm/fsmcodegen.cc +++ b/colm/fsmcodegen.cc @@ -23,7 +23,6 @@ #include "fsmcodegen.h" #include "redfsm.h" #include "bstmap.h" -#include "fsmrun.h" #include #include #include @@ -886,7 +885,6 @@ void FsmCodeGen::writeIncludes() { out << "#include \n" - "#include \n" "#include \n" "#include \n" "#include \n" diff --git a/colm/fsmcodegen.h b/colm/fsmcodegen.h index 5a24a0d2..39ae1876 100644 --- a/colm/fsmcodegen.h +++ b/colm/fsmcodegen.h @@ -28,7 +28,6 @@ #include "keyops.h" #include "parsedata.h" #include "redfsm.h" -#include "fsmrun.h" using std::string; using std::ostream; diff --git a/colm/fsmexec.cc b/colm/fsmexec.cc index d3b06555..905f1d80 100644 --- a/colm/fsmexec.cc +++ b/colm/fsmexec.cc @@ -24,7 +24,6 @@ #include "config.h" #include "defs.h" -#include "fsmrun.h" #include "redfsm.h" #include "parsedata.h" #include "parsetree.h" diff --git a/colm/fsmrun.h b/colm/fsmrun.h deleted file mode 100644 index 821b3ccf..00000000 --- a/colm/fsmrun.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2007-2012 Adrian Thurston - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _FSMRUN2_H -#define _FSMRUN2_H - -#include - -#ifdef __cplusplus -extern "C" { -#endif - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/colm/input.c b/colm/input.c index 46b72fc6..532170ee 100644 --- a/colm/input.c +++ b/colm/input.c @@ -20,7 +20,6 @@ */ #include -#include #include #include diff --git a/colm/lmparse.kl b/colm/lmparse.kl index 5f3adb38..bbfd9b6f 100644 --- a/colm/lmparse.kl +++ b/colm/lmparse.kl @@ -26,7 +26,6 @@ #include "lmparse.h" #include "global.h" #include "input.h" -#include "fsmrun.h" using std::cout; using std::cerr; diff --git a/colm/parsetree.cc b/colm/parsetree.cc index 02d8f68f..4c82410e 100644 --- a/colm/parsetree.cc +++ b/colm/parsetree.cc @@ -22,7 +22,6 @@ #include "lmparse.h" #include "parsetree.h" #include "input.h" -#include "fsmrun.h" #include #include diff --git a/colm/parsetree.h b/colm/parsetree.h index b9841c7e..9e65bf6d 100644 --- a/colm/parsetree.h +++ b/colm/parsetree.h @@ -35,7 +35,6 @@ #include "astring.h" #include "bytecode.h" #include "avlbasic.h" -#include "fsmrun.h" /* Operators that are represented with single symbol characters. */ #define OP_DoubleEql 'e' diff --git a/colm/pdabuild.cc b/colm/pdabuild.cc index 1dbd649e..6eb929f7 100644 --- a/colm/pdabuild.cc +++ b/colm/pdabuild.cc @@ -32,7 +32,6 @@ #include "redfsm.h" #include "fsmcodegen.h" #include "redbuild.h" -#include "fsmrun.h" /* Dumping the fsm. */ #include "mergesort.h" diff --git a/colm/pdarun.c b/colm/pdarun.c index ec868c7a..778f6339 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -22,7 +22,6 @@ #include "config.h" #include "debug.h" #include "pdarun.h" -#include "fsmrun.h" #include "bytecode.h" #include "tree.h" #include "pool.h" diff --git a/colm/pdarun.h b/colm/pdarun.h index 1da0ef42..d7e83772 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -23,7 +23,6 @@ #define __COLM_PDARUN_H #include -#include #include #include diff --git a/colm/program.c b/colm/program.c index 412473e5..b604fabe 100644 --- a/colm/program.c +++ b/colm/program.c @@ -20,7 +20,6 @@ */ #include -#include #include #include #include diff --git a/colm/redbuild.h b/colm/redbuild.h index dbbb3e19..5ae75406 100644 --- a/colm/redbuild.h +++ b/colm/redbuild.h @@ -26,7 +26,6 @@ #include "avltree.h" #include "fsmgraph.h" #include "parsedata.h" -#include "fsmrun.h" /* Forwards. */ struct FsmTrans; diff --git a/colm/redfsm.cc b/colm/redfsm.cc index d8e4a983..5ec075ce 100644 --- a/colm/redfsm.cc +++ b/colm/redfsm.cc @@ -26,7 +26,6 @@ #include "mergesort.h" #include "fsmgraph.h" #include "parsetree.h" -#include "fsmrun.h" using std::ostringstream; diff --git a/colm/resolve.cc b/colm/resolve.cc index bf639738..6fc4b53e 100644 --- a/colm/resolve.cc +++ b/colm/resolve.cc @@ -21,7 +21,6 @@ #include "bytecode.h" #include "parsedata.h" -#include "fsmrun.h" #include #include diff --git a/colm/synthesis.cc b/colm/synthesis.cc index 2c7e7e8b..bc40af95 100644 --- a/colm/synthesis.cc +++ b/colm/synthesis.cc @@ -21,7 +21,6 @@ #include "bytecode.h" #include "parsedata.h" -#include "fsmrun.h" #include "pdarun.h" #include "input.h" #include -- cgit v1.2.1 From 290fcd2059200a17b27e98a904cd412fe76beb13 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sat, 9 Feb 2013 12:34:23 -0500 Subject: merged initFsmRun into initPdaRun --- colm/compiler.cc | 3 +-- colm/pdarun.c | 8 +++++--- colm/pdarun.h | 6 ++---- colm/tree.c | 5 ++--- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/colm/compiler.cc b/colm/compiler.cc index 687e2791..45eb35cb 100644 --- a/colm/compiler.cc +++ b/colm/compiler.cc @@ -1243,8 +1243,7 @@ PdaRun *Compiler::parsePattern( Program *prg, Tree **sp, const InputLoc &loc, PdaRun *pdaRun = new PdaRun; initStreamImpl( in ); - initPdaRun( pdaRun, prg, pdaTables, fsmRun, parserId, 0, false, 0 ); - initFsmRun( fsmRun, prg ); + initPdaRun( prg, pdaRun, fsmRun, pdaTables, parserId, 0, false, 0 ); Stream *res = streamAllocate( prg ); res->id = LEL_ID_STREAM; diff --git a/colm/pdarun.c b/colm/pdarun.c index 778f6339..5bf9216e 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -56,7 +56,7 @@ i = (Tree*)w; \ } while(0) -void initFsmRun( FsmRun *fsmRun, Program *prg ) +static void initFsmRun( Program *prg, FsmRun *fsmRun ) { fsmRun->tables = prg->rtd->fsmTables; @@ -1429,8 +1429,8 @@ int isParserStopFinished( PdaRun *pdaRun ) return done; } -void initPdaRun( PdaRun *pdaRun, Program *prg, PdaTables *tables, - FsmRun *fsmRun, int parserId, long stopTarget, int revertOn, Tree *context ) +void initPdaRun( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, PdaTables *tables, + int parserId, long stopTarget, int revertOn, Tree *context ) { memset( pdaRun, 0, sizeof(PdaRun) ); pdaRun->tables = tables; @@ -1478,6 +1478,8 @@ void initPdaRun( PdaRun *pdaRun, Program *prg, PdaTables *tables, pdaRun->reject = false; pdaRun->rcBlockCount = 0; + + initFsmRun( prg, fsmRun ); } long stackTopTarget( Program *prg, PdaRun *pdaRun ) diff --git a/colm/pdarun.h b/colm/pdarun.h index d7e83772..c43997f7 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -92,14 +92,12 @@ typedef struct _FsmRun long matchedToken; } FsmRun; -void initFsmRun( FsmRun *fsmRun, struct ColmProgram *prg ); void clearFsmRun( struct ColmProgram *prg, FsmRun *fsmRun ); void updatePosition( StreamImpl *inputStream, const char *data, long length ); void undoPosition( StreamImpl *inputStream, const char *data, long length ); void sendBackRunBufHead( FsmRun *fsmRun, StreamImpl *inputStream ); void undoStreamPull( StreamImpl *inputStream, const char *data, long length ); - #if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 #error "SIZEOF_LONG contained an unexpected value" #endif @@ -400,8 +398,8 @@ void decrementSteps( PdaRun *pdaRun ); int makeReverseCode( PdaRun *pdaRun ); void transferReverseCode( PdaRun *pdaRun, ParseTree *tree ); -void initPdaRun( PdaRun *pdaRun, struct ColmProgram *prg, PdaTables *tables, - FsmRun *fsmRun, int parserId, long stopTarget, int revertOn, Tree *context ); +void initPdaRun( struct ColmProgram *prg, PdaRun *pdaRun, FsmRun *fsmRun, PdaTables *tables, + int parserId, long stopTarget, int revertOn, Tree *context ); void clearPdaRun( struct ColmProgram *prg, Tree **root, PdaRun *pdaRun ); void initStreamImpl( StreamImpl *inputStream ); diff --git a/colm/tree.c b/colm/tree.c index edd0dc79..b14273f5 100644 --- a/colm/tree.c +++ b/colm/tree.c @@ -990,9 +990,8 @@ Tree *createGeneric( Program *prg, long genericId ) parser->pdaRun = malloc( sizeof(PdaRun) ); /* Start off the parsing process. */ - initPdaRun( parser->pdaRun, prg, prg->rtd->pdaTables, - parser->fsmRun, genericInfo->parserId, false, false, 0 ); - initFsmRun( parser->fsmRun, prg ); + initPdaRun( prg, parser->pdaRun, parser->fsmRun, prg->rtd->pdaTables, + genericInfo->parserId, false, false, 0 ); newToken( prg, parser->pdaRun, parser->fsmRun ); newGeneric = (Tree*) parser; -- cgit v1.2.1 From 3853e924647f680a8ec7d70367562cf11a29189d Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sat, 9 Feb 2013 12:40:27 -0500 Subject: merged call to newToken into initPdaRun, made it static --- colm/compiler.cc | 1 - colm/pdarun.c | 3 ++- colm/pdarun.h | 1 - colm/tree.c | 1 - 4 files changed, 2 insertions(+), 4 deletions(-) diff --git a/colm/compiler.cc b/colm/compiler.cc index 45eb35cb..f87f39ad 100644 --- a/colm/compiler.cc +++ b/colm/compiler.cc @@ -1251,7 +1251,6 @@ PdaRun *Compiler::parsePattern( Program *prg, Tree **sp, const InputLoc &loc, in->funcs->appendStream( in, (Tree*)res ); in->funcs->setEof( in ); - newToken( prg, pdaRun, fsmRun ); long pcr = parseLoop( prg, sp, pdaRun, fsmRun, in, PcrStart ); assert( pcr == PcrDone ); if ( pdaRun->parseError ) { diff --git a/colm/pdarun.c b/colm/pdarun.c index 5bf9216e..360d426f 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -939,7 +939,7 @@ static void sendEof( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, Pd pdaRun->parseInput = parseTree; } -void newToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun ) +static void newToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun ) { fsmRun->p = fsmRun->pe = 0; fsmRun->toklen = 0; @@ -1480,6 +1480,7 @@ void initPdaRun( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, PdaTables *tables pdaRun->rcBlockCount = 0; initFsmRun( prg, fsmRun ); + newToken( prg, pdaRun, fsmRun ); } long stackTopTarget( Program *prg, PdaRun *pdaRun ) diff --git a/colm/pdarun.h b/colm/pdarun.h index c43997f7..6b9b065c 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -453,7 +453,6 @@ long sendBackQueuedIgnore( struct ColmProgram *prg, Tree **sp, StreamImpl *input void clearIgnoreList( struct ColmProgram *prg, Tree **sp, Kid *kid ); Head *extractMatch( struct ColmProgram *prg, FsmRun *fsmRun, StreamImpl *inputStream ); Head *extractMatch( struct ColmProgram *prg, FsmRun *fsmRun, StreamImpl *inputStream ); -void newToken( struct ColmProgram *prg, PdaRun *pdaRun, FsmRun *fsmRun ); void fsmExecute( FsmRun *fsmRun, StreamImpl *inputStream ); void sendNamedLangEl( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *inputStream ); long parseLoop( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, diff --git a/colm/tree.c b/colm/tree.c index b14273f5..86474779 100644 --- a/colm/tree.c +++ b/colm/tree.c @@ -992,7 +992,6 @@ Tree *createGeneric( Program *prg, long genericId ) /* Start off the parsing process. */ initPdaRun( prg, parser->pdaRun, parser->fsmRun, prg->rtd->pdaTables, genericInfo->parserId, false, false, 0 ); - newToken( prg, parser->pdaRun, parser->fsmRun ); newGeneric = (Tree*) parser; break; -- cgit v1.2.1 From 0e22038b0cd1230e88888547fe862e26b6ce7945 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sat, 9 Feb 2013 13:40:03 -0500 Subject: store FsmRun inside PdaRun, remove from Parser --- colm/bytecode.c | 14 +++++++------- colm/compiler.cc | 6 +++--- colm/pdarun.c | 2 ++ colm/pdarun.h | 2 ++ colm/tree.c | 8 ++++---- colm/tree.h | 1 - 6 files changed, 18 insertions(+), 15 deletions(-) diff --git a/colm/bytecode.c b/colm/bytecode.c index e8a89c7d..1b105a76 100644 --- a/colm/bytecode.c +++ b/colm/bytecode.c @@ -192,7 +192,7 @@ case PcrStart: if ( ! parser->pdaRun->parseError ) { parser->pdaRun->stopTarget = stopId; - long pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry ); + long pcr = parseLoop( prg, sp, parser->pdaRun, parser->pdaRun->fsmRun, parser->input->in, entry ); while ( pcr != PcrDone ) { @@ -202,7 +202,7 @@ case PcrGeneration: case PcrPreEof: case PcrReverse: - pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry ); + pcr = parseLoop( prg, sp, parser->pdaRun, parser->pdaRun->fsmRun, parser->input->in, entry ); } } @@ -222,7 +222,7 @@ case PcrStart: parser->input->in->funcs->setEof( parser->input->in ); if ( ! parser->pdaRun->parseError ) { - long pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry ); + long pcr = parseLoop( prg, sp, parser->pdaRun, parser->pdaRun->fsmRun, parser->input->in, entry ); while ( pcr != PcrDone ) { @@ -232,7 +232,7 @@ case PcrGeneration: case PcrPreEof: case PcrReverse: - pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry ); + pcr = parseLoop( prg, sp, parser->pdaRun, parser->pdaRun->fsmRun, parser->input->in, entry ); } } } @@ -259,7 +259,7 @@ break; } long undoParseFrag( Program *prg, Tree **sp, Parser *parser, long steps, long entry ) { StreamImpl *is = parser->input->in; - FsmRun *fsmRun = parser->fsmRun; + FsmRun *fsmRun = parser->pdaRun->fsmRun; PdaRun *pdaRun = parser->pdaRun; debug( REALM_PARSE, "undo parse frag, target steps: %ld, pdarun steps: %ld\n", steps, pdaRun->steps ); @@ -1153,7 +1153,7 @@ again: /* If there are captures (this is a translate block) then copy them into * the local frame now. */ LangElInfo *lelInfo = prg->rtd->lelInfo; - char **mark = exec->parser->fsmRun->mark; + char **mark = exec->parser->pdaRun->fsmRun->mark; int i; for ( i = 0; i < lelInfo[exec->parser->pdaRun->tokenId].numCaptureAttr; i++ ) { @@ -2457,7 +2457,7 @@ again: Stream *accumStream = (Stream*)vm_pop(); Tree *len = vm_pop(); - Tree *string = streamPullBc( prg, exec->parser->fsmRun, accumStream->in, len ); + Tree *string = streamPullBc( prg, exec->parser->pdaRun->fsmRun, accumStream->in, len ); treeUpref( string ); vm_push( string ); diff --git a/colm/compiler.cc b/colm/compiler.cc index f87f39ad..23262385 100644 --- a/colm/compiler.cc +++ b/colm/compiler.cc @@ -1239,11 +1239,11 @@ PdaRun *Compiler::parsePattern( Program *prg, Tree **sp, const InputLoc &loc, int parserId, StreamImpl *sourceStream ) { StreamImpl *in = new StreamImpl; - FsmRun *fsmRun = new FsmRun; PdaRun *pdaRun = new PdaRun; + pdaRun->fsmRun = new FsmRun; initStreamImpl( in ); - initPdaRun( prg, pdaRun, fsmRun, pdaTables, parserId, 0, false, 0 ); + initPdaRun( prg, pdaRun, pdaRun->fsmRun, pdaTables, parserId, 0, false, 0 ); Stream *res = streamAllocate( prg ); res->id = LEL_ID_STREAM; @@ -1251,7 +1251,7 @@ PdaRun *Compiler::parsePattern( Program *prg, Tree **sp, const InputLoc &loc, in->funcs->appendStream( in, (Tree*)res ); in->funcs->setEof( in ); - long pcr = parseLoop( prg, sp, pdaRun, fsmRun, in, PcrStart ); + long pcr = parseLoop( prg, sp, pdaRun, pdaRun->fsmRun, in, PcrStart ); assert( pcr == PcrDone ); if ( pdaRun->parseError ) { cout << "PARSE ERROR " << loc.line << ":" << loc.col; diff --git a/colm/pdarun.c b/colm/pdarun.c index 360d426f..0e48efc0 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -1479,6 +1479,8 @@ void initPdaRun( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, PdaTables *tables pdaRun->rcBlockCount = 0; + pdaRun->fsmRun = fsmRun; + initFsmRun( prg, fsmRun ); newToken( prg, pdaRun, fsmRun ); } diff --git a/colm/pdarun.h b/colm/pdarun.h index 6b9b065c..818a753c 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -346,6 +346,8 @@ typedef struct _PdaRun int rcBlockCount; Tree *parseErrorText; + + FsmRun *fsmRun; } PdaRun; void rtCodeVectReplace( RtCodeVect *vect, long pos, const Code *val, long len ); diff --git a/colm/tree.c b/colm/tree.c index 86474779..767dc35d 100644 --- a/colm/tree.c +++ b/colm/tree.c @@ -986,11 +986,11 @@ Tree *createGeneric( Program *prg, long genericId ) Parser *parser = (Parser*)mapElAllocate( prg ); parser->id = genericInfo->langElId; parser->genericInfo = genericInfo; - parser->fsmRun = malloc( sizeof(FsmRun) ); parser->pdaRun = malloc( sizeof(PdaRun) ); + parser->pdaRun->fsmRun = malloc( sizeof(FsmRun) ); /* Start off the parsing process. */ - initPdaRun( prg, parser->pdaRun, parser->fsmRun, prg->rtd->pdaTables, + initPdaRun( prg, parser->pdaRun, parser->pdaRun->fsmRun, prg->rtd->pdaTables, genericInfo->parserId, false, false, 0 ); newGeneric = (Tree*) parser; @@ -1043,10 +1043,10 @@ free_tree: } else if ( generic->type == GEN_PARSER ) { Parser *parser = (Parser*)tree; - clearFsmRun( prg, parser->fsmRun ); + clearFsmRun( prg, parser->pdaRun->fsmRun ); clearPdaRun( prg, sp, parser->pdaRun ); + free( parser->pdaRun->fsmRun ); free( parser->pdaRun ); - free( parser->fsmRun ); treeDownref( prg, sp, (Tree*)parser->input ); mapElFree( prg, (MapEl*)parser ); } diff --git a/colm/tree.h b/colm/tree.h index 8b6d509d..529c0185 100644 --- a/colm/tree.h +++ b/colm/tree.h @@ -203,7 +203,6 @@ typedef struct _Parser GenericInfo *genericInfo; struct _PdaRun *pdaRun; - struct _FsmRun *fsmRun; struct _Stream *input; Tree *result; } Parser; -- cgit v1.2.1 From 968a0f7831ed89030f8eabf842c7bc6270f7d81f Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sat, 9 Feb 2013 13:44:24 -0500 Subject: removed the FsmRun arg from the parseLoop call --- colm/bytecode.c | 12 ++++++------ colm/compiler.cc | 2 +- colm/pdarun.c | 3 ++- colm/pdarun.h | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/colm/bytecode.c b/colm/bytecode.c index 1b105a76..ff407332 100644 --- a/colm/bytecode.c +++ b/colm/bytecode.c @@ -192,7 +192,7 @@ case PcrStart: if ( ! parser->pdaRun->parseError ) { parser->pdaRun->stopTarget = stopId; - long pcr = parseLoop( prg, sp, parser->pdaRun, parser->pdaRun->fsmRun, parser->input->in, entry ); + long pcr = parseLoop( prg, sp, parser->pdaRun, parser->input->in, entry ); while ( pcr != PcrDone ) { @@ -202,7 +202,7 @@ case PcrGeneration: case PcrPreEof: case PcrReverse: - pcr = parseLoop( prg, sp, parser->pdaRun, parser->pdaRun->fsmRun, parser->input->in, entry ); + pcr = parseLoop( prg, sp, parser->pdaRun, parser->input->in, entry ); } } @@ -222,7 +222,7 @@ case PcrStart: parser->input->in->funcs->setEof( parser->input->in ); if ( ! parser->pdaRun->parseError ) { - long pcr = parseLoop( prg, sp, parser->pdaRun, parser->pdaRun->fsmRun, parser->input->in, entry ); + long pcr = parseLoop( prg, sp, parser->pdaRun, parser->input->in, entry ); while ( pcr != PcrDone ) { @@ -232,7 +232,7 @@ case PcrGeneration: case PcrPreEof: case PcrReverse: - pcr = parseLoop( prg, sp, parser->pdaRun, parser->pdaRun->fsmRun, parser->input->in, entry ); + pcr = parseLoop( prg, sp, parser->pdaRun, parser->input->in, entry ); } } } @@ -277,7 +277,7 @@ case PcrStart: pdaRun->triggerUndo = 1; /* The parse loop will recognise the situation. */ - long pcr = parseLoop( prg, sp, pdaRun, fsmRun, is, entry ); + long pcr = parseLoop( prg, sp, pdaRun, is, entry ); while ( pcr != PcrDone ) { return pcr; @@ -286,7 +286,7 @@ case PcrGeneration: case PcrPreEof: case PcrReverse: - pcr = parseLoop( prg, sp, pdaRun, fsmRun, is, entry ); + pcr = parseLoop( prg, sp, pdaRun, is, entry ); } /* Reset environment. */ diff --git a/colm/compiler.cc b/colm/compiler.cc index 23262385..25d95397 100644 --- a/colm/compiler.cc +++ b/colm/compiler.cc @@ -1251,7 +1251,7 @@ PdaRun *Compiler::parsePattern( Program *prg, Tree **sp, const InputLoc &loc, in->funcs->appendStream( in, (Tree*)res ); in->funcs->setEof( in ); - long pcr = parseLoop( prg, sp, pdaRun, pdaRun->fsmRun, in, PcrStart ); + long pcr = parseLoop( prg, sp, pdaRun, in, PcrStart ); assert( pcr == PcrDone ); if ( pdaRun->parseError ) { cout << "PARSE ERROR " << loc.line << ":" << loc.col; diff --git a/colm/pdarun.c b/colm/pdarun.c index 0e48efc0..cff8854b 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -1108,8 +1108,9 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) */ long parseLoop( Program *prg, Tree **sp, PdaRun *pdaRun, - FsmRun *fsmRun, StreamImpl *is, long entry ) + StreamImpl *is, long entry ) { + FsmRun *fsmRun = pdaRun->fsmRun; LangElInfo *lelInfo = prg->rtd->lelInfo; switch ( entry ) { diff --git a/colm/pdarun.h b/colm/pdarun.h index 818a753c..17ea1ab8 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -458,7 +458,7 @@ Head *extractMatch( struct ColmProgram *prg, FsmRun *fsmRun, StreamImpl *inputSt void fsmExecute( FsmRun *fsmRun, StreamImpl *inputStream ); void sendNamedLangEl( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *inputStream ); long parseLoop( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, - FsmRun *fsmRun, StreamImpl *inputStream, long entry ); + StreamImpl *inputStream, long entry ); void initBindings( PdaRun *pdaRun ); Tree *getParsedRoot( PdaRun *pdaRun, int stop ); void undoParseStream( struct ColmProgram *prg, Tree **sp, StreamImpl *inputStream, FsmRun *fsmRun, -- cgit v1.2.1 From fa4a8cddabbb7f3ef30b91b038cf97c3ab905026 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sat, 9 Feb 2013 13:48:15 -0500 Subject: check return value of write call, suppresses warning --- colm/tree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/colm/tree.c b/colm/tree.c index 767dc35d..341aad8d 100644 --- a/colm/tree.c +++ b/colm/tree.c @@ -2052,7 +2052,10 @@ void appendFile( struct ColmPrintArgs *args, const char *data, int length ) void appendFd( struct ColmPrintArgs *args, const char *data, int length ) { - write( (long)args->arg, data, length ); + int res = write( (long)args->arg, data, length ); + if ( res != 0 ) { + message( "write error\n" ); + } } Tree *treeTrim( struct ColmProgram *prg, Tree **sp, Tree *tree ) -- cgit v1.2.1 From 4db469d5320b8a7370d01113fb91c122ffd144cb Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sat, 9 Feb 2013 20:32:37 -0500 Subject: converted resetToken to tak PdaRun --- colm/bytecode.c | 3 +-- colm/pdarun.c | 4 +++- colm/pdarun.h | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/colm/bytecode.c b/colm/bytecode.c index ff407332..afa67060 100644 --- a/colm/bytecode.c +++ b/colm/bytecode.c @@ -259,12 +259,11 @@ break; } long undoParseFrag( Program *prg, Tree **sp, Parser *parser, long steps, long entry ) { StreamImpl *is = parser->input->in; - FsmRun *fsmRun = parser->pdaRun->fsmRun; PdaRun *pdaRun = parser->pdaRun; debug( REALM_PARSE, "undo parse frag, target steps: %ld, pdarun steps: %ld\n", steps, pdaRun->steps ); - resetToken( fsmRun ); + resetToken( pdaRun ); switch ( entry ) { case PcrStart: diff --git a/colm/pdarun.c b/colm/pdarun.c index cff8854b..dfd67074 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -330,8 +330,10 @@ void clearBuffered( FsmRun *fsmRun ) } } -void resetToken( FsmRun *fsmRun ) +void resetToken( PdaRun *pdaRun ) { + FsmRun *fsmRun = pdaRun->fsmRun; + /* If there is a token started, but never finished for a lack of data, we * must first backup over it. */ if ( fsmRun->tokstart != 0 ) { diff --git a/colm/pdarun.h b/colm/pdarun.h index 17ea1ab8..828d2db6 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -465,7 +465,7 @@ void undoParseStream( struct ColmProgram *prg, Tree **sp, StreamImpl *inputStrea PdaRun *pdaRun, long steps ); void clearBuffered( FsmRun *fsmRun ); -void resetToken( FsmRun *fsmRun ); +void resetToken( PdaRun *pdaRun ); #ifdef __cplusplus } -- cgit v1.2.1 From 3b1f71094782b21c0c8da0cd12c1fe97adcecd30 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sat, 9 Feb 2013 20:50:06 -0500 Subject: removal of fsmRun arg from a number of stream interface functions --- colm/bytecode.c | 16 ++++++++-------- colm/pdarun.c | 10 +++++----- colm/pdarun.h | 10 +++++----- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/colm/bytecode.c b/colm/bytecode.c index afa67060..d167013c 100644 --- a/colm/bytecode.c +++ b/colm/bytecode.c @@ -314,7 +314,7 @@ void undoPull( Program *prg, StreamImpl *in, Tree *str ) undoStreamPull( in, data, length ); } -long streamPush( Program *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *in, Tree *tree, int ignore ) +static long streamPush( Program *prg, Tree **sp, StreamImpl *in, Tree *tree, int ignore ) { if ( tree->id == LEL_ID_STR ) { /* This should become a compile error. If it's text, it's up to the @@ -326,7 +326,7 @@ long streamPush( Program *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *in, Tree * initStrCollect( &collect ); printTreeCollect( prg, sp, &collect, tree, true ); - streamPushText( fsmRun, in, collect.data, collect.length ); + streamPushText( in, collect.data, collect.length ); long length = collect.length; strCollectDestroy( &collect ); @@ -334,12 +334,12 @@ long streamPush( Program *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *in, Tree * } else if ( tree->id == LEL_ID_STREAM ) { treeUpref( tree ); - streamPushStream( fsmRun, in, tree ); + streamPushStream( in, tree ); return -1; } else { treeUpref( tree ); - streamPushTree( fsmRun, in, tree, ignore ); + streamPushTree( in, tree, ignore ); return -1; } } @@ -2155,7 +2155,7 @@ again: debug( REALM_BYTECODE, "IN_INPUT_APPEND_BKT\n" ); - undoStreamAppend( prg, sp, 0, ((Stream*)accumStream)->in, input, len ); + undoStreamAppend( prg, sp, ((Stream*)accumStream)->in, input, len ); treeDownref( prg, sp, accumStream ); treeDownref( prg, sp, input ); break; @@ -2488,7 +2488,7 @@ again: Stream *input = (Stream*)vm_pop(); Tree *tree = vm_pop(); - long len = streamPush( prg, sp, 0, input->in, tree, false ); + long len = streamPush( prg, sp, input->in, tree, false ); vm_push( 0 ); /* Single unit. */ @@ -2505,7 +2505,7 @@ again: Stream *input = (Stream*)vm_pop(); Tree *tree = vm_pop(); - long len = streamPush( prg, sp, 0, input->in, tree, true ); + long len = streamPush( prg, sp, input->in, tree, true ); vm_push( 0 ); /* Single unit. */ @@ -2525,7 +2525,7 @@ again: debug( REALM_BYTECODE, "IN_INPUT_PUSH_BKT\n" ); - undoStreamPush( prg, sp, 0, input->in, len ); + undoStreamPush( prg, sp, input->in, len ); treeDownref( prg, sp, (Tree*)input ); break; } diff --git a/colm/pdarun.c b/colm/pdarun.c index dfd67074..e789c8e3 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -223,22 +223,22 @@ void undoStreamPull( StreamImpl *is, const char *data, long length ) is->funcs->prependData( is, data, length ); } -void streamPushText( FsmRun *fsmRun, StreamImpl *is, const char *data, long length ) +void streamPushText( StreamImpl *is, const char *data, long length ) { is->funcs->prependData( is, data, length ); } -void streamPushTree( FsmRun *fsmRun, StreamImpl *is, Tree *tree, int ignore ) +void streamPushTree( StreamImpl *is, Tree *tree, int ignore ) { is->funcs->prependTree( is, tree, ignore ); } -void streamPushStream( FsmRun *fsmRun, StreamImpl *is, Tree *tree ) +void streamPushStream( StreamImpl *is, Tree *tree ) { is->funcs->prependStream( is, tree ); } -void undoStreamPush( Program *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *is, long length ) +void undoStreamPush( Program *prg, Tree **sp, StreamImpl *is, long length ) { if ( length < 0 ) { Tree *tree = is->funcs->undoPrependTree( is ); @@ -249,7 +249,7 @@ void undoStreamPush( Program *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *is, lo } } -void undoStreamAppend( Program *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *is, Tree *input, long length ) +void undoStreamAppend( Program *prg, Tree **sp, StreamImpl *is, Tree *input, long length ) { if ( input->id == LEL_ID_STR ) is->funcs->undoAppendData( is, length ); diff --git a/colm/pdarun.h b/colm/pdarun.h index 828d2db6..17fa3aa7 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -436,11 +436,11 @@ long undoParse( Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *inputStre Head *streamPull( struct ColmProgram *prg, FsmRun *fsmRun, StreamImpl *inputStream, long length ); Head *stringAllocPointer( struct ColmProgram *prg, const char *data, long length ); -void streamPushText( FsmRun *fsmRun, StreamImpl *inputStream, const char *data, long length ); -void streamPushTree( FsmRun *fsmRun, StreamImpl *inputStream, Tree *tree, int ignore ); -void streamPushStream( FsmRun *fsmRun, StreamImpl *inputStream, Tree *tree ); -void undoStreamPush( struct ColmProgram *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *inputStream, long length ); -void undoStreamAppend( struct ColmProgram *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *inputStream, struct ColmTree *tree, long length ); +void streamPushText( StreamImpl *inputStream, const char *data, long length ); +void streamPushTree( StreamImpl *inputStream, Tree *tree, int ignore ); +void streamPushStream( StreamImpl *inputStream, Tree *tree ); +void undoStreamPush( struct ColmProgram *prg, Tree **sp, StreamImpl *inputStream, long length ); +void undoStreamAppend( struct ColmProgram *prg, Tree **sp, StreamImpl *inputStream, struct ColmTree *tree, long length ); Kid *makeTokenWithData( struct ColmProgram *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *inputStream, int id, Head *tokdata ); -- cgit v1.2.1 From f5fc6b33201cf16c47f713f5b5572e787e556e94 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sat, 9 Feb 2013 20:58:45 -0500 Subject: removed FsmRun arg from getData and getParseBlock input interfaces --- colm/ctinput.cc | 9 ++++----- colm/input.c | 14 ++++++-------- colm/input.h | 6 ++---- colm/pdarun.c | 8 ++++---- 4 files changed, 16 insertions(+), 21 deletions(-) diff --git a/colm/ctinput.cc b/colm/ctinput.cc index c2d74fc1..1bf6097f 100644 --- a/colm/ctinput.cc +++ b/colm/ctinput.cc @@ -59,8 +59,7 @@ LangEl *inputStreamPatternGetLangEl( StreamImpl *ss, long *bindId, char **data, return klangEl; } -int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, - int skip, char **pdp, int *copied ) +int inputStreamPatternGetParseBlock( StreamImpl *ss, int skip, char **pdp, int *copied ) { *copied = 0; @@ -107,7 +106,7 @@ int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, return INPUT_DATA; } -int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, char *dest, int length ) +int inputStreamPatternGetData( StreamImpl *ss, char *dest, int length ) { int copied = 0; @@ -273,7 +272,7 @@ LangEl *inputStreamConsGetLangEl( StreamImpl *ss, long *bindId, char **data, lon return klangEl; } -int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, +int inputStreamConsGetParseBlock( StreamImpl *ss, int skip, char **pdp, int *copied ) { *copied = 0; @@ -321,7 +320,7 @@ int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, return INPUT_DATA; } -int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, char *dest, int length ) +int inputStreamConsGetData( StreamImpl *ss, char *dest, int length ) { int copied = 0; diff --git a/colm/input.c b/colm/input.c index 532170ee..959940c6 100644 --- a/colm/input.c +++ b/colm/input.c @@ -147,8 +147,7 @@ void initInputFuncs() * Base run-time input streams. */ -int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, - int skip, char **pdp, int *copied ) +int fdGetParseBlock( StreamImpl *ss, int skip, char **pdp, int *copied ) { int ret = 0; *copied = 0; @@ -207,7 +206,7 @@ int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, return ret; } -int fdGetData( FsmRun *fsmRun, StreamImpl *ss, char *dest, int length ) +int fdGetData( StreamImpl *ss, char *dest, int length ) { int copied = 0; @@ -461,8 +460,7 @@ static void _unsetEof( StreamImpl *is ) } } -static int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, - int skip, char **pdp, int *copied ) +static int _getParseBlock( StreamImpl *is, int skip, char **pdp, int *copied ) { int ret = 0; *copied = 0; @@ -478,7 +476,7 @@ static int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, if ( buf->type == RunBufSourceType ) { Stream *stream = (Stream*)buf->tree; - int type = stream->in->funcs->getParseBlock( fsmRun, stream->in, skip, pdp, copied ); + int type = stream->in->funcs->getParseBlock( stream->in, skip, pdp, copied ); // if ( type == INPUT_EOD && !stream->in->eosSent ) { // stream->in->eosSent = 1; @@ -561,7 +559,7 @@ static int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, return ret; } -static int _getData( FsmRun *fsmRun, StreamImpl *is, char *dest, int length ) +static int _getData( StreamImpl *is, char *dest, int length ) { int copied = 0; @@ -575,7 +573,7 @@ static int _getData( FsmRun *fsmRun, StreamImpl *is, char *dest, int length ) if ( buf->type == RunBufSourceType ) { Stream *stream = (Stream*)buf->tree; - int glen = stream->in->funcs->getData( fsmRun, stream->in, dest+copied, length ); + int glen = stream->in->funcs->getData( stream->in, dest+copied, length ); if ( glen == 0 ) { debug( REALM_INPUT, "skipping over input\n" ); diff --git a/colm/input.h b/colm/input.h index af76f31c..3b426a50 100644 --- a/colm/input.h +++ b/colm/input.h @@ -88,11 +88,9 @@ typedef struct _StreamImpl StreamImpl; struct StreamFuncs { - int (*getParseBlock)( struct _FsmRun *fsmRun, StreamImpl *ss, - int skip, char **pdp, int *copied ); + int (*getParseBlock)( StreamImpl *ss, int skip, char **pdp, int *copied ); - int (*getData)( struct _FsmRun *fsmRun, StreamImpl *ss, - char *dest, int length ); + int (*getData)( StreamImpl *ss, char *dest, int length ); int (*consumeData)( StreamImpl *ss, int length ); int (*undoConsumeData)( struct _FsmRun *fsmRun, StreamImpl *ss, const char *data, int length ); diff --git a/colm/pdarun.c b/colm/pdarun.c index e789c8e3..8362e14d 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -140,7 +140,7 @@ Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) char *dest = runBuf->data + runBuf->length; - is->funcs->getData( fsmRun, is, dest, length ); + is->funcs->getData( is, dest, length ); is->funcs->consumeData( is, length ); runBuf->length += length; @@ -174,7 +174,7 @@ Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) char *dest = runBuf->data + runBuf->length; - is->funcs->getData( fsmRun, is, dest, length ); + is->funcs->getData( is, dest, length ); fsmRun->p = fsmRun->pe = 0; fsmRun->toklen = 0; @@ -202,7 +202,7 @@ Head *streamPull( Program *prg, FsmRun *fsmRun, StreamImpl *is, long length ) char *dest = runBuf->data + runBuf->length; - is->funcs->getData( fsmRun, is, dest, length ); + is->funcs->getData( is, dest, length ); is->funcs->consumeData( is, length ); runBuf->length += length; @@ -1012,7 +1012,7 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) while ( true ) { char *pd = 0; int len = 0; - int type = is->funcs->getParseBlock( fsmRun, is, fsmRun->toklen, &pd, &len ); + int type = is->funcs->getParseBlock( is, fsmRun->toklen, &pd, &len ); switch ( type ) { case INPUT_DATA: -- cgit v1.2.1 From 6e9be06ee366728bb85fe3bc2b977597b655d09c Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sat, 9 Feb 2013 21:34:41 -0500 Subject: completed removed FsmRun from InputStream interface --- colm/ctinput.cc | 4 ++-- colm/input.c | 10 +++------- colm/input.h | 2 +- colm/pdarun.c | 2 +- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/colm/ctinput.cc b/colm/ctinput.cc index 1bf6097f..285e1faf 100644 --- a/colm/ctinput.cc +++ b/colm/ctinput.cc @@ -212,7 +212,7 @@ int inputStreamPatternConsumeData( StreamImpl *ss, int length ) return consumed; } -int inputStreamPatternUndoConsumeData( FsmRun *fsmRun, StreamImpl *ss, const char *data, int length ) +int inputStreamPatternUndoConsumeData( StreamImpl *ss, const char *data, int length ) { ss->offset -= length; return length; @@ -428,7 +428,7 @@ int inputStreamConsConsumeData( StreamImpl *ss, int length ) return consumed; } -int inputStreamConsUndoConsumeData( FsmRun *fsmRun, StreamImpl *ss, const char *data, int length ) +int inputStreamConsUndoConsumeData( StreamImpl *ss, const char *data, int length ) { ss->offset -= length; return length; diff --git a/colm/input.c b/colm/input.c index 959940c6..b73eacbd 100644 --- a/colm/input.c +++ b/colm/input.c @@ -289,7 +289,7 @@ int fdConsumeData( StreamImpl *ss, int length ) return consumed; } -int fdUndoConsumeData( FsmRun *fsmRun, StreamImpl *ss, const char *data, int length ) +int fdUndoConsumeData( StreamImpl *ss, const char *data, int length ) { debug( REALM_INPUT, "undoing consume of %ld bytes\n", length ); @@ -664,15 +664,13 @@ static int _consumeData( StreamImpl *is, int length ) return consumed; } -static int _undoConsumeData( FsmRun *fsmRun, StreamImpl *is, const char *data, int length ) +static int _undoConsumeData( StreamImpl *is, const char *data, int length ) { debug( REALM_INPUT, "undoing consume of %ld bytes\n", length ); if ( isSourceStream( is ) ) { Stream *stream = (Stream*)is->queue->tree; - int len = stream->in->funcs->undoConsumeData( fsmRun, stream->in, data, length ); - - clearBuffered( fsmRun ); + int len = stream->in->funcs->undoConsumeData( stream->in, data, length ); return len; } @@ -682,8 +680,6 @@ static int _undoConsumeData( FsmRun *fsmRun, StreamImpl *is, const char *data, i memcpy( newBuf->data, data, length ); inputStreamPrepend( is, newBuf ); - clearBuffered( fsmRun ); - return length; } } diff --git a/colm/input.h b/colm/input.h index 3b426a50..14cdd465 100644 --- a/colm/input.h +++ b/colm/input.h @@ -93,7 +93,7 @@ struct StreamFuncs int (*getData)( StreamImpl *ss, char *dest, int length ); int (*consumeData)( StreamImpl *ss, int length ); - int (*undoConsumeData)( struct _FsmRun *fsmRun, StreamImpl *ss, const char *data, int length ); + int (*undoConsumeData)( StreamImpl *ss, const char *data, int length ); struct ColmTree *(*consumeTree)( StreamImpl *ss ); void (*undoConsumeTree)( StreamImpl *ss, struct ColmTree *tree, int ignore ); diff --git a/colm/pdarun.c b/colm/pdarun.c index 8362e14d..d116a70d 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -274,7 +274,7 @@ static void sendBackText( FsmRun *fsmRun, StreamImpl *is, const char *data, long debug( REALM_PARSE, "sending back text: %.*s\n", (int)length, data ); - is->funcs->undoConsumeData( fsmRun, is, data, length ); + is->funcs->undoConsumeData( is, data, length ); undoPosition( is, data, length ); } -- cgit v1.2.1 From d0274f50a36f9e0c28bdbd6a230a775c661b2ecb Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sun, 10 Feb 2013 09:25:02 -0500 Subject: write the skip-token label only if it is used --- colm/bytecode.h | 1 - colm/fsmcodegen.cc | 19 +++++++++++++++---- colm/fsmcodegen.h | 2 ++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/colm/bytecode.h b/colm/bytecode.h index 26482f2d..cb777481 100644 --- a/colm/bytecode.h +++ b/colm/bytecode.h @@ -495,7 +495,6 @@ void allocGlobal( struct ColmProgram *prg ); Tree **executeCode( struct ColmProgram *prg, Execution *exec, Tree **sp, Code *instr ); void rcodeDownref( struct ColmProgram *prg, Tree **sp, Code *instr ); Code *popReverseCode( RtCodeVect *allRev ); -void sendBackBuffered( FsmRun *fsmRun, StreamImpl *inputStream ); #ifdef __cplusplus } diff --git a/colm/fsmcodegen.cc b/colm/fsmcodegen.cc index 2c9c3387..ce357e13 100644 --- a/colm/fsmcodegen.cc +++ b/colm/fsmcodegen.cc @@ -47,7 +47,8 @@ FsmCodeGen::FsmCodeGen( const char *sourceFileName, const char *fsmName, ostream codeGenErrCount(0), dataPrefix(true), writeFirstFinal(true), - writeErr(true) + writeErr(true), + skipTokenLabelNeeded(false) { } @@ -194,6 +195,8 @@ void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item, " }\n" "\t" " goto skip_toklen;\n"; + + skipTokenLabelNeeded = true; } void FsmCodeGen::LM_ON_LAST( ostream &ret, InlineItem *item ) @@ -220,6 +223,8 @@ void FsmCodeGen::LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item ) ret << " " << TOKLEN() << " = " << TOKEND() << ";\n"; EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl ); ret << " goto skip_toklen;\n"; + + skipTokenLabelNeeded = true; } @@ -874,9 +879,15 @@ void FsmCodeGen::writeExec() out << "out:\n" " if ( " << P() << " != 0 )\n" - " " << TOKLEN() << " += " << P() << " - " << BLOCK_START() << ";\n" - "skip_toklen:\n" - " {}\n" + " " << TOKLEN() << " += " << P() << " - " << BLOCK_START() << ";\n"; + + if ( skipTokenLabelNeeded ) { + out << + "skip_toklen:\n" + " {}\n"; + } + + out << "}\n" "\n"; } diff --git a/colm/fsmcodegen.h b/colm/fsmcodegen.h index 39ae1876..29fcb98d 100644 --- a/colm/fsmcodegen.h +++ b/colm/fsmcodegen.h @@ -75,6 +75,7 @@ public: RedFsm *redFsm, FsmTables *fsmTables ); protected: + string FSM_NAME(); string START_STATE_ID(); ostream &ACTIONS_ARRAY(); @@ -165,6 +166,7 @@ public: bool dataPrefix; bool writeFirstFinal; bool writeErr; + bool skipTokenLabelNeeded; std::ostream &TO_STATE_ACTION_SWITCH(); std::ostream &FROM_STATE_ACTION_SWITCH(); -- cgit v1.2.1 From c6b71985f817f6b5e4c7d1ec6f19383404507396 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sun, 10 Feb 2013 09:47:44 -0500 Subject: added the WC and WV context for stdin/stdout/sterr get --- colm/synthesis.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/colm/synthesis.cc b/colm/synthesis.cc index bc40af95..ad065343 100644 --- a/colm/synthesis.cc +++ b/colm/synthesis.cc @@ -3247,7 +3247,9 @@ void Compiler::addStdin() el->beenInitialized = true; el->isConst = true; el->useOffset = false; - el->inGetR = IN_GET_STDIN; + el->inGetR = IN_GET_STDIN; + el->inGetWC = IN_GET_STDIN; + el->inGetWV = IN_GET_STDIN; globalObjectDef->insertField( el->name, el ); } @@ -3263,6 +3265,8 @@ void Compiler::addStdout() el->isConst = true; el->useOffset = false; el->inGetR = IN_GET_STDOUT; + el->inGetWC = IN_GET_STDOUT; + el->inGetWV = IN_GET_STDOUT; globalObjectDef->insertField( el->name, el ); } @@ -3278,6 +3282,8 @@ void Compiler::addStderr() el->isConst = true; el->useOffset = false; el->inGetR = IN_GET_STDERR; + el->inGetWC = IN_GET_STDERR; + el->inGetWV = IN_GET_STDERR; globalObjectDef->insertField( el->name, el ); } -- cgit v1.2.1 From 2ccf3932d99d1a82f12e8cccbd2c1e7f54e63bb4 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sun, 10 Feb 2013 11:11:34 -0500 Subject: support stream pull outside of a parser --- colm/bytecode.c | 22 +++++++++++++++++++--- colm/bytecode.h | 2 ++ colm/pdarun.c | 45 +++++++++++++++++++++++++++++---------------- colm/pdarun.h | 2 +- colm/string.c | 2 -- colm/synthesis.cc | 2 +- test/pull1.exp | 1 + test/pull1.in | 1 + test/pull1.lm | 2 ++ 9 files changed, 56 insertions(+), 23 deletions(-) create mode 100644 test/pull1.exp create mode 100644 test/pull1.in create mode 100644 test/pull1.lm diff --git a/colm/bytecode.c b/colm/bytecode.c index d167013c..4d93c0f7 100644 --- a/colm/bytecode.c +++ b/colm/bytecode.c @@ -300,10 +300,10 @@ break; } return PcrDone; } -Tree *streamPullBc( Program *prg, FsmRun *fsmRun, StreamImpl *in, Tree *length ) +Tree *streamPullBc( Program *prg, PdaRun *pdaRun, StreamImpl *in, Tree *length ) { long len = ((Int*)length)->value; - Head *tokdata = streamPull( prg, fsmRun, in, len ); + Head *tokdata = streamPull( prg, pdaRun, in, len ); return constructString( prg, tokdata ); } @@ -2456,7 +2456,8 @@ again: Stream *accumStream = (Stream*)vm_pop(); Tree *len = vm_pop(); - Tree *string = streamPullBc( prg, exec->parser->pdaRun->fsmRun, accumStream->in, len ); + PdaRun *pdaRun = exec->parser != 0 ? exec->parser->pdaRun : 0; + Tree *string = streamPullBc( prg, pdaRun, accumStream->in, len ); treeUpref( string ); vm_push( string ); @@ -2470,6 +2471,21 @@ again: treeDownref( prg, sp, len ); break; } + + case IN_INPUT_PULL_WC: { + debug( REALM_BYTECODE, "IN_INPUT_PULL_WC\n" ); + + Stream *accumStream = (Stream*)vm_pop(); + Tree *len = vm_pop(); + PdaRun *pdaRun = exec->parser != 0 ? exec->parser->pdaRun : 0; + Tree *string = streamPullBc( prg, pdaRun, accumStream->in, len ); + treeUpref( string ); + vm_push( string ); + + treeDownref( prg, sp, (Tree*)accumStream ); + treeDownref( prg, sp, len ); + break; + } case IN_INPUT_PULL_BKT: { Tree *string; read_tree( string ); diff --git a/colm/bytecode.h b/colm/bytecode.h index cb777481..c84ccb6b 100644 --- a/colm/bytecode.h +++ b/colm/bytecode.h @@ -223,6 +223,7 @@ typedef unsigned char uchar; #define IN_CONSTRUCT_TERM 0x9d #define IN_INPUT_PULL_WV 0x9e +#define IN_INPUT_PULL_WC 0xe1 #define IN_INPUT_PULL_BKT 0x9f #define IN_PARSE_SAVE_STEPS 0xa0 @@ -458,6 +459,7 @@ typedef struct _Execution long stringLength( Head *str ); const char *stringData( Head *str ); Head *stringAllocFull( struct ColmProgram *prg, const char *data, long length ); +Head *initStrSpace( long length ); Head *stringCopy( struct ColmProgram *prg, Head *head ); void stringFree( struct ColmProgram *prg, Head *head ); void stringShorten( Head *tokdata, long newlen ); diff --git a/colm/pdarun.c b/colm/pdarun.c index d116a70d..6ac5b256 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -191,29 +191,42 @@ Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) return head; } -Head *streamPull( Program *prg, FsmRun *fsmRun, StreamImpl *is, long length ) +Head *streamPull( Program *prg, PdaRun *pdaRun, StreamImpl *is, long length ) { - RunBuf *runBuf = fsmRun->consumeBuf; - if ( length > ( FSM_BUFSIZE - runBuf->length ) ) { - runBuf = newRunBuf(); - runBuf->next = fsmRun->consumeBuf; - fsmRun->consumeBuf = runBuf; - } + if ( pdaRun != 0 ) { + FsmRun *fsmRun = pdaRun->fsmRun; + RunBuf *runBuf = fsmRun->consumeBuf; + if ( length > ( FSM_BUFSIZE - runBuf->length ) ) { + runBuf = newRunBuf(); + runBuf->next = fsmRun->consumeBuf; + fsmRun->consumeBuf = runBuf; + } - char *dest = runBuf->data + runBuf->length; + char *dest = runBuf->data + runBuf->length; - is->funcs->getData( is, dest, length ); - is->funcs->consumeData( is, length ); + is->funcs->getData( is, dest, length ); + is->funcs->consumeData( is, length ); - runBuf->length += length; + runBuf->length += length; - fsmRun->p = fsmRun->pe = 0; - fsmRun->toklen = 0; + fsmRun->p = fsmRun->pe = 0; + fsmRun->toklen = 0; - Head *tokdata = stringAllocPointer( prg, dest, length ); - updatePosition( is, dest, length ); + Head *tokdata = stringAllocPointer( prg, dest, length ); + updatePosition( is, dest, length ); - return tokdata; + return tokdata; + } + else { + Head *head = initStrSpace( length ); + char *dest = (char*)head->data; + + is->funcs->getData( is, dest, length ); + is->funcs->consumeData( is, length ); + + updatePosition( is, dest, length ); + return head; + } } void undoStreamPull( StreamImpl *is, const char *data, long length ) diff --git a/colm/pdarun.h b/colm/pdarun.h index 17fa3aa7..00f07885 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -433,7 +433,7 @@ long parseToken( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, long undoParse( Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *inputStream, Tree *tree ); -Head *streamPull( struct ColmProgram *prg, FsmRun *fsmRun, StreamImpl *inputStream, long length ); +Head *streamPull( struct ColmProgram *prg, PdaRun *pdaRun, StreamImpl *is, long length ); Head *stringAllocPointer( struct ColmProgram *prg, const char *data, long length ); void streamPushText( StreamImpl *inputStream, const char *data, long length ); diff --git a/colm/string.c b/colm/string.c index d670b68c..21c3aac4 100644 --- a/colm/string.c +++ b/colm/string.c @@ -89,8 +89,6 @@ Head *initStrSpace( long length ) { /* Find the length and allocate the space for the shared string. */ Head *head = (Head*) malloc( sizeof(Head) + length ); - //if ( head == 0 ) - // throw std::bad_alloc(); /* Init the header. */ head->data = (char*)(head+1); diff --git a/colm/synthesis.cc b/colm/synthesis.cc index ad065343..f164e1ed 100644 --- a/colm/synthesis.cc +++ b/colm/synthesis.cc @@ -2531,7 +2531,7 @@ void Compiler::initStreamObject( ) streamLangEl->objectDef = streamObj; initFunction( uniqueTypeStr, streamObj, "pull", - IN_INPUT_PULL_WV, IN_INPUT_PULL_WV, uniqueTypeInt, false ); + IN_INPUT_PULL_WV, IN_INPUT_PULL_WC, uniqueTypeInt, false ); initFunction( uniqueTypeStr, streamObj, "push", IN_INPUT_PUSH_WV, IN_INPUT_PUSH_WV, uniqueTypeAny, false ); initFunction( uniqueTypeStr, streamObj, "push_ignore", diff --git a/test/pull1.exp b/test/pull1.exp new file mode 100644 index 00000000..aa3a0fe0 --- /dev/null +++ b/test/pull1.exp @@ -0,0 +1 @@ +this is in diff --git a/test/pull1.in b/test/pull1.in new file mode 100644 index 00000000..f4d2e4a0 --- /dev/null +++ b/test/pull1.in @@ -0,0 +1 @@ +this is input for a non-parse pull diff --git a/test/pull1.lm b/test/pull1.lm new file mode 100644 index 00000000..bc559671 --- /dev/null +++ b/test/pull1.lm @@ -0,0 +1,2 @@ +String: str = stdin.pull( 10 ) +print( String '\n' ) -- cgit v1.2.1 From 61cdd8c309e7610927d4ad6f26b7585cf5631e4f Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sun, 10 Feb 2013 11:22:30 -0500 Subject: additional test for stream pull out of the parsing code --- test/pull2.exp | 1 + test/pull2.in | 1 + test/pull2.lm | 3 +++ 3 files changed, 5 insertions(+) create mode 100644 test/pull2.exp create mode 100644 test/pull2.in create mode 100644 test/pull2.lm diff --git a/test/pull2.exp b/test/pull2.exp new file mode 100644 index 00000000..aa3a0fe0 --- /dev/null +++ b/test/pull2.exp @@ -0,0 +1 @@ +this is in diff --git a/test/pull2.in b/test/pull2.in new file mode 100644 index 00000000..f4d2e4a0 --- /dev/null +++ b/test/pull2.in @@ -0,0 +1 @@ +this is input for a non-parse pull diff --git a/test/pull2.lm b/test/pull2.lm new file mode 100644 index 00000000..1a18b829 --- /dev/null +++ b/test/pull2.lm @@ -0,0 +1,3 @@ +Stream: stream = open( 'pull2.in' 'r' ) +String: str = Stream.pull( 10 ) +print( String '\n' ) -- cgit v1.2.1