diff options
-rw-r--r-- | colm/ctinput.cc | 14 | ||||
-rw-r--r-- | colm/input.c | 32 | ||||
-rw-r--r-- | colm/input.h | 11 | ||||
-rw-r--r-- | colm/pdarun.c | 111 | ||||
-rw-r--r-- | colm/pdarun.h | 2 |
5 files changed, 56 insertions, 114 deletions
diff --git a/colm/ctinput.cc b/colm/ctinput.cc index 9c2f848e..a445a8fc 100644 --- a/colm/ctinput.cc +++ b/colm/ctinput.cc @@ -60,8 +60,8 @@ LangEl *inputStreamPatternGetLangEl( StreamImpl *ss, long *bindId, char **data, return klangEl; } -int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, - char *dest, int length, char **pdp, int *copied ) +int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, + int skip, char **pdp, int *copied ) { *copied = 0; @@ -81,7 +81,7 @@ int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, if ( avail > 0 ) { /* The source data from the current buffer. */ char *src = &buf->data[offset]; - int slen = avail <= length ? avail : length; + int slen = avail; /* Need to skip? */ if ( skip > 0 && slen <= skip ) { @@ -95,7 +95,6 @@ int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, slen -= skip; skip = 0; - memcpy( dest, src, slen ) ; *pdp = src; *copied += slen; break; @@ -285,8 +284,8 @@ LangEl *inputStreamConsGetLangEl( StreamImpl *ss, long *bindId, char **data, lon return klangEl; } -int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, - char *dest, int length, char **pdp, int *copied ) +int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, + int skip, char **pdp, int *copied ) { *copied = 0; @@ -306,7 +305,7 @@ int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, if ( avail > 0 ) { /* The source data from the current buffer. */ char *src = &buf->data[offset]; - int slen = avail <= length ? avail : length; + int slen = avail; /* Need to skip? */ if ( skip > 0 && slen <= skip ) { @@ -320,7 +319,6 @@ int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, slen -= skip; skip = 0; - memcpy( dest, src, slen ) ; *pdp = src; *copied += slen; break; diff --git a/colm/input.c b/colm/input.c index 5adfb35e..5512450f 100644 --- a/colm/input.c +++ b/colm/input.c @@ -178,8 +178,8 @@ void initInputFuncs() * Base run-time input streams. */ -int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, - char *dest, int length, char **pdp, int *copied ) +int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, + int skip, char **pdp, int *copied ) { int ret = 0; *copied = 0; @@ -198,8 +198,7 @@ int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, } runBuf->length = received; - int slen = received < length ? received : length; - memcpy( dest, runBuf->data, slen ); + int slen = received; *pdp = runBuf->data; *copied = slen; ret = INPUT_DATA; @@ -225,8 +224,7 @@ int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, avail -= skip; skip = 0; - int slen = avail < length ? avail : length; - memcpy( dest, src, slen ) ; + int slen = avail; *pdp = src; *copied += slen; ret = INPUT_DATA; @@ -510,8 +508,8 @@ void _unsetEof( StreamImpl *is ) } } -int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip, - char *dest, int length, char **pdp, int *copied ) +int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, + int skip, char **pdp, int *copied ) { int ret = 0; *copied = 0; @@ -527,7 +525,7 @@ int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip, if ( buf->type == RunBufSourceType ) { Stream *stream = (Stream*)buf->tree; - int type = stream->in->funcs->getParseBlock( fsmRun, stream->in, skip, dest, length, pdp, copied ); + int type = stream->in->funcs->getParseBlock( fsmRun, stream->in, skip, pdp, copied ); // if ( type == INPUT_EOD && !stream->in->eosSent ) { // stream->in->eosSent = 1; @@ -574,10 +572,8 @@ int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip, avail -= skip; skip = 0; - int slen = avail <= length ? avail : length; - memcpy( dest, src, slen ) ; *pdp = src; - *copied += slen; + *copied += avail; ret = INPUT_DATA; break; } @@ -589,22 +585,22 @@ int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip, #if DEBUG switch ( ret ) { case INPUT_DATA: - debug( REALM_INPUT, "get data: DATA copied: %d: %.*s\n", *copied, (int)*copied, dest ); + debug( REALM_INPUT, "get parse block: DATA: %d\n", *copied ); break; case INPUT_EOD: - debug( REALM_INPUT, "get data: EOD\n" ); + debug( REALM_INPUT, "get parse block: EOD\n" ); break; case INPUT_EOF: - debug( REALM_INPUT, "get data: EOF\n" ); + debug( REALM_INPUT, "get parse block: EOF\n" ); break; case INPUT_TREE: - debug( REALM_INPUT, "get data: TREE\n" ); + debug( REALM_INPUT, "get parse block: TREE\n" ); break; case INPUT_IGNORE: - debug( REALM_INPUT, "get data: IGNORE\n" ); + debug( REALM_INPUT, "get parse block: IGNORE\n" ); break; case INPUT_LANG_EL: - debug( REALM_INPUT, "get data: LANG_EL\n" ); + debug( REALM_INPUT, "get parse block: LANG_EL\n" ); break; } #endif diff --git a/colm/input.h b/colm/input.h index e2b2fce1..e37a9383 100644 --- a/colm/input.h +++ b/colm/input.h @@ -88,10 +88,11 @@ typedef struct _StreamImpl StreamImpl; struct StreamFuncs { - int (*getParseBlock)( struct _FsmRun *fsmRun, StreamImpl *ss, int offset, - char *dest, int length, char **pdp, int *copied ); + int (*getParseBlock)( struct _FsmRun *fsmRun, StreamImpl *ss, + int skip, char **pdp, int *copied ); - int (*getData)( struct _FsmRun *fsmRun, StreamImpl *ss, int offset, char *dest, int length, int *copied ); + int (*getData)( struct _FsmRun *fsmRun, StreamImpl *ss, + int offset, char *dest, int length, int *copied ); int (*consumeData)( StreamImpl *ss, int length ); int (*undoConsumeData)( struct _FsmRun *fsmRun, StreamImpl *ss, const char *data, int length ); @@ -171,8 +172,8 @@ void initConsFuncs(); int _getData( struct _FsmRun *fsmRun, StreamImpl *in, int offset, char *dest, int length, int *copied ); -int _getParseBlock( struct _FsmRun *fsmRun, StreamImpl *in, int offset, - char *dest, int length, char **pdp, int *copied ); +int _getParseBlock( struct _FsmRun *fsmRun, StreamImpl *in, + int skip, char **pdp, int *copied ); int _consumeData( StreamImpl *in, int length ); int _undoConsumeData( struct _FsmRun *fsmRun, StreamImpl *is, const char *data, int length ); diff --git a/colm/pdarun.c b/colm/pdarun.c index ace7b019..b226f004 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -61,13 +61,11 @@ void initFsmRun( FsmRun *fsmRun, Program *prg ) { fsmRun->tables = prg->rtd->fsmTables; - fsmRun->scanBuf = newRunBuf(); - fsmRun->scanBuf->next = 0; - fsmRun->consumeBuf = 0; - fsmRun->p = fsmRun->pe = fsmRun->scanBuf->data; - fsmRun->peof = 0; + fsmRun->p = fsmRun->pe = 0; + fsmRun->have = 0; + fsmRun->peof = (char*)-1; fsmRun->preRegion = -1; } @@ -141,8 +139,9 @@ Head *streamPull( Program *prg, FsmRun *fsmRun, StreamImpl *is, long length ) is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &lenCopied ); is->funcs->consumeData( is, length ); - fsmRun->p = fsmRun->pe = fsmRun->scanBuf->data; - //fsmRun->peof = 0; + fsmRun->p = fsmRun->pe = 0; + fsmRun->have = 0; + //fsmRun->peof = (char*)-1; Head *tokdata = stringAllocPointer( prg, runBuf->data, length ); updatePosition( is, runBuf->data, length ); @@ -767,8 +766,9 @@ Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) int lenCopied = 0; is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &lenCopied ); - fsmRun->p = fsmRun->pe = fsmRun->scanBuf->data; - //fsmRun->peof = 0; + fsmRun->p = fsmRun->pe = 0; + fsmRun->have = 0; + //fsmRun->peof = (char*)-1; Head *head = stringAllocPointer( prg, runBuf->data, length ); @@ -802,8 +802,9 @@ Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) is->funcs->consumeData( is, length ); - fsmRun->p = fsmRun->pe = fsmRun->scanBuf->data; - //fsmRun->peof = 0; + fsmRun->p = fsmRun->pe = 0; + fsmRun->have = 0; + //fsmRun->peof = (char*)-1; Head *head = stringAllocPointer( prg, runBuf->data, length ); @@ -936,6 +937,10 @@ static void sendEof( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, Pd void newToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun ) { + fsmRun->p = fsmRun->pe = 0; + fsmRun->have = 0; + fsmRun->peof = (char*)-1; + /* Init the scanner vars. */ fsmRun->act = 0; fsmRun->tokstart = 0; @@ -999,7 +1004,10 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) return SCAN_UNDO; while ( true ) { + char *start = fsmRun->p; fsmExecute( fsmRun, is ); + if ( fsmRun->p != 0 ) + fsmRun->have += fsmRun->p - start; /* First check if scanning stopped because we have a token. */ if ( fsmRun->matchedToken > 0 ) { @@ -1029,93 +1037,32 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) return SCAN_ERROR; } - /* Got here because the state machine didn't match a token or - * encounter an error. Must be because we got to the end of the buffer - * data. */ + /* Got here because the state machine didn't match a token or encounter + * an error. Must be because we got to the end of the buffer data. */ assert( fsmRun->p == fsmRun->pe ); - /* There may be space left in the current buffer. If not then we need - * to make some. */ - long space = fsmRun->scanBuf->data + FSM_BUFSIZE - fsmRun->pe; - if ( space == 0 ) { - /* Create a new run buf. */ - RunBuf *newBuf = newRunBuf(); - - /* If partway through a token then preserve the prefix. */ - long have = 0; - - if ( fsmRun->tokstart == 0 ) { - /* No prefix. We filled the previous buffer. */ - fsmRun->scanBuf->length = FSM_BUFSIZE; - } - else { - int i; - - debug( REALM_SCAN, "copying data over to new buffer\n" ); - assert( fsmRun->scanBuf->offset == 0 ); - - if ( fsmRun->tokstart == fsmRun->scanBuf->data ) { - /* A token is started and it is already at the beginning - * of the current buffer. This means buffer is full and it - * must be grown. Probably need to do this sooner. */ - fatal( "OUT OF BUFFER SPACE\n" ); - } - - /* There is data that needs to be shifted over. */ - have = fsmRun->pe - fsmRun->tokstart; - memcpy( newBuf->data, fsmRun->tokstart, have ); - - /* Compute the length of the previous buffer. */ - fsmRun->scanBuf->length = FSM_BUFSIZE - have; - - /* Compute tokstart and tokend. */ - long dist = fsmRun->tokstart - newBuf->data; - - fsmRun->tokend -= dist; - fsmRun->tokstart = newBuf->data; - - /* Shift any markers. */ - for ( i = 0; i < MARK_SLOTS; i++ ) { - if ( fsmRun->mark[i] != 0 ) - fsmRun->mark[i] -= dist; - } - } - - fsmRun->p = fsmRun->pe = newBuf->data + have; - fsmRun->peof = 0; - - newBuf->next = fsmRun->scanBuf; - fsmRun->scanBuf = newBuf; - } - - /* We don't have any data. What is next in the input inputStream? */ - space = fsmRun->scanBuf->data + FSM_BUFSIZE - fsmRun->pe; - assert( space > 0 ); - - /* Get more data. */ - int have = fsmRun->tokstart != 0 ? fsmRun->p - fsmRun->tokstart : 0; - - debug( REALM_SCAN, "fetching data: have: %d space: %d\n", have, space ); - char *pd = 0; int len = 0; - int type = is->funcs->getParseBlock( fsmRun, is, have, fsmRun->p, space, &pd, &len ); + int type = is->funcs->getParseBlock( fsmRun, is, fsmRun->have, &pd, &len ); switch ( type ) { case INPUT_DATA: - fsmRun->pe = fsmRun->p + len; + fsmRun->p = pd; + fsmRun->pe = pd + len; break; case INPUT_EOS: + //fsmRun->p = fsmRun->pe = 0; if ( fsmRun->tokstart != 0 ) fsmRun->peof = fsmRun->pe; debug( REALM_SCAN, "EOS *******************\n" ); -// else { -// return SCAN_EOS; -// } + //else { + // return SCAN_EOS; + //} break; case INPUT_EOF: + //fsmRun->p = fsmRun->pe = 0; if ( fsmRun->tokstart != 0 ) fsmRun->peof = fsmRun->pe; else diff --git a/colm/pdarun.h b/colm/pdarun.h index 81d376c4..ac08889e 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -73,7 +73,6 @@ typedef struct _FsmRun { FsmTables *tables; - RunBuf *scanBuf; RunBuf *consumeBuf; /* FsmRun State. */ @@ -81,6 +80,7 @@ typedef struct _FsmRun long cs, ncs, act; char *tokstart, *tokend; char *p, *pe, *peof; + int have; int returnResult; char *mark[MARK_SLOTS]; long matchedToken; |