diff options
-rw-r--r-- | colm/ctinput.cc | 96 | ||||
-rw-r--r-- | colm/input.c | 162 | ||||
-rw-r--r-- | colm/input.h | 4 | ||||
-rw-r--r-- | colm/pdarun.c | 2 |
4 files changed, 262 insertions, 2 deletions
diff --git a/colm/ctinput.cc b/colm/ctinput.cc index a10d03f5..10f89da9 100644 --- a/colm/ctinput.cc +++ b/colm/ctinput.cc @@ -60,6 +60,53 @@ LangEl *inputStreamPatternGetLangEl( StreamImpl *ss, long *bindId, char **data, return klangEl; } +int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) +{ + *copied = 0; + + PatternItem *buf = ss->patItem; + int offset = ss->offset; + + while ( true ) { + if ( buf == 0 ) + return INPUT_EOD; + + if ( buf->type == PatternItem::FactorType ) + return INPUT_LANG_EL; + + assert ( buf->type == PatternItem::InputText ); + int avail = buf->data.length() - offset; + + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[offset]; + int slen = avail <= length ? avail : length; + + /* Need to skip? */ + if ( skip > 0 && slen <= skip ) { + /* Skipping the the whole source. */ + skip -= slen; + } + else { + /* Either skip is zero, or less than slen. Skip goes to zero. + * Some data left over, copy it. */ + src += skip; + slen -= skip; + skip = 0; + + memcpy( dest, src, slen ) ; + *copied += slen; + break; + } + } + + buf = buf->next; + offset = 0; + } + + return INPUT_DATA; +} + int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) { *copied = 0; @@ -187,6 +234,7 @@ extern "C" void initPatFuncs() memset( &patternFuncs, 0, sizeof(StreamFuncs) ); patternFuncs.getData = &inputStreamPatternGetData; + patternFuncs.getParseBlock = &inputStreamPatternGetParseBlock; patternFuncs.consumeData = &inputStreamPatternConsumeData; patternFuncs.undoConsumeData = &inputStreamPatternUndoConsumeData; @@ -235,6 +283,53 @@ LangEl *inputStreamConsGetLangEl( StreamImpl *ss, long *bindId, char **data, lon return klangEl; } +int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) +{ + *copied = 0; + + ConsItem *buf = ss->consItem; + int offset = ss->offset; + + while ( true ) { + if ( buf == 0 ) + return INPUT_EOD; + + if ( buf->type == ConsItem::ExprType || buf->type == ConsItem::FactorType ) + return INPUT_LANG_EL; + + assert ( buf->type == ConsItem::InputText ); + int avail = buf->data.length() - offset; + + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[offset]; + int slen = avail <= length ? avail : length; + + /* Need to skip? */ + if ( skip > 0 && slen <= skip ) { + /* Skipping the the whole source. */ + skip -= slen; + } + else { + /* Either skip is zero, or less than slen. Skip goes to zero. + * Some data left over, copy it. */ + src += skip; + slen -= skip; + skip = 0; + + memcpy( dest, src, slen ) ; + *copied += slen; + break; + } + } + + buf = buf->next; + offset = 0; + } + + return INPUT_DATA; +} + int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) { *copied = 0; @@ -364,6 +459,7 @@ extern "C" void initConsFuncs() memset( &replFuncs, 0, sizeof(StreamFuncs) ); replFuncs.getData = &inputStreamConsGetData; + replFuncs.getParseBlock = &inputStreamConsGetParseBlock; replFuncs.consumeData = &inputStreamConsConsumeData; replFuncs.undoConsumeData = &inputStreamConsUndoConsumeData; diff --git a/colm/input.c b/colm/input.c index 14c1cb1b..ad52f56b 100644 --- a/colm/input.c +++ b/colm/input.c @@ -139,6 +139,7 @@ void initStreamFuncs() { memset( &streamFuncs, 0, sizeof(struct StreamFuncs) ); streamFuncs.getData = &_getData; + streamFuncs.getParseBlock = &_getParseBlock; streamFuncs.consumeData = &_consumeData; streamFuncs.undoConsumeData = &_undoConsumeData; streamFuncs.consumeTree = &_consumeTree; @@ -177,6 +178,65 @@ void initInputFuncs() * Base run-time input streams. */ +int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) +{ + int ret = 0; + *copied = 0; + + /* Move over skip bytes. */ + RunBuf *buf = ss->queue; + while ( true ) { + if ( buf == 0 ) { + /* Got through the in-mem buffers without copying anything. */ + RunBuf *runBuf = newRunBuf(); + sourceStreamAppend( ss, runBuf ); + int received = ss->funcs->getDataSource( ss, runBuf->data, FSM_BUFSIZE ); + if ( received == 0 ) { + ret = INPUT_EOD; + break; + } + runBuf->length = received; + + int slen = received < length ? received : length; + memcpy( dest, runBuf->data, slen ); + *copied = slen; + ret = INPUT_DATA; + break; + } + + int avail = buf->length - buf->offset; + + /* Anything available in the current buffer. */ + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[buf->offset]; + + /* Need to skip? */ + if ( skip > 0 && skip >= avail ) { + /* Skipping the the whole source. */ + skip -= avail; + } + else { + /* Either skip is zero, or less than slen. Skip goes to zero. + * Some data left over, copy it. */ + src += skip; + avail -= skip; + skip = 0; + + int slen = avail < length ? avail : length; + memcpy( dest, src, slen ) ; + *copied += slen; + ret = INPUT_DATA; + break; + } + } + + buf = buf->next; + } + + return ret; +} + int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) { int ret = 0; @@ -303,6 +363,7 @@ void initFileFuncs() { memset( &fileFuncs, 0, sizeof(struct StreamFuncs) ); fileFuncs.getData = &fdGetData; + fileFuncs.getParseBlock = &fdGetParseBlock; fileFuncs.consumeData = &fdConsumeData; fileFuncs.undoConsumeData = &fdUndoConsumeData; fileFuncs.getDataSource = &fileGetDataSource; @@ -328,6 +389,7 @@ void initFdFuncs() { memset( &fdFuncs, 0, sizeof(struct StreamFuncs) ); fdFuncs.getData = &fdGetData; + fdFuncs.getParseBlock = &fdGetParseBlock; fdFuncs.consumeData = &fdConsumeData; fdFuncs.undoConsumeData = &fdUndoConsumeData; fdFuncs.getDataSource = &fdGetDataSource; @@ -445,6 +507,106 @@ void _unsetEof( StreamImpl *is ) } } +int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length, int *copied ) +{ + int ret = 0; + *copied = 0; + + /* Move over skip bytes. */ + RunBuf *buf = is->queue; + while ( true ) { + if ( buf == 0 ) { + /* Got through the in-mem buffers without copying anything. */ + ret = is->eof ? INPUT_EOF : INPUT_EOD; + break; + } + + if ( buf->type == RunBufSourceType ) { + Stream *stream = (Stream*)buf->tree; + int type = stream->in->funcs->getData( fsmRun, stream->in, skip, dest, length, copied ); + +// if ( type == INPUT_EOD && !stream->in->eosSent ) { +// stream->in->eosSent = 1; +// ret = INPUT_EOS; +// continue; +// } + + if ( type == INPUT_EOD || type == INPUT_EOF ) { + debug( REALM_INPUT, "skipping over input\n" ); + buf = buf->next; + continue; + } + + ret = type; + break; + } + + if ( buf->type == RunBufTokenType ) { + ret = INPUT_TREE; + break; + } + + if ( buf->type == RunBufIgnoreType ) { + ret = INPUT_IGNORE; + break; + } + + int avail = buf->length - buf->offset; + + /* Anything available in the current buffer. */ + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[buf->offset]; + + /* Need to skip? */ + if ( skip > 0 && skip >= avail ) { + /* Skipping the the whole source. */ + skip -= avail; + } + else { + /* Either skip is zero, or less than slen. Skip goes to zero. + * Some data left over, copy it. */ + src += skip; + avail -= skip; + skip = 0; + + int slen = avail <= length ? avail : length; + memcpy( dest, src, slen ) ; + *copied += slen; + ret = INPUT_DATA; + break; + } + } + + buf = buf->next; + } + +#if DEBUG + switch ( ret ) { + case INPUT_DATA: + debug( REALM_INPUT, "get data: DATA copied: %d: %.*s\n", *copied, (int)*copied, dest ); + break; + case INPUT_EOD: + debug( REALM_INPUT, "get data: EOD\n" ); + break; + case INPUT_EOF: + debug( REALM_INPUT, "get data: EOF\n" ); + break; + case INPUT_TREE: + debug( REALM_INPUT, "get data: TREE\n" ); + break; + case INPUT_IGNORE: + debug( REALM_INPUT, "get data: IGNORE\n" ); + break; + case INPUT_LANG_EL: + debug( REALM_INPUT, "get data: LANG_EL\n" ); + break; + } +#endif + + return ret; +} + int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length, int *copied ) { int ret = 0; diff --git a/colm/input.h b/colm/input.h index 13cd48c0..59473932 100644 --- a/colm/input.h +++ b/colm/input.h @@ -88,7 +88,8 @@ typedef struct _StreamImpl StreamImpl; struct StreamFuncs { - /* Data. */ + int (*getParseBlock)( struct _FsmRun *fsmRun, StreamImpl *ss, int offset, char *dest, int length, int *copied ); + int (*getData)( struct _FsmRun *fsmRun, StreamImpl *ss, int offset, char *dest, int length, int *copied ); int (*consumeData)( StreamImpl *ss, int length ); @@ -168,6 +169,7 @@ void initConsFuncs(); /* The input stream interface. */ int _getData( struct _FsmRun *fsmRun, StreamImpl *in, int offset, char *dest, int length, int *copied ); +int _getParseBlock( struct _FsmRun *fsmRun, StreamImpl *in, int offset, char *dest, int length, int *copied ); int _consumeData( StreamImpl *in, int length ); int _undoConsumeData( struct _FsmRun *fsmRun, StreamImpl *is, const char *data, int length ); diff --git a/colm/pdarun.c b/colm/pdarun.c index e655d9e4..5ff7921e 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -1096,7 +1096,7 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) int have = fsmRun->tokstart != 0 ? fsmRun->p - fsmRun->tokstart : 0; int len = 0; debug( REALM_SCAN, "fetching data: have: %d space: %d\n", have, space ); - int type = is->funcs->getData( fsmRun, is, have, fsmRun->p, space, &len ); + int type = is->funcs->getParseBlock( fsmRun, is, have, fsmRun->p, space, &len ); switch ( type ) { case INPUT_DATA: |