summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2013-02-02 10:15:56 -0500
committerAdrian Thurston <thurston@complang.org>2013-02-02 10:15:56 -0500
commit2e74066ca8fb99c6fcccf210f8b9c2400a62e821 (patch)
tree63dbe35d902e5537221f81d0d81f9394c178f189
parent0815d6da3c82dcb4a29da971ed49f323e8f6ac0c (diff)
downloadcolm-2e74066ca8fb99c6fcccf210f8b9c2400a62e821.tar.gz
forked the getData input function into getParseBlock
After forking we can rework this function to return a pointer to a parse block, instead of copying data in.
-rw-r--r--colm/ctinput.cc96
-rw-r--r--colm/input.c162
-rw-r--r--colm/input.h4
-rw-r--r--colm/pdarun.c2
4 files changed, 262 insertions, 2 deletions
diff --git a/colm/ctinput.cc b/colm/ctinput.cc
index a10d03f5..10f89da9 100644
--- a/colm/ctinput.cc
+++ b/colm/ctinput.cc
@@ -60,6 +60,53 @@ LangEl *inputStreamPatternGetLangEl( StreamImpl *ss, long *bindId, char **data,
return klangEl;
}
+int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied )
+{
+ *copied = 0;
+
+ PatternItem *buf = ss->patItem;
+ int offset = ss->offset;
+
+ while ( true ) {
+ if ( buf == 0 )
+ return INPUT_EOD;
+
+ if ( buf->type == PatternItem::FactorType )
+ return INPUT_LANG_EL;
+
+ assert ( buf->type == PatternItem::InputText );
+ int avail = buf->data.length() - offset;
+
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ char *src = &buf->data[offset];
+ int slen = avail <= length ? avail : length;
+
+ /* Need to skip? */
+ if ( skip > 0 && slen <= skip ) {
+ /* Skipping the the whole source. */
+ skip -= slen;
+ }
+ else {
+ /* Either skip is zero, or less than slen. Skip goes to zero.
+ * Some data left over, copy it. */
+ src += skip;
+ slen -= skip;
+ skip = 0;
+
+ memcpy( dest, src, slen ) ;
+ *copied += slen;
+ break;
+ }
+ }
+
+ buf = buf->next;
+ offset = 0;
+ }
+
+ return INPUT_DATA;
+}
+
int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied )
{
*copied = 0;
@@ -187,6 +234,7 @@ extern "C" void initPatFuncs()
memset( &patternFuncs, 0, sizeof(StreamFuncs) );
patternFuncs.getData = &inputStreamPatternGetData;
+ patternFuncs.getParseBlock = &inputStreamPatternGetParseBlock;
patternFuncs.consumeData = &inputStreamPatternConsumeData;
patternFuncs.undoConsumeData = &inputStreamPatternUndoConsumeData;
@@ -235,6 +283,53 @@ LangEl *inputStreamConsGetLangEl( StreamImpl *ss, long *bindId, char **data, lon
return klangEl;
}
+int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied )
+{
+ *copied = 0;
+
+ ConsItem *buf = ss->consItem;
+ int offset = ss->offset;
+
+ while ( true ) {
+ if ( buf == 0 )
+ return INPUT_EOD;
+
+ if ( buf->type == ConsItem::ExprType || buf->type == ConsItem::FactorType )
+ return INPUT_LANG_EL;
+
+ assert ( buf->type == ConsItem::InputText );
+ int avail = buf->data.length() - offset;
+
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ char *src = &buf->data[offset];
+ int slen = avail <= length ? avail : length;
+
+ /* Need to skip? */
+ if ( skip > 0 && slen <= skip ) {
+ /* Skipping the the whole source. */
+ skip -= slen;
+ }
+ else {
+ /* Either skip is zero, or less than slen. Skip goes to zero.
+ * Some data left over, copy it. */
+ src += skip;
+ slen -= skip;
+ skip = 0;
+
+ memcpy( dest, src, slen ) ;
+ *copied += slen;
+ break;
+ }
+ }
+
+ buf = buf->next;
+ offset = 0;
+ }
+
+ return INPUT_DATA;
+}
+
int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied )
{
*copied = 0;
@@ -364,6 +459,7 @@ extern "C" void initConsFuncs()
memset( &replFuncs, 0, sizeof(StreamFuncs) );
replFuncs.getData = &inputStreamConsGetData;
+ replFuncs.getParseBlock = &inputStreamConsGetParseBlock;
replFuncs.consumeData = &inputStreamConsConsumeData;
replFuncs.undoConsumeData = &inputStreamConsUndoConsumeData;
diff --git a/colm/input.c b/colm/input.c
index 14c1cb1b..ad52f56b 100644
--- a/colm/input.c
+++ b/colm/input.c
@@ -139,6 +139,7 @@ void initStreamFuncs()
{
memset( &streamFuncs, 0, sizeof(struct StreamFuncs) );
streamFuncs.getData = &_getData;
+ streamFuncs.getParseBlock = &_getParseBlock;
streamFuncs.consumeData = &_consumeData;
streamFuncs.undoConsumeData = &_undoConsumeData;
streamFuncs.consumeTree = &_consumeTree;
@@ -177,6 +178,65 @@ void initInputFuncs()
* Base run-time input streams.
*/
+int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied )
+{
+ int ret = 0;
+ *copied = 0;
+
+ /* Move over skip bytes. */
+ RunBuf *buf = ss->queue;
+ while ( true ) {
+ if ( buf == 0 ) {
+ /* Got through the in-mem buffers without copying anything. */
+ RunBuf *runBuf = newRunBuf();
+ sourceStreamAppend( ss, runBuf );
+ int received = ss->funcs->getDataSource( ss, runBuf->data, FSM_BUFSIZE );
+ if ( received == 0 ) {
+ ret = INPUT_EOD;
+ break;
+ }
+ runBuf->length = received;
+
+ int slen = received < length ? received : length;
+ memcpy( dest, runBuf->data, slen );
+ *copied = slen;
+ ret = INPUT_DATA;
+ break;
+ }
+
+ int avail = buf->length - buf->offset;
+
+ /* Anything available in the current buffer. */
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ char *src = &buf->data[buf->offset];
+
+ /* Need to skip? */
+ if ( skip > 0 && skip >= avail ) {
+ /* Skipping the the whole source. */
+ skip -= avail;
+ }
+ else {
+ /* Either skip is zero, or less than slen. Skip goes to zero.
+ * Some data left over, copy it. */
+ src += skip;
+ avail -= skip;
+ skip = 0;
+
+ int slen = avail < length ? avail : length;
+ memcpy( dest, src, slen ) ;
+ *copied += slen;
+ ret = INPUT_DATA;
+ break;
+ }
+ }
+
+ buf = buf->next;
+ }
+
+ return ret;
+}
+
int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied )
{
int ret = 0;
@@ -303,6 +363,7 @@ void initFileFuncs()
{
memset( &fileFuncs, 0, sizeof(struct StreamFuncs) );
fileFuncs.getData = &fdGetData;
+ fileFuncs.getParseBlock = &fdGetParseBlock;
fileFuncs.consumeData = &fdConsumeData;
fileFuncs.undoConsumeData = &fdUndoConsumeData;
fileFuncs.getDataSource = &fileGetDataSource;
@@ -328,6 +389,7 @@ void initFdFuncs()
{
memset( &fdFuncs, 0, sizeof(struct StreamFuncs) );
fdFuncs.getData = &fdGetData;
+ fdFuncs.getParseBlock = &fdGetParseBlock;
fdFuncs.consumeData = &fdConsumeData;
fdFuncs.undoConsumeData = &fdUndoConsumeData;
fdFuncs.getDataSource = &fdGetDataSource;
@@ -445,6 +507,106 @@ void _unsetEof( StreamImpl *is )
}
}
+int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length, int *copied )
+{
+ int ret = 0;
+ *copied = 0;
+
+ /* Move over skip bytes. */
+ RunBuf *buf = is->queue;
+ while ( true ) {
+ if ( buf == 0 ) {
+ /* Got through the in-mem buffers without copying anything. */
+ ret = is->eof ? INPUT_EOF : INPUT_EOD;
+ break;
+ }
+
+ if ( buf->type == RunBufSourceType ) {
+ Stream *stream = (Stream*)buf->tree;
+ int type = stream->in->funcs->getData( fsmRun, stream->in, skip, dest, length, copied );
+
+// if ( type == INPUT_EOD && !stream->in->eosSent ) {
+// stream->in->eosSent = 1;
+// ret = INPUT_EOS;
+// continue;
+// }
+
+ if ( type == INPUT_EOD || type == INPUT_EOF ) {
+ debug( REALM_INPUT, "skipping over input\n" );
+ buf = buf->next;
+ continue;
+ }
+
+ ret = type;
+ break;
+ }
+
+ if ( buf->type == RunBufTokenType ) {
+ ret = INPUT_TREE;
+ break;
+ }
+
+ if ( buf->type == RunBufIgnoreType ) {
+ ret = INPUT_IGNORE;
+ break;
+ }
+
+ int avail = buf->length - buf->offset;
+
+ /* Anything available in the current buffer. */
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ char *src = &buf->data[buf->offset];
+
+ /* Need to skip? */
+ if ( skip > 0 && skip >= avail ) {
+ /* Skipping the the whole source. */
+ skip -= avail;
+ }
+ else {
+ /* Either skip is zero, or less than slen. Skip goes to zero.
+ * Some data left over, copy it. */
+ src += skip;
+ avail -= skip;
+ skip = 0;
+
+ int slen = avail <= length ? avail : length;
+ memcpy( dest, src, slen ) ;
+ *copied += slen;
+ ret = INPUT_DATA;
+ break;
+ }
+ }
+
+ buf = buf->next;
+ }
+
+#if DEBUG
+ switch ( ret ) {
+ case INPUT_DATA:
+ debug( REALM_INPUT, "get data: DATA copied: %d: %.*s\n", *copied, (int)*copied, dest );
+ break;
+ case INPUT_EOD:
+ debug( REALM_INPUT, "get data: EOD\n" );
+ break;
+ case INPUT_EOF:
+ debug( REALM_INPUT, "get data: EOF\n" );
+ break;
+ case INPUT_TREE:
+ debug( REALM_INPUT, "get data: TREE\n" );
+ break;
+ case INPUT_IGNORE:
+ debug( REALM_INPUT, "get data: IGNORE\n" );
+ break;
+ case INPUT_LANG_EL:
+ debug( REALM_INPUT, "get data: LANG_EL\n" );
+ break;
+ }
+#endif
+
+ return ret;
+}
+
int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length, int *copied )
{
int ret = 0;
diff --git a/colm/input.h b/colm/input.h
index 13cd48c0..59473932 100644
--- a/colm/input.h
+++ b/colm/input.h
@@ -88,7 +88,8 @@ typedef struct _StreamImpl StreamImpl;
struct StreamFuncs
{
- /* Data. */
+ int (*getParseBlock)( struct _FsmRun *fsmRun, StreamImpl *ss, int offset, char *dest, int length, int *copied );
+
int (*getData)( struct _FsmRun *fsmRun, StreamImpl *ss, int offset, char *dest, int length, int *copied );
int (*consumeData)( StreamImpl *ss, int length );
@@ -168,6 +169,7 @@ void initConsFuncs();
/* The input stream interface. */
int _getData( struct _FsmRun *fsmRun, StreamImpl *in, int offset, char *dest, int length, int *copied );
+int _getParseBlock( struct _FsmRun *fsmRun, StreamImpl *in, int offset, char *dest, int length, int *copied );
int _consumeData( StreamImpl *in, int length );
int _undoConsumeData( struct _FsmRun *fsmRun, StreamImpl *is, const char *data, int length );
diff --git a/colm/pdarun.c b/colm/pdarun.c
index e655d9e4..5ff7921e 100644
--- a/colm/pdarun.c
+++ b/colm/pdarun.c
@@ -1096,7 +1096,7 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is )
int have = fsmRun->tokstart != 0 ? fsmRun->p - fsmRun->tokstart : 0;
int len = 0;
debug( REALM_SCAN, "fetching data: have: %d space: %d\n", have, space );
- int type = is->funcs->getData( fsmRun, is, have, fsmRun->p, space, &len );
+ int type = is->funcs->getParseBlock( fsmRun, is, have, fsmRun->p, space, &len );
switch ( type ) {
case INPUT_DATA: