summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2013-02-03 11:35:55 -0500
committerAdrian Thurston <thurston@complang.org>2013-02-03 11:35:55 -0500
commitd7ff62a2f5cb920dbbf1a6f7516be9cd417b892e (patch)
treeec80eb86cc7c4a4c292c7bd66e8e574796f5ff65
parent116b212223225cc519862330d42b2426737fb4f4 (diff)
downloadcolm-d7ff62a2f5cb920dbbf1a6f7516be9cd417b892e.tar.gz
refraining from copying in getParsBlock, basics working
-rw-r--r--colm/ctinput.cc14
-rw-r--r--colm/input.c32
-rw-r--r--colm/input.h11
-rw-r--r--colm/pdarun.c111
-rw-r--r--colm/pdarun.h2
5 files changed, 56 insertions, 114 deletions
diff --git a/colm/ctinput.cc b/colm/ctinput.cc
index 9c2f848e..a445a8fc 100644
--- a/colm/ctinput.cc
+++ b/colm/ctinput.cc
@@ -60,8 +60,8 @@ LangEl *inputStreamPatternGetLangEl( StreamImpl *ss, long *bindId, char **data,
return klangEl;
}
-int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip,
- char *dest, int length, char **pdp, int *copied )
+int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss,
+ int skip, char **pdp, int *copied )
{
*copied = 0;
@@ -81,7 +81,7 @@ int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip,
if ( avail > 0 ) {
/* The source data from the current buffer. */
char *src = &buf->data[offset];
- int slen = avail <= length ? avail : length;
+ int slen = avail;
/* Need to skip? */
if ( skip > 0 && slen <= skip ) {
@@ -95,7 +95,6 @@ int inputStreamPatternGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip,
slen -= skip;
skip = 0;
- memcpy( dest, src, slen ) ;
*pdp = src;
*copied += slen;
break;
@@ -285,8 +284,8 @@ LangEl *inputStreamConsGetLangEl( StreamImpl *ss, long *bindId, char **data, lon
return klangEl;
}
-int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip,
- char *dest, int length, char **pdp, int *copied )
+int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss,
+ int skip, char **pdp, int *copied )
{
*copied = 0;
@@ -306,7 +305,7 @@ int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip,
if ( avail > 0 ) {
/* The source data from the current buffer. */
char *src = &buf->data[offset];
- int slen = avail <= length ? avail : length;
+ int slen = avail;
/* Need to skip? */
if ( skip > 0 && slen <= skip ) {
@@ -320,7 +319,6 @@ int inputStreamConsGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip,
slen -= skip;
skip = 0;
- memcpy( dest, src, slen ) ;
*pdp = src;
*copied += slen;
break;
diff --git a/colm/input.c b/colm/input.c
index 5adfb35e..5512450f 100644
--- a/colm/input.c
+++ b/colm/input.c
@@ -178,8 +178,8 @@ void initInputFuncs()
* Base run-time input streams.
*/
-int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip,
- char *dest, int length, char **pdp, int *copied )
+int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss,
+ int skip, char **pdp, int *copied )
{
int ret = 0;
*copied = 0;
@@ -198,8 +198,7 @@ int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip,
}
runBuf->length = received;
- int slen = received < length ? received : length;
- memcpy( dest, runBuf->data, slen );
+ int slen = received;
*pdp = runBuf->data;
*copied = slen;
ret = INPUT_DATA;
@@ -225,8 +224,7 @@ int fdGetParseBlock( FsmRun *fsmRun, StreamImpl *ss, int skip,
avail -= skip;
skip = 0;
- int slen = avail < length ? avail : length;
- memcpy( dest, src, slen ) ;
+ int slen = avail;
*pdp = src;
*copied += slen;
ret = INPUT_DATA;
@@ -510,8 +508,8 @@ void _unsetEof( StreamImpl *is )
}
}
-int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip,
- char *dest, int length, char **pdp, int *copied )
+int _getParseBlock( FsmRun *fsmRun, StreamImpl *is,
+ int skip, char **pdp, int *copied )
{
int ret = 0;
*copied = 0;
@@ -527,7 +525,7 @@ int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip,
if ( buf->type == RunBufSourceType ) {
Stream *stream = (Stream*)buf->tree;
- int type = stream->in->funcs->getParseBlock( fsmRun, stream->in, skip, dest, length, pdp, copied );
+ int type = stream->in->funcs->getParseBlock( fsmRun, stream->in, skip, pdp, copied );
// if ( type == INPUT_EOD && !stream->in->eosSent ) {
// stream->in->eosSent = 1;
@@ -574,10 +572,8 @@ int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip,
avail -= skip;
skip = 0;
- int slen = avail <= length ? avail : length;
- memcpy( dest, src, slen ) ;
*pdp = src;
- *copied += slen;
+ *copied += avail;
ret = INPUT_DATA;
break;
}
@@ -589,22 +585,22 @@ int _getParseBlock( FsmRun *fsmRun, StreamImpl *is, int skip,
#if DEBUG
switch ( ret ) {
case INPUT_DATA:
- debug( REALM_INPUT, "get data: DATA copied: %d: %.*s\n", *copied, (int)*copied, dest );
+ debug( REALM_INPUT, "get parse block: DATA: %d\n", *copied );
break;
case INPUT_EOD:
- debug( REALM_INPUT, "get data: EOD\n" );
+ debug( REALM_INPUT, "get parse block: EOD\n" );
break;
case INPUT_EOF:
- debug( REALM_INPUT, "get data: EOF\n" );
+ debug( REALM_INPUT, "get parse block: EOF\n" );
break;
case INPUT_TREE:
- debug( REALM_INPUT, "get data: TREE\n" );
+ debug( REALM_INPUT, "get parse block: TREE\n" );
break;
case INPUT_IGNORE:
- debug( REALM_INPUT, "get data: IGNORE\n" );
+ debug( REALM_INPUT, "get parse block: IGNORE\n" );
break;
case INPUT_LANG_EL:
- debug( REALM_INPUT, "get data: LANG_EL\n" );
+ debug( REALM_INPUT, "get parse block: LANG_EL\n" );
break;
}
#endif
diff --git a/colm/input.h b/colm/input.h
index e2b2fce1..e37a9383 100644
--- a/colm/input.h
+++ b/colm/input.h
@@ -88,10 +88,11 @@ typedef struct _StreamImpl StreamImpl;
struct StreamFuncs
{
- int (*getParseBlock)( struct _FsmRun *fsmRun, StreamImpl *ss, int offset,
- char *dest, int length, char **pdp, int *copied );
+ int (*getParseBlock)( struct _FsmRun *fsmRun, StreamImpl *ss,
+ int skip, char **pdp, int *copied );
- int (*getData)( struct _FsmRun *fsmRun, StreamImpl *ss, int offset, char *dest, int length, int *copied );
+ int (*getData)( struct _FsmRun *fsmRun, StreamImpl *ss,
+ int offset, char *dest, int length, int *copied );
int (*consumeData)( StreamImpl *ss, int length );
int (*undoConsumeData)( struct _FsmRun *fsmRun, StreamImpl *ss, const char *data, int length );
@@ -171,8 +172,8 @@ void initConsFuncs();
int _getData( struct _FsmRun *fsmRun, StreamImpl *in, int offset,
char *dest, int length, int *copied );
-int _getParseBlock( struct _FsmRun *fsmRun, StreamImpl *in, int offset,
- char *dest, int length, char **pdp, int *copied );
+int _getParseBlock( struct _FsmRun *fsmRun, StreamImpl *in,
+ int skip, char **pdp, int *copied );
int _consumeData( StreamImpl *in, int length );
int _undoConsumeData( struct _FsmRun *fsmRun, StreamImpl *is, const char *data, int length );
diff --git a/colm/pdarun.c b/colm/pdarun.c
index ace7b019..b226f004 100644
--- a/colm/pdarun.c
+++ b/colm/pdarun.c
@@ -61,13 +61,11 @@ void initFsmRun( FsmRun *fsmRun, Program *prg )
{
fsmRun->tables = prg->rtd->fsmTables;
- fsmRun->scanBuf = newRunBuf();
- fsmRun->scanBuf->next = 0;
-
fsmRun->consumeBuf = 0;
- fsmRun->p = fsmRun->pe = fsmRun->scanBuf->data;
- fsmRun->peof = 0;
+ fsmRun->p = fsmRun->pe = 0;
+ fsmRun->have = 0;
+ fsmRun->peof = (char*)-1;
fsmRun->preRegion = -1;
}
@@ -141,8 +139,9 @@ Head *streamPull( Program *prg, FsmRun *fsmRun, StreamImpl *is, long length )
is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &lenCopied );
is->funcs->consumeData( is, length );
- fsmRun->p = fsmRun->pe = fsmRun->scanBuf->data;
- //fsmRun->peof = 0;
+ fsmRun->p = fsmRun->pe = 0;
+ fsmRun->have = 0;
+ //fsmRun->peof = (char*)-1;
Head *tokdata = stringAllocPointer( prg, runBuf->data, length );
updatePosition( is, runBuf->data, length );
@@ -767,8 +766,9 @@ Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is )
int lenCopied = 0;
is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &lenCopied );
- fsmRun->p = fsmRun->pe = fsmRun->scanBuf->data;
- //fsmRun->peof = 0;
+ fsmRun->p = fsmRun->pe = 0;
+ fsmRun->have = 0;
+ //fsmRun->peof = (char*)-1;
Head *head = stringAllocPointer( prg, runBuf->data, length );
@@ -802,8 +802,9 @@ Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is )
is->funcs->consumeData( is, length );
- fsmRun->p = fsmRun->pe = fsmRun->scanBuf->data;
- //fsmRun->peof = 0;
+ fsmRun->p = fsmRun->pe = 0;
+ fsmRun->have = 0;
+ //fsmRun->peof = (char*)-1;
Head *head = stringAllocPointer( prg, runBuf->data, length );
@@ -936,6 +937,10 @@ static void sendEof( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, Pd
void newToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun )
{
+ fsmRun->p = fsmRun->pe = 0;
+ fsmRun->have = 0;
+ fsmRun->peof = (char*)-1;
+
/* Init the scanner vars. */
fsmRun->act = 0;
fsmRun->tokstart = 0;
@@ -999,7 +1004,10 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is )
return SCAN_UNDO;
while ( true ) {
+ char *start = fsmRun->p;
fsmExecute( fsmRun, is );
+ if ( fsmRun->p != 0 )
+ fsmRun->have += fsmRun->p - start;
/* First check if scanning stopped because we have a token. */
if ( fsmRun->matchedToken > 0 ) {
@@ -1029,93 +1037,32 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is )
return SCAN_ERROR;
}
- /* Got here because the state machine didn't match a token or
- * encounter an error. Must be because we got to the end of the buffer
- * data. */
+ /* Got here because the state machine didn't match a token or encounter
+ * an error. Must be because we got to the end of the buffer data. */
assert( fsmRun->p == fsmRun->pe );
- /* There may be space left in the current buffer. If not then we need
- * to make some. */
- long space = fsmRun->scanBuf->data + FSM_BUFSIZE - fsmRun->pe;
- if ( space == 0 ) {
- /* Create a new run buf. */
- RunBuf *newBuf = newRunBuf();
-
- /* If partway through a token then preserve the prefix. */
- long have = 0;
-
- if ( fsmRun->tokstart == 0 ) {
- /* No prefix. We filled the previous buffer. */
- fsmRun->scanBuf->length = FSM_BUFSIZE;
- }
- else {
- int i;
-
- debug( REALM_SCAN, "copying data over to new buffer\n" );
- assert( fsmRun->scanBuf->offset == 0 );
-
- if ( fsmRun->tokstart == fsmRun->scanBuf->data ) {
- /* A token is started and it is already at the beginning
- * of the current buffer. This means buffer is full and it
- * must be grown. Probably need to do this sooner. */
- fatal( "OUT OF BUFFER SPACE\n" );
- }
-
- /* There is data that needs to be shifted over. */
- have = fsmRun->pe - fsmRun->tokstart;
- memcpy( newBuf->data, fsmRun->tokstart, have );
-
- /* Compute the length of the previous buffer. */
- fsmRun->scanBuf->length = FSM_BUFSIZE - have;
-
- /* Compute tokstart and tokend. */
- long dist = fsmRun->tokstart - newBuf->data;
-
- fsmRun->tokend -= dist;
- fsmRun->tokstart = newBuf->data;
-
- /* Shift any markers. */
- for ( i = 0; i < MARK_SLOTS; i++ ) {
- if ( fsmRun->mark[i] != 0 )
- fsmRun->mark[i] -= dist;
- }
- }
-
- fsmRun->p = fsmRun->pe = newBuf->data + have;
- fsmRun->peof = 0;
-
- newBuf->next = fsmRun->scanBuf;
- fsmRun->scanBuf = newBuf;
- }
-
- /* We don't have any data. What is next in the input inputStream? */
- space = fsmRun->scanBuf->data + FSM_BUFSIZE - fsmRun->pe;
- assert( space > 0 );
-
- /* Get more data. */
- int have = fsmRun->tokstart != 0 ? fsmRun->p - fsmRun->tokstart : 0;
-
- debug( REALM_SCAN, "fetching data: have: %d space: %d\n", have, space );
-
char *pd = 0;
int len = 0;
- int type = is->funcs->getParseBlock( fsmRun, is, have, fsmRun->p, space, &pd, &len );
+ int type = is->funcs->getParseBlock( fsmRun, is, fsmRun->have, &pd, &len );
switch ( type ) {
case INPUT_DATA:
- fsmRun->pe = fsmRun->p + len;
+ fsmRun->p = pd;
+ fsmRun->pe = pd + len;
break;
case INPUT_EOS:
+ //fsmRun->p = fsmRun->pe = 0;
if ( fsmRun->tokstart != 0 )
fsmRun->peof = fsmRun->pe;
debug( REALM_SCAN, "EOS *******************\n" );
-// else {
-// return SCAN_EOS;
-// }
+ //else {
+ // return SCAN_EOS;
+ //}
break;
case INPUT_EOF:
+ //fsmRun->p = fsmRun->pe = 0;
if ( fsmRun->tokstart != 0 )
fsmRun->peof = fsmRun->pe;
else
diff --git a/colm/pdarun.h b/colm/pdarun.h
index 81d376c4..ac08889e 100644
--- a/colm/pdarun.h
+++ b/colm/pdarun.h
@@ -73,7 +73,6 @@ typedef struct _FsmRun
{
FsmTables *tables;
- RunBuf *scanBuf;
RunBuf *consumeBuf;
/* FsmRun State. */
@@ -81,6 +80,7 @@ typedef struct _FsmRun
long cs, ncs, act;
char *tokstart, *tokend;
char *p, *pe, *peof;
+ int have;
int returnResult;
char *mark[MARK_SLOTS];
long matchedToken;