summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2013-02-10 12:27:08 -0500
committerAdrian Thurston <thurston@complang.org>2013-02-10 12:27:08 -0500
commitd559e6a6af04863a5623f0f1647e43cdf200d058 (patch)
tree57fa5af448b5a960163e01360e7fe880a24c8ede
parenta5e884858837dbe06f72ef3bf0a3e3bfa3ea917f (diff)
parent61cdd8c309e7610927d4ad6f26b7585cf5631e4f (diff)
downloadcolm-d559e6a6af04863a5623f0f1647e43cdf200d058.tar.gz
Merge remote-tracking branch 'origin/copy-on-consume'
-rw-r--r--colm/Makefile.am4
-rw-r--r--colm/bytecode.c56
-rw-r--r--colm/bytecode.h3
-rw-r--r--colm/codegen.cc1
-rw-r--r--colm/compiler.cc9
-rw-r--r--colm/ctinput.cc98
-rw-r--r--colm/declare.cc1
-rw-r--r--colm/exports.cc1
-rw-r--r--colm/fsmcodegen.cc42
-rw-r--r--colm/fsmcodegen.h7
-rw-r--r--colm/fsmexec.cc37
-rw-r--r--colm/fsmrun.h36
-rw-r--r--colm/input.c289
-rw-r--r--colm/input.h47
-rw-r--r--colm/lmparse.kl1
-rw-r--r--colm/parsetree.cc1
-rw-r--r--colm/parsetree.h1
-rw-r--r--colm/pdabuild.cc1
-rw-r--r--colm/pdarun.c397
-rw-r--r--colm/pdarun.h45
-rw-r--r--colm/program.c1
-rw-r--r--colm/redbuild.h1
-rw-r--r--colm/redfsm.cc1
-rw-r--r--colm/resolve.cc1
-rw-r--r--colm/string.c2
-rw-r--r--colm/synthesis.cc11
-rw-r--r--colm/tree.c17
-rw-r--r--colm/tree.h1
-rw-r--r--test/include1.exp19
-rw-r--r--test/include1.in14
-rw-r--r--test/include1.lm98
-rw-r--r--test/include1a.in2
-rw-r--r--test/include1b.in2
-rw-r--r--test/include1c.in2
-rw-r--r--test/pull1.exp1
-rw-r--r--test/pull1.in1
-rw-r--r--test/pull1.lm2
-rw-r--r--test/pull2.exp1
-rw-r--r--test/pull2.in1
-rw-r--r--test/pull2.lm3
40 files changed, 755 insertions, 503 deletions
diff --git a/colm/Makefile.am b/colm/Makefile.am
index 5d7f1101..226bafbf 100644
--- a/colm/Makefile.am
+++ b/colm/Makefile.am
@@ -28,7 +28,7 @@ RUNTIME_SRC = \
RUNTIME_HDR = \
bytecode.h config.h defs.h debug.h pool.h input.h \
- fsmrun.h pdarun.h map.h tree.h program.h colm.h
+ pdarun.h map.h tree.h program.h colm.h
lib_LIBRARIES = libcolmp.a libcolmd.a
@@ -49,7 +49,7 @@ colm_LDADD = libcolmp.a
colm_SOURCES = \
buffer.h bytecode.h colm.h debug.h dotgen.h fsmcodegen.h fsmgraph.h \
- fsmrun.h input.h keyops.h lmparse.h lmscan.h map.h parsedata.h \
+ input.h keyops.h lmparse.h lmscan.h map.h parsedata.h \
parsetree.h pcheck.h pdacodegen.h pdagraph.h pdarun.h pool.h redbuild.h \
redfsm.h rtvector.h tree.h version.h global.h colm.h \
\
diff --git a/colm/bytecode.c b/colm/bytecode.c
index 94e7d9b6..4d93c0f7 100644
--- a/colm/bytecode.c
+++ b/colm/bytecode.c
@@ -20,7 +20,6 @@
*/
#include <colm/pdarun.h>
-#include <colm/fsmrun.h>
#include <colm/tree.h>
#include <colm/bytecode.h>
#include <colm/pool.h>
@@ -193,7 +192,7 @@ case PcrStart:
if ( ! parser->pdaRun->parseError ) {
parser->pdaRun->stopTarget = stopId;
- long pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry );
+ long pcr = parseLoop( prg, sp, parser->pdaRun, parser->input->in, entry );
while ( pcr != PcrDone ) {
@@ -203,7 +202,7 @@ case PcrGeneration:
case PcrPreEof:
case PcrReverse:
- pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry );
+ pcr = parseLoop( prg, sp, parser->pdaRun, parser->input->in, entry );
}
}
@@ -223,7 +222,7 @@ case PcrStart:
parser->input->in->funcs->setEof( parser->input->in );
if ( ! parser->pdaRun->parseError ) {
- long pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry );
+ long pcr = parseLoop( prg, sp, parser->pdaRun, parser->input->in, entry );
while ( pcr != PcrDone ) {
@@ -233,7 +232,7 @@ case PcrGeneration:
case PcrPreEof:
case PcrReverse:
- pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry );
+ pcr = parseLoop( prg, sp, parser->pdaRun, parser->input->in, entry );
}
}
}
@@ -260,12 +259,11 @@ break; }
long undoParseFrag( Program *prg, Tree **sp, Parser *parser, long steps, long entry )
{
StreamImpl *is = parser->input->in;
- FsmRun *fsmRun = parser->fsmRun;
PdaRun *pdaRun = parser->pdaRun;
debug( REALM_PARSE, "undo parse frag, target steps: %ld, pdarun steps: %ld\n", steps, pdaRun->steps );
- resetToken( fsmRun );
+ resetToken( pdaRun );
switch ( entry ) {
case PcrStart:
@@ -278,7 +276,7 @@ case PcrStart:
pdaRun->triggerUndo = 1;
/* The parse loop will recognise the situation. */
- long pcr = parseLoop( prg, sp, pdaRun, fsmRun, is, entry );
+ long pcr = parseLoop( prg, sp, pdaRun, is, entry );
while ( pcr != PcrDone ) {
return pcr;
@@ -287,7 +285,7 @@ case PcrGeneration:
case PcrPreEof:
case PcrReverse:
- pcr = parseLoop( prg, sp, pdaRun, fsmRun, is, entry );
+ pcr = parseLoop( prg, sp, pdaRun, is, entry );
}
/* Reset environment. */
@@ -302,10 +300,10 @@ break; }
return PcrDone;
}
-Tree *streamPullBc( Program *prg, FsmRun *fsmRun, StreamImpl *in, Tree *length )
+Tree *streamPullBc( Program *prg, PdaRun *pdaRun, StreamImpl *in, Tree *length )
{
long len = ((Int*)length)->value;
- Head *tokdata = streamPull( prg, fsmRun, in, len );
+ Head *tokdata = streamPull( prg, pdaRun, in, len );
return constructString( prg, tokdata );
}
@@ -316,7 +314,7 @@ void undoPull( Program *prg, StreamImpl *in, Tree *str )
undoStreamPull( in, data, length );
}
-long streamPush( Program *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *in, Tree *tree, int ignore )
+static long streamPush( Program *prg, Tree **sp, StreamImpl *in, Tree *tree, int ignore )
{
if ( tree->id == LEL_ID_STR ) {
/* This should become a compile error. If it's text, it's up to the
@@ -328,7 +326,7 @@ long streamPush( Program *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *in, Tree *
initStrCollect( &collect );
printTreeCollect( prg, sp, &collect, tree, true );
- streamPushText( fsmRun, in, collect.data, collect.length );
+ streamPushText( in, collect.data, collect.length );
long length = collect.length;
strCollectDestroy( &collect );
@@ -336,12 +334,12 @@ long streamPush( Program *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *in, Tree *
}
else if ( tree->id == LEL_ID_STREAM ) {
treeUpref( tree );
- streamPushStream( fsmRun, in, tree );
+ streamPushStream( in, tree );
return -1;
}
else {
treeUpref( tree );
- streamPushTree( fsmRun, in, tree, ignore );
+ streamPushTree( in, tree, ignore );
return -1;
}
}
@@ -1154,7 +1152,7 @@ again:
/* If there are captures (this is a translate block) then copy them into
* the local frame now. */
LangElInfo *lelInfo = prg->rtd->lelInfo;
- char **mark = exec->parser->fsmRun->mark;
+ char **mark = exec->parser->pdaRun->fsmRun->mark;
int i;
for ( i = 0; i < lelInfo[exec->parser->pdaRun->tokenId].numCaptureAttr; i++ ) {
@@ -2157,7 +2155,7 @@ again:
debug( REALM_BYTECODE, "IN_INPUT_APPEND_BKT\n" );
- undoStreamAppend( prg, sp, 0, ((Stream*)accumStream)->in, input, len );
+ undoStreamAppend( prg, sp, ((Stream*)accumStream)->in, input, len );
treeDownref( prg, sp, accumStream );
treeDownref( prg, sp, input );
break;
@@ -2458,7 +2456,8 @@ again:
Stream *accumStream = (Stream*)vm_pop();
Tree *len = vm_pop();
- Tree *string = streamPullBc( prg, exec->parser->fsmRun, accumStream->in, len );
+ PdaRun *pdaRun = exec->parser != 0 ? exec->parser->pdaRun : 0;
+ Tree *string = streamPullBc( prg, pdaRun, accumStream->in, len );
treeUpref( string );
vm_push( string );
@@ -2472,6 +2471,21 @@ again:
treeDownref( prg, sp, len );
break;
}
+
+ case IN_INPUT_PULL_WC: {
+ debug( REALM_BYTECODE, "IN_INPUT_PULL_WC\n" );
+
+ Stream *accumStream = (Stream*)vm_pop();
+ Tree *len = vm_pop();
+ PdaRun *pdaRun = exec->parser != 0 ? exec->parser->pdaRun : 0;
+ Tree *string = streamPullBc( prg, pdaRun, accumStream->in, len );
+ treeUpref( string );
+ vm_push( string );
+
+ treeDownref( prg, sp, (Tree*)accumStream );
+ treeDownref( prg, sp, len );
+ break;
+ }
case IN_INPUT_PULL_BKT: {
Tree *string;
read_tree( string );
@@ -2490,7 +2504,7 @@ again:
Stream *input = (Stream*)vm_pop();
Tree *tree = vm_pop();
- long len = streamPush( prg, sp, 0, input->in, tree, false );
+ long len = streamPush( prg, sp, input->in, tree, false );
vm_push( 0 );
/* Single unit. */
@@ -2507,7 +2521,7 @@ again:
Stream *input = (Stream*)vm_pop();
Tree *tree = vm_pop();
- long len = streamPush( prg, sp, 0, input->in, tree, true );
+ long len = streamPush( prg, sp, input->in, tree, true );
vm_push( 0 );
/* Single unit. */
@@ -2527,7 +2541,7 @@ again:
debug( REALM_BYTECODE, "IN_INPUT_PUSH_BKT\n" );
- undoStreamPush( prg, sp, 0, input->in, len );
+ undoStreamPush( prg, sp, input->in, len );
treeDownref( prg, sp, (Tree*)input );
break;
}
diff --git a/colm/bytecode.h b/colm/bytecode.h
index 26482f2d..c84ccb6b 100644
--- a/colm/bytecode.h
+++ b/colm/bytecode.h
@@ -223,6 +223,7 @@ typedef unsigned char uchar;
#define IN_CONSTRUCT_TERM 0x9d
#define IN_INPUT_PULL_WV 0x9e
+#define IN_INPUT_PULL_WC 0xe1
#define IN_INPUT_PULL_BKT 0x9f
#define IN_PARSE_SAVE_STEPS 0xa0
@@ -458,6 +459,7 @@ typedef struct _Execution
long stringLength( Head *str );
const char *stringData( Head *str );
Head *stringAllocFull( struct ColmProgram *prg, const char *data, long length );
+Head *initStrSpace( long length );
Head *stringCopy( struct ColmProgram *prg, Head *head );
void stringFree( struct ColmProgram *prg, Head *head );
void stringShorten( Head *tokdata, long newlen );
@@ -495,7 +497,6 @@ void allocGlobal( struct ColmProgram *prg );
Tree **executeCode( struct ColmProgram *prg, Execution *exec, Tree **sp, Code *instr );
void rcodeDownref( struct ColmProgram *prg, Tree **sp, Code *instr );
Code *popReverseCode( RtCodeVect *allRev );
-void sendBackBuffered( FsmRun *fsmRun, StreamImpl *inputStream );
#ifdef __cplusplus
}
diff --git a/colm/codegen.cc b/colm/codegen.cc
index f55257e7..653d8d51 100644
--- a/colm/codegen.cc
+++ b/colm/codegen.cc
@@ -23,7 +23,6 @@
#include "fsmcodegen.h"
#include "redfsm.h"
#include "bstmap.h"
-#include "fsmrun.h"
#include "debug.h"
#include <sstream>
#include <string>
diff --git a/colm/compiler.cc b/colm/compiler.cc
index 8043b45f..25d95397 100644
--- a/colm/compiler.cc
+++ b/colm/compiler.cc
@@ -34,7 +34,6 @@
#include "redbuild.h"
#include "pdacodegen.h"
#include "fsmcodegen.h"
-#include "fsmrun.h"
#include "pdarun.h"
#include "colm.h"
#include "pool.h"
@@ -1240,12 +1239,11 @@ PdaRun *Compiler::parsePattern( Program *prg, Tree **sp, const InputLoc &loc,
int parserId, StreamImpl *sourceStream )
{
StreamImpl *in = new StreamImpl;
- FsmRun *fsmRun = new FsmRun;
PdaRun *pdaRun = new PdaRun;
+ pdaRun->fsmRun = new FsmRun;
initStreamImpl( in );
- initPdaRun( pdaRun, prg, pdaTables, fsmRun, parserId, 0, false, 0 );
- initFsmRun( fsmRun, prg );
+ initPdaRun( prg, pdaRun, pdaRun->fsmRun, pdaTables, parserId, 0, false, 0 );
Stream *res = streamAllocate( prg );
res->id = LEL_ID_STREAM;
@@ -1253,8 +1251,7 @@ PdaRun *Compiler::parsePattern( Program *prg, Tree **sp, const InputLoc &loc,
in->funcs->appendStream( in, (Tree*)res );
in->funcs->setEof( in );
- newToken( prg, pdaRun, fsmRun );
- long pcr = parseLoop( prg, sp, pdaRun, fsmRun, in, PcrStart );
+ long pcr = parseLoop( prg, sp, pdaRun, in, PcrStart );
assert( pcr == PcrDone );
if ( pdaRun->parseError ) {
cout << "PARSE ERROR " << loc.line << ":" << loc.col;
diff --git a/colm/ctinput.cc b/colm/ctinput.cc
index bde9f424..285e1faf 100644
--- a/colm/ctinput.cc
+++ b/colm/ctinput.cc
@@ -22,7 +22,6 @@
#include "parsedata.h"
#include "parsetree.h"
#include "input.h"
-#include "fsmrun.h"
#include "debug.h"
#include "pool.h"
@@ -60,15 +59,13 @@ LangEl *inputStreamPatternGetLangEl( StreamImpl *ss, long *bindId, char **data,
return klangEl;
}
-int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied )
+int inputStreamPatternGetParseBlock( StreamImpl *ss, int skip, char **pdp, int *copied )
{
*copied = 0;
PatternItem *buf = ss->patItem;
int offset = ss->offset;
- attachStream( fsmRun, ss );
-
while ( true ) {
if ( buf == 0 )
return INPUT_EOD;
@@ -82,7 +79,7 @@ int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *d
if ( avail > 0 ) {
/* The source data from the current buffer. */
char *src = &buf->data[offset];
- int slen = avail <= length ? avail : length;
+ int slen = avail;
/* Need to skip? */
if ( skip > 0 && slen <= skip ) {
@@ -96,7 +93,7 @@ int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *d
slen -= skip;
skip = 0;
- memcpy( dest, src, slen ) ;
+ *pdp = src;
*copied += slen;
break;
}
@@ -109,6 +106,43 @@ int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *d
return INPUT_DATA;
}
+int inputStreamPatternGetData( StreamImpl *ss, char *dest, int length )
+{
+ int copied = 0;
+
+ PatternItem *buf = ss->patItem;
+ int offset = ss->offset;
+
+ while ( true ) {
+ if ( buf == 0 )
+ break;
+
+ if ( buf->type == PatternItem::FactorType )
+ break;
+
+ assert ( buf->type == PatternItem::InputText );
+ int avail = buf->data.length() - offset;
+
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ char *src = &buf->data[offset];
+ int slen = avail <= length ? avail : length;
+
+ memcpy( dest+copied, src, slen ) ;
+ copied += slen;
+ length -= slen;
+ }
+
+ if ( length == 0 )
+ break;
+
+ buf = buf->next;
+ offset = 0;
+ }
+
+ return copied;
+}
+
void inputStreamPatternBackup( StreamImpl *ss )
{
if ( ss->patItem == 0 )
@@ -178,7 +212,7 @@ int inputStreamPatternConsumeData( StreamImpl *ss, int length )
return consumed;
}
-int inputStreamPatternUndoConsumeData( FsmRun *fsmRun, StreamImpl *ss, const char *data, int length )
+int inputStreamPatternUndoConsumeData( StreamImpl *ss, const char *data, int length )
{
ss->offset -= length;
return length;
@@ -189,6 +223,7 @@ extern "C" void initPatFuncs()
memset( &patternFuncs, 0, sizeof(StreamFuncs) );
patternFuncs.getData = &inputStreamPatternGetData;
+ patternFuncs.getParseBlock = &inputStreamPatternGetParseBlock;
patternFuncs.consumeData = &inputStreamPatternConsumeData;
patternFuncs.undoConsumeData = &inputStreamPatternUndoConsumeData;
@@ -237,15 +272,14 @@ LangEl *inputStreamConsGetLangEl( StreamImpl *ss, long *bindId, char **data, lon
return klangEl;
}
-int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied )
+int inputStreamConsGetParseBlock( StreamImpl *ss,
+ int skip, char **pdp, int *copied )
{
*copied = 0;
ConsItem *buf = ss->consItem;
int offset = ss->offset;
- attachStream( fsmRun, ss );
-
while ( true ) {
if ( buf == 0 )
return INPUT_EOD;
@@ -259,7 +293,7 @@ int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest
if ( avail > 0 ) {
/* The source data from the current buffer. */
char *src = &buf->data[offset];
- int slen = avail <= length ? avail : length;
+ int slen = avail;
/* Need to skip? */
if ( skip > 0 && slen <= skip ) {
@@ -273,7 +307,7 @@ int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest
slen -= skip;
skip = 0;
- memcpy( dest, src, slen ) ;
+ *pdp = src;
*copied += slen;
break;
}
@@ -286,6 +320,43 @@ int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest
return INPUT_DATA;
}
+int inputStreamConsGetData( StreamImpl *ss, char *dest, int length )
+{
+ int copied = 0;
+
+ ConsItem *buf = ss->consItem;
+ int offset = ss->offset;
+
+ while ( true ) {
+ if ( buf == 0 )
+ break;
+
+ if ( buf->type == ConsItem::ExprType || buf->type == ConsItem::FactorType )
+ break;
+
+ assert ( buf->type == ConsItem::InputText );
+ int avail = buf->data.length() - offset;
+
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ char *src = &buf->data[offset];
+ int slen = avail <= length ? avail : length;
+
+ memcpy( dest+copied, src, slen ) ;
+ copied += slen;
+ length -= slen;
+ }
+
+ if ( length == 0 )
+ break;
+
+ buf = buf->next;
+ offset = 0;
+ }
+
+ return copied;
+}
+
void inputStreamConsBackup( StreamImpl *ss )
{
if ( ss->consItem == 0 )
@@ -357,7 +428,7 @@ int inputStreamConsConsumeData( StreamImpl *ss, int length )
return consumed;
}
-int inputStreamConsUndoConsumeData( FsmRun *fsmRun, StreamImpl *ss, const char *data, int length )
+int inputStreamConsUndoConsumeData( StreamImpl *ss, const char *data, int length )
{
ss->offset -= length;
return length;
@@ -368,6 +439,7 @@ extern "C" void initConsFuncs()
memset( &replFuncs, 0, sizeof(StreamFuncs) );
replFuncs.getData = &inputStreamConsGetData;
+ replFuncs.getParseBlock = &inputStreamConsGetParseBlock;
replFuncs.consumeData = &inputStreamConsConsumeData;
replFuncs.undoConsumeData = &inputStreamConsUndoConsumeData;
diff --git a/colm/declare.cc b/colm/declare.cc
index 6a285277..c2cdec51 100644
--- a/colm/declare.cc
+++ b/colm/declare.cc
@@ -21,7 +21,6 @@
#include "bytecode.h"
#include "parsedata.h"
-#include "fsmrun.h"
#include <iostream>
#include <assert.h>
diff --git a/colm/exports.cc b/colm/exports.cc
index df94abdf..3556e249 100644
--- a/colm/exports.cc
+++ b/colm/exports.cc
@@ -23,7 +23,6 @@
#include "fsmcodegen.h"
#include "redfsm.h"
#include "bstmap.h"
-#include "fsmrun.h"
#include "debug.h"
#include <sstream>
#include <string>
diff --git a/colm/fsmcodegen.cc b/colm/fsmcodegen.cc
index 212de648..ce357e13 100644
--- a/colm/fsmcodegen.cc
+++ b/colm/fsmcodegen.cc
@@ -23,7 +23,6 @@
#include "fsmcodegen.h"
#include "redfsm.h"
#include "bstmap.h"
-#include "fsmrun.h"
#include <sstream>
#include <string>
#include <assert.h>
@@ -48,7 +47,8 @@ FsmCodeGen::FsmCodeGen( const char *sourceFileName, const char *fsmName, ostream
codeGenErrCount(0),
dataPrefix(true),
writeFirstFinal(true),
- writeErr(true)
+ writeErr(true),
+ skipTokenLabelNeeded(false)
{
}
@@ -146,7 +146,7 @@ void FsmCodeGen::SET_ACT( ostream &ret, InlineItem *item )
void FsmCodeGen::SET_TOKEND( ostream &ret, InlineItem *item )
{
/* The tokend action sets tokend. */
- ret << TOKEND() << " = " << P() << "+1;";
+ ret << "{ " << TOKEND() << " = " << TOKLEN() << " + ( " << P() << " - " << BLOCK_START() << " ) + 1; }";
}
void FsmCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item )
{
@@ -172,14 +172,14 @@ void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item,
int targState, int inFinish )
{
ret <<
- " " << P() << " = " << TOKEND() << ";\n"
+ " " << TOKLEN() << " = " << TOKEND() << ";\n"
" switch( " << ACT() << " ) {\n";
/* If the switch handles error then we also forced the error state. It
* will exist. */
if ( item->tokenRegion->lmSwitchHandlesError ) {
- ret << " case 0: " << P() << " = " << TOKSTART() <<
- "; goto st" << redFsm->errState->id << ";\n";
+ ret << " case 0: " //<< P() << " = " << TOKSTART() << ";" <<
+ "goto st" << redFsm->errState->id << ";\n";
}
for ( TokenDefListReg::Iter lmi = item->tokenRegion->tokenDefList; lmi.lte(); lmi++ ) {
@@ -194,7 +194,9 @@ void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item,
ret <<
" }\n"
"\t"
- " return;\n";
+ " goto skip_toklen;\n";
+
+ skipTokenLabelNeeded = true;
}
void FsmCodeGen::LM_ON_LAST( ostream &ret, InlineItem *item )
@@ -203,7 +205,7 @@ void FsmCodeGen::LM_ON_LAST( ostream &ret, InlineItem *item )
ret << " " << P() << " += 1;\n";
EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl );
- ret << " return;\n";
+ ret << " goto out;\n";
}
void FsmCodeGen::LM_ON_NEXT( ostream &ret, InlineItem *item )
@@ -211,16 +213,18 @@ void FsmCodeGen::LM_ON_NEXT( ostream &ret, InlineItem *item )
assert( item->longestMatchPart->tdLangEl != 0 );
EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl );
- ret << " return;\n";
+ ret << " goto out;\n";
}
void FsmCodeGen::LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item )
{
assert( item->longestMatchPart->tdLangEl != 0 );
- ret << " " << P() << " = " << TOKEND() << ";\n";
+ ret << " " << TOKLEN() << " = " << TOKEND() << ";\n";
EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl );
- ret << " return;\n";
+ ret << " goto skip_toklen;\n";
+
+ skipTokenLabelNeeded = true;
}
@@ -731,7 +735,7 @@ std::ostream &FsmCodeGen::EXIT_STATES()
for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) {
out << " case " << st->id << ": out" << st->id << ": ";
if ( st->eofTrans != 0 ) {
- out << "if ( " << PE() << " == " << PEOF() << " ) {";
+ out << "if ( " << DATA_EOF() << " ) {";
TRANS_GOTO( st->eofTrans, 0 );
out << "\n";
out << "}";
@@ -848,6 +852,7 @@ void FsmCodeGen::writeExec()
out <<
"void fsmExecute( FsmRun *fsmRun, StreamImpl *inputStream )\n"
"{\n"
+ " " << BLOCK_START() << " = fsmRun->p;\n"
"/*_resume:*/\n";
if ( redFsm->errState != 0 ) {
@@ -872,7 +877,17 @@ void FsmCodeGen::writeExec()
" }\n";
out <<
- " out: {}\n"
+ "out:\n"
+ " if ( " << P() << " != 0 )\n"
+ " " << TOKLEN() << " += " << P() << " - " << BLOCK_START() << ";\n";
+
+ if ( skipTokenLabelNeeded ) {
+ out <<
+ "skip_toklen:\n"
+ " {}\n";
+ }
+
+ out <<
"}\n"
"\n";
}
@@ -881,7 +896,6 @@ void FsmCodeGen::writeIncludes()
{
out <<
"#include <colm/pdarun.h>\n"
- "#include <colm/fsmrun.h>\n"
"#include <colm/debug.h>\n"
"#include <colm/bytecode.h>\n"
"#include <stdio.h>\n"
diff --git a/colm/fsmcodegen.h b/colm/fsmcodegen.h
index 1b004f5e..29fcb98d 100644
--- a/colm/fsmcodegen.h
+++ b/colm/fsmcodegen.h
@@ -28,7 +28,6 @@
#include "keyops.h"
#include "parsedata.h"
#include "redfsm.h"
-#include "fsmrun.h"
using std::string;
using std::ostream;
@@ -76,6 +75,7 @@ public:
RedFsm *redFsm, FsmTables *fsmTables );
protected:
+
string FSM_NAME();
string START_STATE_ID();
ostream &ACTIONS_ARRAY();
@@ -99,12 +99,14 @@ protected:
string P() { return ACCESS() + "p"; }
string PE() { return ACCESS() + "pe"; }
- string PEOF() { return ACCESS() + "peof"; }
+ string DATA_EOF() { return ACCESS() + "eof"; }
string CS();
string TOP() { return ACCESS() + "top"; }
string TOKSTART() { return ACCESS() + "tokstart"; }
string TOKEND() { return ACCESS() + "tokend"; }
+ string BLOCK_START() { return ACCESS() + "start"; }
+ string TOKLEN() { return ACCESS() + "toklen"; }
string ACT() { return ACCESS() + "act"; }
string MATCHED_TOKEN() { return ACCESS() + "matchedToken"; }
@@ -164,6 +166,7 @@ public:
bool dataPrefix;
bool writeFirstFinal;
bool writeErr;
+ bool skipTokenLabelNeeded;
std::ostream &TO_STATE_ACTION_SWITCH();
std::ostream &FROM_STATE_ACTION_SWITCH();
diff --git a/colm/fsmexec.cc b/colm/fsmexec.cc
index 9b945374..905f1d80 100644
--- a/colm/fsmexec.cc
+++ b/colm/fsmexec.cc
@@ -24,7 +24,6 @@
#include "config.h"
#include "defs.h"
-#include "fsmrun.h"
#include "redfsm.h"
#include "parsedata.h"
#include "parsetree.h"
@@ -42,7 +41,7 @@ void execAction( FsmRun *fsmRun, GenAction *genAction )
fsmRun->act = item->longestMatchPart->longestMatchId;
break;
case InlineItem::LmSetTokEnd:
- fsmRun->tokend = fsmRun->p + 1;
+ fsmRun->tokend = fsmRun->toklen + ( fsmRun->p - fsmRun->start ) + 1;
break;
case InlineItem::LmInitTokStart:
assert(false);
@@ -56,9 +55,8 @@ void execAction( FsmRun *fsmRun, GenAction *genAction )
case InlineItem::LmSwitch:
/* If the switch handles error then we also forced the error state. It
* will exist. */
- fsmRun->p = fsmRun->tokend;
+ fsmRun->toklen = fsmRun->tokend;
if ( item->tokenRegion->lmSwitchHandlesError && fsmRun->act == 0 ) {
- fsmRun->p = fsmRun->tokstart;
fsmRun->cs = fsmRun->tables->errorState;
}
else {
@@ -70,6 +68,7 @@ void execAction( FsmRun *fsmRun, GenAction *genAction )
}
}
fsmRun->returnResult = true;
+ fsmRun->skipToklen = true;
break;
case InlineItem::LmOnLast:
fsmRun->p += 1;
@@ -81,9 +80,10 @@ void execAction( FsmRun *fsmRun, GenAction *genAction )
fsmRun->returnResult = true;
break;
case InlineItem::LmOnLagBehind:
- fsmRun->p = fsmRun->tokend;
+ fsmRun->toklen = fsmRun->tokend;
fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id;
fsmRun->returnResult = true;
+ fsmRun->skipToklen = true;
break;
}
}
@@ -99,6 +99,8 @@ void fsmExecute( FsmRun *fsmRun, StreamImpl *inputStream )
const long *_acts;
unsigned int _nacts;
const char *_keys;
+
+ fsmRun->start = fsmRun->p;
/* Init the token match to nothing (the sentinal). */
fsmRun->matchedToken = 0;
@@ -171,12 +173,16 @@ _match:
goto _again;
fsmRun->returnResult = false;
+ fsmRun->skipToklen = false;
_acts = fsmRun->tables->actions + fsmRun->tables->transActionsWI[_trans];
_nacts = (unsigned int) *_acts++;
while ( _nacts-- > 0 )
execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] );
- if ( fsmRun->returnResult )
- return;
+ if ( fsmRun->returnResult ) {
+ if ( fsmRun->skipToklen )
+ goto skip_toklen;
+ goto final;
+ }
_again:
_acts = fsmRun->tables->actions + fsmRun->tables->toStateActions[fsmRun->cs];
@@ -190,8 +196,9 @@ _again:
if ( ++fsmRun->p != fsmRun->pe )
goto _loop_head;
out:
- if ( fsmRun->p == fsmRun->peof ) {
+ if ( fsmRun->eof ) {
fsmRun->returnResult = false;
+ fsmRun->skipToklen = false;
_acts = fsmRun->tables->actions + fsmRun->tables->eofActions[fsmRun->cs];
_nacts = (unsigned int) *_acts++;
@@ -200,9 +207,17 @@ out:
while ( _nacts-- > 0 )
execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] );
- if ( fsmRun->returnResult )
- return;
+ if ( fsmRun->returnResult ) {
+ if ( fsmRun->skipToklen )
+ goto skip_toklen;
+ goto final;
+ }
}
-}
+final:
+ if ( fsmRun->p != 0 )
+ fsmRun->toklen += fsmRun->p - fsmRun->start;
+skip_toklen:
+ {}
+}
diff --git a/colm/fsmrun.h b/colm/fsmrun.h
deleted file mode 100644
index 821b3ccf..00000000
--- a/colm/fsmrun.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2007-2012 Adrian Thurston <thurston@complang.org>
- */
-
-/* This file is part of Colm.
- *
- * Colm is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Colm is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Colm; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _FSMRUN2_H
-#define _FSMRUN2_H
-
-#include <colm/input.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/colm/input.c b/colm/input.c
index ba144d94..b73eacbd 100644
--- a/colm/input.c
+++ b/colm/input.c
@@ -20,7 +20,6 @@
*/
#include <colm/input.h>
-#include <colm/fsmrun.h>
#include <colm/pdarun.h>
#include <colm/debug.h>
@@ -135,35 +134,6 @@ static void sourceStreamPrepend( StreamImpl *ss, RunBuf *runBuf )
}
}
-void initStreamFuncs()
-{
- memset( &streamFuncs, 0, sizeof(struct StreamFuncs) );
- streamFuncs.getData = &_getData;
- streamFuncs.consumeData = &_consumeData;
- streamFuncs.undoConsumeData = &_undoConsumeData;
- streamFuncs.consumeTree = &_consumeTree;
- streamFuncs.undoConsumeTree = &_undoConsumeTree;
- streamFuncs.consumeLangEl = &_consumeLangEl;
- streamFuncs.undoConsumeLangEl = &_undoConsumeLangEl;
-
- streamFuncs.setEof = &_setEof;
- streamFuncs.unsetEof = &_unsetEof;
-
- streamFuncs.prependData = &_prependData;
- streamFuncs.prependTree = &_prependTree;
- streamFuncs.prependStream = &_prependStream;
- streamFuncs.undoPrependData = &_undoPrependData;
- streamFuncs.undoPrependTree = &_undoPrependTree;
-
- streamFuncs.appendData = &_appendData;
- streamFuncs.appendTree = &_appendTree;
- streamFuncs.appendStream = &_appendStream;
- streamFuncs.undoAppendData = &_undoAppendData;
- streamFuncs.undoAppendTree = &_undoAppendTree;
- streamFuncs.undoAppendStream = &_undoAppendStream;
-}
-
-
void initInputFuncs()
{
initStreamFuncs();
@@ -177,7 +147,7 @@ void initInputFuncs()
* Base run-time input streams.
*/
-int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied )
+int fdGetParseBlock( StreamImpl *ss, int skip, char **pdp, int *copied )
{
int ret = 0;
*copied = 0;
@@ -196,8 +166,8 @@ int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length,
}
runBuf->length = received;
- int slen = received < length ? received : length;
- memcpy( dest, runBuf->data, slen );
+ int slen = received;
+ *pdp = runBuf->data;
*copied = slen;
ret = INPUT_DATA;
break;
@@ -222,8 +192,8 @@ int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length,
avail -= skip;
skip = 0;
- int slen = avail < length ? avail : length;
- memcpy( dest, src, slen ) ;
+ int slen = avail;
+ *pdp = src;
*copied += slen;
ret = INPUT_DATA;
break;
@@ -233,11 +203,52 @@ int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length,
buf = buf->next;
}
- attachStream( fsmRun, ss );
-
return ret;
}
+int fdGetData( StreamImpl *ss, char *dest, int length )
+{
+ int copied = 0;
+
+ /* Move over skip bytes. */
+ RunBuf *buf = ss->queue;
+ while ( true ) {
+ if ( buf == 0 ) {
+ /* Got through the in-mem buffers without copying anything. */
+ RunBuf *runBuf = newRunBuf();
+ sourceStreamAppend( ss, runBuf );
+ int received = ss->funcs->getDataSource( ss, runBuf->data, FSM_BUFSIZE );
+ runBuf->length = received;
+ if ( received == 0 )
+ break;
+
+ buf = runBuf;
+ }
+
+ int avail = buf->length - buf->offset;
+
+ /* Anything available in the current buffer. */
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ char *src = &buf->data[buf->offset];
+
+ int slen = avail < length ? avail : length;
+ memcpy( dest+copied, src, slen ) ;
+ copied += slen;
+ length -= slen;
+ }
+
+ if ( length == 0 ) {
+ debug( REALM_INPUT, "exiting get data\n", length );
+ break;
+ }
+
+ buf = buf->next;
+ }
+
+ return copied;
+}
+
int fdConsumeData( StreamImpl *ss, int length )
{
debug( REALM_INPUT, "source consuming %ld bytes\n", length );
@@ -278,7 +289,7 @@ int fdConsumeData( StreamImpl *ss, int length )
return consumed;
}
-int fdUndoConsumeData( FsmRun *fsmRun, StreamImpl *ss, const char *data, int length )
+int fdUndoConsumeData( StreamImpl *ss, const char *data, int length )
{
debug( REALM_INPUT, "undoing consume of %ld bytes\n", length );
@@ -305,6 +316,7 @@ void initFileFuncs()
{
memset( &fileFuncs, 0, sizeof(struct StreamFuncs) );
fileFuncs.getData = &fdGetData;
+ fileFuncs.getParseBlock = &fdGetParseBlock;
fileFuncs.consumeData = &fdConsumeData;
fileFuncs.undoConsumeData = &fdUndoConsumeData;
fileFuncs.getDataSource = &fileGetDataSource;
@@ -330,6 +342,7 @@ void initFdFuncs()
{
memset( &fdFuncs, 0, sizeof(struct StreamFuncs) );
fdFuncs.getData = &fdGetData;
+ fdFuncs.getParseBlock = &fdGetParseBlock;
fdFuncs.consumeData = &fdConsumeData;
fdFuncs.undoConsumeData = &fdUndoConsumeData;
fdFuncs.getDataSource = &fdGetDataSource;
@@ -430,13 +443,13 @@ static int isSourceStream( StreamImpl *is )
return false;
}
-void _setEof( StreamImpl *is )
+static void _setEof( StreamImpl *is )
{
debug( REALM_INPUT, "setting EOF in input stream\n" );
is->eof = true;
}
-void _unsetEof( StreamImpl *is )
+static void _unsetEof( StreamImpl *is )
{
if ( isSourceStream( is ) ) {
Stream *stream = (Stream*)is->queue->tree;
@@ -447,13 +460,11 @@ void _unsetEof( StreamImpl *is )
}
}
-int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length, int *copied )
+static int _getParseBlock( StreamImpl *is, int skip, char **pdp, int *copied )
{
int ret = 0;
*copied = 0;
- attachStream( fsmRun, is );
-
/* Move over skip bytes. */
RunBuf *buf = is->queue;
while ( true ) {
@@ -465,11 +476,18 @@ int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length,
if ( buf->type == RunBufSourceType ) {
Stream *stream = (Stream*)buf->tree;
- int type = stream->in->funcs->getData( fsmRun, stream->in, skip, dest, length, copied );
-
- if ( type == INPUT_EOD && is->eof ) {
- ret = INPUT_EOF;
- break;
+ int type = stream->in->funcs->getParseBlock( stream->in, skip, pdp, copied );
+
+// if ( type == INPUT_EOD && !stream->in->eosSent ) {
+// stream->in->eosSent = 1;
+// ret = INPUT_EOS;
+// continue;
+// }
+
+ if ( type == INPUT_EOD || type == INPUT_EOF ) {
+ debug( REALM_INPUT, "skipping over input\n" );
+ buf = buf->next;
+ continue;
}
ret = type;
@@ -505,9 +523,8 @@ int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length,
avail -= skip;
skip = 0;
- int slen = avail <= length ? avail : length;
- memcpy( dest, src, slen ) ;
- *copied += slen;
+ *pdp = src;
+ *copied += avail;
ret = INPUT_DATA;
break;
}
@@ -519,22 +536,22 @@ int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length,
#if DEBUG
switch ( ret ) {
case INPUT_DATA:
- debug( REALM_INPUT, "get data: DATA copied: %d: %.*s\n", *copied, (int)*copied, dest );
+ debug( REALM_INPUT, "get parse block: DATA: %d\n", *copied );
break;
case INPUT_EOD:
- debug( REALM_INPUT, "get data: EOD\n" );
+ debug( REALM_INPUT, "get parse block: EOD\n" );
break;
case INPUT_EOF:
- debug( REALM_INPUT, "get data: EOF\n" );
+ debug( REALM_INPUT, "get parse block: EOF\n" );
break;
case INPUT_TREE:
- debug( REALM_INPUT, "get data: TREE\n" );
+ debug( REALM_INPUT, "get parse block: TREE\n" );
break;
case INPUT_IGNORE:
- debug( REALM_INPUT, "get data: IGNORE\n" );
+ debug( REALM_INPUT, "get parse block: IGNORE\n" );
break;
case INPUT_LANG_EL:
- debug( REALM_INPUT, "get data: LANG_EL\n" );
+ debug( REALM_INPUT, "get parse block: LANG_EL\n" );
break;
}
#endif
@@ -542,7 +559,63 @@ int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length,
return ret;
}
-int _consumeData( StreamImpl *is, int length )
+static int _getData( StreamImpl *is, char *dest, int length )
+{
+ int copied = 0;
+
+ /* Move over skip bytes. */
+ RunBuf *buf = is->queue;
+ while ( true ) {
+ if ( buf == 0 ) {
+ /* Got through the in-mem buffers without copying anything. */
+ break;
+ }
+
+ if ( buf->type == RunBufSourceType ) {
+ Stream *stream = (Stream*)buf->tree;
+ int glen = stream->in->funcs->getData( stream->in, dest+copied, length );
+
+ if ( glen == 0 ) {
+ debug( REALM_INPUT, "skipping over input\n" );
+ buf = buf->next;
+ continue;
+ }
+
+ copied += glen;
+ length -= glen;
+ }
+ else if ( buf->type == RunBufTokenType )
+ break;
+ else if ( buf->type == RunBufIgnoreType )
+ break;
+ else {
+ int avail = buf->length - buf->offset;
+
+ /* Anything available in the current buffer. */
+ if ( avail > 0 ) {
+ /* The source data from the current buffer. */
+ char *src = &buf->data[buf->offset];
+
+ int slen = avail <= length ? avail : length;
+ memcpy( dest+copied, src, slen ) ;
+
+ copied += slen;
+ length -= slen;
+ }
+ }
+
+ if ( length == 0 ) {
+ debug( REALM_INPUT, "exiting get data\n", length );
+ break;
+ }
+
+ buf = buf->next;
+ }
+
+ return copied;
+}
+
+static int _consumeData( StreamImpl *is, int length )
{
debug( REALM_INPUT, "consuming %d bytes\n", length );
@@ -558,6 +631,7 @@ int _consumeData( StreamImpl *is, int length )
if ( buf->type == RunBufSourceType ) {
Stream *stream = (Stream*)buf->tree;
int slen = stream->in->funcs->consumeData( stream->in, length );
+ debug( REALM_INPUT, " got %d bytes from source\n", slen );
consumed += slen;
length -= slen;
@@ -578,8 +652,10 @@ int _consumeData( StreamImpl *is, int length )
}
}
- if ( length == 0 )
+ if ( length == 0 ) {
+ debug( REALM_INPUT, "exiting consume\n", length );
break;
+ }
RunBuf *runBuf = inputStreamPopHead( is );
free( runBuf );
@@ -588,16 +664,13 @@ int _consumeData( StreamImpl *is, int length )
return consumed;
}
-int _undoConsumeData( FsmRun *fsmRun, StreamImpl *is, const char *data, int length )
+static int _undoConsumeData( StreamImpl *is, const char *data, int length )
{
debug( REALM_INPUT, "undoing consume of %ld bytes\n", length );
if ( isSourceStream( is ) ) {
Stream *stream = (Stream*)is->queue->tree;
- int len = stream->in->funcs->undoConsumeData( fsmRun, stream->in, data, length );
-
- if ( stream->in->attached != 0 )
- detachStream( stream->in->attached, stream->in );
+ int len = stream->in->funcs->undoConsumeData( stream->in, data, length );
return len;
}
@@ -607,14 +680,11 @@ int _undoConsumeData( FsmRun *fsmRun, StreamImpl *is, const char *data, int leng
memcpy( newBuf->data, data, length );
inputStreamPrepend( is, newBuf );
- if ( is->attached != 0 )
- detachStream( is->attached, is );
-
return length;
}
}
-Tree *_consumeTree( StreamImpl *is )
+static Tree *_consumeTree( StreamImpl *is )
{
while ( is->queue != 0 && is->queue->type == RunBufDataType && is->queue->offset == is->queue->length ) {
RunBuf *runBuf = inputStreamPopHead( is );
@@ -633,11 +703,8 @@ Tree *_consumeTree( StreamImpl *is )
return 0;
}
-void _undoConsumeTree( StreamImpl *is, Tree *tree, int ignore )
+static void _undoConsumeTree( StreamImpl *is, Tree *tree, int ignore )
{
- if ( is->attached != 0 )
- detachStream( is->attached, is );
-
/* Create a new buffer for the data. This is the easy implementation.
* Something better is needed here. It puts a max on the amount of
* data that can be pushed back to the inputStream. */
@@ -647,7 +714,7 @@ void _undoConsumeTree( StreamImpl *is, Tree *tree, int ignore )
inputStreamPrepend( is, newBuf );
}
-struct LangEl *_consumeLangEl( StreamImpl *is, long *bindId, char **data, long *length )
+static struct LangEl *_consumeLangEl( StreamImpl *is, long *bindId, char **data, long *length )
{
if ( isSourceStream( is ) ) {
Stream *stream = (Stream*)is->queue->tree;
@@ -658,7 +725,7 @@ struct LangEl *_consumeLangEl( StreamImpl *is, long *bindId, char **data, long *
}
}
-void _undoConsumeLangEl( StreamImpl *is )
+static void _undoConsumeLangEl( StreamImpl *is )
{
if ( isSourceStream( is ) ) {
Stream *stream = (Stream*)is->queue->tree;
@@ -669,11 +736,8 @@ void _undoConsumeLangEl( StreamImpl *is )
}
}
-void _prependData( StreamImpl *is, const char *data, long length )
+static void _prependData( StreamImpl *is, const char *data, long length )
{
- if ( is->attached != 0 )
- detachStream( is->attached, is );
-
if ( isSourceStream( is ) && ((Stream*)is->queue->tree)->in->funcs == &streamFuncs ) {
Stream *stream = (Stream*)is->queue->tree;
@@ -693,11 +757,8 @@ void _prependData( StreamImpl *is, const char *data, long length )
}
}
-void _prependTree( StreamImpl *is, Tree *tree, int ignore )
+static void _prependTree( StreamImpl *is, Tree *tree, int ignore )
{
- if ( is->attached != 0 )
- detachStream( is->attached, is );
-
/* Create a new buffer for the data. This is the easy implementation.
* Something better is needed here. It puts a max on the amount of
* data that can be pushed back to the inputStream. */
@@ -707,7 +768,7 @@ void _prependTree( StreamImpl *is, Tree *tree, int ignore )
inputStreamPrepend( is, newBuf );
}
-void _prependStream( StreamImpl *in, struct ColmTree *tree )
+static void _prependStream( StreamImpl *in, struct ColmTree *tree )
{
/* Create a new buffer for the data. This is the easy implementation.
* Something better is needed here. It puts a max on the amount of
@@ -718,11 +779,8 @@ void _prependStream( StreamImpl *in, struct ColmTree *tree )
inputStreamPrepend( in, newBuf );
}
-int _undoPrependData( StreamImpl *is, int length )
+static int _undoPrependData( StreamImpl *is, int length )
{
- if ( is->attached != 0 )
- detachStream( is->attached, is );
-
debug( REALM_INPUT, "consuming %d bytes\n", length );
int consumed = 0;
@@ -767,11 +825,8 @@ int _undoPrependData( StreamImpl *is, int length )
return consumed;
}
-Tree *_undoPrependTree( StreamImpl *is )
+static Tree *_undoPrependTree( StreamImpl *is )
{
- if ( is->attached != 0 )
- detachStream( is->attached, is );
-
while ( is->queue != 0 && is->queue->type == RunBufDataType && is->queue->offset == is->queue->length ) {
RunBuf *runBuf = inputStreamPopHead( is );
free( runBuf );
@@ -789,7 +844,7 @@ Tree *_undoPrependTree( StreamImpl *is )
return 0;
}
-void _appendData( StreamImpl *is, const char *data, long len )
+static void _appendData( StreamImpl *is, const char *data, long len )
{
while ( len > 0 ) {
RunBuf *ad = newRunBuf();
@@ -807,11 +862,8 @@ void _appendData( StreamImpl *is, const char *data, long len )
}
}
-Tree *_undoAppendData( StreamImpl *is, int length )
+static Tree *_undoAppendData( StreamImpl *is, int length )
{
- if ( is->attached != 0 )
- detachStream( is->attached, is );
-
int consumed = 0;
/* Move over skip bytes. */
@@ -847,7 +899,7 @@ Tree *_undoAppendData( StreamImpl *is, int length )
return 0;
}
-void _appendTree( StreamImpl *is, Tree *tree )
+static void _appendTree( StreamImpl *is, Tree *tree )
{
RunBuf *ad = newRunBuf();
@@ -858,7 +910,7 @@ void _appendTree( StreamImpl *is, Tree *tree )
ad->length = 0;
}
-void _appendStream( StreamImpl *in, struct ColmTree *tree )
+static void _appendStream( StreamImpl *in, struct ColmTree *tree )
{
RunBuf *ad = newRunBuf();
@@ -869,24 +921,49 @@ void _appendStream( StreamImpl *in, struct ColmTree *tree )
ad->length = 0;
}
-Tree *_undoAppendStream( StreamImpl *is )
+static Tree *_undoAppendTree( StreamImpl *is )
{
- if ( is->attached != 0 )
- detachStream( is->attached, is );
-
RunBuf *runBuf = inputStreamPopTail( is );
Tree *tree = runBuf->tree;
free( runBuf );
return tree;
}
-Tree *_undoAppendTree( StreamImpl *is )
+static Tree *_undoAppendStream( StreamImpl *is )
{
- if ( is->attached != 0 )
- detachStream( is->attached, is );
-
RunBuf *runBuf = inputStreamPopTail( is );
Tree *tree = runBuf->tree;
free( runBuf );
return tree;
}
+
+void initStreamFuncs()
+{
+ memset( &streamFuncs, 0, sizeof(struct StreamFuncs) );
+ streamFuncs.getData = &_getData;
+ streamFuncs.getParseBlock = &_getParseBlock;
+ streamFuncs.consumeData = &_consumeData;
+ streamFuncs.undoConsumeData = &_undoConsumeData;
+ streamFuncs.consumeTree = &_consumeTree;
+ streamFuncs.undoConsumeTree = &_undoConsumeTree;
+ streamFuncs.consumeLangEl = &_consumeLangEl;
+ streamFuncs.undoConsumeLangEl = &_undoConsumeLangEl;
+
+ streamFuncs.setEof = &_setEof;
+ streamFuncs.unsetEof = &_unsetEof;
+
+ streamFuncs.prependData = &_prependData;
+ streamFuncs.prependTree = &_prependTree;
+ streamFuncs.prependStream = &_prependStream;
+ streamFuncs.undoPrependData = &_undoPrependData;
+ streamFuncs.undoPrependTree = &_undoPrependTree;
+
+ streamFuncs.appendData = &_appendData;
+ streamFuncs.appendTree = &_appendTree;
+ streamFuncs.appendStream = &_appendStream;
+ streamFuncs.undoAppendData = &_undoAppendData;
+ streamFuncs.undoAppendTree = &_undoAppendTree;
+ streamFuncs.undoAppendStream = &_undoAppendStream;
+}
+
+
diff --git a/colm/input.h b/colm/input.h
index 3bfc43c5..14cdd465 100644
--- a/colm/input.h
+++ b/colm/input.h
@@ -35,9 +35,10 @@ extern "C" {
/* This is for data sources to return, not for the wrapper. */
#define INPUT_EOD 2
#define INPUT_EOF 3
-#define INPUT_LANG_EL 4
-#define INPUT_TREE 5
-#define INPUT_IGNORE 6
+#define INPUT_EOS 4
+#define INPUT_LANG_EL 5
+#define INPUT_TREE 6
+#define INPUT_IGNORE 7
/*
* pdaRun <- fsmRun <- stream
@@ -87,11 +88,12 @@ typedef struct _StreamImpl StreamImpl;
struct StreamFuncs
{
- /* Data. */
- int (*getData)( struct _FsmRun *fsmRun, StreamImpl *ss, int offset, char *dest, int length, int *copied );
+ int (*getParseBlock)( StreamImpl *ss, int skip, char **pdp, int *copied );
+
+ int (*getData)( StreamImpl *ss, char *dest, int length );
int (*consumeData)( StreamImpl *ss, int length );
- int (*undoConsumeData)( struct _FsmRun *fsmRun, StreamImpl *ss, const char *data, int length );
+ int (*undoConsumeData)( StreamImpl *ss, const char *data, int length );
struct ColmTree *(*consumeTree)( StreamImpl *ss );
void (*undoConsumeTree)( StreamImpl *ss, struct ColmTree *tree, int ignore );
@@ -127,10 +129,10 @@ struct StreamFuncs
struct _StreamImpl
{
struct StreamFuncs *funcs;
- struct _FsmRun *attached;
char eofSent;
char eof;
+ char eosSent;
RunBuf *queue;
RunBuf *queueTail;
@@ -152,8 +154,6 @@ struct _StreamImpl
struct ConsItem *consItem;
};
-typedef struct _StreamImpl StreamImpl;
-
StreamImpl *newSourceStreamPat( struct Pattern *pattern );
StreamImpl *newSourceStreamCons( struct Constructor *constructor );
StreamImpl *newSourceStreamFile( FILE *file );
@@ -164,35 +164,6 @@ void initStaticFuncs();
void initPatFuncs();
void initConsFuncs();
-/* The input stream interface. */
-
-int _getData( struct _FsmRun *fsmRun, StreamImpl *in, int offset, char *dest, int length, int *copied );
-int _consumeData( StreamImpl *in, int length );
-int _undoConsumeData( struct _FsmRun *fsmRun, StreamImpl *is, const char *data, int length );
-
-struct ColmTree *_consumeTree( StreamImpl *in );
-void _undoConsumeTree( StreamImpl *in, struct ColmTree *tree, int ignore );
-
-struct LangEl *_consumeLangEl( StreamImpl *in, long *bindId, char **data, long *length );
-void _undoConsumeLangEl( StreamImpl *in );
-
-void _setEof( StreamImpl *is );
-void _unsetEof( StreamImpl *is );
-
-void _prependData( StreamImpl *in, const char *data, long len );
-void _prependTree( StreamImpl *is, struct ColmTree *tree, int ignore );
-void _prependStream( StreamImpl *in, struct ColmTree *tree );
-int _undoPrependData( StreamImpl *is, int length );
-struct ColmTree *_undoPrependTree( StreamImpl *is );
-struct ColmTree *_undoPrependStream( StreamImpl *in );
-
-void _appendData( StreamImpl *in, const char *data, long len );
-void _appendTree( StreamImpl *in, struct ColmTree *tree );
-void _appendStream( StreamImpl *in, struct ColmTree *tree );
-struct ColmTree *_undoAppendData( StreamImpl *in, int length );
-struct ColmTree *_undoAppendTree( StreamImpl *in );
-struct ColmTree *_undoAppendStream( StreamImpl *in );
-
#ifdef __cplusplus
}
#endif
diff --git a/colm/lmparse.kl b/colm/lmparse.kl
index 5f3adb38..bbfd9b6f 100644
--- a/colm/lmparse.kl
+++ b/colm/lmparse.kl
@@ -26,7 +26,6 @@
#include "lmparse.h"
#include "global.h"
#include "input.h"
-#include "fsmrun.h"
using std::cout;
using std::cerr;
diff --git a/colm/parsetree.cc b/colm/parsetree.cc
index 02d8f68f..4c82410e 100644
--- a/colm/parsetree.cc
+++ b/colm/parsetree.cc
@@ -22,7 +22,6 @@
#include "lmparse.h"
#include "parsetree.h"
#include "input.h"
-#include "fsmrun.h"
#include <iostream>
#include <iomanip>
diff --git a/colm/parsetree.h b/colm/parsetree.h
index b9841c7e..9e65bf6d 100644
--- a/colm/parsetree.h
+++ b/colm/parsetree.h
@@ -35,7 +35,6 @@
#include "astring.h"
#include "bytecode.h"
#include "avlbasic.h"
-#include "fsmrun.h"
/* Operators that are represented with single symbol characters. */
#define OP_DoubleEql 'e'
diff --git a/colm/pdabuild.cc b/colm/pdabuild.cc
index 1dbd649e..6eb929f7 100644
--- a/colm/pdabuild.cc
+++ b/colm/pdabuild.cc
@@ -32,7 +32,6 @@
#include "redfsm.h"
#include "fsmcodegen.h"
#include "redbuild.h"
-#include "fsmrun.h"
/* Dumping the fsm. */
#include "mergesort.h"
diff --git a/colm/pdarun.c b/colm/pdarun.c
index e4cb9e1e..3ed94374 100644
--- a/colm/pdarun.c
+++ b/colm/pdarun.c
@@ -22,7 +22,6 @@
#include "config.h"
#include "debug.h"
#include "pdarun.h"
-#include "fsmrun.h"
#include "bytecode.h"
#include "tree.h"
#include "pool.h"
@@ -57,27 +56,24 @@
i = (Tree*)w; \
} while(0)
-void initFsmRun( FsmRun *fsmRun, Program *prg )
+static void initFsmRun( Program *prg, FsmRun *fsmRun )
{
fsmRun->tables = prg->rtd->fsmTables;
- fsmRun->runBuf = 0;
- /* Run buffers need to stick around because
- * token strings point into them. */
- fsmRun->runBuf = newRunBuf();
- fsmRun->runBuf->next = 0;
+ fsmRun->consumeBuf = 0;
- fsmRun->p = fsmRun->pe = fsmRun->runBuf->data;
- fsmRun->peof = 0;
+ fsmRun->p = fsmRun->pe = 0;
+ fsmRun->toklen = 0;
+ fsmRun->eof = 0;
fsmRun->preRegion = -1;
}
void clearFsmRun( Program *prg, FsmRun *fsmRun )
{
- if ( fsmRun->runBuf != 0 ) {
+ if ( fsmRun->consumeBuf != 0 ) {
/* Transfer the run buf list to the program */
- RunBuf *head = fsmRun->runBuf;
+ RunBuf *head = fsmRun->consumeBuf;
RunBuf *tail = head;
while ( tail->next != 0 )
tail = tail->next;
@@ -129,26 +125,108 @@ void decrementSteps( PdaRun *pdaRun )
debug( REALM_PARSE, "steps down to %ld\n", pdaRun->steps );
}
-/* Load up a token, starting from tokstart if it is set. If not set then
- * start it at data. */
-Head *streamPull( Program *prg, FsmRun *fsmRun, StreamImpl *is, long length )
+Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is )
{
- /* We should not be in the midst of getting a token. */
- assert( fsmRun->tokstart == 0 );
+ long length = fsmRun->toklen;
+
+ debug( REALM_PARSE, "extracting token of length: %ld\n", length );
+
+ RunBuf *runBuf = fsmRun->consumeBuf;
+ if ( runBuf == 0 || length > ( FSM_BUFSIZE - runBuf->length ) ) {
+ runBuf = newRunBuf();
+ runBuf->next = fsmRun->consumeBuf;
+ fsmRun->consumeBuf = runBuf;
+ }
- RunBuf *runBuf = newRunBuf();
- runBuf->next = fsmRun->runBuf;
- fsmRun->runBuf = runBuf;
+ char *dest = runBuf->data + runBuf->length;
- int len = 0;
- is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &len );
+ is->funcs->getData( is, dest, length );
is->funcs->consumeData( is, length );
- fsmRun->p = fsmRun->pe = runBuf->data + length;
- Head *tokdata = stringAllocPointer( prg, runBuf->data, length );
- updatePosition( is, runBuf->data, length );
+ runBuf->length += length;
+
+ fsmRun->p = fsmRun->pe = 0;
+ fsmRun->toklen = 0;
+ fsmRun->tokstart = 0;
+
+ Head *head = stringAllocPointer( prg, dest, length );
+
+ head->location = locationAllocate( prg );
+ head->location->line = is->line;
+ head->location->column = is->column;
+ head->location->byte = is->byte;
+
+ debug( REALM_PARSE, "location byte: %d\n", is->byte );
+
+ return head;
+}
+
+Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is )
+{
+ long length = fsmRun->toklen;
+
+ RunBuf *runBuf = fsmRun->consumeBuf;
+ if ( runBuf == 0 || length > ( FSM_BUFSIZE - runBuf->length ) ) {
+ runBuf = newRunBuf();
+ runBuf->next = fsmRun->consumeBuf;
+ fsmRun->consumeBuf = runBuf;
+ }
+
+ char *dest = runBuf->data + runBuf->length;
+
+ is->funcs->getData( is, dest, length );
+
+ fsmRun->p = fsmRun->pe = 0;
+ fsmRun->toklen = 0;
+
+ Head *head = stringAllocPointer( prg, dest, length );
+
+ head->location = locationAllocate( prg );
+ head->location->line = is->line;
+ head->location->column = is->column;
+ head->location->byte = is->byte;
+
+ debug( REALM_PARSE, "location byte: %d\n", is->byte );
+
+ return head;
+}
+
+Head *streamPull( Program *prg, PdaRun *pdaRun, StreamImpl *is, long length )
+{
+ if ( pdaRun != 0 ) {
+ FsmRun *fsmRun = pdaRun->fsmRun;
+ RunBuf *runBuf = fsmRun->consumeBuf;
+ if ( length > ( FSM_BUFSIZE - runBuf->length ) ) {
+ runBuf = newRunBuf();
+ runBuf->next = fsmRun->consumeBuf;
+ fsmRun->consumeBuf = runBuf;
+ }
+
+ char *dest = runBuf->data + runBuf->length;
+
+ is->funcs->getData( is, dest, length );
+ is->funcs->consumeData( is, length );
+
+ runBuf->length += length;
+
+ fsmRun->p = fsmRun->pe = 0;
+ fsmRun->toklen = 0;
+
+ Head *tokdata = stringAllocPointer( prg, dest, length );
+ updatePosition( is, dest, length );
+
+ return tokdata;
+ }
+ else {
+ Head *head = initStrSpace( length );
+ char *dest = (char*)head->data;
+
+ is->funcs->getData( is, dest, length );
+ is->funcs->consumeData( is, length );
- return tokdata;
+ updatePosition( is, dest, length );
+ return head;
+ }
}
void undoStreamPull( StreamImpl *is, const char *data, long length )
@@ -158,22 +236,22 @@ void undoStreamPull( StreamImpl *is, const char *data, long length )
is->funcs->prependData( is, data, length );
}
-void streamPushText( FsmRun *fsmRun, StreamImpl *is, const char *data, long length )
+void streamPushText( StreamImpl *is, const char *data, long length )
{
is->funcs->prependData( is, data, length );
}
-void streamPushTree( FsmRun *fsmRun, StreamImpl *is, Tree *tree, int ignore )
+void streamPushTree( StreamImpl *is, Tree *tree, int ignore )
{
is->funcs->prependTree( is, tree, ignore );
}
-void streamPushStream( FsmRun *fsmRun, StreamImpl *is, Tree *tree )
+void streamPushStream( StreamImpl *is, Tree *tree )
{
is->funcs->prependStream( is, tree );
}
-void undoStreamPush( Program *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *is, long length )
+void undoStreamPush( Program *prg, Tree **sp, StreamImpl *is, long length )
{
if ( length < 0 ) {
Tree *tree = is->funcs->undoPrependTree( is );
@@ -184,7 +262,7 @@ void undoStreamPush( Program *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *is, lo
}
}
-void undoStreamAppend( Program *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *is, Tree *input, long length )
+void undoStreamAppend( Program *prg, Tree **sp, StreamImpl *is, Tree *input, long length )
{
if ( input->id == LEL_ID_STR )
is->funcs->undoAppendData( is, length );
@@ -209,7 +287,7 @@ static void sendBackText( FsmRun *fsmRun, StreamImpl *is, const char *data, long
debug( REALM_PARSE, "sending back text: %.*s\n",
(int)length, data );
- is->funcs->undoConsumeData( fsmRun, is, data, length );
+ is->funcs->undoConsumeData( is, data, length );
undoPosition( is, data, length );
}
@@ -250,54 +328,31 @@ static void sendBackIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsm
debug( REALM_PARSE, "trigger parse stop, steps = target = %d\n", pdaRun->targetSteps );
pdaRun->stop = true;
}
-
-}
-
-void attachStream( FsmRun *fsmRun, StreamImpl *is )
-{
- if ( is->attached != 0 && is->attached != fsmRun )
- detachStream( is->attached, is );
-
- if ( is->attached != fsmRun ) {
- debug( REALM_INPUT, "attaching FsmRun to stream: %p %p\n", fsmRun, is );
- is->attached = fsmRun;
- }
-}
-
-void detachStream( FsmRun *fsmRun, StreamImpl *is )
-{
- debug( REALM_INPUT, "detaching FsmRun from stream: %p %p\n", fsmRun, is );
-
- is->attached = 0;
- clearBuffered( fsmRun );
-}
-
-void detachSource( FsmRun *fsmRun, StreamImpl *is )
-{
- debug( REALM_INPUT, "detaching fsm run from source stream: %p %p\n", fsmRun, is );
-
- is->attached = 0;
- clearBuffered( fsmRun );
}
void clearBuffered( FsmRun *fsmRun )
{
if ( fsmRun->tokstart != 0 ) {
- fsmRun->p = fsmRun->pe = fsmRun->tokstart;
- fsmRun->tokstart = 0;
+ //fsmRun->p = fsmRun->pe = fsmRun->tokstart;
+ //fsmRun->tokstart = 0;
+
+ fsmRun->pe = fsmRun->p;
}
else {
fsmRun->pe = fsmRun->p;
}
}
-void resetToken( FsmRun *fsmRun )
+void resetToken( PdaRun *pdaRun )
{
+ FsmRun *fsmRun = pdaRun->fsmRun;
+
/* If there is a token started, but never finished for a lack of data, we
* must first backup over it. */
if ( fsmRun->tokstart != 0 ) {
- fsmRun->p = fsmRun->tokstart;
- fsmRun->tokstart = 0;
+ fsmRun->p = fsmRun->pe = 0;
+ fsmRun->toklen = 0;
+ fsmRun->eof = 0;
}
}
@@ -768,7 +823,7 @@ void sendIgnore( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, PdaRun
/* Make the ignore string. */
Head *ignoreStr = extractMatch( prg, fsmRun, is );
- updatePosition( is, fsmRun->tokstart, ignoreStr->length );
+ updatePosition( is, ignoreStr->data, ignoreStr->length );
debug( REALM_PARSE, "ignoring: %.*s\n", ignoreStr->length, ignoreStr->data );
@@ -782,38 +837,6 @@ void sendIgnore( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, PdaRun
}
-/* Doesn't consume. */
-Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is )
-{
- long length = fsmRun->p - fsmRun->tokstart;
- Head *head = stringAllocPointer( prg, fsmRun->tokstart, length );
- head->location = locationAllocate( prg );
- head->location->line = is->line;
- head->location->column = is->column;
- head->location->byte = is->byte;
-
- debug( REALM_PARSE, "location byte: %d\n", is->byte );
-
- return head;
-}
-
-/* Consumes. */
-Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is )
-{
- long length = fsmRun->p - fsmRun->tokstart;
- Head *head = stringAllocPointer( prg, fsmRun->tokstart, length );
- head->location = locationAllocate( prg );
- head->location->line = is->line;
- head->location->column = is->column;
- head->location->byte = is->byte;
-
- debug( REALM_PARSE, "location byte: %d\n", is->byte );
-
- is->funcs->consumeData( is, length );
-
- return head;
-}
-
static void sendToken( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, PdaRun *pdaRun, long id )
{
int emptyIgnore = pdaRun->accumIgnore == 0;
@@ -825,7 +848,7 @@ static void sendToken( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun,
prg->rtd->lelInfo[id].name,
stringLength(tokdata), stringData(tokdata) );
- updatePosition( is, fsmRun->tokstart, tokdata->length );
+ updatePosition( is, stringData(tokdata), stringLength(tokdata) );
Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, is, id, tokdata );
@@ -882,7 +905,7 @@ static void sendCi( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, Pda
prg->rtd->lelInfo[id].name,
stringLength(tokdata), stringData(tokdata) );
- updatePosition( is, fsmRun->tokstart, tokdata->length );
+ updatePosition( is, stringData(tokdata), stringLength(tokdata) );
Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, is, id, tokdata );
@@ -931,8 +954,12 @@ static void sendEof( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, Pd
pdaRun->parseInput = parseTree;
}
-void newToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun )
+static void newToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun )
{
+ fsmRun->p = fsmRun->pe = 0;
+ fsmRun->toklen = 0;
+ fsmRun->eof = 0;
+
/* Init the scanner vars. */
fsmRun->act = 0;
fsmRun->tokstart = 0;
@@ -996,140 +1023,89 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is )
return SCAN_UNDO;
while ( true ) {
- fsmExecute( fsmRun, is );
-
- /* First check if scanning stopped because we have a token. */
- if ( fsmRun->matchedToken > 0 ) {
- /* If the token has a marker indicating the end (due to trailing
- * context) then adjust data now. */
- LangElInfo *lelInfo = prg->rtd->lelInfo;
- if ( lelInfo[fsmRun->matchedToken].markId >= 0 )
- fsmRun->p = fsmRun->mark[lelInfo[fsmRun->matchedToken].markId];
-
- return fsmRun->matchedToken;
- }
-
- /* Check for error. */
- if ( fsmRun->cs == fsmRun->tables->errorState ) {
- /* If a token was started, but not finished (tokstart != 0) then
- * restore data to the beginning of that token. */
- if ( fsmRun->tokstart != 0 )
- fsmRun->p = fsmRun->tokstart;
-
- /* Check for a default token in the region. If one is there
- * then send it and continue with the processing loop. */
- if ( prg->rtd->regionInfo[fsmRun->region].defaultToken >= 0 ) {
- fsmRun->tokstart = fsmRun->tokend = fsmRun->p;
- return prg->rtd->regionInfo[fsmRun->region].defaultToken;
- }
-
- return SCAN_ERROR;
- }
-
- /* Got here because the state machine didn't match a token or
- * encounter an error. Must be because we got to the end of the buffer
- * data. */
- assert( fsmRun->p == fsmRun->pe );
-
- /* There may be space left in the current buffer. If not then we need
- * to make some. */
- long space = fsmRun->runBuf->data + FSM_BUFSIZE - fsmRun->pe;
- if ( space == 0 ) {
- /* Create a new run buf. */
- RunBuf *newBuf = newRunBuf();
-
- /* If partway through a token then preserve the prefix. */
- long have = 0;
-
- if ( fsmRun->tokstart == 0 ) {
- /* No prefix. We filled the previous buffer. */
- fsmRun->runBuf->length = FSM_BUFSIZE;
- }
- else {
- int i;
-
- debug( REALM_SCAN, "copying data over to new buffer\n" );
- assert( fsmRun->runBuf->offset == 0 );
-
- if ( fsmRun->tokstart == fsmRun->runBuf->data ) {
- /* A token is started and it is already at the beginning
- * of the current buffer. This means buffer is full and it
- * must be grown. Probably need to do this sooner. */
- fatal( "OUT OF BUFFER SPACE\n" );
- }
-
- /* There is data that needs to be shifted over. */
- have = fsmRun->pe - fsmRun->tokstart;
- memcpy( newBuf->data, fsmRun->tokstart, have );
-
- /* Compute the length of the previous buffer. */
- fsmRun->runBuf->length = FSM_BUFSIZE - have;
-
- /* Compute tokstart and tokend. */
- long dist = fsmRun->tokstart - newBuf->data;
-
- fsmRun->tokend -= dist;
- fsmRun->tokstart = newBuf->data;
-
- /* Shift any markers. */
- for ( i = 0; i < MARK_SLOTS; i++ ) {
- if ( fsmRun->mark[i] != 0 )
- fsmRun->mark[i] -= dist;
- }
- }
-
- fsmRun->p = fsmRun->pe = newBuf->data + have;
- fsmRun->peof = 0;
-
- newBuf->next = fsmRun->runBuf;
- fsmRun->runBuf = newBuf;
- }
-
- /* We don't have any data. What is next in the input inputStream? */
- space = fsmRun->runBuf->data + FSM_BUFSIZE - fsmRun->pe;
- assert( space > 0 );
-
- /* Get more data. */
- int have = fsmRun->tokstart != 0 ? fsmRun->p - fsmRun->tokstart : 0;
+ char *pd = 0;
int len = 0;
- debug( REALM_SCAN, "fetching data: have: %d space: %d\n", have, space );
- int type = is->funcs->getData( fsmRun, is, have, fsmRun->p, space, &len );
+ int type = is->funcs->getParseBlock( is, fsmRun->toklen, &pd, &len );
switch ( type ) {
case INPUT_DATA:
- fsmRun->pe = fsmRun->p + len;
+ fsmRun->p = pd;
+ fsmRun->pe = pd + len;
+ break;
+
+ case INPUT_EOS:
+ fsmRun->p = fsmRun->pe = 0;
+ if ( fsmRun->tokstart != 0 )
+ fsmRun->eof = 1;
+ debug( REALM_SCAN, "EOS *******************\n" );
break;
case INPUT_EOF:
+ fsmRun->p = fsmRun->pe = 0;
if ( fsmRun->tokstart != 0 )
- fsmRun->peof = fsmRun->pe;
+ fsmRun->eof = 1;
else
return SCAN_EOF;
break;
case INPUT_EOD:
+ fsmRun->p = fsmRun->pe = 0;
return SCAN_TRY_AGAIN_LATER;
case INPUT_LANG_EL:
if ( fsmRun->tokstart != 0 )
- fsmRun->peof = fsmRun->pe;
+ fsmRun->eof = 1;
else
return SCAN_LANG_EL;
break;
case INPUT_TREE:
if ( fsmRun->tokstart != 0 )
- fsmRun->peof = fsmRun->pe;
+ fsmRun->eof = 1;
else
return SCAN_TREE;
break;
case INPUT_IGNORE:
if ( fsmRun->tokstart != 0 )
- fsmRun->peof = fsmRun->pe;
+ fsmRun->eof = 1;
else
return SCAN_IGNORE;
break;
}
+
+ fsmExecute( fsmRun, is );
+
+ /* First check if scanning stopped because we have a token. */
+ if ( fsmRun->matchedToken > 0 ) {
+ /* If the token has a marker indicating the end (due to trailing
+ * context) then adjust data now. */
+ LangElInfo *lelInfo = prg->rtd->lelInfo;
+ if ( lelInfo[fsmRun->matchedToken].markId >= 0 )
+ fsmRun->p = fsmRun->mark[lelInfo[fsmRun->matchedToken].markId];
+
+ return fsmRun->matchedToken;
+ }
+
+ /* Check for error. */
+ if ( fsmRun->cs == fsmRun->tables->errorState ) {
+ /* If a token was started, but not finished (tokstart != 0) then
+ * restore data to the beginning of that token. */
+ if ( fsmRun->tokstart != 0 )
+ fsmRun->p = fsmRun->tokstart;
+
+ /* Check for a default token in the region. If one is there
+ * then send it and continue with the processing loop. */
+ if ( prg->rtd->regionInfo[fsmRun->region].defaultToken >= 0 ) {
+ fsmRun->toklen = 0;
+ return prg->rtd->regionInfo[fsmRun->region].defaultToken;
+ }
+
+ return SCAN_ERROR;
+ }
+
+ /* Got here because the state machine didn't match a token or encounter
+ * an error. Must be because we got to the end of the buffer data. */
+ assert( fsmRun->p == fsmRun->pe );
}
/* Should not be reached. */
@@ -1147,8 +1123,9 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is )
*/
long parseLoop( Program *prg, Tree **sp, PdaRun *pdaRun,
- FsmRun *fsmRun, StreamImpl *is, long entry )
+ StreamImpl *is, long entry )
{
+ FsmRun *fsmRun = pdaRun->fsmRun;
LangElInfo *lelInfo = prg->rtd->lelInfo;
switch ( entry ) {
@@ -1269,8 +1246,9 @@ case PcrPreEof:
/* Note that we don't update the position now. It is done when the token
* data is pulled from the inputStream. */
- fsmRun->p = fsmRun->tokstart;
- fsmRun->tokstart = 0;
+ fsmRun->p = fsmRun->pe = 0;
+ fsmRun->toklen = 0;
+ fsmRun->eof = 0;
pdaRun->fi = &prg->rtd->frameInfo[prg->rtd->lelInfo[pdaRun->tokenId].frameId];
pdaRun->frameId = prg->rtd->lelInfo[pdaRun->tokenId].frameId;
@@ -1484,8 +1462,8 @@ int isParserStopFinished( PdaRun *pdaRun )
return done;
}
-void initPdaRun( PdaRun *pdaRun, Program *prg, PdaTables *tables,
- FsmRun *fsmRun, int parserId, long stopTarget, int revertOn, Tree *context )
+void initPdaRun( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, PdaTables *tables,
+ int parserId, long stopTarget, int revertOn, Tree *context )
{
memset( pdaRun, 0, sizeof(PdaRun) );
pdaRun->tables = tables;
@@ -1533,6 +1511,11 @@ void initPdaRun( PdaRun *pdaRun, Program *prg, PdaTables *tables,
pdaRun->reject = false;
pdaRun->rcBlockCount = 0;
+
+ pdaRun->fsmRun = fsmRun;
+
+ initFsmRun( prg, fsmRun );
+ newToken( prg, pdaRun, fsmRun );
}
long stackTopTarget( Program *prg, PdaRun *pdaRun )
diff --git a/colm/pdarun.h b/colm/pdarun.h
index 4b37c5cd..00f07885 100644
--- a/colm/pdarun.h
+++ b/colm/pdarun.h
@@ -23,7 +23,6 @@
#define __COLM_PDARUN_H
#include <colm/input.h>
-#include <colm/fsmrun.h>
#include <colm/defs.h>
#include <colm/tree.h>
@@ -73,26 +72,32 @@ typedef struct _FsmRun
{
FsmTables *tables;
- RunBuf *runBuf;
+ RunBuf *consumeBuf;
/* FsmRun State. */
long region, preRegion;
long cs, ncs, act;
- char *tokstart, *tokend;
- char *p, *pe, *peof;
- int returnResult;
+ char *start;
+ char *tokstart;
+ long tokend;
+ long toklen;
+ char *p, *pe;
+
+ /* Bits. */
+ char eof;
+ char returnResult;
+ char skipToklen;
+
char *mark[MARK_SLOTS];
long matchedToken;
} FsmRun;
-void initFsmRun( FsmRun *fsmRun, struct ColmProgram *prg );
void clearFsmRun( struct ColmProgram *prg, FsmRun *fsmRun );
void updatePosition( StreamImpl *inputStream, const char *data, long length );
void undoPosition( StreamImpl *inputStream, const char *data, long length );
void sendBackRunBufHead( FsmRun *fsmRun, StreamImpl *inputStream );
void undoStreamPull( StreamImpl *inputStream, const char *data, long length );
-
#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8
#error "SIZEOF_LONG contained an unexpected value"
#endif
@@ -341,6 +346,8 @@ typedef struct _PdaRun
int rcBlockCount;
Tree *parseErrorText;
+
+ FsmRun *fsmRun;
} PdaRun;
void rtCodeVectReplace( RtCodeVect *vect, long pos, const Code *val, long len );
@@ -393,8 +400,8 @@ void decrementSteps( PdaRun *pdaRun );
int makeReverseCode( PdaRun *pdaRun );
void transferReverseCode( PdaRun *pdaRun, ParseTree *tree );
-void initPdaRun( PdaRun *pdaRun, struct ColmProgram *prg, PdaTables *tables,
- FsmRun *fsmRun, int parserId, long stopTarget, int revertOn, Tree *context );
+void initPdaRun( struct ColmProgram *prg, PdaRun *pdaRun, FsmRun *fsmRun, PdaTables *tables,
+ int parserId, long stopTarget, int revertOn, Tree *context );
void clearPdaRun( struct ColmProgram *prg, Tree **root, PdaRun *pdaRun );
void initStreamImpl( StreamImpl *inputStream );
@@ -426,14 +433,14 @@ long parseToken( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun,
long undoParse( Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *inputStream, Tree *tree );
-Head *streamPull( struct ColmProgram *prg, FsmRun *fsmRun, StreamImpl *inputStream, long length );
+Head *streamPull( struct ColmProgram *prg, PdaRun *pdaRun, StreamImpl *is, long length );
Head *stringAllocPointer( struct ColmProgram *prg, const char *data, long length );
-void streamPushText( FsmRun *fsmRun, StreamImpl *inputStream, const char *data, long length );
-void streamPushTree( FsmRun *fsmRun, StreamImpl *inputStream, Tree *tree, int ignore );
-void streamPushStream( FsmRun *fsmRun, StreamImpl *inputStream, Tree *tree );
-void undoStreamPush( struct ColmProgram *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *inputStream, long length );
-void undoStreamAppend( struct ColmProgram *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *inputStream, struct ColmTree *tree, long length );
+void streamPushText( StreamImpl *inputStream, const char *data, long length );
+void streamPushTree( StreamImpl *inputStream, Tree *tree, int ignore );
+void streamPushStream( StreamImpl *inputStream, Tree *tree );
+void undoStreamPush( struct ColmProgram *prg, Tree **sp, StreamImpl *inputStream, long length );
+void undoStreamAppend( struct ColmProgram *prg, Tree **sp, StreamImpl *inputStream, struct ColmTree *tree, long length );
Kid *makeTokenWithData( struct ColmProgram *prg, PdaRun *pdaRun, FsmRun *fsmRun,
StreamImpl *inputStream, int id, Head *tokdata );
@@ -448,21 +455,17 @@ long sendBackQueuedIgnore( struct ColmProgram *prg, Tree **sp, StreamImpl *input
void clearIgnoreList( struct ColmProgram *prg, Tree **sp, Kid *kid );
Head *extractMatch( struct ColmProgram *prg, FsmRun *fsmRun, StreamImpl *inputStream );
Head *extractMatch( struct ColmProgram *prg, FsmRun *fsmRun, StreamImpl *inputStream );
-void newToken( struct ColmProgram *prg, PdaRun *pdaRun, FsmRun *fsmRun );
void fsmExecute( FsmRun *fsmRun, StreamImpl *inputStream );
void sendNamedLangEl( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *inputStream );
long parseLoop( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun,
- FsmRun *fsmRun, StreamImpl *inputStream, long entry );
+ StreamImpl *inputStream, long entry );
void initBindings( PdaRun *pdaRun );
Tree *getParsedRoot( PdaRun *pdaRun, int stop );
void undoParseStream( struct ColmProgram *prg, Tree **sp, StreamImpl *inputStream, FsmRun *fsmRun,
PdaRun *pdaRun, long steps );
void clearBuffered( FsmRun *fsmRun );
-void resetToken( FsmRun *fsmRun );
-
-void detachStream( FsmRun *fsmRun, StreamImpl *is );
-void attachStream( FsmRun *fsmRun, StreamImpl *is );
+void resetToken( PdaRun *pdaRun );
#ifdef __cplusplus
}
diff --git a/colm/program.c b/colm/program.c
index 412473e5..b604fabe 100644
--- a/colm/program.c
+++ b/colm/program.c
@@ -20,7 +20,6 @@
*/
#include <colm/pdarun.h>
-#include <colm/fsmrun.h>
#include <colm/tree.h>
#include <colm/bytecode.h>
#include <colm/pool.h>
diff --git a/colm/redbuild.h b/colm/redbuild.h
index dbbb3e19..5ae75406 100644
--- a/colm/redbuild.h
+++ b/colm/redbuild.h
@@ -26,7 +26,6 @@
#include "avltree.h"
#include "fsmgraph.h"
#include "parsedata.h"
-#include "fsmrun.h"
/* Forwards. */
struct FsmTrans;
diff --git a/colm/redfsm.cc b/colm/redfsm.cc
index d8e4a983..5ec075ce 100644
--- a/colm/redfsm.cc
+++ b/colm/redfsm.cc
@@ -26,7 +26,6 @@
#include "mergesort.h"
#include "fsmgraph.h"
#include "parsetree.h"
-#include "fsmrun.h"
using std::ostringstream;
diff --git a/colm/resolve.cc b/colm/resolve.cc
index bf639738..6fc4b53e 100644
--- a/colm/resolve.cc
+++ b/colm/resolve.cc
@@ -21,7 +21,6 @@
#include "bytecode.h"
#include "parsedata.h"
-#include "fsmrun.h"
#include <iostream>
#include <assert.h>
diff --git a/colm/string.c b/colm/string.c
index d670b68c..21c3aac4 100644
--- a/colm/string.c
+++ b/colm/string.c
@@ -89,8 +89,6 @@ Head *initStrSpace( long length )
{
/* Find the length and allocate the space for the shared string. */
Head *head = (Head*) malloc( sizeof(Head) + length );
- //if ( head == 0 )
- // throw std::bad_alloc();
/* Init the header. */
head->data = (char*)(head+1);
diff --git a/colm/synthesis.cc b/colm/synthesis.cc
index 2c7e7e8b..f164e1ed 100644
--- a/colm/synthesis.cc
+++ b/colm/synthesis.cc
@@ -21,7 +21,6 @@
#include "bytecode.h"
#include "parsedata.h"
-#include "fsmrun.h"
#include "pdarun.h"
#include "input.h"
#include <iostream>
@@ -2532,7 +2531,7 @@ void Compiler::initStreamObject( )
streamLangEl->objectDef = streamObj;
initFunction( uniqueTypeStr, streamObj, "pull",
- IN_INPUT_PULL_WV, IN_INPUT_PULL_WV, uniqueTypeInt, false );
+ IN_INPUT_PULL_WV, IN_INPUT_PULL_WC, uniqueTypeInt, false );
initFunction( uniqueTypeStr, streamObj, "push",
IN_INPUT_PUSH_WV, IN_INPUT_PUSH_WV, uniqueTypeAny, false );
initFunction( uniqueTypeStr, streamObj, "push_ignore",
@@ -3248,7 +3247,9 @@ void Compiler::addStdin()
el->beenInitialized = true;
el->isConst = true;
el->useOffset = false;
- el->inGetR = IN_GET_STDIN;
+ el->inGetR = IN_GET_STDIN;
+ el->inGetWC = IN_GET_STDIN;
+ el->inGetWV = IN_GET_STDIN;
globalObjectDef->insertField( el->name, el );
}
@@ -3264,6 +3265,8 @@ void Compiler::addStdout()
el->isConst = true;
el->useOffset = false;
el->inGetR = IN_GET_STDOUT;
+ el->inGetWC = IN_GET_STDOUT;
+ el->inGetWV = IN_GET_STDOUT;
globalObjectDef->insertField( el->name, el );
}
@@ -3279,6 +3282,8 @@ void Compiler::addStderr()
el->isConst = true;
el->useOffset = false;
el->inGetR = IN_GET_STDERR;
+ el->inGetWC = IN_GET_STDERR;
+ el->inGetWV = IN_GET_STDERR;
globalObjectDef->insertField( el->name, el );
}
diff --git a/colm/tree.c b/colm/tree.c
index edd0dc79..341aad8d 100644
--- a/colm/tree.c
+++ b/colm/tree.c
@@ -986,14 +986,12 @@ Tree *createGeneric( Program *prg, long genericId )
Parser *parser = (Parser*)mapElAllocate( prg );
parser->id = genericInfo->langElId;
parser->genericInfo = genericInfo;
- parser->fsmRun = malloc( sizeof(FsmRun) );
parser->pdaRun = malloc( sizeof(PdaRun) );
+ parser->pdaRun->fsmRun = malloc( sizeof(FsmRun) );
/* Start off the parsing process. */
- initPdaRun( parser->pdaRun, prg, prg->rtd->pdaTables,
- parser->fsmRun, genericInfo->parserId, false, false, 0 );
- initFsmRun( parser->fsmRun, prg );
- newToken( prg, parser->pdaRun, parser->fsmRun );
+ initPdaRun( prg, parser->pdaRun, parser->pdaRun->fsmRun, prg->rtd->pdaTables,
+ genericInfo->parserId, false, false, 0 );
newGeneric = (Tree*) parser;
break;
@@ -1045,10 +1043,10 @@ free_tree:
}
else if ( generic->type == GEN_PARSER ) {
Parser *parser = (Parser*)tree;
- clearFsmRun( prg, parser->fsmRun );
+ clearFsmRun( prg, parser->pdaRun->fsmRun );
clearPdaRun( prg, sp, parser->pdaRun );
+ free( parser->pdaRun->fsmRun );
free( parser->pdaRun );
- free( parser->fsmRun );
treeDownref( prg, sp, (Tree*)parser->input );
mapElFree( prg, (MapEl*)parser );
}
@@ -2054,7 +2052,10 @@ void appendFile( struct ColmPrintArgs *args, const char *data, int length )
void appendFd( struct ColmPrintArgs *args, const char *data, int length )
{
- write( (long)args->arg, data, length );
+ int res = write( (long)args->arg, data, length );
+ if ( res != 0 ) {
+ message( "write error\n" );
+ }
}
Tree *treeTrim( struct ColmProgram *prg, Tree **sp, Tree *tree )
diff --git a/colm/tree.h b/colm/tree.h
index 8b6d509d..529c0185 100644
--- a/colm/tree.h
+++ b/colm/tree.h
@@ -203,7 +203,6 @@ typedef struct _Parser
GenericInfo *genericInfo;
struct _PdaRun *pdaRun;
- struct _FsmRun *fsmRun;
struct _Stream *input;
Tree *result;
} Parser;
diff --git a/test/include1.exp b/test/include1.exp
new file mode 100644
index 00000000..502194e3
--- /dev/null
+++ b/test/include1.exp
@@ -0,0 +1,19 @@
+opening include1a.in
+opening include1b.in
+opening include1c.in
+hello;
+
+a;
+b;
+
+there;
+
+c;
+d;
+
+dude;
+
+e;
+f;
+
+and dudettes;
diff --git a/test/include1.in b/test/include1.in
new file mode 100644
index 00000000..9c7aa806
--- /dev/null
+++ b/test/include1.in
@@ -0,0 +1,14 @@
+
+hello;
+
+#include "include1a.in"
+
+there;
+
+#include "include1b.in"
+
+dude;
+
+#include "include1c.in"
+
+and dudettes;
diff --git a/test/include1.lm b/test/include1.lm
new file mode 100644
index 00000000..03a3b046
--- /dev/null
+++ b/test/include1.lm
@@ -0,0 +1,98 @@
+namespace string
+ lex
+ literal '"'
+ token data /[^"\\]+/
+ token escape /'\\' any/
+ end
+
+ def string_data
+ [data]
+ | [escape]
+
+ def string
+ ['"' string_data* '"']
+
+ str unquote( S: string )
+ {
+ match S ['"' DL: string_data* '"']
+ for E: escape in DL
+ E.data = 'x'
+ return $DL
+ }
+
+end string
+
+namespace hash
+
+ lex
+ literal 'define', 'include'
+ literal '#', '\n' ni
+
+ token id /[a-zA-Z_][a-zA-Z_0-9]*/
+ token number /[0-9]+/
+
+ ignore /[ \t]/
+ end
+
+ def hash
+ ['#' 'define' Id: id number '\n']
+ | ['#' 'include' Inc: string::string '\n']
+
+end hash
+
+token rest_of_line /[^\n]* '\n'/
+
+namespace lang
+
+ lex
+ ignore /space/
+ literal '*', '(', ')', ';'
+ token id /[a-zA-Z_][a-zA-Z_0-9]*/
+ token number /[0-9]+/
+
+ token hash /'#'/ {
+ parse_stop H: hash::hash[ input ]
+ if ( H.tree ) {
+ if ( H.tree.Inc ) {
+ FN: str = unquote( H.tree.Inc )
+ print( 'opening ' FN '\n' )
+ IS: stream = open( FN 'r' )
+ if ( ! IS ) {
+ print( 'ERROR: failed to open ' FN '\n' )
+ exit(1)
+ }
+ input.push( IS )
+ }
+ }
+ else {
+ parse_stop L: rest_of_line[ input ]
+ if ! L.tree {
+ print( "ERROR: stuck: " L.error )
+ exit(1)
+ }
+ print( "ERROR: failed to parse # directive: " L.tree )
+ }
+ }
+ end
+
+ def item
+ [id]
+ | ['(' item* ')']
+
+ def statement
+ [item* ';']
+
+ def start
+ [statement*]
+
+end lang
+
+parse Input: lang::start[ stdin ]
+
+if ! Input.tree
+ print( Input.error '\n' )
+else {
+ #print( Input.tree '\n' )
+ S: lang::start = Input.tree
+ print( Input.tree '\n' )
+}
diff --git a/test/include1a.in b/test/include1a.in
new file mode 100644
index 00000000..26da0afa
--- /dev/null
+++ b/test/include1a.in
@@ -0,0 +1,2 @@
+a;
+b;
diff --git a/test/include1b.in b/test/include1b.in
new file mode 100644
index 00000000..6c574323
--- /dev/null
+++ b/test/include1b.in
@@ -0,0 +1,2 @@
+c;
+d;
diff --git a/test/include1c.in b/test/include1c.in
new file mode 100644
index 00000000..5373832d
--- /dev/null
+++ b/test/include1c.in
@@ -0,0 +1,2 @@
+e;
+f;
diff --git a/test/pull1.exp b/test/pull1.exp
new file mode 100644
index 00000000..aa3a0fe0
--- /dev/null
+++ b/test/pull1.exp
@@ -0,0 +1 @@
+this is in
diff --git a/test/pull1.in b/test/pull1.in
new file mode 100644
index 00000000..f4d2e4a0
--- /dev/null
+++ b/test/pull1.in
@@ -0,0 +1 @@
+this is input for a non-parse pull
diff --git a/test/pull1.lm b/test/pull1.lm
new file mode 100644
index 00000000..bc559671
--- /dev/null
+++ b/test/pull1.lm
@@ -0,0 +1,2 @@
+String: str = stdin.pull( 10 )
+print( String '\n' )
diff --git a/test/pull2.exp b/test/pull2.exp
new file mode 100644
index 00000000..aa3a0fe0
--- /dev/null
+++ b/test/pull2.exp
@@ -0,0 +1 @@
+this is in
diff --git a/test/pull2.in b/test/pull2.in
new file mode 100644
index 00000000..f4d2e4a0
--- /dev/null
+++ b/test/pull2.in
@@ -0,0 +1 @@
+this is input for a non-parse pull
diff --git a/test/pull2.lm b/test/pull2.lm
new file mode 100644
index 00000000..1a18b829
--- /dev/null
+++ b/test/pull2.lm
@@ -0,0 +1,3 @@
+Stream: stream = open( 'pull2.in' 'r' )
+String: str = Stream.pull( 10 )
+print( String '\n' )