diff options
40 files changed, 755 insertions, 503 deletions
diff --git a/colm/Makefile.am b/colm/Makefile.am index 5d7f1101..226bafbf 100644 --- a/colm/Makefile.am +++ b/colm/Makefile.am @@ -28,7 +28,7 @@ RUNTIME_SRC = \ RUNTIME_HDR = \ bytecode.h config.h defs.h debug.h pool.h input.h \ - fsmrun.h pdarun.h map.h tree.h program.h colm.h + pdarun.h map.h tree.h program.h colm.h lib_LIBRARIES = libcolmp.a libcolmd.a @@ -49,7 +49,7 @@ colm_LDADD = libcolmp.a colm_SOURCES = \ buffer.h bytecode.h colm.h debug.h dotgen.h fsmcodegen.h fsmgraph.h \ - fsmrun.h input.h keyops.h lmparse.h lmscan.h map.h parsedata.h \ + input.h keyops.h lmparse.h lmscan.h map.h parsedata.h \ parsetree.h pcheck.h pdacodegen.h pdagraph.h pdarun.h pool.h redbuild.h \ redfsm.h rtvector.h tree.h version.h global.h colm.h \ \ diff --git a/colm/bytecode.c b/colm/bytecode.c index 94e7d9b6..4d93c0f7 100644 --- a/colm/bytecode.c +++ b/colm/bytecode.c @@ -20,7 +20,6 @@ */ #include <colm/pdarun.h> -#include <colm/fsmrun.h> #include <colm/tree.h> #include <colm/bytecode.h> #include <colm/pool.h> @@ -193,7 +192,7 @@ case PcrStart: if ( ! parser->pdaRun->parseError ) { parser->pdaRun->stopTarget = stopId; - long pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry ); + long pcr = parseLoop( prg, sp, parser->pdaRun, parser->input->in, entry ); while ( pcr != PcrDone ) { @@ -203,7 +202,7 @@ case PcrGeneration: case PcrPreEof: case PcrReverse: - pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry ); + pcr = parseLoop( prg, sp, parser->pdaRun, parser->input->in, entry ); } } @@ -223,7 +222,7 @@ case PcrStart: parser->input->in->funcs->setEof( parser->input->in ); if ( ! parser->pdaRun->parseError ) { - long pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry ); + long pcr = parseLoop( prg, sp, parser->pdaRun, parser->input->in, entry ); while ( pcr != PcrDone ) { @@ -233,7 +232,7 @@ case PcrGeneration: case PcrPreEof: case PcrReverse: - pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry ); + pcr = parseLoop( prg, sp, parser->pdaRun, parser->input->in, entry ); } } } @@ -260,12 +259,11 @@ break; } long undoParseFrag( Program *prg, Tree **sp, Parser *parser, long steps, long entry ) { StreamImpl *is = parser->input->in; - FsmRun *fsmRun = parser->fsmRun; PdaRun *pdaRun = parser->pdaRun; debug( REALM_PARSE, "undo parse frag, target steps: %ld, pdarun steps: %ld\n", steps, pdaRun->steps ); - resetToken( fsmRun ); + resetToken( pdaRun ); switch ( entry ) { case PcrStart: @@ -278,7 +276,7 @@ case PcrStart: pdaRun->triggerUndo = 1; /* The parse loop will recognise the situation. */ - long pcr = parseLoop( prg, sp, pdaRun, fsmRun, is, entry ); + long pcr = parseLoop( prg, sp, pdaRun, is, entry ); while ( pcr != PcrDone ) { return pcr; @@ -287,7 +285,7 @@ case PcrGeneration: case PcrPreEof: case PcrReverse: - pcr = parseLoop( prg, sp, pdaRun, fsmRun, is, entry ); + pcr = parseLoop( prg, sp, pdaRun, is, entry ); } /* Reset environment. */ @@ -302,10 +300,10 @@ break; } return PcrDone; } -Tree *streamPullBc( Program *prg, FsmRun *fsmRun, StreamImpl *in, Tree *length ) +Tree *streamPullBc( Program *prg, PdaRun *pdaRun, StreamImpl *in, Tree *length ) { long len = ((Int*)length)->value; - Head *tokdata = streamPull( prg, fsmRun, in, len ); + Head *tokdata = streamPull( prg, pdaRun, in, len ); return constructString( prg, tokdata ); } @@ -316,7 +314,7 @@ void undoPull( Program *prg, StreamImpl *in, Tree *str ) undoStreamPull( in, data, length ); } -long streamPush( Program *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *in, Tree *tree, int ignore ) +static long streamPush( Program *prg, Tree **sp, StreamImpl *in, Tree *tree, int ignore ) { if ( tree->id == LEL_ID_STR ) { /* This should become a compile error. If it's text, it's up to the @@ -328,7 +326,7 @@ long streamPush( Program *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *in, Tree * initStrCollect( &collect ); printTreeCollect( prg, sp, &collect, tree, true ); - streamPushText( fsmRun, in, collect.data, collect.length ); + streamPushText( in, collect.data, collect.length ); long length = collect.length; strCollectDestroy( &collect ); @@ -336,12 +334,12 @@ long streamPush( Program *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *in, Tree * } else if ( tree->id == LEL_ID_STREAM ) { treeUpref( tree ); - streamPushStream( fsmRun, in, tree ); + streamPushStream( in, tree ); return -1; } else { treeUpref( tree ); - streamPushTree( fsmRun, in, tree, ignore ); + streamPushTree( in, tree, ignore ); return -1; } } @@ -1154,7 +1152,7 @@ again: /* If there are captures (this is a translate block) then copy them into * the local frame now. */ LangElInfo *lelInfo = prg->rtd->lelInfo; - char **mark = exec->parser->fsmRun->mark; + char **mark = exec->parser->pdaRun->fsmRun->mark; int i; for ( i = 0; i < lelInfo[exec->parser->pdaRun->tokenId].numCaptureAttr; i++ ) { @@ -2157,7 +2155,7 @@ again: debug( REALM_BYTECODE, "IN_INPUT_APPEND_BKT\n" ); - undoStreamAppend( prg, sp, 0, ((Stream*)accumStream)->in, input, len ); + undoStreamAppend( prg, sp, ((Stream*)accumStream)->in, input, len ); treeDownref( prg, sp, accumStream ); treeDownref( prg, sp, input ); break; @@ -2458,7 +2456,8 @@ again: Stream *accumStream = (Stream*)vm_pop(); Tree *len = vm_pop(); - Tree *string = streamPullBc( prg, exec->parser->fsmRun, accumStream->in, len ); + PdaRun *pdaRun = exec->parser != 0 ? exec->parser->pdaRun : 0; + Tree *string = streamPullBc( prg, pdaRun, accumStream->in, len ); treeUpref( string ); vm_push( string ); @@ -2472,6 +2471,21 @@ again: treeDownref( prg, sp, len ); break; } + + case IN_INPUT_PULL_WC: { + debug( REALM_BYTECODE, "IN_INPUT_PULL_WC\n" ); + + Stream *accumStream = (Stream*)vm_pop(); + Tree *len = vm_pop(); + PdaRun *pdaRun = exec->parser != 0 ? exec->parser->pdaRun : 0; + Tree *string = streamPullBc( prg, pdaRun, accumStream->in, len ); + treeUpref( string ); + vm_push( string ); + + treeDownref( prg, sp, (Tree*)accumStream ); + treeDownref( prg, sp, len ); + break; + } case IN_INPUT_PULL_BKT: { Tree *string; read_tree( string ); @@ -2490,7 +2504,7 @@ again: Stream *input = (Stream*)vm_pop(); Tree *tree = vm_pop(); - long len = streamPush( prg, sp, 0, input->in, tree, false ); + long len = streamPush( prg, sp, input->in, tree, false ); vm_push( 0 ); /* Single unit. */ @@ -2507,7 +2521,7 @@ again: Stream *input = (Stream*)vm_pop(); Tree *tree = vm_pop(); - long len = streamPush( prg, sp, 0, input->in, tree, true ); + long len = streamPush( prg, sp, input->in, tree, true ); vm_push( 0 ); /* Single unit. */ @@ -2527,7 +2541,7 @@ again: debug( REALM_BYTECODE, "IN_INPUT_PUSH_BKT\n" ); - undoStreamPush( prg, sp, 0, input->in, len ); + undoStreamPush( prg, sp, input->in, len ); treeDownref( prg, sp, (Tree*)input ); break; } diff --git a/colm/bytecode.h b/colm/bytecode.h index 26482f2d..c84ccb6b 100644 --- a/colm/bytecode.h +++ b/colm/bytecode.h @@ -223,6 +223,7 @@ typedef unsigned char uchar; #define IN_CONSTRUCT_TERM 0x9d #define IN_INPUT_PULL_WV 0x9e +#define IN_INPUT_PULL_WC 0xe1 #define IN_INPUT_PULL_BKT 0x9f #define IN_PARSE_SAVE_STEPS 0xa0 @@ -458,6 +459,7 @@ typedef struct _Execution long stringLength( Head *str ); const char *stringData( Head *str ); Head *stringAllocFull( struct ColmProgram *prg, const char *data, long length ); +Head *initStrSpace( long length ); Head *stringCopy( struct ColmProgram *prg, Head *head ); void stringFree( struct ColmProgram *prg, Head *head ); void stringShorten( Head *tokdata, long newlen ); @@ -495,7 +497,6 @@ void allocGlobal( struct ColmProgram *prg ); Tree **executeCode( struct ColmProgram *prg, Execution *exec, Tree **sp, Code *instr ); void rcodeDownref( struct ColmProgram *prg, Tree **sp, Code *instr ); Code *popReverseCode( RtCodeVect *allRev ); -void sendBackBuffered( FsmRun *fsmRun, StreamImpl *inputStream ); #ifdef __cplusplus } diff --git a/colm/codegen.cc b/colm/codegen.cc index f55257e7..653d8d51 100644 --- a/colm/codegen.cc +++ b/colm/codegen.cc @@ -23,7 +23,6 @@ #include "fsmcodegen.h" #include "redfsm.h" #include "bstmap.h" -#include "fsmrun.h" #include "debug.h" #include <sstream> #include <string> diff --git a/colm/compiler.cc b/colm/compiler.cc index 8043b45f..25d95397 100644 --- a/colm/compiler.cc +++ b/colm/compiler.cc @@ -34,7 +34,6 @@ #include "redbuild.h" #include "pdacodegen.h" #include "fsmcodegen.h" -#include "fsmrun.h" #include "pdarun.h" #include "colm.h" #include "pool.h" @@ -1240,12 +1239,11 @@ PdaRun *Compiler::parsePattern( Program *prg, Tree **sp, const InputLoc &loc, int parserId, StreamImpl *sourceStream ) { StreamImpl *in = new StreamImpl; - FsmRun *fsmRun = new FsmRun; PdaRun *pdaRun = new PdaRun; + pdaRun->fsmRun = new FsmRun; initStreamImpl( in ); - initPdaRun( pdaRun, prg, pdaTables, fsmRun, parserId, 0, false, 0 ); - initFsmRun( fsmRun, prg ); + initPdaRun( prg, pdaRun, pdaRun->fsmRun, pdaTables, parserId, 0, false, 0 ); Stream *res = streamAllocate( prg ); res->id = LEL_ID_STREAM; @@ -1253,8 +1251,7 @@ PdaRun *Compiler::parsePattern( Program *prg, Tree **sp, const InputLoc &loc, in->funcs->appendStream( in, (Tree*)res ); in->funcs->setEof( in ); - newToken( prg, pdaRun, fsmRun ); - long pcr = parseLoop( prg, sp, pdaRun, fsmRun, in, PcrStart ); + long pcr = parseLoop( prg, sp, pdaRun, in, PcrStart ); assert( pcr == PcrDone ); if ( pdaRun->parseError ) { cout << "PARSE ERROR " << loc.line << ":" << loc.col; diff --git a/colm/ctinput.cc b/colm/ctinput.cc index bde9f424..285e1faf 100644 --- a/colm/ctinput.cc +++ b/colm/ctinput.cc @@ -22,7 +22,6 @@ #include "parsedata.h" #include "parsetree.h" #include "input.h" -#include "fsmrun.h" #include "debug.h" #include "pool.h" @@ -60,15 +59,13 @@ LangEl *inputStreamPatternGetLangEl( StreamImpl *ss, long *bindId, char **data, return klangEl; } -int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) +int inputStreamPatternGetParseBlock( StreamImpl *ss, int skip, char **pdp, int *copied ) { *copied = 0; PatternItem *buf = ss->patItem; int offset = ss->offset; - attachStream( fsmRun, ss ); - while ( true ) { if ( buf == 0 ) return INPUT_EOD; @@ -82,7 +79,7 @@ int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *d if ( avail > 0 ) { /* The source data from the current buffer. */ char *src = &buf->data[offset]; - int slen = avail <= length ? avail : length; + int slen = avail; /* Need to skip? */ if ( skip > 0 && slen <= skip ) { @@ -96,7 +93,7 @@ int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *d slen -= skip; skip = 0; - memcpy( dest, src, slen ) ; + *pdp = src; *copied += slen; break; } @@ -109,6 +106,43 @@ int inputStreamPatternGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *d return INPUT_DATA; } +int inputStreamPatternGetData( StreamImpl *ss, char *dest, int length ) +{ + int copied = 0; + + PatternItem *buf = ss->patItem; + int offset = ss->offset; + + while ( true ) { + if ( buf == 0 ) + break; + + if ( buf->type == PatternItem::FactorType ) + break; + + assert ( buf->type == PatternItem::InputText ); + int avail = buf->data.length() - offset; + + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[offset]; + int slen = avail <= length ? avail : length; + + memcpy( dest+copied, src, slen ) ; + copied += slen; + length -= slen; + } + + if ( length == 0 ) + break; + + buf = buf->next; + offset = 0; + } + + return copied; +} + void inputStreamPatternBackup( StreamImpl *ss ) { if ( ss->patItem == 0 ) @@ -178,7 +212,7 @@ int inputStreamPatternConsumeData( StreamImpl *ss, int length ) return consumed; } -int inputStreamPatternUndoConsumeData( FsmRun *fsmRun, StreamImpl *ss, const char *data, int length ) +int inputStreamPatternUndoConsumeData( StreamImpl *ss, const char *data, int length ) { ss->offset -= length; return length; @@ -189,6 +223,7 @@ extern "C" void initPatFuncs() memset( &patternFuncs, 0, sizeof(StreamFuncs) ); patternFuncs.getData = &inputStreamPatternGetData; + patternFuncs.getParseBlock = &inputStreamPatternGetParseBlock; patternFuncs.consumeData = &inputStreamPatternConsumeData; patternFuncs.undoConsumeData = &inputStreamPatternUndoConsumeData; @@ -237,15 +272,14 @@ LangEl *inputStreamConsGetLangEl( StreamImpl *ss, long *bindId, char **data, lon return klangEl; } -int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) +int inputStreamConsGetParseBlock( StreamImpl *ss, + int skip, char **pdp, int *copied ) { *copied = 0; ConsItem *buf = ss->consItem; int offset = ss->offset; - attachStream( fsmRun, ss ); - while ( true ) { if ( buf == 0 ) return INPUT_EOD; @@ -259,7 +293,7 @@ int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest if ( avail > 0 ) { /* The source data from the current buffer. */ char *src = &buf->data[offset]; - int slen = avail <= length ? avail : length; + int slen = avail; /* Need to skip? */ if ( skip > 0 && slen <= skip ) { @@ -273,7 +307,7 @@ int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest slen -= skip; skip = 0; - memcpy( dest, src, slen ) ; + *pdp = src; *copied += slen; break; } @@ -286,6 +320,43 @@ int inputStreamConsGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest return INPUT_DATA; } +int inputStreamConsGetData( StreamImpl *ss, char *dest, int length ) +{ + int copied = 0; + + ConsItem *buf = ss->consItem; + int offset = ss->offset; + + while ( true ) { + if ( buf == 0 ) + break; + + if ( buf->type == ConsItem::ExprType || buf->type == ConsItem::FactorType ) + break; + + assert ( buf->type == ConsItem::InputText ); + int avail = buf->data.length() - offset; + + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[offset]; + int slen = avail <= length ? avail : length; + + memcpy( dest+copied, src, slen ) ; + copied += slen; + length -= slen; + } + + if ( length == 0 ) + break; + + buf = buf->next; + offset = 0; + } + + return copied; +} + void inputStreamConsBackup( StreamImpl *ss ) { if ( ss->consItem == 0 ) @@ -357,7 +428,7 @@ int inputStreamConsConsumeData( StreamImpl *ss, int length ) return consumed; } -int inputStreamConsUndoConsumeData( FsmRun *fsmRun, StreamImpl *ss, const char *data, int length ) +int inputStreamConsUndoConsumeData( StreamImpl *ss, const char *data, int length ) { ss->offset -= length; return length; @@ -368,6 +439,7 @@ extern "C" void initConsFuncs() memset( &replFuncs, 0, sizeof(StreamFuncs) ); replFuncs.getData = &inputStreamConsGetData; + replFuncs.getParseBlock = &inputStreamConsGetParseBlock; replFuncs.consumeData = &inputStreamConsConsumeData; replFuncs.undoConsumeData = &inputStreamConsUndoConsumeData; diff --git a/colm/declare.cc b/colm/declare.cc index 6a285277..c2cdec51 100644 --- a/colm/declare.cc +++ b/colm/declare.cc @@ -21,7 +21,6 @@ #include "bytecode.h" #include "parsedata.h" -#include "fsmrun.h" #include <iostream> #include <assert.h> diff --git a/colm/exports.cc b/colm/exports.cc index df94abdf..3556e249 100644 --- a/colm/exports.cc +++ b/colm/exports.cc @@ -23,7 +23,6 @@ #include "fsmcodegen.h" #include "redfsm.h" #include "bstmap.h" -#include "fsmrun.h" #include "debug.h" #include <sstream> #include <string> diff --git a/colm/fsmcodegen.cc b/colm/fsmcodegen.cc index 212de648..ce357e13 100644 --- a/colm/fsmcodegen.cc +++ b/colm/fsmcodegen.cc @@ -23,7 +23,6 @@ #include "fsmcodegen.h" #include "redfsm.h" #include "bstmap.h" -#include "fsmrun.h" #include <sstream> #include <string> #include <assert.h> @@ -48,7 +47,8 @@ FsmCodeGen::FsmCodeGen( const char *sourceFileName, const char *fsmName, ostream codeGenErrCount(0), dataPrefix(true), writeFirstFinal(true), - writeErr(true) + writeErr(true), + skipTokenLabelNeeded(false) { } @@ -146,7 +146,7 @@ void FsmCodeGen::SET_ACT( ostream &ret, InlineItem *item ) void FsmCodeGen::SET_TOKEND( ostream &ret, InlineItem *item ) { /* The tokend action sets tokend. */ - ret << TOKEND() << " = " << P() << "+1;"; + ret << "{ " << TOKEND() << " = " << TOKLEN() << " + ( " << P() << " - " << BLOCK_START() << " ) + 1; }"; } void FsmCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item ) { @@ -172,14 +172,14 @@ void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item, int targState, int inFinish ) { ret << - " " << P() << " = " << TOKEND() << ";\n" + " " << TOKLEN() << " = " << TOKEND() << ";\n" " switch( " << ACT() << " ) {\n"; /* If the switch handles error then we also forced the error state. It * will exist. */ if ( item->tokenRegion->lmSwitchHandlesError ) { - ret << " case 0: " << P() << " = " << TOKSTART() << - "; goto st" << redFsm->errState->id << ";\n"; + ret << " case 0: " //<< P() << " = " << TOKSTART() << ";" << + "goto st" << redFsm->errState->id << ";\n"; } for ( TokenDefListReg::Iter lmi = item->tokenRegion->tokenDefList; lmi.lte(); lmi++ ) { @@ -194,7 +194,9 @@ void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item, ret << " }\n" "\t" - " return;\n"; + " goto skip_toklen;\n"; + + skipTokenLabelNeeded = true; } void FsmCodeGen::LM_ON_LAST( ostream &ret, InlineItem *item ) @@ -203,7 +205,7 @@ void FsmCodeGen::LM_ON_LAST( ostream &ret, InlineItem *item ) ret << " " << P() << " += 1;\n"; EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl ); - ret << " return;\n"; + ret << " goto out;\n"; } void FsmCodeGen::LM_ON_NEXT( ostream &ret, InlineItem *item ) @@ -211,16 +213,18 @@ void FsmCodeGen::LM_ON_NEXT( ostream &ret, InlineItem *item ) assert( item->longestMatchPart->tdLangEl != 0 ); EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl ); - ret << " return;\n"; + ret << " goto out;\n"; } void FsmCodeGen::LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item ) { assert( item->longestMatchPart->tdLangEl != 0 ); - ret << " " << P() << " = " << TOKEND() << ";\n"; + ret << " " << TOKLEN() << " = " << TOKEND() << ";\n"; EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl ); - ret << " return;\n"; + ret << " goto skip_toklen;\n"; + + skipTokenLabelNeeded = true; } @@ -731,7 +735,7 @@ std::ostream &FsmCodeGen::EXIT_STATES() for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { out << " case " << st->id << ": out" << st->id << ": "; if ( st->eofTrans != 0 ) { - out << "if ( " << PE() << " == " << PEOF() << " ) {"; + out << "if ( " << DATA_EOF() << " ) {"; TRANS_GOTO( st->eofTrans, 0 ); out << "\n"; out << "}"; @@ -848,6 +852,7 @@ void FsmCodeGen::writeExec() out << "void fsmExecute( FsmRun *fsmRun, StreamImpl *inputStream )\n" "{\n" + " " << BLOCK_START() << " = fsmRun->p;\n" "/*_resume:*/\n"; if ( redFsm->errState != 0 ) { @@ -872,7 +877,17 @@ void FsmCodeGen::writeExec() " }\n"; out << - " out: {}\n" + "out:\n" + " if ( " << P() << " != 0 )\n" + " " << TOKLEN() << " += " << P() << " - " << BLOCK_START() << ";\n"; + + if ( skipTokenLabelNeeded ) { + out << + "skip_toklen:\n" + " {}\n"; + } + + out << "}\n" "\n"; } @@ -881,7 +896,6 @@ void FsmCodeGen::writeIncludes() { out << "#include <colm/pdarun.h>\n" - "#include <colm/fsmrun.h>\n" "#include <colm/debug.h>\n" "#include <colm/bytecode.h>\n" "#include <stdio.h>\n" diff --git a/colm/fsmcodegen.h b/colm/fsmcodegen.h index 1b004f5e..29fcb98d 100644 --- a/colm/fsmcodegen.h +++ b/colm/fsmcodegen.h @@ -28,7 +28,6 @@ #include "keyops.h" #include "parsedata.h" #include "redfsm.h" -#include "fsmrun.h" using std::string; using std::ostream; @@ -76,6 +75,7 @@ public: RedFsm *redFsm, FsmTables *fsmTables ); protected: + string FSM_NAME(); string START_STATE_ID(); ostream &ACTIONS_ARRAY(); @@ -99,12 +99,14 @@ protected: string P() { return ACCESS() + "p"; } string PE() { return ACCESS() + "pe"; } - string PEOF() { return ACCESS() + "peof"; } + string DATA_EOF() { return ACCESS() + "eof"; } string CS(); string TOP() { return ACCESS() + "top"; } string TOKSTART() { return ACCESS() + "tokstart"; } string TOKEND() { return ACCESS() + "tokend"; } + string BLOCK_START() { return ACCESS() + "start"; } + string TOKLEN() { return ACCESS() + "toklen"; } string ACT() { return ACCESS() + "act"; } string MATCHED_TOKEN() { return ACCESS() + "matchedToken"; } @@ -164,6 +166,7 @@ public: bool dataPrefix; bool writeFirstFinal; bool writeErr; + bool skipTokenLabelNeeded; std::ostream &TO_STATE_ACTION_SWITCH(); std::ostream &FROM_STATE_ACTION_SWITCH(); diff --git a/colm/fsmexec.cc b/colm/fsmexec.cc index 9b945374..905f1d80 100644 --- a/colm/fsmexec.cc +++ b/colm/fsmexec.cc @@ -24,7 +24,6 @@ #include "config.h" #include "defs.h" -#include "fsmrun.h" #include "redfsm.h" #include "parsedata.h" #include "parsetree.h" @@ -42,7 +41,7 @@ void execAction( FsmRun *fsmRun, GenAction *genAction ) fsmRun->act = item->longestMatchPart->longestMatchId; break; case InlineItem::LmSetTokEnd: - fsmRun->tokend = fsmRun->p + 1; + fsmRun->tokend = fsmRun->toklen + ( fsmRun->p - fsmRun->start ) + 1; break; case InlineItem::LmInitTokStart: assert(false); @@ -56,9 +55,8 @@ void execAction( FsmRun *fsmRun, GenAction *genAction ) case InlineItem::LmSwitch: /* If the switch handles error then we also forced the error state. It * will exist. */ - fsmRun->p = fsmRun->tokend; + fsmRun->toklen = fsmRun->tokend; if ( item->tokenRegion->lmSwitchHandlesError && fsmRun->act == 0 ) { - fsmRun->p = fsmRun->tokstart; fsmRun->cs = fsmRun->tables->errorState; } else { @@ -70,6 +68,7 @@ void execAction( FsmRun *fsmRun, GenAction *genAction ) } } fsmRun->returnResult = true; + fsmRun->skipToklen = true; break; case InlineItem::LmOnLast: fsmRun->p += 1; @@ -81,9 +80,10 @@ void execAction( FsmRun *fsmRun, GenAction *genAction ) fsmRun->returnResult = true; break; case InlineItem::LmOnLagBehind: - fsmRun->p = fsmRun->tokend; + fsmRun->toklen = fsmRun->tokend; fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id; fsmRun->returnResult = true; + fsmRun->skipToklen = true; break; } } @@ -99,6 +99,8 @@ void fsmExecute( FsmRun *fsmRun, StreamImpl *inputStream ) const long *_acts; unsigned int _nacts; const char *_keys; + + fsmRun->start = fsmRun->p; /* Init the token match to nothing (the sentinal). */ fsmRun->matchedToken = 0; @@ -171,12 +173,16 @@ _match: goto _again; fsmRun->returnResult = false; + fsmRun->skipToklen = false; _acts = fsmRun->tables->actions + fsmRun->tables->transActionsWI[_trans]; _nacts = (unsigned int) *_acts++; while ( _nacts-- > 0 ) execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] ); - if ( fsmRun->returnResult ) - return; + if ( fsmRun->returnResult ) { + if ( fsmRun->skipToklen ) + goto skip_toklen; + goto final; + } _again: _acts = fsmRun->tables->actions + fsmRun->tables->toStateActions[fsmRun->cs]; @@ -190,8 +196,9 @@ _again: if ( ++fsmRun->p != fsmRun->pe ) goto _loop_head; out: - if ( fsmRun->p == fsmRun->peof ) { + if ( fsmRun->eof ) { fsmRun->returnResult = false; + fsmRun->skipToklen = false; _acts = fsmRun->tables->actions + fsmRun->tables->eofActions[fsmRun->cs]; _nacts = (unsigned int) *_acts++; @@ -200,9 +207,17 @@ out: while ( _nacts-- > 0 ) execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] ); - if ( fsmRun->returnResult ) - return; + if ( fsmRun->returnResult ) { + if ( fsmRun->skipToklen ) + goto skip_toklen; + goto final; + } } -} +final: + if ( fsmRun->p != 0 ) + fsmRun->toklen += fsmRun->p - fsmRun->start; +skip_toklen: + {} +} diff --git a/colm/fsmrun.h b/colm/fsmrun.h deleted file mode 100644 index 821b3ccf..00000000 --- a/colm/fsmrun.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _FSMRUN2_H -#define _FSMRUN2_H - -#include <colm/input.h> - -#ifdef __cplusplus -extern "C" { -#endif - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/colm/input.c b/colm/input.c index ba144d94..b73eacbd 100644 --- a/colm/input.c +++ b/colm/input.c @@ -20,7 +20,6 @@ */ #include <colm/input.h> -#include <colm/fsmrun.h> #include <colm/pdarun.h> #include <colm/debug.h> @@ -135,35 +134,6 @@ static void sourceStreamPrepend( StreamImpl *ss, RunBuf *runBuf ) } } -void initStreamFuncs() -{ - memset( &streamFuncs, 0, sizeof(struct StreamFuncs) ); - streamFuncs.getData = &_getData; - streamFuncs.consumeData = &_consumeData; - streamFuncs.undoConsumeData = &_undoConsumeData; - streamFuncs.consumeTree = &_consumeTree; - streamFuncs.undoConsumeTree = &_undoConsumeTree; - streamFuncs.consumeLangEl = &_consumeLangEl; - streamFuncs.undoConsumeLangEl = &_undoConsumeLangEl; - - streamFuncs.setEof = &_setEof; - streamFuncs.unsetEof = &_unsetEof; - - streamFuncs.prependData = &_prependData; - streamFuncs.prependTree = &_prependTree; - streamFuncs.prependStream = &_prependStream; - streamFuncs.undoPrependData = &_undoPrependData; - streamFuncs.undoPrependTree = &_undoPrependTree; - - streamFuncs.appendData = &_appendData; - streamFuncs.appendTree = &_appendTree; - streamFuncs.appendStream = &_appendStream; - streamFuncs.undoAppendData = &_undoAppendData; - streamFuncs.undoAppendTree = &_undoAppendTree; - streamFuncs.undoAppendStream = &_undoAppendStream; -} - - void initInputFuncs() { initStreamFuncs(); @@ -177,7 +147,7 @@ void initInputFuncs() * Base run-time input streams. */ -int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, int *copied ) +int fdGetParseBlock( StreamImpl *ss, int skip, char **pdp, int *copied ) { int ret = 0; *copied = 0; @@ -196,8 +166,8 @@ int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, } runBuf->length = received; - int slen = received < length ? received : length; - memcpy( dest, runBuf->data, slen ); + int slen = received; + *pdp = runBuf->data; *copied = slen; ret = INPUT_DATA; break; @@ -222,8 +192,8 @@ int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, avail -= skip; skip = 0; - int slen = avail < length ? avail : length; - memcpy( dest, src, slen ) ; + int slen = avail; + *pdp = src; *copied += slen; ret = INPUT_DATA; break; @@ -233,11 +203,52 @@ int fdGetData( FsmRun *fsmRun, StreamImpl *ss, int skip, char *dest, int length, buf = buf->next; } - attachStream( fsmRun, ss ); - return ret; } +int fdGetData( StreamImpl *ss, char *dest, int length ) +{ + int copied = 0; + + /* Move over skip bytes. */ + RunBuf *buf = ss->queue; + while ( true ) { + if ( buf == 0 ) { + /* Got through the in-mem buffers without copying anything. */ + RunBuf *runBuf = newRunBuf(); + sourceStreamAppend( ss, runBuf ); + int received = ss->funcs->getDataSource( ss, runBuf->data, FSM_BUFSIZE ); + runBuf->length = received; + if ( received == 0 ) + break; + + buf = runBuf; + } + + int avail = buf->length - buf->offset; + + /* Anything available in the current buffer. */ + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[buf->offset]; + + int slen = avail < length ? avail : length; + memcpy( dest+copied, src, slen ) ; + copied += slen; + length -= slen; + } + + if ( length == 0 ) { + debug( REALM_INPUT, "exiting get data\n", length ); + break; + } + + buf = buf->next; + } + + return copied; +} + int fdConsumeData( StreamImpl *ss, int length ) { debug( REALM_INPUT, "source consuming %ld bytes\n", length ); @@ -278,7 +289,7 @@ int fdConsumeData( StreamImpl *ss, int length ) return consumed; } -int fdUndoConsumeData( FsmRun *fsmRun, StreamImpl *ss, const char *data, int length ) +int fdUndoConsumeData( StreamImpl *ss, const char *data, int length ) { debug( REALM_INPUT, "undoing consume of %ld bytes\n", length ); @@ -305,6 +316,7 @@ void initFileFuncs() { memset( &fileFuncs, 0, sizeof(struct StreamFuncs) ); fileFuncs.getData = &fdGetData; + fileFuncs.getParseBlock = &fdGetParseBlock; fileFuncs.consumeData = &fdConsumeData; fileFuncs.undoConsumeData = &fdUndoConsumeData; fileFuncs.getDataSource = &fileGetDataSource; @@ -330,6 +342,7 @@ void initFdFuncs() { memset( &fdFuncs, 0, sizeof(struct StreamFuncs) ); fdFuncs.getData = &fdGetData; + fdFuncs.getParseBlock = &fdGetParseBlock; fdFuncs.consumeData = &fdConsumeData; fdFuncs.undoConsumeData = &fdUndoConsumeData; fdFuncs.getDataSource = &fdGetDataSource; @@ -430,13 +443,13 @@ static int isSourceStream( StreamImpl *is ) return false; } -void _setEof( StreamImpl *is ) +static void _setEof( StreamImpl *is ) { debug( REALM_INPUT, "setting EOF in input stream\n" ); is->eof = true; } -void _unsetEof( StreamImpl *is ) +static void _unsetEof( StreamImpl *is ) { if ( isSourceStream( is ) ) { Stream *stream = (Stream*)is->queue->tree; @@ -447,13 +460,11 @@ void _unsetEof( StreamImpl *is ) } } -int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length, int *copied ) +static int _getParseBlock( StreamImpl *is, int skip, char **pdp, int *copied ) { int ret = 0; *copied = 0; - attachStream( fsmRun, is ); - /* Move over skip bytes. */ RunBuf *buf = is->queue; while ( true ) { @@ -465,11 +476,18 @@ int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length, if ( buf->type == RunBufSourceType ) { Stream *stream = (Stream*)buf->tree; - int type = stream->in->funcs->getData( fsmRun, stream->in, skip, dest, length, copied ); - - if ( type == INPUT_EOD && is->eof ) { - ret = INPUT_EOF; - break; + int type = stream->in->funcs->getParseBlock( stream->in, skip, pdp, copied ); + +// if ( type == INPUT_EOD && !stream->in->eosSent ) { +// stream->in->eosSent = 1; +// ret = INPUT_EOS; +// continue; +// } + + if ( type == INPUT_EOD || type == INPUT_EOF ) { + debug( REALM_INPUT, "skipping over input\n" ); + buf = buf->next; + continue; } ret = type; @@ -505,9 +523,8 @@ int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length, avail -= skip; skip = 0; - int slen = avail <= length ? avail : length; - memcpy( dest, src, slen ) ; - *copied += slen; + *pdp = src; + *copied += avail; ret = INPUT_DATA; break; } @@ -519,22 +536,22 @@ int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length, #if DEBUG switch ( ret ) { case INPUT_DATA: - debug( REALM_INPUT, "get data: DATA copied: %d: %.*s\n", *copied, (int)*copied, dest ); + debug( REALM_INPUT, "get parse block: DATA: %d\n", *copied ); break; case INPUT_EOD: - debug( REALM_INPUT, "get data: EOD\n" ); + debug( REALM_INPUT, "get parse block: EOD\n" ); break; case INPUT_EOF: - debug( REALM_INPUT, "get data: EOF\n" ); + debug( REALM_INPUT, "get parse block: EOF\n" ); break; case INPUT_TREE: - debug( REALM_INPUT, "get data: TREE\n" ); + debug( REALM_INPUT, "get parse block: TREE\n" ); break; case INPUT_IGNORE: - debug( REALM_INPUT, "get data: IGNORE\n" ); + debug( REALM_INPUT, "get parse block: IGNORE\n" ); break; case INPUT_LANG_EL: - debug( REALM_INPUT, "get data: LANG_EL\n" ); + debug( REALM_INPUT, "get parse block: LANG_EL\n" ); break; } #endif @@ -542,7 +559,63 @@ int _getData( FsmRun *fsmRun, StreamImpl *is, int skip, char *dest, int length, return ret; } -int _consumeData( StreamImpl *is, int length ) +static int _getData( StreamImpl *is, char *dest, int length ) +{ + int copied = 0; + + /* Move over skip bytes. */ + RunBuf *buf = is->queue; + while ( true ) { + if ( buf == 0 ) { + /* Got through the in-mem buffers without copying anything. */ + break; + } + + if ( buf->type == RunBufSourceType ) { + Stream *stream = (Stream*)buf->tree; + int glen = stream->in->funcs->getData( stream->in, dest+copied, length ); + + if ( glen == 0 ) { + debug( REALM_INPUT, "skipping over input\n" ); + buf = buf->next; + continue; + } + + copied += glen; + length -= glen; + } + else if ( buf->type == RunBufTokenType ) + break; + else if ( buf->type == RunBufIgnoreType ) + break; + else { + int avail = buf->length - buf->offset; + + /* Anything available in the current buffer. */ + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[buf->offset]; + + int slen = avail <= length ? avail : length; + memcpy( dest+copied, src, slen ) ; + + copied += slen; + length -= slen; + } + } + + if ( length == 0 ) { + debug( REALM_INPUT, "exiting get data\n", length ); + break; + } + + buf = buf->next; + } + + return copied; +} + +static int _consumeData( StreamImpl *is, int length ) { debug( REALM_INPUT, "consuming %d bytes\n", length ); @@ -558,6 +631,7 @@ int _consumeData( StreamImpl *is, int length ) if ( buf->type == RunBufSourceType ) { Stream *stream = (Stream*)buf->tree; int slen = stream->in->funcs->consumeData( stream->in, length ); + debug( REALM_INPUT, " got %d bytes from source\n", slen ); consumed += slen; length -= slen; @@ -578,8 +652,10 @@ int _consumeData( StreamImpl *is, int length ) } } - if ( length == 0 ) + if ( length == 0 ) { + debug( REALM_INPUT, "exiting consume\n", length ); break; + } RunBuf *runBuf = inputStreamPopHead( is ); free( runBuf ); @@ -588,16 +664,13 @@ int _consumeData( StreamImpl *is, int length ) return consumed; } -int _undoConsumeData( FsmRun *fsmRun, StreamImpl *is, const char *data, int length ) +static int _undoConsumeData( StreamImpl *is, const char *data, int length ) { debug( REALM_INPUT, "undoing consume of %ld bytes\n", length ); if ( isSourceStream( is ) ) { Stream *stream = (Stream*)is->queue->tree; - int len = stream->in->funcs->undoConsumeData( fsmRun, stream->in, data, length ); - - if ( stream->in->attached != 0 ) - detachStream( stream->in->attached, stream->in ); + int len = stream->in->funcs->undoConsumeData( stream->in, data, length ); return len; } @@ -607,14 +680,11 @@ int _undoConsumeData( FsmRun *fsmRun, StreamImpl *is, const char *data, int leng memcpy( newBuf->data, data, length ); inputStreamPrepend( is, newBuf ); - if ( is->attached != 0 ) - detachStream( is->attached, is ); - return length; } } -Tree *_consumeTree( StreamImpl *is ) +static Tree *_consumeTree( StreamImpl *is ) { while ( is->queue != 0 && is->queue->type == RunBufDataType && is->queue->offset == is->queue->length ) { RunBuf *runBuf = inputStreamPopHead( is ); @@ -633,11 +703,8 @@ Tree *_consumeTree( StreamImpl *is ) return 0; } -void _undoConsumeTree( StreamImpl *is, Tree *tree, int ignore ) +static void _undoConsumeTree( StreamImpl *is, Tree *tree, int ignore ) { - if ( is->attached != 0 ) - detachStream( is->attached, is ); - /* Create a new buffer for the data. This is the easy implementation. * Something better is needed here. It puts a max on the amount of * data that can be pushed back to the inputStream. */ @@ -647,7 +714,7 @@ void _undoConsumeTree( StreamImpl *is, Tree *tree, int ignore ) inputStreamPrepend( is, newBuf ); } -struct LangEl *_consumeLangEl( StreamImpl *is, long *bindId, char **data, long *length ) +static struct LangEl *_consumeLangEl( StreamImpl *is, long *bindId, char **data, long *length ) { if ( isSourceStream( is ) ) { Stream *stream = (Stream*)is->queue->tree; @@ -658,7 +725,7 @@ struct LangEl *_consumeLangEl( StreamImpl *is, long *bindId, char **data, long * } } -void _undoConsumeLangEl( StreamImpl *is ) +static void _undoConsumeLangEl( StreamImpl *is ) { if ( isSourceStream( is ) ) { Stream *stream = (Stream*)is->queue->tree; @@ -669,11 +736,8 @@ void _undoConsumeLangEl( StreamImpl *is ) } } -void _prependData( StreamImpl *is, const char *data, long length ) +static void _prependData( StreamImpl *is, const char *data, long length ) { - if ( is->attached != 0 ) - detachStream( is->attached, is ); - if ( isSourceStream( is ) && ((Stream*)is->queue->tree)->in->funcs == &streamFuncs ) { Stream *stream = (Stream*)is->queue->tree; @@ -693,11 +757,8 @@ void _prependData( StreamImpl *is, const char *data, long length ) } } -void _prependTree( StreamImpl *is, Tree *tree, int ignore ) +static void _prependTree( StreamImpl *is, Tree *tree, int ignore ) { - if ( is->attached != 0 ) - detachStream( is->attached, is ); - /* Create a new buffer for the data. This is the easy implementation. * Something better is needed here. It puts a max on the amount of * data that can be pushed back to the inputStream. */ @@ -707,7 +768,7 @@ void _prependTree( StreamImpl *is, Tree *tree, int ignore ) inputStreamPrepend( is, newBuf ); } -void _prependStream( StreamImpl *in, struct ColmTree *tree ) +static void _prependStream( StreamImpl *in, struct ColmTree *tree ) { /* Create a new buffer for the data. This is the easy implementation. * Something better is needed here. It puts a max on the amount of @@ -718,11 +779,8 @@ void _prependStream( StreamImpl *in, struct ColmTree *tree ) inputStreamPrepend( in, newBuf ); } -int _undoPrependData( StreamImpl *is, int length ) +static int _undoPrependData( StreamImpl *is, int length ) { - if ( is->attached != 0 ) - detachStream( is->attached, is ); - debug( REALM_INPUT, "consuming %d bytes\n", length ); int consumed = 0; @@ -767,11 +825,8 @@ int _undoPrependData( StreamImpl *is, int length ) return consumed; } -Tree *_undoPrependTree( StreamImpl *is ) +static Tree *_undoPrependTree( StreamImpl *is ) { - if ( is->attached != 0 ) - detachStream( is->attached, is ); - while ( is->queue != 0 && is->queue->type == RunBufDataType && is->queue->offset == is->queue->length ) { RunBuf *runBuf = inputStreamPopHead( is ); free( runBuf ); @@ -789,7 +844,7 @@ Tree *_undoPrependTree( StreamImpl *is ) return 0; } -void _appendData( StreamImpl *is, const char *data, long len ) +static void _appendData( StreamImpl *is, const char *data, long len ) { while ( len > 0 ) { RunBuf *ad = newRunBuf(); @@ -807,11 +862,8 @@ void _appendData( StreamImpl *is, const char *data, long len ) } } -Tree *_undoAppendData( StreamImpl *is, int length ) +static Tree *_undoAppendData( StreamImpl *is, int length ) { - if ( is->attached != 0 ) - detachStream( is->attached, is ); - int consumed = 0; /* Move over skip bytes. */ @@ -847,7 +899,7 @@ Tree *_undoAppendData( StreamImpl *is, int length ) return 0; } -void _appendTree( StreamImpl *is, Tree *tree ) +static void _appendTree( StreamImpl *is, Tree *tree ) { RunBuf *ad = newRunBuf(); @@ -858,7 +910,7 @@ void _appendTree( StreamImpl *is, Tree *tree ) ad->length = 0; } -void _appendStream( StreamImpl *in, struct ColmTree *tree ) +static void _appendStream( StreamImpl *in, struct ColmTree *tree ) { RunBuf *ad = newRunBuf(); @@ -869,24 +921,49 @@ void _appendStream( StreamImpl *in, struct ColmTree *tree ) ad->length = 0; } -Tree *_undoAppendStream( StreamImpl *is ) +static Tree *_undoAppendTree( StreamImpl *is ) { - if ( is->attached != 0 ) - detachStream( is->attached, is ); - RunBuf *runBuf = inputStreamPopTail( is ); Tree *tree = runBuf->tree; free( runBuf ); return tree; } -Tree *_undoAppendTree( StreamImpl *is ) +static Tree *_undoAppendStream( StreamImpl *is ) { - if ( is->attached != 0 ) - detachStream( is->attached, is ); - RunBuf *runBuf = inputStreamPopTail( is ); Tree *tree = runBuf->tree; free( runBuf ); return tree; } + +void initStreamFuncs() +{ + memset( &streamFuncs, 0, sizeof(struct StreamFuncs) ); + streamFuncs.getData = &_getData; + streamFuncs.getParseBlock = &_getParseBlock; + streamFuncs.consumeData = &_consumeData; + streamFuncs.undoConsumeData = &_undoConsumeData; + streamFuncs.consumeTree = &_consumeTree; + streamFuncs.undoConsumeTree = &_undoConsumeTree; + streamFuncs.consumeLangEl = &_consumeLangEl; + streamFuncs.undoConsumeLangEl = &_undoConsumeLangEl; + + streamFuncs.setEof = &_setEof; + streamFuncs.unsetEof = &_unsetEof; + + streamFuncs.prependData = &_prependData; + streamFuncs.prependTree = &_prependTree; + streamFuncs.prependStream = &_prependStream; + streamFuncs.undoPrependData = &_undoPrependData; + streamFuncs.undoPrependTree = &_undoPrependTree; + + streamFuncs.appendData = &_appendData; + streamFuncs.appendTree = &_appendTree; + streamFuncs.appendStream = &_appendStream; + streamFuncs.undoAppendData = &_undoAppendData; + streamFuncs.undoAppendTree = &_undoAppendTree; + streamFuncs.undoAppendStream = &_undoAppendStream; +} + + diff --git a/colm/input.h b/colm/input.h index 3bfc43c5..14cdd465 100644 --- a/colm/input.h +++ b/colm/input.h @@ -35,9 +35,10 @@ extern "C" { /* This is for data sources to return, not for the wrapper. */ #define INPUT_EOD 2 #define INPUT_EOF 3 -#define INPUT_LANG_EL 4 -#define INPUT_TREE 5 -#define INPUT_IGNORE 6 +#define INPUT_EOS 4 +#define INPUT_LANG_EL 5 +#define INPUT_TREE 6 +#define INPUT_IGNORE 7 /* * pdaRun <- fsmRun <- stream @@ -87,11 +88,12 @@ typedef struct _StreamImpl StreamImpl; struct StreamFuncs { - /* Data. */ - int (*getData)( struct _FsmRun *fsmRun, StreamImpl *ss, int offset, char *dest, int length, int *copied ); + int (*getParseBlock)( StreamImpl *ss, int skip, char **pdp, int *copied ); + + int (*getData)( StreamImpl *ss, char *dest, int length ); int (*consumeData)( StreamImpl *ss, int length ); - int (*undoConsumeData)( struct _FsmRun *fsmRun, StreamImpl *ss, const char *data, int length ); + int (*undoConsumeData)( StreamImpl *ss, const char *data, int length ); struct ColmTree *(*consumeTree)( StreamImpl *ss ); void (*undoConsumeTree)( StreamImpl *ss, struct ColmTree *tree, int ignore ); @@ -127,10 +129,10 @@ struct StreamFuncs struct _StreamImpl { struct StreamFuncs *funcs; - struct _FsmRun *attached; char eofSent; char eof; + char eosSent; RunBuf *queue; RunBuf *queueTail; @@ -152,8 +154,6 @@ struct _StreamImpl struct ConsItem *consItem; }; -typedef struct _StreamImpl StreamImpl; - StreamImpl *newSourceStreamPat( struct Pattern *pattern ); StreamImpl *newSourceStreamCons( struct Constructor *constructor ); StreamImpl *newSourceStreamFile( FILE *file ); @@ -164,35 +164,6 @@ void initStaticFuncs(); void initPatFuncs(); void initConsFuncs(); -/* The input stream interface. */ - -int _getData( struct _FsmRun *fsmRun, StreamImpl *in, int offset, char *dest, int length, int *copied ); -int _consumeData( StreamImpl *in, int length ); -int _undoConsumeData( struct _FsmRun *fsmRun, StreamImpl *is, const char *data, int length ); - -struct ColmTree *_consumeTree( StreamImpl *in ); -void _undoConsumeTree( StreamImpl *in, struct ColmTree *tree, int ignore ); - -struct LangEl *_consumeLangEl( StreamImpl *in, long *bindId, char **data, long *length ); -void _undoConsumeLangEl( StreamImpl *in ); - -void _setEof( StreamImpl *is ); -void _unsetEof( StreamImpl *is ); - -void _prependData( StreamImpl *in, const char *data, long len ); -void _prependTree( StreamImpl *is, struct ColmTree *tree, int ignore ); -void _prependStream( StreamImpl *in, struct ColmTree *tree ); -int _undoPrependData( StreamImpl *is, int length ); -struct ColmTree *_undoPrependTree( StreamImpl *is ); -struct ColmTree *_undoPrependStream( StreamImpl *in ); - -void _appendData( StreamImpl *in, const char *data, long len ); -void _appendTree( StreamImpl *in, struct ColmTree *tree ); -void _appendStream( StreamImpl *in, struct ColmTree *tree ); -struct ColmTree *_undoAppendData( StreamImpl *in, int length ); -struct ColmTree *_undoAppendTree( StreamImpl *in ); -struct ColmTree *_undoAppendStream( StreamImpl *in ); - #ifdef __cplusplus } #endif diff --git a/colm/lmparse.kl b/colm/lmparse.kl index 5f3adb38..bbfd9b6f 100644 --- a/colm/lmparse.kl +++ b/colm/lmparse.kl @@ -26,7 +26,6 @@ #include "lmparse.h" #include "global.h" #include "input.h" -#include "fsmrun.h" using std::cout; using std::cerr; diff --git a/colm/parsetree.cc b/colm/parsetree.cc index 02d8f68f..4c82410e 100644 --- a/colm/parsetree.cc +++ b/colm/parsetree.cc @@ -22,7 +22,6 @@ #include "lmparse.h" #include "parsetree.h" #include "input.h" -#include "fsmrun.h" #include <iostream> #include <iomanip> diff --git a/colm/parsetree.h b/colm/parsetree.h index b9841c7e..9e65bf6d 100644 --- a/colm/parsetree.h +++ b/colm/parsetree.h @@ -35,7 +35,6 @@ #include "astring.h" #include "bytecode.h" #include "avlbasic.h" -#include "fsmrun.h" /* Operators that are represented with single symbol characters. */ #define OP_DoubleEql 'e' diff --git a/colm/pdabuild.cc b/colm/pdabuild.cc index 1dbd649e..6eb929f7 100644 --- a/colm/pdabuild.cc +++ b/colm/pdabuild.cc @@ -32,7 +32,6 @@ #include "redfsm.h" #include "fsmcodegen.h" #include "redbuild.h" -#include "fsmrun.h" /* Dumping the fsm. */ #include "mergesort.h" diff --git a/colm/pdarun.c b/colm/pdarun.c index e4cb9e1e..3ed94374 100644 --- a/colm/pdarun.c +++ b/colm/pdarun.c @@ -22,7 +22,6 @@ #include "config.h" #include "debug.h" #include "pdarun.h" -#include "fsmrun.h" #include "bytecode.h" #include "tree.h" #include "pool.h" @@ -57,27 +56,24 @@ i = (Tree*)w; \ } while(0) -void initFsmRun( FsmRun *fsmRun, Program *prg ) +static void initFsmRun( Program *prg, FsmRun *fsmRun ) { fsmRun->tables = prg->rtd->fsmTables; - fsmRun->runBuf = 0; - /* Run buffers need to stick around because - * token strings point into them. */ - fsmRun->runBuf = newRunBuf(); - fsmRun->runBuf->next = 0; + fsmRun->consumeBuf = 0; - fsmRun->p = fsmRun->pe = fsmRun->runBuf->data; - fsmRun->peof = 0; + fsmRun->p = fsmRun->pe = 0; + fsmRun->toklen = 0; + fsmRun->eof = 0; fsmRun->preRegion = -1; } void clearFsmRun( Program *prg, FsmRun *fsmRun ) { - if ( fsmRun->runBuf != 0 ) { + if ( fsmRun->consumeBuf != 0 ) { /* Transfer the run buf list to the program */ - RunBuf *head = fsmRun->runBuf; + RunBuf *head = fsmRun->consumeBuf; RunBuf *tail = head; while ( tail->next != 0 ) tail = tail->next; @@ -129,26 +125,108 @@ void decrementSteps( PdaRun *pdaRun ) debug( REALM_PARSE, "steps down to %ld\n", pdaRun->steps ); } -/* Load up a token, starting from tokstart if it is set. If not set then - * start it at data. */ -Head *streamPull( Program *prg, FsmRun *fsmRun, StreamImpl *is, long length ) +Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) { - /* We should not be in the midst of getting a token. */ - assert( fsmRun->tokstart == 0 ); + long length = fsmRun->toklen; + + debug( REALM_PARSE, "extracting token of length: %ld\n", length ); + + RunBuf *runBuf = fsmRun->consumeBuf; + if ( runBuf == 0 || length > ( FSM_BUFSIZE - runBuf->length ) ) { + runBuf = newRunBuf(); + runBuf->next = fsmRun->consumeBuf; + fsmRun->consumeBuf = runBuf; + } - RunBuf *runBuf = newRunBuf(); - runBuf->next = fsmRun->runBuf; - fsmRun->runBuf = runBuf; + char *dest = runBuf->data + runBuf->length; - int len = 0; - is->funcs->getData( fsmRun, is, 0, runBuf->data, length, &len ); + is->funcs->getData( is, dest, length ); is->funcs->consumeData( is, length ); - fsmRun->p = fsmRun->pe = runBuf->data + length; - Head *tokdata = stringAllocPointer( prg, runBuf->data, length ); - updatePosition( is, runBuf->data, length ); + runBuf->length += length; + + fsmRun->p = fsmRun->pe = 0; + fsmRun->toklen = 0; + fsmRun->tokstart = 0; + + Head *head = stringAllocPointer( prg, dest, length ); + + head->location = locationAllocate( prg ); + head->location->line = is->line; + head->location->column = is->column; + head->location->byte = is->byte; + + debug( REALM_PARSE, "location byte: %d\n", is->byte ); + + return head; +} + +Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) +{ + long length = fsmRun->toklen; + + RunBuf *runBuf = fsmRun->consumeBuf; + if ( runBuf == 0 || length > ( FSM_BUFSIZE - runBuf->length ) ) { + runBuf = newRunBuf(); + runBuf->next = fsmRun->consumeBuf; + fsmRun->consumeBuf = runBuf; + } + + char *dest = runBuf->data + runBuf->length; + + is->funcs->getData( is, dest, length ); + + fsmRun->p = fsmRun->pe = 0; + fsmRun->toklen = 0; + + Head *head = stringAllocPointer( prg, dest, length ); + + head->location = locationAllocate( prg ); + head->location->line = is->line; + head->location->column = is->column; + head->location->byte = is->byte; + + debug( REALM_PARSE, "location byte: %d\n", is->byte ); + + return head; +} + +Head *streamPull( Program *prg, PdaRun *pdaRun, StreamImpl *is, long length ) +{ + if ( pdaRun != 0 ) { + FsmRun *fsmRun = pdaRun->fsmRun; + RunBuf *runBuf = fsmRun->consumeBuf; + if ( length > ( FSM_BUFSIZE - runBuf->length ) ) { + runBuf = newRunBuf(); + runBuf->next = fsmRun->consumeBuf; + fsmRun->consumeBuf = runBuf; + } + + char *dest = runBuf->data + runBuf->length; + + is->funcs->getData( is, dest, length ); + is->funcs->consumeData( is, length ); + + runBuf->length += length; + + fsmRun->p = fsmRun->pe = 0; + fsmRun->toklen = 0; + + Head *tokdata = stringAllocPointer( prg, dest, length ); + updatePosition( is, dest, length ); + + return tokdata; + } + else { + Head *head = initStrSpace( length ); + char *dest = (char*)head->data; + + is->funcs->getData( is, dest, length ); + is->funcs->consumeData( is, length ); - return tokdata; + updatePosition( is, dest, length ); + return head; + } } void undoStreamPull( StreamImpl *is, const char *data, long length ) @@ -158,22 +236,22 @@ void undoStreamPull( StreamImpl *is, const char *data, long length ) is->funcs->prependData( is, data, length ); } -void streamPushText( FsmRun *fsmRun, StreamImpl *is, const char *data, long length ) +void streamPushText( StreamImpl *is, const char *data, long length ) { is->funcs->prependData( is, data, length ); } -void streamPushTree( FsmRun *fsmRun, StreamImpl *is, Tree *tree, int ignore ) +void streamPushTree( StreamImpl *is, Tree *tree, int ignore ) { is->funcs->prependTree( is, tree, ignore ); } -void streamPushStream( FsmRun *fsmRun, StreamImpl *is, Tree *tree ) +void streamPushStream( StreamImpl *is, Tree *tree ) { is->funcs->prependStream( is, tree ); } -void undoStreamPush( Program *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *is, long length ) +void undoStreamPush( Program *prg, Tree **sp, StreamImpl *is, long length ) { if ( length < 0 ) { Tree *tree = is->funcs->undoPrependTree( is ); @@ -184,7 +262,7 @@ void undoStreamPush( Program *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *is, lo } } -void undoStreamAppend( Program *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *is, Tree *input, long length ) +void undoStreamAppend( Program *prg, Tree **sp, StreamImpl *is, Tree *input, long length ) { if ( input->id == LEL_ID_STR ) is->funcs->undoAppendData( is, length ); @@ -209,7 +287,7 @@ static void sendBackText( FsmRun *fsmRun, StreamImpl *is, const char *data, long debug( REALM_PARSE, "sending back text: %.*s\n", (int)length, data ); - is->funcs->undoConsumeData( fsmRun, is, data, length ); + is->funcs->undoConsumeData( is, data, length ); undoPosition( is, data, length ); } @@ -250,54 +328,31 @@ static void sendBackIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsm debug( REALM_PARSE, "trigger parse stop, steps = target = %d\n", pdaRun->targetSteps ); pdaRun->stop = true; } - -} - -void attachStream( FsmRun *fsmRun, StreamImpl *is ) -{ - if ( is->attached != 0 && is->attached != fsmRun ) - detachStream( is->attached, is ); - - if ( is->attached != fsmRun ) { - debug( REALM_INPUT, "attaching FsmRun to stream: %p %p\n", fsmRun, is ); - is->attached = fsmRun; - } -} - -void detachStream( FsmRun *fsmRun, StreamImpl *is ) -{ - debug( REALM_INPUT, "detaching FsmRun from stream: %p %p\n", fsmRun, is ); - - is->attached = 0; - clearBuffered( fsmRun ); -} - -void detachSource( FsmRun *fsmRun, StreamImpl *is ) -{ - debug( REALM_INPUT, "detaching fsm run from source stream: %p %p\n", fsmRun, is ); - - is->attached = 0; - clearBuffered( fsmRun ); } void clearBuffered( FsmRun *fsmRun ) { if ( fsmRun->tokstart != 0 ) { - fsmRun->p = fsmRun->pe = fsmRun->tokstart; - fsmRun->tokstart = 0; + //fsmRun->p = fsmRun->pe = fsmRun->tokstart; + //fsmRun->tokstart = 0; + + fsmRun->pe = fsmRun->p; } else { fsmRun->pe = fsmRun->p; } } -void resetToken( FsmRun *fsmRun ) +void resetToken( PdaRun *pdaRun ) { + FsmRun *fsmRun = pdaRun->fsmRun; + /* If there is a token started, but never finished for a lack of data, we * must first backup over it. */ if ( fsmRun->tokstart != 0 ) { - fsmRun->p = fsmRun->tokstart; - fsmRun->tokstart = 0; + fsmRun->p = fsmRun->pe = 0; + fsmRun->toklen = 0; + fsmRun->eof = 0; } } @@ -768,7 +823,7 @@ void sendIgnore( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, PdaRun /* Make the ignore string. */ Head *ignoreStr = extractMatch( prg, fsmRun, is ); - updatePosition( is, fsmRun->tokstart, ignoreStr->length ); + updatePosition( is, ignoreStr->data, ignoreStr->length ); debug( REALM_PARSE, "ignoring: %.*s\n", ignoreStr->length, ignoreStr->data ); @@ -782,38 +837,6 @@ void sendIgnore( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, PdaRun } -/* Doesn't consume. */ -Head *peekMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) -{ - long length = fsmRun->p - fsmRun->tokstart; - Head *head = stringAllocPointer( prg, fsmRun->tokstart, length ); - head->location = locationAllocate( prg ); - head->location->line = is->line; - head->location->column = is->column; - head->location->byte = is->byte; - - debug( REALM_PARSE, "location byte: %d\n", is->byte ); - - return head; -} - -/* Consumes. */ -Head *extractMatch( Program *prg, FsmRun *fsmRun, StreamImpl *is ) -{ - long length = fsmRun->p - fsmRun->tokstart; - Head *head = stringAllocPointer( prg, fsmRun->tokstart, length ); - head->location = locationAllocate( prg ); - head->location->line = is->line; - head->location->column = is->column; - head->location->byte = is->byte; - - debug( REALM_PARSE, "location byte: %d\n", is->byte ); - - is->funcs->consumeData( is, length ); - - return head; -} - static void sendToken( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, PdaRun *pdaRun, long id ) { int emptyIgnore = pdaRun->accumIgnore == 0; @@ -825,7 +848,7 @@ static void sendToken( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, prg->rtd->lelInfo[id].name, stringLength(tokdata), stringData(tokdata) ); - updatePosition( is, fsmRun->tokstart, tokdata->length ); + updatePosition( is, stringData(tokdata), stringLength(tokdata) ); Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, is, id, tokdata ); @@ -882,7 +905,7 @@ static void sendCi( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, Pda prg->rtd->lelInfo[id].name, stringLength(tokdata), stringData(tokdata) ); - updatePosition( is, fsmRun->tokstart, tokdata->length ); + updatePosition( is, stringData(tokdata), stringLength(tokdata) ); Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, is, id, tokdata ); @@ -931,8 +954,12 @@ static void sendEof( Program *prg, Tree **sp, StreamImpl *is, FsmRun *fsmRun, Pd pdaRun->parseInput = parseTree; } -void newToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun ) +static void newToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun ) { + fsmRun->p = fsmRun->pe = 0; + fsmRun->toklen = 0; + fsmRun->eof = 0; + /* Init the scanner vars. */ fsmRun->act = 0; fsmRun->tokstart = 0; @@ -996,140 +1023,89 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) return SCAN_UNDO; while ( true ) { - fsmExecute( fsmRun, is ); - - /* First check if scanning stopped because we have a token. */ - if ( fsmRun->matchedToken > 0 ) { - /* If the token has a marker indicating the end (due to trailing - * context) then adjust data now. */ - LangElInfo *lelInfo = prg->rtd->lelInfo; - if ( lelInfo[fsmRun->matchedToken].markId >= 0 ) - fsmRun->p = fsmRun->mark[lelInfo[fsmRun->matchedToken].markId]; - - return fsmRun->matchedToken; - } - - /* Check for error. */ - if ( fsmRun->cs == fsmRun->tables->errorState ) { - /* If a token was started, but not finished (tokstart != 0) then - * restore data to the beginning of that token. */ - if ( fsmRun->tokstart != 0 ) - fsmRun->p = fsmRun->tokstart; - - /* Check for a default token in the region. If one is there - * then send it and continue with the processing loop. */ - if ( prg->rtd->regionInfo[fsmRun->region].defaultToken >= 0 ) { - fsmRun->tokstart = fsmRun->tokend = fsmRun->p; - return prg->rtd->regionInfo[fsmRun->region].defaultToken; - } - - return SCAN_ERROR; - } - - /* Got here because the state machine didn't match a token or - * encounter an error. Must be because we got to the end of the buffer - * data. */ - assert( fsmRun->p == fsmRun->pe ); - - /* There may be space left in the current buffer. If not then we need - * to make some. */ - long space = fsmRun->runBuf->data + FSM_BUFSIZE - fsmRun->pe; - if ( space == 0 ) { - /* Create a new run buf. */ - RunBuf *newBuf = newRunBuf(); - - /* If partway through a token then preserve the prefix. */ - long have = 0; - - if ( fsmRun->tokstart == 0 ) { - /* No prefix. We filled the previous buffer. */ - fsmRun->runBuf->length = FSM_BUFSIZE; - } - else { - int i; - - debug( REALM_SCAN, "copying data over to new buffer\n" ); - assert( fsmRun->runBuf->offset == 0 ); - - if ( fsmRun->tokstart == fsmRun->runBuf->data ) { - /* A token is started and it is already at the beginning - * of the current buffer. This means buffer is full and it - * must be grown. Probably need to do this sooner. */ - fatal( "OUT OF BUFFER SPACE\n" ); - } - - /* There is data that needs to be shifted over. */ - have = fsmRun->pe - fsmRun->tokstart; - memcpy( newBuf->data, fsmRun->tokstart, have ); - - /* Compute the length of the previous buffer. */ - fsmRun->runBuf->length = FSM_BUFSIZE - have; - - /* Compute tokstart and tokend. */ - long dist = fsmRun->tokstart - newBuf->data; - - fsmRun->tokend -= dist; - fsmRun->tokstart = newBuf->data; - - /* Shift any markers. */ - for ( i = 0; i < MARK_SLOTS; i++ ) { - if ( fsmRun->mark[i] != 0 ) - fsmRun->mark[i] -= dist; - } - } - - fsmRun->p = fsmRun->pe = newBuf->data + have; - fsmRun->peof = 0; - - newBuf->next = fsmRun->runBuf; - fsmRun->runBuf = newBuf; - } - - /* We don't have any data. What is next in the input inputStream? */ - space = fsmRun->runBuf->data + FSM_BUFSIZE - fsmRun->pe; - assert( space > 0 ); - - /* Get more data. */ - int have = fsmRun->tokstart != 0 ? fsmRun->p - fsmRun->tokstart : 0; + char *pd = 0; int len = 0; - debug( REALM_SCAN, "fetching data: have: %d space: %d\n", have, space ); - int type = is->funcs->getData( fsmRun, is, have, fsmRun->p, space, &len ); + int type = is->funcs->getParseBlock( is, fsmRun->toklen, &pd, &len ); switch ( type ) { case INPUT_DATA: - fsmRun->pe = fsmRun->p + len; + fsmRun->p = pd; + fsmRun->pe = pd + len; + break; + + case INPUT_EOS: + fsmRun->p = fsmRun->pe = 0; + if ( fsmRun->tokstart != 0 ) + fsmRun->eof = 1; + debug( REALM_SCAN, "EOS *******************\n" ); break; case INPUT_EOF: + fsmRun->p = fsmRun->pe = 0; if ( fsmRun->tokstart != 0 ) - fsmRun->peof = fsmRun->pe; + fsmRun->eof = 1; else return SCAN_EOF; break; case INPUT_EOD: + fsmRun->p = fsmRun->pe = 0; return SCAN_TRY_AGAIN_LATER; case INPUT_LANG_EL: if ( fsmRun->tokstart != 0 ) - fsmRun->peof = fsmRun->pe; + fsmRun->eof = 1; else return SCAN_LANG_EL; break; case INPUT_TREE: if ( fsmRun->tokstart != 0 ) - fsmRun->peof = fsmRun->pe; + fsmRun->eof = 1; else return SCAN_TREE; break; case INPUT_IGNORE: if ( fsmRun->tokstart != 0 ) - fsmRun->peof = fsmRun->pe; + fsmRun->eof = 1; else return SCAN_IGNORE; break; } + + fsmExecute( fsmRun, is ); + + /* First check if scanning stopped because we have a token. */ + if ( fsmRun->matchedToken > 0 ) { + /* If the token has a marker indicating the end (due to trailing + * context) then adjust data now. */ + LangElInfo *lelInfo = prg->rtd->lelInfo; + if ( lelInfo[fsmRun->matchedToken].markId >= 0 ) + fsmRun->p = fsmRun->mark[lelInfo[fsmRun->matchedToken].markId]; + + return fsmRun->matchedToken; + } + + /* Check for error. */ + if ( fsmRun->cs == fsmRun->tables->errorState ) { + /* If a token was started, but not finished (tokstart != 0) then + * restore data to the beginning of that token. */ + if ( fsmRun->tokstart != 0 ) + fsmRun->p = fsmRun->tokstart; + + /* Check for a default token in the region. If one is there + * then send it and continue with the processing loop. */ + if ( prg->rtd->regionInfo[fsmRun->region].defaultToken >= 0 ) { + fsmRun->toklen = 0; + return prg->rtd->regionInfo[fsmRun->region].defaultToken; + } + + return SCAN_ERROR; + } + + /* Got here because the state machine didn't match a token or encounter + * an error. Must be because we got to the end of the buffer data. */ + assert( fsmRun->p == fsmRun->pe ); } /* Should not be reached. */ @@ -1147,8 +1123,9 @@ long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *is ) */ long parseLoop( Program *prg, Tree **sp, PdaRun *pdaRun, - FsmRun *fsmRun, StreamImpl *is, long entry ) + StreamImpl *is, long entry ) { + FsmRun *fsmRun = pdaRun->fsmRun; LangElInfo *lelInfo = prg->rtd->lelInfo; switch ( entry ) { @@ -1269,8 +1246,9 @@ case PcrPreEof: /* Note that we don't update the position now. It is done when the token * data is pulled from the inputStream. */ - fsmRun->p = fsmRun->tokstart; - fsmRun->tokstart = 0; + fsmRun->p = fsmRun->pe = 0; + fsmRun->toklen = 0; + fsmRun->eof = 0; pdaRun->fi = &prg->rtd->frameInfo[prg->rtd->lelInfo[pdaRun->tokenId].frameId]; pdaRun->frameId = prg->rtd->lelInfo[pdaRun->tokenId].frameId; @@ -1484,8 +1462,8 @@ int isParserStopFinished( PdaRun *pdaRun ) return done; } -void initPdaRun( PdaRun *pdaRun, Program *prg, PdaTables *tables, - FsmRun *fsmRun, int parserId, long stopTarget, int revertOn, Tree *context ) +void initPdaRun( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, PdaTables *tables, + int parserId, long stopTarget, int revertOn, Tree *context ) { memset( pdaRun, 0, sizeof(PdaRun) ); pdaRun->tables = tables; @@ -1533,6 +1511,11 @@ void initPdaRun( PdaRun *pdaRun, Program *prg, PdaTables *tables, pdaRun->reject = false; pdaRun->rcBlockCount = 0; + + pdaRun->fsmRun = fsmRun; + + initFsmRun( prg, fsmRun ); + newToken( prg, pdaRun, fsmRun ); } long stackTopTarget( Program *prg, PdaRun *pdaRun ) diff --git a/colm/pdarun.h b/colm/pdarun.h index 4b37c5cd..00f07885 100644 --- a/colm/pdarun.h +++ b/colm/pdarun.h @@ -23,7 +23,6 @@ #define __COLM_PDARUN_H #include <colm/input.h> -#include <colm/fsmrun.h> #include <colm/defs.h> #include <colm/tree.h> @@ -73,26 +72,32 @@ typedef struct _FsmRun { FsmTables *tables; - RunBuf *runBuf; + RunBuf *consumeBuf; /* FsmRun State. */ long region, preRegion; long cs, ncs, act; - char *tokstart, *tokend; - char *p, *pe, *peof; - int returnResult; + char *start; + char *tokstart; + long tokend; + long toklen; + char *p, *pe; + + /* Bits. */ + char eof; + char returnResult; + char skipToklen; + char *mark[MARK_SLOTS]; long matchedToken; } FsmRun; -void initFsmRun( FsmRun *fsmRun, struct ColmProgram *prg ); void clearFsmRun( struct ColmProgram *prg, FsmRun *fsmRun ); void updatePosition( StreamImpl *inputStream, const char *data, long length ); void undoPosition( StreamImpl *inputStream, const char *data, long length ); void sendBackRunBufHead( FsmRun *fsmRun, StreamImpl *inputStream ); void undoStreamPull( StreamImpl *inputStream, const char *data, long length ); - #if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 #error "SIZEOF_LONG contained an unexpected value" #endif @@ -341,6 +346,8 @@ typedef struct _PdaRun int rcBlockCount; Tree *parseErrorText; + + FsmRun *fsmRun; } PdaRun; void rtCodeVectReplace( RtCodeVect *vect, long pos, const Code *val, long len ); @@ -393,8 +400,8 @@ void decrementSteps( PdaRun *pdaRun ); int makeReverseCode( PdaRun *pdaRun ); void transferReverseCode( PdaRun *pdaRun, ParseTree *tree ); -void initPdaRun( PdaRun *pdaRun, struct ColmProgram *prg, PdaTables *tables, - FsmRun *fsmRun, int parserId, long stopTarget, int revertOn, Tree *context ); +void initPdaRun( struct ColmProgram *prg, PdaRun *pdaRun, FsmRun *fsmRun, PdaTables *tables, + int parserId, long stopTarget, int revertOn, Tree *context ); void clearPdaRun( struct ColmProgram *prg, Tree **root, PdaRun *pdaRun ); void initStreamImpl( StreamImpl *inputStream ); @@ -426,14 +433,14 @@ long parseToken( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, long undoParse( Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *inputStream, Tree *tree ); -Head *streamPull( struct ColmProgram *prg, FsmRun *fsmRun, StreamImpl *inputStream, long length ); +Head *streamPull( struct ColmProgram *prg, PdaRun *pdaRun, StreamImpl *is, long length ); Head *stringAllocPointer( struct ColmProgram *prg, const char *data, long length ); -void streamPushText( FsmRun *fsmRun, StreamImpl *inputStream, const char *data, long length ); -void streamPushTree( FsmRun *fsmRun, StreamImpl *inputStream, Tree *tree, int ignore ); -void streamPushStream( FsmRun *fsmRun, StreamImpl *inputStream, Tree *tree ); -void undoStreamPush( struct ColmProgram *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *inputStream, long length ); -void undoStreamAppend( struct ColmProgram *prg, Tree **sp, FsmRun *fsmRun, StreamImpl *inputStream, struct ColmTree *tree, long length ); +void streamPushText( StreamImpl *inputStream, const char *data, long length ); +void streamPushTree( StreamImpl *inputStream, Tree *tree, int ignore ); +void streamPushStream( StreamImpl *inputStream, Tree *tree ); +void undoStreamPush( struct ColmProgram *prg, Tree **sp, StreamImpl *inputStream, long length ); +void undoStreamAppend( struct ColmProgram *prg, Tree **sp, StreamImpl *inputStream, struct ColmTree *tree, long length ); Kid *makeTokenWithData( struct ColmProgram *prg, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *inputStream, int id, Head *tokdata ); @@ -448,21 +455,17 @@ long sendBackQueuedIgnore( struct ColmProgram *prg, Tree **sp, StreamImpl *input void clearIgnoreList( struct ColmProgram *prg, Tree **sp, Kid *kid ); Head *extractMatch( struct ColmProgram *prg, FsmRun *fsmRun, StreamImpl *inputStream ); Head *extractMatch( struct ColmProgram *prg, FsmRun *fsmRun, StreamImpl *inputStream ); -void newToken( struct ColmProgram *prg, PdaRun *pdaRun, FsmRun *fsmRun ); void fsmExecute( FsmRun *fsmRun, StreamImpl *inputStream ); void sendNamedLangEl( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, StreamImpl *inputStream ); long parseLoop( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, - FsmRun *fsmRun, StreamImpl *inputStream, long entry ); + StreamImpl *inputStream, long entry ); void initBindings( PdaRun *pdaRun ); Tree *getParsedRoot( PdaRun *pdaRun, int stop ); void undoParseStream( struct ColmProgram *prg, Tree **sp, StreamImpl *inputStream, FsmRun *fsmRun, PdaRun *pdaRun, long steps ); void clearBuffered( FsmRun *fsmRun ); -void resetToken( FsmRun *fsmRun ); - -void detachStream( FsmRun *fsmRun, StreamImpl *is ); -void attachStream( FsmRun *fsmRun, StreamImpl *is ); +void resetToken( PdaRun *pdaRun ); #ifdef __cplusplus } diff --git a/colm/program.c b/colm/program.c index 412473e5..b604fabe 100644 --- a/colm/program.c +++ b/colm/program.c @@ -20,7 +20,6 @@ */ #include <colm/pdarun.h> -#include <colm/fsmrun.h> #include <colm/tree.h> #include <colm/bytecode.h> #include <colm/pool.h> diff --git a/colm/redbuild.h b/colm/redbuild.h index dbbb3e19..5ae75406 100644 --- a/colm/redbuild.h +++ b/colm/redbuild.h @@ -26,7 +26,6 @@ #include "avltree.h" #include "fsmgraph.h" #include "parsedata.h" -#include "fsmrun.h" /* Forwards. */ struct FsmTrans; diff --git a/colm/redfsm.cc b/colm/redfsm.cc index d8e4a983..5ec075ce 100644 --- a/colm/redfsm.cc +++ b/colm/redfsm.cc @@ -26,7 +26,6 @@ #include "mergesort.h" #include "fsmgraph.h" #include "parsetree.h" -#include "fsmrun.h" using std::ostringstream; diff --git a/colm/resolve.cc b/colm/resolve.cc index bf639738..6fc4b53e 100644 --- a/colm/resolve.cc +++ b/colm/resolve.cc @@ -21,7 +21,6 @@ #include "bytecode.h" #include "parsedata.h" -#include "fsmrun.h" #include <iostream> #include <assert.h> diff --git a/colm/string.c b/colm/string.c index d670b68c..21c3aac4 100644 --- a/colm/string.c +++ b/colm/string.c @@ -89,8 +89,6 @@ Head *initStrSpace( long length ) { /* Find the length and allocate the space for the shared string. */ Head *head = (Head*) malloc( sizeof(Head) + length ); - //if ( head == 0 ) - // throw std::bad_alloc(); /* Init the header. */ head->data = (char*)(head+1); diff --git a/colm/synthesis.cc b/colm/synthesis.cc index 2c7e7e8b..f164e1ed 100644 --- a/colm/synthesis.cc +++ b/colm/synthesis.cc @@ -21,7 +21,6 @@ #include "bytecode.h" #include "parsedata.h" -#include "fsmrun.h" #include "pdarun.h" #include "input.h" #include <iostream> @@ -2532,7 +2531,7 @@ void Compiler::initStreamObject( ) streamLangEl->objectDef = streamObj; initFunction( uniqueTypeStr, streamObj, "pull", - IN_INPUT_PULL_WV, IN_INPUT_PULL_WV, uniqueTypeInt, false ); + IN_INPUT_PULL_WV, IN_INPUT_PULL_WC, uniqueTypeInt, false ); initFunction( uniqueTypeStr, streamObj, "push", IN_INPUT_PUSH_WV, IN_INPUT_PUSH_WV, uniqueTypeAny, false ); initFunction( uniqueTypeStr, streamObj, "push_ignore", @@ -3248,7 +3247,9 @@ void Compiler::addStdin() el->beenInitialized = true; el->isConst = true; el->useOffset = false; - el->inGetR = IN_GET_STDIN; + el->inGetR = IN_GET_STDIN; + el->inGetWC = IN_GET_STDIN; + el->inGetWV = IN_GET_STDIN; globalObjectDef->insertField( el->name, el ); } @@ -3264,6 +3265,8 @@ void Compiler::addStdout() el->isConst = true; el->useOffset = false; el->inGetR = IN_GET_STDOUT; + el->inGetWC = IN_GET_STDOUT; + el->inGetWV = IN_GET_STDOUT; globalObjectDef->insertField( el->name, el ); } @@ -3279,6 +3282,8 @@ void Compiler::addStderr() el->isConst = true; el->useOffset = false; el->inGetR = IN_GET_STDERR; + el->inGetWC = IN_GET_STDERR; + el->inGetWV = IN_GET_STDERR; globalObjectDef->insertField( el->name, el ); } diff --git a/colm/tree.c b/colm/tree.c index edd0dc79..341aad8d 100644 --- a/colm/tree.c +++ b/colm/tree.c @@ -986,14 +986,12 @@ Tree *createGeneric( Program *prg, long genericId ) Parser *parser = (Parser*)mapElAllocate( prg ); parser->id = genericInfo->langElId; parser->genericInfo = genericInfo; - parser->fsmRun = malloc( sizeof(FsmRun) ); parser->pdaRun = malloc( sizeof(PdaRun) ); + parser->pdaRun->fsmRun = malloc( sizeof(FsmRun) ); /* Start off the parsing process. */ - initPdaRun( parser->pdaRun, prg, prg->rtd->pdaTables, - parser->fsmRun, genericInfo->parserId, false, false, 0 ); - initFsmRun( parser->fsmRun, prg ); - newToken( prg, parser->pdaRun, parser->fsmRun ); + initPdaRun( prg, parser->pdaRun, parser->pdaRun->fsmRun, prg->rtd->pdaTables, + genericInfo->parserId, false, false, 0 ); newGeneric = (Tree*) parser; break; @@ -1045,10 +1043,10 @@ free_tree: } else if ( generic->type == GEN_PARSER ) { Parser *parser = (Parser*)tree; - clearFsmRun( prg, parser->fsmRun ); + clearFsmRun( prg, parser->pdaRun->fsmRun ); clearPdaRun( prg, sp, parser->pdaRun ); + free( parser->pdaRun->fsmRun ); free( parser->pdaRun ); - free( parser->fsmRun ); treeDownref( prg, sp, (Tree*)parser->input ); mapElFree( prg, (MapEl*)parser ); } @@ -2054,7 +2052,10 @@ void appendFile( struct ColmPrintArgs *args, const char *data, int length ) void appendFd( struct ColmPrintArgs *args, const char *data, int length ) { - write( (long)args->arg, data, length ); + int res = write( (long)args->arg, data, length ); + if ( res != 0 ) { + message( "write error\n" ); + } } Tree *treeTrim( struct ColmProgram *prg, Tree **sp, Tree *tree ) diff --git a/colm/tree.h b/colm/tree.h index 8b6d509d..529c0185 100644 --- a/colm/tree.h +++ b/colm/tree.h @@ -203,7 +203,6 @@ typedef struct _Parser GenericInfo *genericInfo; struct _PdaRun *pdaRun; - struct _FsmRun *fsmRun; struct _Stream *input; Tree *result; } Parser; diff --git a/test/include1.exp b/test/include1.exp new file mode 100644 index 00000000..502194e3 --- /dev/null +++ b/test/include1.exp @@ -0,0 +1,19 @@ +opening include1a.in +opening include1b.in +opening include1c.in +hello; + +a; +b; + +there; + +c; +d; + +dude; + +e; +f; + +and dudettes; diff --git a/test/include1.in b/test/include1.in new file mode 100644 index 00000000..9c7aa806 --- /dev/null +++ b/test/include1.in @@ -0,0 +1,14 @@ + +hello; + +#include "include1a.in" + +there; + +#include "include1b.in" + +dude; + +#include "include1c.in" + +and dudettes; diff --git a/test/include1.lm b/test/include1.lm new file mode 100644 index 00000000..03a3b046 --- /dev/null +++ b/test/include1.lm @@ -0,0 +1,98 @@ +namespace string + lex + literal '"' + token data /[^"\\]+/ + token escape /'\\' any/ + end + + def string_data + [data] + | [escape] + + def string + ['"' string_data* '"'] + + str unquote( S: string ) + { + match S ['"' DL: string_data* '"'] + for E: escape in DL + E.data = 'x' + return $DL + } + +end string + +namespace hash + + lex + literal 'define', 'include' + literal '#', '\n' ni + + token id /[a-zA-Z_][a-zA-Z_0-9]*/ + token number /[0-9]+/ + + ignore /[ \t]/ + end + + def hash + ['#' 'define' Id: id number '\n'] + | ['#' 'include' Inc: string::string '\n'] + +end hash + +token rest_of_line /[^\n]* '\n'/ + +namespace lang + + lex + ignore /space/ + literal '*', '(', ')', ';' + token id /[a-zA-Z_][a-zA-Z_0-9]*/ + token number /[0-9]+/ + + token hash /'#'/ { + parse_stop H: hash::hash[ input ] + if ( H.tree ) { + if ( H.tree.Inc ) { + FN: str = unquote( H.tree.Inc ) + print( 'opening ' FN '\n' ) + IS: stream = open( FN 'r' ) + if ( ! IS ) { + print( 'ERROR: failed to open ' FN '\n' ) + exit(1) + } + input.push( IS ) + } + } + else { + parse_stop L: rest_of_line[ input ] + if ! L.tree { + print( "ERROR: stuck: " L.error ) + exit(1) + } + print( "ERROR: failed to parse # directive: " L.tree ) + } + } + end + + def item + [id] + | ['(' item* ')'] + + def statement + [item* ';'] + + def start + [statement*] + +end lang + +parse Input: lang::start[ stdin ] + +if ! Input.tree + print( Input.error '\n' ) +else { + #print( Input.tree '\n' ) + S: lang::start = Input.tree + print( Input.tree '\n' ) +} diff --git a/test/include1a.in b/test/include1a.in new file mode 100644 index 00000000..26da0afa --- /dev/null +++ b/test/include1a.in @@ -0,0 +1,2 @@ +a; +b; diff --git a/test/include1b.in b/test/include1b.in new file mode 100644 index 00000000..6c574323 --- /dev/null +++ b/test/include1b.in @@ -0,0 +1,2 @@ +c; +d; diff --git a/test/include1c.in b/test/include1c.in new file mode 100644 index 00000000..5373832d --- /dev/null +++ b/test/include1c.in @@ -0,0 +1,2 @@ +e; +f; diff --git a/test/pull1.exp b/test/pull1.exp new file mode 100644 index 00000000..aa3a0fe0 --- /dev/null +++ b/test/pull1.exp @@ -0,0 +1 @@ +this is in diff --git a/test/pull1.in b/test/pull1.in new file mode 100644 index 00000000..f4d2e4a0 --- /dev/null +++ b/test/pull1.in @@ -0,0 +1 @@ +this is input for a non-parse pull diff --git a/test/pull1.lm b/test/pull1.lm new file mode 100644 index 00000000..bc559671 --- /dev/null +++ b/test/pull1.lm @@ -0,0 +1,2 @@ +String: str = stdin.pull( 10 ) +print( String '\n' ) diff --git a/test/pull2.exp b/test/pull2.exp new file mode 100644 index 00000000..aa3a0fe0 --- /dev/null +++ b/test/pull2.exp @@ -0,0 +1 @@ +this is in diff --git a/test/pull2.in b/test/pull2.in new file mode 100644 index 00000000..f4d2e4a0 --- /dev/null +++ b/test/pull2.in @@ -0,0 +1 @@ +this is input for a non-parse pull diff --git a/test/pull2.lm b/test/pull2.lm new file mode 100644 index 00000000..1a18b829 --- /dev/null +++ b/test/pull2.lm @@ -0,0 +1,3 @@ +Stream: stream = open( 'pull2.in' 'r' ) +String: str = Stream.pull( 10 ) +print( String '\n' ) |