diff options
author | Adrian Thurston <thurston@colm.net> | 2018-07-04 13:23:21 +0700 |
---|---|---|
committer | Adrian Thurston <thurston@colm.net> | 2018-07-04 13:23:21 +0700 |
commit | 00b1dd7777079036321afb8b3dcff0a229815517 (patch) | |
tree | 74f4d3acf8c043b03b1182e0453659cd8dca4f8a | |
parent | e87efd2f55958dc4f915890091ecf436d33d8c4f (diff) | |
download | colm-00b1dd7777079036321afb8b3dcff0a229815517.tar.gz |
reusing stream impls when parsing from 'input'
To detect parsing from a top level input and to avoid appending it to a
parser's top level (thus creating a tree), added a new type for input so we can
replace the parser's top level. This change forces us to a two level structure
where stream seq is at the top and stream data underneath. Requires us to hack
the destructor so that we don't multiple delete the stream impl.
-rw-r--r-- | src/bytecode.c | 16 | ||||
-rw-r--r-- | src/bytecode.h | 2 | ||||
-rw-r--r-- | src/compiler.cc | 2 | ||||
-rw-r--r-- | src/compiler.h | 8 | ||||
-rw-r--r-- | src/declare.cc | 39 | ||||
-rw-r--r-- | src/input.c | 10 | ||||
-rw-r--r-- | src/parser.cc | 13 | ||||
-rw-r--r-- | src/pdabuild.cc | 2 | ||||
-rw-r--r-- | src/pdacodegen.cc | 1 | ||||
-rw-r--r-- | src/pdarun.h | 1 | ||||
-rw-r--r-- | src/program.h | 1 | ||||
-rw-r--r-- | src/struct.h | 1 | ||||
-rw-r--r-- | src/synthesis.cc | 14 |
13 files changed, 102 insertions, 8 deletions
diff --git a/src/bytecode.c b/src/bytecode.c index 4e8d8e61..d79231f5 100644 --- a/src/bytecode.c +++ b/src/bytecode.c @@ -2453,6 +2453,22 @@ again: break; } + case IN_REPLACE_STREAM: { + debug( prg, REALM_BYTECODE, "IN_REPLACE_STREAM\n" ); + + stream_t *to_replace_with = vm_pop_stream(); + stream_t *stream = vm_pop_stream(); + + stream->impl = to_replace_with->impl; + stream->not_owner = true; + + vm_push_stream( stream ); + + exec->steps = stream->parser->pda_run->steps; + exec->pcr = PCR_START; + + break; + } case IN_SEND_STREAM_W: { debug( prg, REALM_BYTECODE, "IN_SEND_STREAM_W\n" ); diff --git a/src/bytecode.h b/src/bytecode.h index e303e675..6cb01c39 100644 --- a/src/bytecode.h +++ b/src/bytecode.h @@ -267,6 +267,8 @@ typedef unsigned char uchar; #define IN_SEND_TREE_W 0xa9 #define IN_SEND_TREE_BKT 0xaa +#define IN_REPLACE_STREAM 0x88 + #define IN_SEND_STREAM_W 0x90 #define IN_SEND_STREAM_BKT 0x1c diff --git a/src/compiler.cc b/src/compiler.cc index 79e36627..4ceb2721 100644 --- a/src/compiler.cc +++ b/src/compiler.cc @@ -377,6 +377,7 @@ Compiler::Compiler( ) argv(0), stream(0), + inputSel(0), streamSel(0), uniqueTypeNil(0), @@ -386,6 +387,7 @@ Compiler::Compiler( ) uniqueTypeStr(0), uniqueTypeIgnore(0), uniqueTypeAny(0), + uniqueTypeInput(0), uniqueTypeStream(0), nextPatConsId(0), nextGenericId(1), diff --git a/src/compiler.h b/src/compiler.h index 0f67c17b..02b75916 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -910,6 +910,7 @@ struct Compiler int firstNonTermId; int firstStructElId; int structInbuiltId; + int structInputId; int structStreamId; LangEl **langElIndex; @@ -933,7 +934,9 @@ struct Compiler StructEl *argvElSel; StructEl *stdsElSel; + StructDef *input; StructDef *stream; + StructEl *inputSel; StructEl *streamSel; VectorTypeIdMap vectorTypeIdMap; @@ -958,6 +961,7 @@ struct Compiler UniqueType *uniqueTypeIgnore; UniqueType *uniqueTypeAny; + UniqueType *uniqueTypeInput; UniqueType *uniqueTypeStream; UniqueTypeMap uniqeTypeMap; @@ -967,6 +971,9 @@ struct Compiler void declareGlobalFields(); void declareStrFields(); + void declareInputField( ObjectDef *objDef, code_t getLength ); + void declareInputFields(); + void declareStreamField( ObjectDef *objDef, code_t getLength ); void declareStreamFields(); @@ -975,6 +982,7 @@ struct Compiler ObjectDef *intObj; ObjectDef *strObj; + ObjectDef *inputObj; ObjectDef *streamObj; struct fsm_tables *fsmTables; diff --git a/src/declare.cc b/src/declare.cc index 0bd98e8b..b2efc2b0 100644 --- a/src/declare.cc +++ b/src/declare.cc @@ -38,6 +38,7 @@ void Compiler::initUniqueTypes( ) uniqueTypeIgnore = new UniqueType( TYPE_TREE, ignoreLangEl ); uniqueTypeAny = new UniqueType( TYPE_TREE, anyLangEl ); + uniqueTypeInput = new UniqueType( TYPE_STRUCT, inputSel ); uniqueTypeStream = new UniqueType( TYPE_STRUCT, streamSel ); uniqeTypeMap.insert( uniqueTypeNil ); @@ -761,7 +762,7 @@ void Compiler::addMatchText( ObjectDef *frame, LangEl *lel ) void Compiler::addInput( ObjectDef *frame ) { /* Make the type ref. */ - TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStream ); + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeInput ); /* Create the field and insert it into the map. */ ObjectField *el = ObjectField::cons( internal, @@ -842,6 +843,19 @@ void Compiler::declareStrFields( ) addLengthField( strObj, IN_STR_LENGTH ); } +void Compiler::declareInputField( ObjectDef *objDef, code_t getLength ) +{ + /* Create the "length" field. */ + TypeRef *typeRef = TypeRef::cons( internal, uniqueTypeStr ); + ObjectField *el = ObjectField::cons( internal, + ObjectField::InbuiltFieldType, typeRef, "tree" ); + el->isConst = true; + el->inGetR = IN_GET_COLLECT_STRING; + el->inGetValR = IN_GET_COLLECT_STRING; + + objDef->rootScope->insertField( el->name, el ); +} + void Compiler::declareStreamField( ObjectDef *objDef, code_t getLength ) { /* Create the "length" field. */ @@ -855,6 +869,28 @@ void Compiler::declareStreamField( ObjectDef *objDef, code_t getLength ) objDef->rootScope->insertField( el->name, el ); } +void Compiler::declareInputFields( ) +{ + inputObj = inputSel->structDef->objectDef; + + initFunction( uniqueTypeStr, inputObj, ObjectMethod::Call, "pull", + IN_INPUT_PULL_WV, IN_INPUT_PULL_WC, uniqueTypeInt, false ); + + initFunction( uniqueTypeStr, inputObj, ObjectMethod::Call, "push", + IN_INPUT_PUSH_WV, IN_INPUT_PUSH_WV, uniqueTypeAny, false ); + + initFunction( uniqueTypeStr, inputObj, ObjectMethod::Call, "push_ignore", + IN_INPUT_PUSH_IGNORE_WV, IN_INPUT_PUSH_IGNORE_WV, uniqueTypeAny, false ); + + initFunction( uniqueTypeStr, inputObj, ObjectMethod::Call, "push_stream", + IN_INPUT_PUSH_STREAM_WV, IN_INPUT_PUSH_STREAM_WV, uniqueTypeStream, false ); + + initFunction( uniqueTypeVoid, inputObj, ObjectMethod::Call, "close", + IN_INPUT_CLOSE_WC, IN_INPUT_CLOSE_WC, false ); + + declareInputField( inputObj, 0 ); +} + void Compiler::declareStreamFields( ) { streamObj = streamSel->structDef->objectDef; @@ -1536,6 +1572,7 @@ void Compiler::declarePass() declareIntFields(); declareStrFields(); + declareInputFields(); declareStreamFields(); declareTokenFields(); declareGlobalFields(); diff --git a/src/input.c b/src/input.c index dcead096..8360d1de 100644 --- a/src/input.c +++ b/src/input.c @@ -168,7 +168,8 @@ void colm_stream_destroy( program_t *prg, tree_t **sp, struct_t *s ) stream_t *stream = (stream_t*) s; struct stream_impl *si = stream->impl; - si->funcs->destructor( prg, sp, si ); + if ( !stream->not_owner ) + si->funcs->destructor( prg, sp, si ); } /* Keep the position up to date after consuming text. */ @@ -759,7 +760,12 @@ static int stream_get_parse_block( struct colm_program *prg, struct stream_impl_ #if DEBUG switch ( ret ) { case INPUT_DATA: - debug( prg, REALM_INPUT, "get parse block: DATA: %d\n", *copied ); + if ( *pdp != 0 ) { + debug( prg, REALM_INPUT, "get parse block: DATA: %d %.*s\n", *copied, (int)(*copied), *pdp ); + } + else { + debug( prg, REALM_INPUT, "get parse block: DATA: %d\n", *copied ); + } break; case INPUT_EOD: debug( prg, REALM_INPUT, "get parse block: EOD\n" ); diff --git a/src/parser.cc b/src/parser.cc index 78f59c2e..23e60ec2 100644 --- a/src/parser.cc +++ b/src/parser.cc @@ -133,15 +133,24 @@ void BaseParser::init() pd->global = new StructDef( internal, global, pd->globalObjectDef ); pd->globalSel = declareStruct( pd, 0, global, pd->global ); + /* Setup the input object. */ + global = "_input"; + ObjectDef *objectDef = ObjectDef::cons( ObjectDef::BuiltinType, + global, pd->nextObjectId++ ); + + pd->input = new StructDef( internal, global, objectDef ); + pd->inputSel = declareStruct( pd, pd->rootNamespace, + pd->input->name, pd->input ); + /* Setup the stream object. */ global = "stream"; - ObjectDef *objectDef = ObjectDef::cons( ObjectDef::BuiltinType, + objectDef = ObjectDef::cons( ObjectDef::BuiltinType, global, pd->nextObjectId++ ); pd->stream = new StructDef( internal, global, objectDef ); pd->streamSel = declareStruct( pd, pd->rootNamespace, pd->stream->name, pd->stream ); - + /* Initialize the dictionary of graphs. This is our symbol table. The * initialization needs to be done on construction which happens at the * beginning of a machine spec so any assignment operators can reference diff --git a/src/pdabuild.cc b/src/pdabuild.cc index 1760c9ba..ba4850df 100644 --- a/src/pdabuild.cc +++ b/src/pdabuild.cc @@ -250,6 +250,7 @@ void Compiler::makeStructElIds() sel->id = nextId++; structInbuiltId = nextId++; + structInputId = nextId++; structStreamId = nextId++; } @@ -1700,6 +1701,7 @@ void Compiler::makeRuntimeData() runtimeData->stds_el_id = stdsElSel->id; runtimeData->struct_inbuilt_id = structInbuiltId; runtimeData->struct_stream_id = structStreamId; + runtimeData->struct_input_id = structInputId; runtimeData->fsm_execute = &internalFsmExecute; runtimeData->send_named_lang_el = &internalSendNamedLangEl; diff --git a/src/pdacodegen.cc b/src/pdacodegen.cc index fdb68852..e5c07cf1 100644 --- a/src/pdacodegen.cc +++ b/src/pdacodegen.cc @@ -514,6 +514,7 @@ void PdaCodeGen::writeRuntimeData( colm_sections *runtimeData, struct pda_tables " " << runtimeData->argv_el_id << ",\n" " " << runtimeData->stds_el_id << ",\n" " " << runtimeData->struct_inbuilt_id << ",\n" + " " << runtimeData->struct_inbuilt_id << ",\n" " " << runtimeData->struct_stream_id << ",\n" " &fsm_execute,\n" " &sendNamedLangEl,\n" diff --git a/src/pdarun.h b/src/pdarun.h index 9190b430..41020ebb 100644 --- a/src/pdarun.h +++ b/src/pdarun.h @@ -317,7 +317,6 @@ struct pda_run long steps; long target_steps; - /* The shift count simply tracks the number of shifts that have happend. * The commit shift count is the shift count when the last commit occurred. * If we back up to this number of shifts then we decide we cannot proceed. diff --git a/src/program.h b/src/program.h index dc96ef16..4c5f1834 100644 --- a/src/program.h +++ b/src/program.h @@ -102,6 +102,7 @@ struct colm_sections long argv_el_id; long stds_el_id; long struct_inbuilt_id; + long struct_input_id; long struct_stream_id; void (*fsm_execute)( struct pda_run *pda_run, struct stream_impl *input_stream ); diff --git a/src/struct.h b/src/struct.h index 29c707d7..724eb6cf 100644 --- a/src/struct.h +++ b/src/struct.h @@ -71,6 +71,7 @@ typedef struct colm_stream struct stream_impl *impl; parser_t *parser; + char not_owner; } stream_t; #define COLM_LIST_EL_SIZE 2 diff --git a/src/synthesis.cc b/src/synthesis.cc index a45076d9..d6f5cf4d 100644 --- a/src/synthesis.cc +++ b/src/synthesis.cc @@ -1524,6 +1524,7 @@ UniqueType *LangTerm::evaluateParse( Compiler *pd, CodeVect &code, code.append( IN_GET_PARSER_STREAM ); for ( ConsItemList::Iter item = *parserText->list; item.lte(); item++ ) { + bool isInput = false; bool isStream = false; switch ( item->type ) { case ConsItem::LiteralType: { @@ -1564,13 +1565,17 @@ UniqueType *LangTerm::evaluateParse( Compiler *pd, CodeVect &code, if ( ut->typeId == TYPE_INT || ut->typeId == TYPE_BOOL ) code.append( IN_INT_TO_STR ); + if ( ut == pd->uniqueTypeInput ) + isInput = true; if ( ut == pd->uniqueTypeStream ) isStream = true; break; }} - if ( isStream ) + if ( isInput ) + code.append( IN_REPLACE_STREAM ); + else if ( isStream ) code.append( IN_SEND_STREAM_W ); else if ( tree ) code.append( IN_SEND_TREE_W ); @@ -1626,6 +1631,7 @@ void LangTerm::evaluateSendParser( Compiler *pd, CodeVect &code, bool strings ) /* Assign bind ids to the variables in the replacement. */ for ( ConsItemList::Iter item = *parserText->list; item.lte(); item++ ) { + bool isInput = false; bool isStream = false; switch ( item->type ) { case ConsItem::LiteralType: { @@ -1662,6 +1668,8 @@ void LangTerm::evaluateSendParser( Compiler *pd, CodeVect &code, bool strings ) continue; } + if ( ut == pd->uniqueTypeInput ) + isInput = true; if ( ut == pd->uniqueTypeStream ) isStream = true; @@ -1671,7 +1679,9 @@ void LangTerm::evaluateSendParser( Compiler *pd, CodeVect &code, bool strings ) break; } - if ( isStream ) + if ( isInput ) + code.append( IN_REPLACE_STREAM ); + else if ( isStream ) code.append( IN_SEND_STREAM_W ); else if ( !strings ) code.append( IN_SEND_TREE_W ); |