From db03a8019692eac96f33890e5a7a775606f16263 Mon Sep 17 00:00:00 2001 From: Adrian Thurston Date: Sun, 15 Jul 2012 21:33:52 -0400 Subject: a new parse statement syntax Added a parse statement constructs the parser, sends some text in the style of the send statement, then returns the parser. More can be sent. When done finish is called. The goal here is to eliminate the parse statements that return the parsed tree because there is no way to get the parse error. The parser is immediately destroyed. --- src/lmparse.kl | 39 ++++++++++++++++ src/parsetree.h | 26 ++++++++++- src/resolve.cc | 8 ++++ src/synthesis.cc | 137 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 209 insertions(+), 1 deletion(-) diff --git a/src/lmparse.kl b/src/lmparse.kl index cd8f9255..9f46de95 100644 --- a/src/lmparse.kl +++ b/src/lmparse.kl @@ -1857,6 +1857,45 @@ code_factor: pd->curLocalFrame->insertField( $2->objField->name, $2->objField ); } }; +code_factor: + KW_Parse opt_capture type_ref opt_field_init accumulate + final { + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + + ReplItemList *emptyReplItemList = new ReplItemList; + + Replacement *replacement = Replacement::cons( $1->loc, nspace, region, + emptyReplItemList, pd->nextPatReplId++ ); + pd->replList.append( replacement ); + + LangVarRef *varRef = 0; + if ( $2->objField != 0 ) + varRef = new LangVarRef( $2->objField->loc, new QualItemVect, $2->objField->name ); + + NamespaceQual *nspaceQual = new NamespaceQual( + namespaceStack.top(), regionStack.top() ); + TypeRef *parserTypeRef = TypeRef::cons( TypeRef::Parser, + InputLoc(), nspaceQual, $3->typeRef, 0 ); + + ParserText *parserText = ParserText::cons( $2->loc, nspace, region, replItemList ); + pd->parserTextList.append( parserText ); + + $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, LangTerm::Parser2Type, + varRef, $2->objField, parserTypeRef, $4->fieldInitVect, replacement, parserText ) ); + + /* Check for redeclaration. */ + if ( $2->objField != 0 ) { + if ( pd->curLocalFrame->checkRedecl( $2->objField->name ) != 0 ) { + error( $2->objField->loc ) << "variable " << $2->objField->name << + " redeclared" << endp; + } + + /* Insert it into the field map. */ + $2->objField->typeRef = parserTypeRef; + pd->curLocalFrame->insertField( $2->objField->name, $2->objField ); + } + }; code_factor: KW_Parse opt_capture type_ref '(' opt_code_expr_list ')' final { String parserName = $3->typeRef->typeName + "_parser"; diff --git a/src/parsetree.h b/src/parsetree.h index db4e2900..d813a3ee 100644 --- a/src/parsetree.h +++ b/src/parsetree.h @@ -2341,9 +2341,15 @@ struct LangTerm ParseStopType, MakeTreeType, MakeTokenType, - EmbedStringType + EmbedStringType, + Parser2Type, }; + LangTerm() + : + parserText(0) + {} + static LangTerm *cons( Type type, LangVarRef *varRef ) { LangTerm *t = new LangTerm; @@ -2463,6 +2469,22 @@ struct LangTerm return t; } + static LangTerm *cons( const InputLoc &loc, Type type, LangVarRef *varRef, ObjField *objField, + TypeRef *typeRef, FieldInitVect *fieldInitArgs, Replacement *replacement, + ParserText *parserText ) + { + LangTerm *t = new LangTerm; + t->loc = (loc); + t->type = (type); + t->varRef = (varRef); + t->objField = (objField); + t->typeRef = (typeRef); + t->fieldInitArgs = (fieldInitArgs); + t->replacement = (replacement); + t->parserText = (parserText); + return t; + } + static LangTerm *cons( Type type, LangExpr *expr ) { LangTerm *t = new LangTerm; @@ -2500,6 +2522,7 @@ struct LangTerm UniqueType *evaluateParse( Compiler *pd, CodeVect &code, bool stop ) const; UniqueType *evaluateNew( Compiler *pd, CodeVect &code ) const; UniqueType *evaluateConstruct( Compiler *pd, CodeVect &code ) const; + UniqueType *evaluateParse2( Compiler *pd, CodeVect &code ) const; UniqueType *evaluateMatch( Compiler *pd, CodeVect &code ) const; UniqueType *evaluate( Compiler *pd, CodeVect &code ) const; void assignFieldArgs( Compiler *pd, CodeVect &code, UniqueType *replUT ) const; @@ -2520,6 +2543,7 @@ struct LangTerm GenericType *generic; TypeRef *parserTypeRef; Replacement *replacement; + ParserText *parserText; LangExpr *expr; ReplItemList *replItemList; }; diff --git a/src/resolve.cc b/src/resolve.cc index a661e68e..e3e29950 100644 --- a/src/resolve.cc +++ b/src/resolve.cc @@ -327,6 +327,14 @@ void LangTerm::resolve( Compiler *pd ) case ConstructType: typeRef->lookupType( pd ); + /* Evaluate the initialization expressions. */ + if ( fieldInitArgs != 0 ) { + for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ ) + (*pi)->expr->resolve( pd ); + } + break; + case Parser2Type: + typeRef->lookupType( pd ); /* Evaluate the initialization expressions. */ if ( fieldInitArgs != 0 ) { for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ ) diff --git a/src/synthesis.cc b/src/synthesis.cc index 15918e58..90766227 100644 --- a/src/synthesis.cc +++ b/src/synthesis.cc @@ -1288,6 +1288,141 @@ UniqueType *LangTerm::evaluateConstruct( Compiler *pd, CodeVect &code ) const return replUT; } +UniqueType *LangTerm::evaluateParse2( Compiler *pd, CodeVect &code ) const +{ + /* Evaluate the initialization expressions. */ + if ( fieldInitArgs != 0 && fieldInitArgs->length() > 0 ) { + for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ ) { + FieldInit *fieldInit = *pi; + fieldInit->exprUT = fieldInit->expr->evaluate( pd, code ); + } + } + + /* Assign bind ids to the variables in the replacement. */ + for ( ReplItemList::Iter item = *replacement->list; item.lte(); item++ ) { + if ( item->expr != 0 ) + item->bindId = replacement->nextBindId++; + } + + /* Evaluate variable references. */ + for ( ReplItemList::Iter item = replacement->list->last(); item.gtb(); item-- ) { + if ( item->type == ReplItem::ExprType ) { + UniqueType *ut = item->expr->evaluate( pd, code ); + + if ( ut->typeId != TYPE_TREE ) + error() << "variables used in replacements must be trees" << endp; + + item->langEl = ut->langEl; + } + } + + /* Construct the tree using the tree information stored in the compiled + * code. */ + code.append( IN_CONSTRUCT ); + code.appendHalf( replacement->patRepId ); + + /* Dup for the send. */ + code.append( IN_DUP_TOP ); + + /* Lookup the type of the replacement and store it in the replacement + * object so that replacement parsing has a target. */ + UniqueType *replUT = typeRef->uniqueType; + if ( replUT->typeId != TYPE_TREE ) + error(loc) << "don't know how to construct this type" << endp; + + if ( replUT->langEl->generic != 0 && replUT->langEl->generic->typeId == GEN_PARSER ) { + code.append( IN_CONSTRUCT_INPUT ); + code.append( IN_DUP_TOP_OFF ); + code.appendHalf( 1 ); + code.append( IN_SET_INPUT ); + } + + replacement->langEl = replUT->langEl; + assignFieldArgs( pd, code, replUT ); + + if ( varRef != 0 ) { + code.append( IN_DUP_TOP ); + + /* Get the type of the variable being assigned to. */ + VarRefLookup lookup = varRef->lookupField( pd ); + + varRef->loadObj( pd, code, lookup.lastPtrInQual, false ); + varRef->setField( pd, code, lookup.inObject, replUT, false ); + } + +/*****************************/ + + /* Assign bind ids to the variables in the replacement. */ + for ( ReplItemList::Iter item = *parserText->list; item.lte(); item++ ) { + switch ( item->type ) { + case ReplItem::FactorType: { + String result; + bool unusedCI; + prepareLitString( result, unusedCI, + item->factor->typeRef->pdaLiteral->token.data, + item->factor->typeRef->pdaLiteral->token.loc ); + + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( result, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.append( IN_LOAD_STR ); + code.appendWord( mapEl->value ); + break; + } + case ReplItem::InputText: { + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( item->data, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.append( IN_LOAD_STR ); + code.appendWord( mapEl->value ); + break; + } + case ReplItem::ExprType: + item->expr->evaluate( pd, code ); + break; + } + + code.append( IN_DUP_TOP_OFF ); + code.appendHalf( 1 ); + + /* Not a stream. Get the input first. */ + code.append( IN_GET_INPUT ); + if ( pd->revertOn ) + code.append( IN_INPUT_APPEND_WV ); + else + code.append( IN_INPUT_APPEND_WC ); + code.append( IN_POP ); + + code.append( IN_DUP_TOP ); + + /* Parse instruction, dependent on whether or not we are producing + * revert or commit code. */ + if ( pd->revertOn ) { + code.append( IN_PARSE_SAVE_STEPS ); + code.append( IN_PARSE_LOAD_START ); + code.append( IN_PARSE_FRAG_WV ); + code.appendHalf( 0 ); + code.append( IN_PCR_CALL ); + code.append( IN_PARSE_FRAG_WV3 ); + } + else { + code.append( IN_PARSE_SAVE_STEPS ); + code.append( IN_PARSE_LOAD_START ); + code.append( IN_PARSE_FRAG_WC ); + code.appendHalf( 0 ); + code.append( IN_PCR_CALL ); + code.append( IN_PARSE_FRAG_WC3 ); + } + } + code.append( IN_POP ); + + return replUT; +} + UniqueType *LangTerm::evaluateParse( Compiler *pd, CodeVect &code, bool stop ) const { UniqueType *ut = typeRef->uniqueType; @@ -1521,6 +1656,8 @@ UniqueType *LangTerm::evaluate( Compiler *pd, CodeVect &code ) const return evaluateParse( pd, code, true ); case ConstructType: return evaluateConstruct( pd, code ); + case Parser2Type: + return evaluateParse2( pd, code ); case NewType: return evaluateNew( pd, code ); case TypeIdType: { -- cgit v1.2.1