diff options
Diffstat (limited to 'colm/lmparse.kl')
-rw-r--r-- | colm/lmparse.kl | 2694 |
1 files changed, 2694 insertions, 0 deletions
diff --git a/colm/lmparse.kl b/colm/lmparse.kl new file mode 100644 index 00000000..e94a1ac8 --- /dev/null +++ b/colm/lmparse.kl @@ -0,0 +1,2694 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <iostream> +#include <errno.h> + +#include "config.h" +#include "lmparse.h" +#include "global.h" +#include "input.h" +#include "fsmrun.h" + +using std::cout; +using std::cerr; +using std::endl; + +ParserDict parserDict; + +%%{ + +parser ColmParser; + +include "lmparse.kh"; + +start: root_item_list + final { + if ( colm_log_compile ) { + cerr << "parsing complete" << endl; + } + + pd->rootCodeBlock = new CodeBlock( $1->stmtList ); + }; + +nonterm root_item_list uses lang_stmt_list; + +root_item_list: root_item_list root_item + final { + $$->stmtList = $1->stmtList; + + /* Maybe a statement. */ + if ( $2->stmt != 0 ) + $$->stmtList->append( $2->stmt ); + }; + +root_item_list: + final { + $$->stmtList = new StmtList; + }; + +nonterm root_item uses statement; + +root_item: literal_def commit final { $$->stmt = 0; }; +root_item: rl_def commit final { $$->stmt = 0; }; +root_item: token_def commit final { $$->stmt = 0; }; +root_item: cfl_def commit final { $$->stmt = 0; }; +root_item: region_def commit final { $$->stmt = 0; }; +root_item: context_def commit final { $$->stmt = 0; }; +root_item: namespace_def commit final { $$->stmt = 0; }; +root_item: function_def commit final { $$->stmt = 0; }; +root_item: iter_def commit final { $$->stmt = 0; }; +root_item: global_def commit final { $$->stmt = $1->stmt; }; +root_item: statement commit final { $$->stmt = $1->stmt; }; +root_item: pre_eof commit final { $$->stmt = 0; }; +root_item: precedence commit final { $$->stmt = 0; }; +root_item: typedef commit final { $$->stmt = 0; }; + +nonterm block_open +{ + ObjectDef *localFrame; +}; + +block_open: '{' + final { + /* Init the object representing the local frame. */ + $$->localFrame = new ObjectDef( ObjectDef::FrameType, + "local", pd->nextObjectId++ ); + + pd->curLocalFrame = $$->localFrame; + + /* Add captures to the local frame. We Depend on these becoming the + * first local variables so we can compute their location. */ + + /* Make local variables corresponding to the local capture vector. */ + for ( ReCaptureVect::Iter c = reCaptureVect; c.lte(); c++ ) + { + ObjField *objField = new ObjField( c->objField->loc, + c->objField->typeRef, c->objField->name ); + + /* Insert it into the field map. */ + pd->curLocalFrame->insertField( objField->name, objField ); + } + }; + +block_close: '}' + final { + /* Pop the cur local frame, back to the root. */ + pd->curLocalFrame = pd->rootLocalFrame; + }; + + +iter_def: + KW_Iter TK_Word '(' opt_param_list ')' block_open lang_stmt_list block_close + final { + CodeBlock *codeBlock = new CodeBlock( $7->stmtList ); + codeBlock->localFrame = $6->localFrame; + Function *newFunction = new Function( 0, $2->data, + $4->paramList, codeBlock, pd->nextFuncId++, true ); + pd->functionList.append( newFunction ); + }; + +function_def: + type_ref TK_Word '(' opt_param_list ')' block_open lang_stmt_list block_close + final { + CodeBlock *codeBlock = new CodeBlock( $7->stmtList ); + codeBlock->localFrame = $6->localFrame; + Function *newFunction = new Function( $1->typeRef, $2->data, + $4->paramList, codeBlock, pd->nextFuncId++, false ); + pd->functionList.append( newFunction ); + + if ( contextStack.length() > 0 ) + newFunction->inContext = contextStack.top(); + }; + +nonterm opt_param_list uses param_list; + +opt_param_list: param_list + final { + $$->paramList = $1->paramList; + }; + +opt_param_list: + final { + $$->paramList = new ParameterList; + }; + +nonterm param_list +{ + ParameterList *paramList; +}; + +param_list: param_list param_var_def + final { + $$->paramList = $1->paramList; + $$->paramList->append( $2->objField ); + }; + +param_list: param_var_def + final { + /* Create the map and insert the first item. */ + $$->paramList = new ParameterList; + $$->paramList->append( $1->objField ); + }; + +nonterm param_var_def uses var_def; + +param_var_def: TK_Word ':' type_ref + final { + $$->objField = new ObjField( $1->loc, $3->typeRef, $1->data ); + $$->objField->isParam = true; + }; +param_var_def: TK_Word ':' reference_type_ref + final { + $$->objField = new ObjField( $1->loc, $3->typeRef, $1->data ); + $$->objField->isParam = true; + }; + +nonterm reference_type_ref uses type_ref; + +reference_type_ref: KW_Ref type_ref + final { + $$->typeRef = TypeRef::cons( $1->loc, TypeRef::Ref, $2->typeRef ); + }; + +nonterm global_def uses statement; + +global_def: KW_Export var_def opt_def_init + final { + $$->stmt = 0; + + if ( contextStack.length() != 0 ) + error($2->objField->loc) << "cannot export parser context variables" << endp; + + ObjectDef *object = pd->globalObjectDef; + + if ( object->checkRedecl( $2->objField->name ) != 0 ) + error($2->objField->loc) << "object field renamed" << endp; + + object->insertField( $2->objField->name, $2->objField ); + $2->objField->isExport = true; + + if ( $3->expr != 0 ) { + LangVarRef *varRef = LangVarRef::cons( $2->objField->loc, + $2->objField->name ); + + $$->stmt = LangStmt::cons( $2->objField->loc, + $3->assignType, varRef, $3->expr ); + } + }; + +global_def: KW_Global var_def opt_def_init + final { + $$->stmt = 0; + + ObjectDef *object; + if ( contextStack.length() == 0 ) + object = pd->globalObjectDef; + else { + Context *context = contextStack.top(); + $2->objField->context = context; + object = context->contextObjDef; + } + + if ( object->checkRedecl( $2->objField->name ) != 0 ) + error($2->objField->loc) << "object field renamed" << endp; + + object->insertField( $2->objField->name, $2->objField ); + + if ( $3->expr != 0 ) { + LangVarRef *varRef = LangVarRef::cons( $2->objField->loc, + $2->objField->name ); + + $$->stmt = LangStmt::cons( $2->objField->loc, + $3->assignType, varRef, $3->expr ); + } + }; + +precedence: pred_type pred_token_list final { pd->predValue++; }; + +pred_type: KW_Left final { predType = PredLeft; }; +pred_type: KW_Right final { predType = PredRight; }; +pred_type: KW_Nonassoc final { predType = PredNonassoc; }; + +pred_token_list: pred_token_list ',' pred_token + final { + }; + +pred_token_list: pred_token; + +nonterm pred_token +{ + ProdEl *factor; + TypeRef *typeRef; +}; + +pred_token: + region_qual TK_Word + final { + TypeRef *typeRef = TypeRef::cons( $2->loc, $1->nspaceQual, $2->data ); + + PredDecl *predDecl = new PredDecl( typeRef, predType, pd->predValue ); + pd->predDeclList.append( predDecl ); + }; + +pred_token: + region_qual TK_Literal + final { + PdaLiteral *literal = new PdaLiteral( $2->loc, $2->data ); + TypeRef *typeRef = TypeRef::cons( $2->loc, $1->nspaceQual, literal ); + + PredDecl *predDecl = new PredDecl( typeRef, predType, pd->predValue ); + pd->predDeclList.append( predDecl ); + }; + +typedef: + KW_Alias TK_Word type_ref + final { + Namespace *nspace = namespaceStack.top(); + TypeAlias *typeAlias = new TypeAlias( + $1->loc, nspace, $2->data, $3->typeRef ); + nspace->typeAliasList.append( typeAlias ); + }; + +cfl_def: cfl_def_head obj_var_list properties_list cfl_prod_list + final { + Namespace *nspace = namespaceStack.top(); + NtDef *ntDef = new NtDef( + curDefineId, + nspace, + $4->defList, + pd->objectDef, + contextStack.length() > 0 ? contextStack.top() : 0, + $3->property & PROPERTY_REDUCE_FIRST ); + + nspace->ntDefList.append( ntDef ); + }; + +cfl_def_head: KW_Def TK_Word + final { + curDefineId = $2->data; + curDefList = new LelDefList; + }; + +nonterm cfl_prod_list +{ + LelDefList *defList; +}; + +cfl_prod_list: cfl_prod_list '|' define_prod + final { + $$->defList = $1->defList; + $3->definition->prodNum = $$->defList->length(); + $$->defList->append( $3->definition ); + }; +cfl_prod_list: define_prod + final { + $$->defList = curDefList; + $1->definition->prodNum = $$->defList->length(); + $$->defList->append( $1->definition ); + }; + +nonterm property +{ + long property; +}; + +nonterm properties_list uses property; + +properties_list: properties_list property + final { + $$->property = $1->property | $2->property; + }; +properties_list: + final { + $$->property = 0; + }; + +property: + KW_ReduceFirst + final { + $$->property = PROPERTY_REDUCE_FIRST; + }; + +nonterm opt_prec +{ + LangEl *predOf; +}; + +opt_prec: + final { + $$->predOf = 0; + }; + +opt_prec: + KW_Prec pred_token + final { + $$->predOf = $2->factor->langEl; + }; + +nonterm define_prod +{ + Production *definition; +}; + +define_prod: '[' prod_el_list ']' opt_commit opt_reduce_code opt_prec + final { + ProdElList *prodElList = curProdElList; + + Production *newDef = Production::cons( $1->loc, 0, + prodElList, $4->commit, $5->codeBlock, + pd->prodList.length(), 0 ); + newDef->predOf = $6->predOf; + + pd->prodList.append( newDef ); + + $$->definition = newDef; + }; + +obj_var_list: obj_var_list var_def + final { + if ( pd->objectDef->checkRedecl( $2->objField->name ) != 0 ) + error() << "object field renamed" << endp; + + pd->objectDef->insertField( $2->objField->name, $2->objField ); + }; + +obj_var_list: + final { + pd->objectDef = new ObjectDef( ObjectDef::UserType, + curDefineId, pd->nextObjectId++ ); + }; + + +nonterm type_ref +{ + TypeRef *typeRef; +}; + +type_ref: basic_type_ref + final { + $$->typeRef = $1->typeRef; + }; + +type_ref: KW_Map '<' type_ref type_ref '>' + final { + NamespaceQual *nspaceQual = new NamespaceQual( + namespaceStack.top(), regionStack.top() ); + $$->typeRef = TypeRef::cons( InputLoc(), TypeRef::Map, + nspaceQual, $3->typeRef, $4->typeRef ); + }; + +type_ref: KW_List '<' type_ref '>' + final { + NamespaceQual *nspaceQual = new NamespaceQual( + namespaceStack.top(), regionStack.top() ); + $$->typeRef = TypeRef::cons( InputLoc(), TypeRef::List, + nspaceQual, $3->typeRef, 0 ); + }; +type_ref: KW_Vector '<' type_ref '>' + final { + NamespaceQual *nspaceQual = new NamespaceQual( + namespaceStack.top(), regionStack.top() ); + $$->typeRef = TypeRef::cons( InputLoc(), TypeRef::Vector, + nspaceQual, $3->typeRef, 0 ); + }; +type_ref: KW_Parser '<' type_ref '>' + final { + NamespaceQual *nspaceQual = new NamespaceQual( + namespaceStack.top(), regionStack.top() ); + $$->typeRef = TypeRef::cons( InputLoc(), TypeRef::Parser, + nspaceQual, $3->typeRef, 0 ); + }; + +nonterm basic_type_ref uses type_ref; + +basic_type_ref: region_qual TK_Word opt_repeat + final { + $$->typeRef = TypeRef::cons( $2->loc, $1->nspaceQual, $2->data, $3->repeatType ); + }; + +basic_type_ref: KW_Ptr region_qual TK_Word opt_repeat + final { + $$->typeRef = TypeRef::cons( $1->loc, $2->nspaceQual, $3->data, $4->repeatType ); + $$->typeRef = TypeRef::cons( $1->loc, TypeRef::Ptr, $$->typeRef ); + }; + + +nonterm var_def +{ + InputLoc loc; + ObjField *objField; +}; + +var_def: TK_Word ':' type_ref + final { + /* Return an object field object. The user of this nonterminal must + * load it into the approrpriate map and do error checking. */ + $$->objField = new ObjField( $1->loc, $3->typeRef, $1->data ); + }; + +region_def: + region_head '{' root_item_list '}' + final { + /* Pop the top of the stack. */ + regionStack.pop(); + }; + +region_head: + KW_Lex TK_Word + final { + /* Just for ignores. */ + String scannerNameIgn( $2->data.length() + 2, "<%s>-ign", $2->data.data ); + TokenRegion *tokenRegionIgn = createRegion( scannerNameIgn ); + + /* Just for collect ignores. Will use the ignore-only start state. */ + String scannerNameCi( $2->data.length() + 2, "<%s>-ci", $2->data.data ); + TokenRegion *tokenRegionCi = createRegion( scannerNameCi ); + + /* Just for tokens. */ + String scannerNameTok( $2->data.length() + 2, "<%s>-tok", $2->data.data ); + TokenRegion *tokenRegionTok = createRegion( scannerNameTok ); + + /* Make the new token region. */ + String scannerName( $2->data.length() + 2, "<%s>", $2->data.data ); + TokenRegion *tokenRegion = createRegion( scannerName ); + + regionStack.push( tokenRegion ); + + tokenRegion->ignoreOnlyRegion = tokenRegionIgn; + tokenRegion->tokenOnlyRegion = tokenRegionTok; + tokenRegion->ciRegion = tokenRegionCi; + + tokenRegion->isFullRegion = true; + tokenRegionIgn->isIgnoreOnly = true; + tokenRegionCi->isCiOnly = true; + tokenRegionTok->isTokenOnly = true; + + tokenRegionIgn->derivedFrom = tokenRegion; + tokenRegionCi->derivedFrom = tokenRegion; + tokenRegionTok->derivedFrom = tokenRegion; + }; + +namespace_def: + namespace_head '{' root_item_list '}' + final { + namespaceStack.pop(); + }; + +namespace_head: + KW_Namespace TK_Word + final { + /* Make the new namespace. */ + Namespace *nspace = new Namespace( InputLoc(), $2->data, + pd->namespaceList.length(), namespaceStack.top() ); + namespaceStack.top()->childNamespaces.append( nspace ); + pd->namespaceList.append( nspace ); + namespaceStack.push( nspace ); + }; + +context_var_def: + var_def + final { + ObjectDef *object; + if ( contextStack.length() == 0 ) + error($1->loc) << "internal error: no context stack items found" << endp; + + Context *context = contextStack.top(); + $1->objField->context = context; + object = context->contextObjDef; + + if ( object->checkRedecl( $1->objField->name ) != 0 ) + error($1->objField->loc) << "object field renamed" << endp; + + object->insertField( $1->objField->name, $1->objField ); + }; + + +context_item: context_var_def commit; +context_item: literal_def commit; +context_item: rl_def commit; +context_item: token_def commit; +context_item: cfl_def commit; +context_item: region_def commit; +context_item: context_def commit; +context_item: function_def commit; +context_item: iter_def commit; +context_item: pre_eof commit; +context_item: precedence commit; + +context_item_list: + context_item_list context_item; +context_item_list: + ; + +context_def: + context_head '{' context_item_list '}' + final { + contextStack.pop(); + namespaceStack.pop(); + }; + +context_head: + KW_Context TK_Word + final { + /* Make the new namespace. */ + Namespace *nspace = new Namespace( InputLoc(), $2->data, + pd->namespaceList.length(), namespaceStack.top() ); + namespaceStack.top()->childNamespaces.append( nspace ); + pd->namespaceList.append( nspace ); + namespaceStack.push( nspace ); + + Context *context = new Context( $1->loc, 0 ); + contextStack.push( context ); + + ContextDef *contextDef = new ContextDef( $2->data, context, nspace ); + nspace->contextDefList.append( contextDef ); + + context->contextObjDef = new ObjectDef( ObjectDef::UserType, + $2->data, pd->nextObjectId++ ); + }; + +pattern_list: pattern_list pattern; +pattern_list: init_pattern_list pattern; + +init_pattern_list: + final { + patternItemList = new PatternItemList; + }; + +pattern: '"' litpat_el_list '"'; +pattern: '[' pattern_el_list ']'; + +litpat_el_list: litpat_el_list litpat_el; +litpat_el_list: ; + +litpat_el: TK_LitPat + final { + PatternItem *patternItem = PatternItem::cons( $1->loc, $1->data, + PatternItem::InputText ); + patternItemList->append( patternItem ); + }; + +litpat_el: '[' pattern_el_list ']'; + +pattern_el_list: pattern_el_list pattern_el; +pattern_el_list: ; + +pattern_el: opt_label pattern_el_type_or_lit + final { + /* Store the variable reference in the pattern itemm. */ + $2->patternItem->varRef = $1->varRef; + + if ( $1->varRef != 0 ) { + if ( pd->curLocalFrame->checkRedecl( $1->varRef->name ) != 0 ) { + error( $1->varRef->loc ) << "variable " << $1->varRef->name << + " redeclared" << endp; + } + + TypeRef *typeRef = $2->patternItem->factor->typeRef; + ObjField *objField = new ObjField( InputLoc(), typeRef, $1->varRef->name ); + + /* Insert it into the field map. */ + pd->curLocalFrame->insertField( $1->varRef->name, objField ); + } + }; + +pattern_el: '"' litpat_el_list '"'; +pattern_el: '?' TK_Word + final { + /* FIXME: Implement */ + assert(false); + }; + +nonterm pattern_el_type_or_lit +{ + PatternItem *patternItem; +}; + +pattern_el_type_or_lit: region_qual TK_Word opt_repeat + final { + TypeRef *typeRef = TypeRef::cons( $2->loc, $1->nspaceQual, $2->data, $3->repeatType ); + ProdEl *factor = new ProdEl( ProdEl::ReferenceType, $2->loc, 0, false, typeRef, 0 ); + $$->patternItem = PatternItem::cons( $2->loc, factor, PatternItem::FactorType ); + patternItemList->append( $$->patternItem ); + }; + +pattern_el_type_or_lit: region_qual TK_Literal opt_repeat + final { + PdaLiteral *literal = new PdaLiteral( $2->loc, $2->data ); + TypeRef *typeRef = TypeRef::cons( $2->loc, $1->nspaceQual, literal, $3->repeatType ); + + ProdEl *factor = new ProdEl( ProdEl::ReferenceType, $2->loc, 0, false, typeRef, 0 ); + $$->patternItem = PatternItem::cons( $2->loc, factor, PatternItem::FactorType ); + patternItemList->append( $$->patternItem ); + }; + +nonterm opt_label +{ + /* Variable reference. */ + LangVarRef *varRef; +}; + +opt_label: TK_Word ':' + final { + $$->varRef = LangVarRef::cons( $1->loc, $1->data ); + }; +opt_label: + final { + $$->varRef = 0; + }; + +# +# Replacement List (constructor) +# + +replacement: init_repl_list repl_list; + +repl_list: repl_top_el repl_list; +repl_list: repl_top_el; + +init_repl_list: + final { + replItemList = new ReplItemList; + }; + +repl_top_el: '"' lit_repl_el_list '"'; +repl_top_el: '[' repl_el_list ']'; + +lit_repl_el_list: lit_repl_el_list lit_repl_el; +lit_repl_el_list: ; + +lit_repl_el: TK_LitPat + final { + ReplItem *replItem = ReplItem::cons( $1->loc, ReplItem::InputText, $1->data ); + replItemList->append( replItem ); + }; + +lit_repl_el: '[' repl_el_list ']'; + +repl_el_list: repl_el_list repl_el; +repl_el_list: ; + +repl_el: region_qual TK_Literal + final { + PdaLiteral *literal = new PdaLiteral( $2->loc, $2->data ); + TypeRef *typeRef = TypeRef::cons( $2->loc, $1->nspaceQual, literal ); + ProdEl *factor = new ProdEl( ProdEl::LiteralType, $2->loc, 0, false, typeRef, 0 ); + ReplItem *replItem = ReplItem::cons( $2->loc, ReplItem::FactorType, factor ); + replItemList->append( replItem ); + }; + +repl_el: '"' lit_repl_el_list '"'; + +repl_el: code_expr + final { + ReplItem *replItem = ReplItem::cons( $1->expr->loc, ReplItem::ExprType, $1->expr ); + replItemList->append( replItem ); + }; + + +# +# Accumulate List +# +accumulate: + init_accum_list accum_list; + +init_accum_list: + final { + replItemList = new ReplItemList; + }; + +accum_list: accum_top_el accum_list; +accum_list: accum_top_el; + +accum_top_el: '"' lit_accum_el_list '"'; +accum_top_el: '[' accum_el_list ']'; + +lit_accum_el_list: lit_accum_el_list lit_accum_el; +lit_accum_el_list: ; + +lit_accum_el: TK_LitPat + final { + ReplItem *replItem = ReplItem::cons( $1->loc, ReplItem::InputText, $1->data ); + replItemList->append( replItem ); + }; + +lit_accum_el: '[' accum_el_list ']'; + +accum_el_list: accum_el_list accum_el; +accum_el_list: ; + +accum_el: code_expr + final { + ReplItem *replItem = ReplItem::cons( $1->expr->loc, ReplItem::ExprType, $1->expr ); + replItemList->append( replItem ); + }; + +accum_el: '"' lit_accum_el_list '"'; + + +# +# String List +# + +string: init_string_list string_list; + +init_string_list: + final { + replItemList = new ReplItemList; + }; + +string_list: string_top_el string_list; +string_list: string_top_el; + +string_top_el: '"' lit_string_el_list '"'; +string_top_el: '[' string_el_list ']'; + +lit_string_el_list: lit_string_el_list lit_string_el; +lit_string_el_list: ; + +lit_string_el: TK_LitPat + final { + ReplItem *replItem = ReplItem::cons( $1->loc, ReplItem::InputText, $1->data ); + replItemList->append( replItem ); + }; + +lit_string_el: '[' string_el_list ']'; + +string_el_list: string_el_list string_el; +string_el_list: ; + +string_el: code_expr + final { + ReplItem *replItem = ReplItem::cons( $1->expr->loc, ReplItem::ExprType, $1->expr ); + replItemList->append( replItem ); + }; + +string_el: '"' lit_string_el_list '"'; + +# +# Production Lists. +# + +prod_el_list: + prod_el_list prod_el + final { + curProdElList->append( $2->factor ); + }; + +prod_el_list: + final { curProdElList = new ProdElList; }; + +nonterm opt_no_ignore { bool value; }; + +opt_no_ignore: KW_Ni final { $$->value = true; }; +opt_no_ignore: final { $$->value = false; }; + +nonterm prod_el +{ + ProdEl *factor; +}; + +prod_el: + opt_capture opt_commit region_qual TK_Word opt_repeat + final { + TypeRef *typeRef = TypeRef::cons( $4->loc, $3->nspaceQual, $4->data, $5->repeatType ); + $$->factor = new ProdEl( ProdEl::ReferenceType, $4->loc, $1->objField, $2->commit, typeRef, 0 ); + + /* If there is a capture, create the field. */ + if ( $1->objField != 0 ) { + /* Might already exist. */ + ObjField *objField = pd->objectDef->checkRedecl( $1->objField->name ); + if ( objField == 0 ) { + objField = $1->objField; + objField->typeRef = typeRef; + pd->objectDef->insertField( objField->name, objField ); + } + else { + /* FIXME: check the types are the same. */ + //error() << "object field renamed" << endp; + } + + objField->isRhsGet = true; + RhsVal rhsVal( curDefList->length(), curProdElList->length() ); + objField->rhsVal.append( RhsVal( curDefList->length(), curProdElList->length() ) ); + } + }; + +prod_el: + opt_capture opt_commit region_qual TK_Literal opt_repeat + final { + /* Create a new factor node going to a concat literal. */ + PdaLiteral *literal = new PdaLiteral( $4->loc, $4->data ); + TypeRef *typeRef = TypeRef::cons( $4->loc, $3->nspaceQual, literal, $5->repeatType ); + $$->factor = new ProdEl( ProdEl::LiteralType, $4->loc, $1->objField, $2->commit, typeRef, 0 ); + + /* If there is a capture, create the field. */ + if ( $1->objField != 0 ) { + $1->objField->typeRef = typeRef; + if ( pd->objectDef->checkRedecl( $1->objField->name ) != 0 ) + error() << "object field renamed" << endp; + + pd->objectDef->insertField( $1->objField->name, $1->objField ); + } + }; + +nonterm opt_repeat +{ + bool opt; + bool repeat; + RepeatType repeatType; +}; + +opt_repeat: '*' final { $$->opt = false; $$->repeat = true; $$->repeatType = RepeatRepeat; }; +opt_repeat: '+' final { $$->opt = false; $$->repeat = false; $$->repeatType = RepeatList; }; +opt_repeat: '?' final { $$->opt = true; $$->repeat = false; $$->repeatType = RepeatOpt; }; +opt_repeat: final { $$->opt = false; $$->repeat = false; $$->repeatType = RepeatNone; }; + +nonterm region_qual +{ + NamespaceQual *nspaceQual; +}; + +region_qual: region_qual TK_Word TK_DoubleColon + final { + $$->nspaceQual = $1->nspaceQual; + $$->nspaceQual->qualNames.append( $2->data ); + }; + +region_qual: + final { + $$->nspaceQual = new NamespaceQual( namespaceStack.top(), regionStack.top() ); + }; + +literal_def: KW_Literal literal_list; + +literal_list: literal_list ',' literal_item; +literal_list: literal_item; + +literal_item: opt_no_ignore TK_Literal opt_no_ignore + final { + /* Create a name for the literal. */ + String name( 32, "_literal_%.4x", pd->nextTokenId ); + + bool insideRegion = regionStack.top() != pd->rootRegion; + if ( !insideRegion ) { + /* Just for ignores. */ + String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data ); + TokenRegion *tokenRegionIgn = createRegion( scannerNameIgn ); + + /* Just for collect ignores. Will use the ignore-only start state. */ + String scannerNameCi( name.length() + 2, "<%s>-ci", name.data ); + TokenRegion *tokenRegionCi = createRegion( scannerNameCi ); + + /* Just for tokens. */ + String scannerNameTok( name.length() + 2, "<%s>-tok", name.data ); + TokenRegion *tokenRegionTok = createRegion( scannerNameTok ); + + /* Make a new token region just for the token. */ + String scannerName( name.length() + 2, "<%s>", name.data ); + TokenRegion *tokenRegion = createRegion( scannerName ); + + regionStack.push( tokenRegion ); + + tokenRegion->ignoreOnlyRegion = tokenRegionIgn; + tokenRegion->tokenOnlyRegion = tokenRegionTok; + tokenRegion->ciRegion = tokenRegionCi; + + tokenRegion->isFullRegion = true; + tokenRegionIgn->isIgnoreOnly = true; + tokenRegionCi->isCiOnly = true; + tokenRegionTok->isTokenOnly = true; + + tokenRegionIgn->derivedFrom = tokenRegion; + tokenRegionCi->derivedFrom = tokenRegion; + tokenRegionTok->derivedFrom = tokenRegion; + } + + bool unusedCI; + String interp; + prepareLitString( interp, unusedCI, $2->data, $2->loc ); + + /* Look for the production's associated region. */ + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + + LiteralDictEl *ldel = nspace->literalDict.find( interp ); + if ( ldel != 0 ) + error( $2->loc ) << "literal already defined in this namespace" << endp; + else { + Join *join = new Join( Expression::cons( Term::cons( FactorWithAug::cons( + FactorWithRep::cons( $2->loc, FactorWithNeg::cons( $2->loc, Factor::cons( + Literal::cons( $2->loc, $2->data, + Literal::LitString ) ) ) ) ) ) ) ); + + if ( strcmp( interp.data, "" ) == 0 ) { + TokenDef *tokenDef = new TokenDef( name, $2->data, true, false, join, + 0, $2->loc, pd->nextTokenId++, nspace, region, 0, 0, 0 ); + + //region->tokenDefList.append( tokenDef ); + + ldel = nspace->literalDict.insert( interp, tokenDef ); + nspace->tokenDefList.append( tokenDef ); + + tokenDef->isZero = true; + } + else { + TokenDef *tokenDef = new TokenDef( name, $2->data, true, false, join, + 0, $2->loc, pd->nextTokenId++, nspace, region, 0, 0, 0 ); + region->tokenDefList.append( tokenDef ); + ldel = nspace->literalDict.insert( interp, tokenDef ); + nspace->tokenDefList.append( tokenDef ); + + if ( $1->value ) + tokenDef->noPreIgnore = true; + if ( $3->value ) + tokenDef->noPostIgnore = true; + + TokenDef *tokenDefTok = new TokenDef( name + "_tok", $2->data, true, false, join, + 0, $2->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion, 0, 0, 0 ); + tokenDefTok->dupOf = tokenDef; + region->tokenOnlyRegion->tokenDefList.append( tokenDefTok ); + ldel = nspace->literalDict.insert( "|" + interp + "_tok", tokenDefTok ); + nspace->tokenDefList.append( tokenDefTok ); + } + } + + if ( !insideRegion ) { + /* Leave the region just for this token. */ + regionStack.pop(); + } + }; + + +# These two productions are responsible for setting and unsetting the Regular +# language scanning context. +enter_rl: + try { + enterRl = true; + } + undo { + enterRl = false; + }; +leave_rl: + try { + enterRl = false; + } + undo { + enterRl = true; + }; + +token_def: + token_or_ignore token_def_name obj_var_list + enter_rl opt_no_ignore '/' opt_rl_join leave_rl '/' opt_no_ignore + opt_translate + final { + bool ignore = $1->ignore; + String name = $2->name; + Join *join = $7->join; + CodeBlock *transBlock = $11->transBlock; + + /* Check the region if this is for an ignore. */ + if ( ignore && !pd->insideRegion ) + error($1->loc) << "ignore tokens can only appear inside scanners" << endp; + + /* Check the name if this is a token. */ + if ( !ignore && name == 0 ) + error($1->loc) << "tokens must have a name" << endp; + + /* Give a default name to ignores. */ + if ( name == 0 ) + name.setAs( 32, "_ignore_%.4x", pd->nextTokenId ); + + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + + TokenDef *tokenDef = new TokenDef( name, String(), false, ignore, join, + transBlock, $1->loc, pd->nextTokenId++, nspace, region, + &reCaptureVect, pd->objectDef, + contextStack.length() > 0 ? contextStack.top() : 0 ); + + region->tokenDefList.append( tokenDef ); + nspace->tokenDefList.append( tokenDef ); + + if ( $5->value ) + tokenDef->noPreIgnore = true; + if ( $10->value ) + tokenDef->noPostIgnore = true; + + /* All again for the ignore. */ + if ( ignore ) { + TokenDef *tokenDefIgn = new TokenDef( name + "_ign", String(), false, ignore, join, + 0, $1->loc, pd->nextTokenId++, nspace, region->ignoreOnlyRegion, + &reCaptureVect, pd->objectDef, + contextStack.length() > 0 ? contextStack.top() : 0 ); + + tokenDefIgn->dupOf = tokenDef; + + region->ignoreOnlyRegion->tokenDefList.append( tokenDefIgn ); + nspace->tokenDefList.append( tokenDefIgn ); + } + else { + TokenDef *tokenDefTok = new TokenDef( name + "_tok", String(), false, ignore, join, + 0, $1->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion, + &reCaptureVect, pd->objectDef, + contextStack.length() > 0 ? contextStack.top() : 0 ); + + tokenDefTok->dupOf = tokenDef; + + region->tokenOnlyRegion->tokenDefList.append( tokenDefTok ); + nspace->tokenDefList.append( tokenDefTok ); + } + + /* This is created and pushed in the name. */ + if ( !pd->insideRegion ) { + /* Leave the region that we made just for this token. */ + regionStack.pop(); + } + + if ( join != 0 ) { + /* Create a regular language definition so the token can be used to + * make other tokens */ + addRegularDef( $1->loc, namespaceStack.top(), name, join ); + } + + + reCaptureVect.empty(); + }; + +nonterm token_or_ignore +{ + InputLoc loc; + bool ignore; +}; + +token_or_ignore: KW_Token + final { $$->loc = $1->loc; $$->ignore = false; }; + +token_or_ignore: KW_Ignore + final { $$->loc = $1->loc; $$->ignore = true; }; + +nonterm class token_def_name +{ + String name; +}; + +token_def_name: + opt_name + final { + String name = $1->name; + + $$->name = name; + pd->insideRegion = regionStack.top() != pd->rootRegion; + curDefineId = name; + + if ( !pd->insideRegion ) { + /* For just ignores. */ + String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data ); + TokenRegion *tokenRegionIgn = createRegion( scannerNameIgn ); + + /* Just for explicitly collecting ignores. */ + String scannerNameCi( name.length() + 2, "<%s>-ci", name.data ); + TokenRegion *tokenRegionCi = createRegion( scannerNameCi ); + + /* Just for tokens. */ + String scannerNameTok( name.length() + 2, "<%s>-tok", name.data ); + TokenRegion *tokenRegionTok = createRegion( scannerNameTok ); + + /* If not inside a region, make one for the token. */ + String scannerName( name.length() + 2, "<%s>", name.data ); + TokenRegion *tokenRegion = createRegion( scannerName ); + + regionStack.push( tokenRegion ); + + tokenRegion->ignoreOnlyRegion = tokenRegionIgn; + tokenRegion->tokenOnlyRegion = tokenRegionTok; + tokenRegion->ciRegion = tokenRegionCi; + + tokenRegion->isFullRegion = true; + tokenRegionIgn->isIgnoreOnly = true; + tokenRegionCi->isCiOnly = true; + tokenRegionTok->isTokenOnly = true; + + tokenRegionIgn->derivedFrom = tokenRegion; + tokenRegionCi->derivedFrom = tokenRegion; + tokenRegionTok->derivedFrom = tokenRegion; + } + + /* Reset the lable id counter. */ + pd->nextLabelId = 0; + }; + +nonterm class opt_name +{ + String name; +}; + +opt_name: TK_Word final { $$->name = $1->data; }; +opt_name: ; + +nonterm opt_translate +{ + CodeBlock *transBlock; +}; + +opt_translate: + block_open lang_stmt_list block_close + final { + $$->transBlock = new CodeBlock( $2->stmtList ); + $$->transBlock->localFrame = $1->localFrame; + $$->transBlock->context = contextStack.length() == 0 ? 0 : contextStack.top(); + }; + +opt_translate: + final { + $$->transBlock = 0; + }; + +pre_eof: + KW_Preeof block_open lang_stmt_list block_close + final { + bool insideRegion = regionStack.top() != pd->rootRegion; + if ( !insideRegion ) + error($1->loc) << "preeof must be used inside an existing region" << endl; + + CodeBlock *codeBlock = new CodeBlock( $3->stmtList ); + codeBlock->localFrame = $2->localFrame; + codeBlock->context = contextStack.length() == 0 ? 0 : contextStack.top(); + + TokenRegion *region = regionStack.top(); + region->preEofBlock = codeBlock; + }; + +rl_def: + KW_Rl machine_name enter_rl '/' rl_join leave_rl '/' + final { + /* Generic creation of machine for instantiation and assignment. */ + addRegularDef( $2->loc, namespaceStack.top(), $2->data, $5->join ); + + if ( reCaptureVect.length() > 0 ) + error($1->loc) << "rl definitions cannot capture vars" << endl; + }; + +type class token_data +{ + InputLoc loc; + String data; +}; + +nonterm machine_name uses token_data; + +machine_name: + TK_Word + final { + /* Make/get the priority key. The name may have already been referenced + * and therefore exist. */ + PriorDictEl *priorDictEl; + if ( pd->priorDict.insert( $1->data, pd->nextPriorKey, &priorDictEl ) ) + pd->nextPriorKey += 1; + pd->curDefPriorKey = priorDictEl->value; + + /* Make/get the local error key. */ + LocalErrDictEl *localErrDictEl; + if ( pd->localErrDict.insert( $1->data, pd->nextLocalErrKey, &localErrDictEl ) ) + pd->nextLocalErrKey += 1; + pd->curDefLocalErrKey = localErrDictEl->value; + + $$->loc = $1->loc; + $$->data = $1->data; + }; + +# +# Reduce statements +# + +nonterm opt_reduce_code +{ + CodeBlock *codeBlock; +}; + +opt_reduce_code: + final { $$->codeBlock = 0; }; + +opt_reduce_code: + start_reduce lang_stmt_list block_close + final { + $$->codeBlock = new CodeBlock( $2->stmtList ); + $$->codeBlock->localFrame = $1->localFrame; + $$->codeBlock->context = contextStack.length() == 0 ? 0 : contextStack.top(); + }; + +nonterm start_reduce uses block_open; + +start_reduce: + block_open + final { + $$->localFrame = $1->localFrame; + }; + +nonterm lang_stmt_list +{ + StmtList *stmtList; +}; + +lang_stmt_list: rec_stmt_list opt_require_stmt + final { + $$->stmtList = $1->stmtList; + if ( $2->stmt != 0 ) + $$->stmtList->append( $2->stmt ); + }; + +nonterm rec_stmt_list uses lang_stmt_list; + +rec_stmt_list: rec_stmt_list statement + final { + $$->stmtList = $1->stmtList; + + /* Maybe a statement was generated. */ + if ( $2->stmt != 0 ) + $$->stmtList->append( $2->stmt ); + }; + +rec_stmt_list: + final { + $$->stmtList = new StmtList; + }; + +nonterm opt_def_init +{ + LangExpr *expr; + LangStmt::Type assignType; +}; + +opt_def_init: '=' code_expr + final { + $$->expr = $2->expr; + $$->assignType = LangStmt::AssignType; + }; +opt_def_init: + final { + $$->expr = 0; + }; + +scope_push: + final { + pd->curLocalFrame->pushScope(); + //cout << "push scope" << endl; + }; + +scope_pop: + final { + pd->curLocalFrame->popScope(); + //cout << "pop scope" << endl; + }; + +nonterm statement +{ + LangStmt *stmt; +}; +nonterm for_scope uses statement; + +statement: var_def opt_def_init + final { + /* By default no statement here. Maybe will add an initialization. */ + $$->stmt = 0; + + /* Check for redeclaration. */ + if ( pd->curLocalFrame->checkRedecl( $1->objField->name ) != 0 ) { + error( $1->objField->loc ) << "variable " << $1->objField->name << + " redeclared" << endp; + } + + /* Insert it into the field map. */ + pd->curLocalFrame->insertField( $1->objField->name, $1->objField ); + + //cout << "var def " << $1->objField->name << endl; + + if ( $2->expr != 0 ) { + LangVarRef *varRef = LangVarRef::cons( $1->objField->loc, + $1->objField->name ); + + $$->stmt = LangStmt::cons( $1->objField->loc, + $2->assignType, varRef, $2->expr ); + } + }; +statement: var_ref '=' code_expr + final { + $$->stmt = LangStmt::cons( $2->loc, LangStmt::AssignType, $1->varRef, $3->expr ); + }; +statement: KW_Print '(' code_expr_list ')' + final { + $$->stmt = LangStmt::cons( $1->loc, LangStmt::PrintType, $3->exprVect ); + }; +statement: KW_PrintXMLAC '(' code_expr_list ')' + final { + $$->stmt = LangStmt::cons( $1->loc, LangStmt::PrintXMLACType, $3->exprVect ); + }; +statement: KW_PrintXML '(' code_expr_list ')' + final { + $$->stmt = LangStmt::cons( $1->loc, LangStmt::PrintXMLType, $3->exprVect ); + }; +statement: KW_PrintStream '(' code_expr_list ')' + final { + $$->stmt = LangStmt::cons( $1->loc, LangStmt::PrintStreamType, $3->exprVect ); + }; +statement: code_expr + final { + $$->stmt = LangStmt::cons( InputLoc(), LangStmt::ExprType, $1->expr ); + }; +statement: if_stmt + final { + $$->stmt = $1->stmt; + }; +statement: KW_Reject + final { + $$->stmt = LangStmt::cons( $1->loc, LangStmt::RejectType ); + }; +statement: KW_While scope_push code_expr block_or_single scope_pop + final { + $$->stmt = LangStmt::cons( LangStmt::WhileType, $3->expr, $4->stmtList ); + }; + +for_scope: TK_Word ':' type_ref KW_In iter_call block_or_single + final { + /* Check for redeclaration. */ + if ( pd->curLocalFrame->checkRedecl( $1->data ) != 0 ) + error( $1->loc ) << "variable " << $1->data << " redeclared" << endp; + + /* Note that we pass in a null type reference. This type is dependent + * on the result of the iter_call lookup since it must contain a reference + * to the iterator that is called. This lookup is done at compile time. */ + ObjField *iterField = new ObjField( $1->loc, (TypeRef*)0, $1->data ); + pd->curLocalFrame->insertField( $1->data, iterField ); + + $$->stmt = LangStmt::cons( $1->loc, LangStmt::ForIterType, + iterField, $3->typeRef, $5->langTerm, $6->stmtList ); + }; + +statement: KW_For scope_push for_scope scope_pop + final { + $$->stmt = $3->stmt; + }; + +statement: KW_Return code_expr + final { + $$->stmt = LangStmt::cons( $1->loc, LangStmt::ReturnType, $2->expr ); + }; +statement: KW_Break + final { + $$->stmt = LangStmt::cons( LangStmt::BreakType ); + }; +statement: KW_Yield var_ref + final { + $$->stmt = LangStmt::cons( LangStmt::YieldType, $2->varRef ); + }; + +nonterm opt_require_stmt uses statement; + +opt_require_stmt: + scope_push require_pattern lang_stmt_list scope_pop + final { + $$->stmt = LangStmt::cons( LangStmt::IfType, $2->expr, $3->stmtList, 0 ); + }; +opt_require_stmt: + final { + $$->stmt = 0; + }; + +nonterm require_pattern uses code_expr; + +require_pattern: + KW_Require var_ref pattern_list + final { + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + Pattern *pattern = Pattern::cons( $1->loc, nspace, region, + patternItemList, pd->nextPatReplId++ ); + pd->patternList.append( pattern ); + + $$->expr = LangExpr::cons( LangTerm::cons( + InputLoc(), LangTerm::MatchType, $2->varRef, pattern ) ); + }; + +nonterm block_or_single uses lang_stmt_list; + +block_or_single: '{' lang_stmt_list '}' + final { + $$->stmtList = $2->stmtList; + }; +block_or_single: statement + final { + $$->stmtList = new StmtList; + $$->stmtList->append( $1->stmt ); + }; + +nonterm iter_call +{ + LangTerm *langTerm; +}; + +iter_call: var_ref '(' opt_code_expr_list ')' + final { + $$->langTerm = LangTerm::cons( InputLoc(), $1->varRef, $3->exprVect ); + }; +iter_call: TK_Word + final { + $$->langTerm = LangTerm::cons( InputLoc(), LangTerm::VarRefType, + LangVarRef::cons( $1->loc, $1->data ) ); + }; + +# +# If Statements +# + +nonterm if_stmt uses statement; + +if_stmt: KW_If scope_push code_expr block_or_single scope_pop elsif_list + final { + $$->stmt = LangStmt::cons( LangStmt::IfType, $3->expr, $4->stmtList, $6->stmt ); + }; + +nonterm elsif_list +{ + LangStmt *stmt; +}; + +elsif_list: + elsif_clause elsif_list + final { + /* Put any of the followng elseif part, an else, or null into the elsePart. */ + $$->stmt = $1->stmt; + $$->stmt->elsePart = $2->stmt; + }; +elsif_list: + optional_else + final { + $$->stmt = $1->stmt; + }; + +nonterm elsif_clause +{ + LangStmt *stmt; +}; + +elsif_clause: + KW_Elsif scope_push code_expr block_or_single scope_pop + final { + $$->stmt = LangStmt::cons( LangStmt::IfType, $3->expr, $4->stmtList, 0 ); + }; + +nonterm optional_else +{ + LangStmt *stmt; +}; + +optional_else: + KW_Else scope_push block_or_single scope_pop + final { + $$->stmt = LangStmt::cons( LangStmt::ElseType, $3->stmtList ); + }; + +optional_else: + final { + $$->stmt = 0; + }; + +# +# Code Expression Lists. +# +nonterm code_expr_list +{ + ExprVect *exprVect; +}; + +code_expr_list: code_expr_list code_expr + final { + $$->exprVect = $1->exprVect; + $$->exprVect->append( $2->expr ); + }; +code_expr_list: code_expr + final { + $$->exprVect = new ExprVect; + $$->exprVect->append( $1->expr ); + }; + +nonterm opt_code_expr_list uses code_expr_list; + +opt_code_expr_list: code_expr_list + final { + $$->exprVect = $1->exprVect; + }; + +opt_code_expr_list: + final { + $$->exprVect = 0; + }; + +# +# Type list +# + +nonterm type_list +{ + TypeRefVect *typeRefVect; +}; + +type_list: type_list ',' type_ref + final { + $$->typeRefVect = $1->typeRefVect; + $$->typeRefVect->append( $3->typeRef ); + }; +type_list: type_ref + final { + $$->typeRefVect = new TypeRefVect; + $$->typeRefVect->append( $1->typeRef ); + }; + +nonterm opt_type_list uses type_list; + +opt_type_list: type_list + final { + $$->typeRefVect = $1->typeRefVect; + }; + +opt_type_list: + final { + $$->typeRefVect = 0; + }; + + +# +# Variable reference +# + +nonterm var_ref +{ + LangVarRef *varRef; +}; + +var_ref: qual TK_Word + final { + $$->varRef = LangVarRef::cons( $2->loc, $1->qual, $2->data ); + }; + +nonterm qual +{ + QualItemVect *qual; +}; + +qual: qual TK_Word '.' + final { + $$->qual = $1->qual; + $$->qual->append( QualItem( $2->loc, $2->data, QualItem::Dot ) ); + }; +qual: qual TK_Word TK_RightArrow + final { + $$->qual = $1->qual; + $$->qual->append( QualItem( $2->loc, $2->data, QualItem::Arrow ) ); + }; +qual: + final { + $$->qual = new QualItemVect; + }; + +# +# Code expression +# + +nonterm code_expr +{ + LangExpr *expr; +}; + +code_expr: code_expr TK_AmpAmp code_relational + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_LogicalAnd, $3->expr ); + }; + +code_expr: code_expr TK_BarBar code_relational + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_LogicalOr, $3->expr ); + }; + +code_expr: code_relational + final { + $$->expr = $1->expr; + }; + +nonterm code_relational uses code_expr; + +code_relational: code_relational TK_DoubleEql code_additive + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_DoubleEql, $3->expr ); + }; + +code_relational: code_relational TK_NotEql code_additive + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_NotEql, $3->expr ); + }; + +code_relational: code_relational '<' code_additive + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, '<', $3->expr ); + }; + +code_relational: code_relational '>' code_additive + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, '>', $3->expr ); + }; + +code_relational: code_relational TK_LessEql code_additive + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_LessEql, $3->expr ); + }; + +code_relational: code_relational TK_GrtrEql code_additive + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, OP_GrtrEql, $3->expr ); + }; + + +code_relational: code_additive + final { + $$->expr = $1->expr; + }; + +nonterm code_additive uses code_expr; + +code_additive: code_additive '+' code_multiplicitive + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, '+', $3->expr ); + }; + +code_additive: code_additive '-' code_multiplicitive + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, '-', $3->expr ); + }; + +code_additive: code_multiplicitive + final { + $$->expr = $1->expr; + }; + +nonterm code_multiplicitive uses code_expr; + +code_multiplicitive: code_multiplicitive '*' code_unary + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, '*', $3->expr ); + }; + +code_multiplicitive: code_multiplicitive '/' code_unary + final { + $$->expr = LangExpr::cons( $2->loc, $1->expr, '/', $3->expr ); + }; + +code_multiplicitive: code_unary + final { + $$->expr = $1->expr; + }; + +nonterm code_unary uses code_expr; +code_unary: '!' code_factor + final { + $$->expr = LangExpr::cons( $1->loc, '!', $2->expr ); + }; +code_unary: '$' code_factor + final { + $$->expr = LangExpr::cons( $1->loc, '$', $2->expr ); + }; +code_unary: '^' code_factor + final { + $$->expr = LangExpr::cons( $1->loc, '^', $2->expr ); + }; +code_unary: '%' code_factor + final { + $$->expr = LangExpr::cons( $1->loc, '%', $2->expr ); + }; +code_unary: code_factor + final { + $$->expr = $1->expr; + }; + +nonterm opt_capture uses var_def; + +opt_capture: TK_Word ':' + final { + $$->objField = new ObjField( $1->loc, 0, $1->data ); + }; +opt_capture: + final { + $$->objField = 0; + }; + +nonterm code_factor uses code_expr; + +code_factor: TK_Number + final { + $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::NumberType, $1->data ) ); + }; +code_factor: TK_Literal + final { + $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::StringType, $1->data ) ); + }; +code_factor: var_ref '(' opt_code_expr_list ')' + final { + $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), $1->varRef, $3->exprVect ) ); + }; +code_factor: var_ref + final { + $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::VarRefType, $1->varRef ) ); + }; +code_factor: KW_Match var_ref pattern_list + final { + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + Pattern *pattern = Pattern::cons( $1->loc, nspace, region, + patternItemList, pd->nextPatReplId++ ); + pd->patternList.append( pattern ); + + $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::MatchType, $2->varRef, pattern ) ); + }; +code_factor: KW_New code_factor + final { + $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::NewType, $2->expr ) ); + }; +code_factor: + KW_Construct opt_capture type_ref opt_field_init replacement + final { + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + Replacement *replacement = Replacement::cons( $1->loc, nspace, region, + replItemList, pd->nextPatReplId++ ); + pd->replList.append( replacement ); + + LangVarRef *varRef = 0; + if ( $2->objField != 0 ) + varRef = LangVarRef::cons( $2->objField->loc, $2->objField->name ); + + $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, LangTerm::ConstructType, + varRef, $2->objField, $3->typeRef, $4->fieldInitVect, replacement ) ); + + /* Check for redeclaration. */ + if ( $2->objField != 0 ) { + if ( pd->curLocalFrame->checkRedecl( $2->objField->name ) != 0 ) { + error( $2->objField->loc ) << "variable " << $2->objField->name << + " redeclared" << endp; + } + + /* Insert it into the field map. */ + $2->objField->typeRef = $3->typeRef; + pd->curLocalFrame->insertField( $2->objField->name, $2->objField ); + } + }; +code_factor: + KW_Parse opt_capture type_ref accumulate + final { + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + + ReplItemList *emptyReplItemList = new ReplItemList; + + Replacement *replacement = Replacement::cons( $1->loc, nspace, region, + emptyReplItemList, pd->nextPatReplId++ ); + pd->replList.append( replacement ); + + LangVarRef *varRef = 0; + if ( $2->objField != 0 ) + varRef = LangVarRef::cons( $2->objField->loc, $2->objField->name ); + + NamespaceQual *nspaceQual = new NamespaceQual( + namespaceStack.top(), regionStack.top() ); + TypeRef *parserTypeRef = TypeRef::cons( InputLoc(), + TypeRef::Parser, nspaceQual, $3->typeRef, 0 ); + + ParserText *parserText = ParserText::cons( $2->loc, nspace, region, replItemList ); + pd->parserTextList.append( parserText ); + + $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, LangTerm::ParseType, + varRef, $2->objField, parserTypeRef, 0, replacement, parserText ) ); + + /* Check for redeclaration. */ + if ( $2->objField != 0 ) { + if ( pd->curLocalFrame->checkRedecl( $2->objField->name ) != 0 ) { + error( $2->objField->loc ) << "variable " << $2->objField->name << + " redeclared" << endp; + } + + /* Insert it into the field map. */ + $2->objField->typeRef = parserTypeRef; + pd->curLocalFrame->insertField( $2->objField->name, $2->objField ); + } + }; +code_factor: + KW_Parse opt_capture type_ref '(' opt_code_expr_list ')' + final { + String parserName = $3->typeRef->typeName + "_parser"; + + /* Get the language element. */ + Namespace *nspace = namespaceStack.top(); + + NamespaceQual *nspaceQual = new NamespaceQual( + namespaceStack.top(), regionStack.top() ); + TypeRef *parserTypeRef = TypeRef::cons( InputLoc(), + TypeRef::Parser, nspaceQual, $3->typeRef, 0 ); + + Replacement *replacement = Replacement::cons( $1->loc, nspace, pd->rootRegion, + new ReplItemList, pd->nextPatReplId++ ); + pd->replList.append( replacement ); + + LangVarRef *varRef = 0; + if ( $2->objField != 0 ) + varRef = LangVarRef::cons( $2->objField->loc, $2->objField->name ); + + $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, LangTerm::OrigParseType, + varRef, $2->objField, $3->typeRef, parserTypeRef, replacement ) ); + $$->expr->term->args = $5->exprVect; + + /* Check for redeclaration. */ + if ( $2->objField != 0 ) { + if ( pd->curLocalFrame->checkRedecl( $2->objField->name ) != 0 ) { + error( $2->objField->loc ) << "variable " << $2->objField->name << + " redeclared" << endp; + } + + /* Insert it into the field map. */ + $2->objField->typeRef = $3->typeRef; + pd->curLocalFrame->insertField( $2->objField->name, $2->objField ); + } + }; +code_factor: + KW_ParseStop opt_capture type_ref '(' opt_code_expr_list ')' + final { + /* This is a silly clone. To be fixed later. */ + String parserName = $3->typeRef->typeName + "_parser"; + + /* Get the language element. */ + Namespace *nspace = namespaceStack.top(); + + NamespaceQual *nspaceQual = new NamespaceQual( + namespaceStack.top(), regionStack.top() ); + TypeRef *parserTypeRef = TypeRef::cons( InputLoc(), + TypeRef::Parser, nspaceQual, $3->typeRef, 0 ); + + Replacement *replacement = Replacement::cons( $1->loc, nspace, pd->rootRegion, + new ReplItemList, pd->nextPatReplId++ ); + pd->replList.append( replacement ); + + LangVarRef *varRef = 0; + if ( $2->objField != 0 ) + varRef = LangVarRef::cons( $2->objField->loc, $2->objField->name ); + + $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, LangTerm::OrigParseStopType, + varRef, $2->objField, $3->typeRef, parserTypeRef, replacement ) ); + $$->expr->term->args = $5->exprVect; + + /* Check for redeclaration. */ + if ( $2->objField != 0 ) { + if ( pd->curLocalFrame->checkRedecl( $2->objField->name ) != 0 ) { + error( $2->objField->loc ) << "variable " << $2->objField->name << + " redeclared" << endp; + } + + /* Insert it into the field map. */ + $2->objField->typeRef = $3->typeRef; + pd->curLocalFrame->insertField( $2->objField->name, $2->objField ); + } + + }; +code_factor: + var_ref TK_LtLt accumulate + final { + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + ParserText *parserText = ParserText::cons( $2->loc, nspace, region, replItemList ); + pd->parserTextList.append( parserText ); + + $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::SendType, $1->varRef, parserText ) ); + }; +code_factor: + KW_Send var_ref accumulate + final { + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + ParserText *parserText = ParserText::cons( $1->loc, nspace, region, replItemList ); + pd->parserTextList.append( parserText ); + + $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), LangTerm::SendType, $2->varRef, parserText ) ); + }; +code_factor: KW_TypeId '<' type_ref '>' + final { + $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, + LangTerm::TypeIdType, $3->typeRef ) ); + }; +code_factor: type_ref KW_In var_ref + final { + $$->expr = LangExpr::cons( LangTerm::cons( $2->loc, + LangTerm::SearchType, $1->typeRef, $3->varRef ) ); + }; +code_factor: KW_Nil + final { + $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, + LangTerm::NilType ) ); + }; +code_factor: KW_True + final { + $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, + LangTerm::TrueType ) ); + }; +code_factor: KW_False + final { + $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, + LangTerm::FalseType ) ); + }; +code_factor: '(' code_expr ')' + final { + $$->expr = $2->expr; + }; +code_factor: KW_MakeTree '(' opt_code_expr_list ')' + final { + $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, + LangTerm::MakeTreeType, $3->exprVect ) ); + }; +code_factor: KW_MakeToken '(' opt_code_expr_list ')' + final { + $$->expr = LangExpr::cons( LangTerm::cons( $1->loc, + LangTerm::MakeTokenType, $3->exprVect ) ); + }; +code_factor: KW_Deref code_expr + final { + $$->expr = LangExpr::cons( $1->loc, OP_Deref, $2->expr ); + }; +code_factor: string + final { + $$->expr = LangExpr::cons( LangTerm::cons( InputLoc(), replItemList ) ); + }; + +nonterm opt_field_init uses field_init_list; + +opt_field_init: '(' opt_field_init_list ')' + final { + $$->fieldInitVect = $2->fieldInitVect; + }; +opt_field_init: + final { + $$->fieldInitVect = 0; + }; + +nonterm opt_field_init_list uses field_init_list; + +opt_field_init_list: field_init_list + final { + $$->fieldInitVect = $1->fieldInitVect; + }; +opt_field_init_list: + final { + $$->fieldInitVect = 0; + }; + +nonterm field_init_list +{ + FieldInitVect *fieldInitVect; +}; + +field_init_list: field_init_list field_init + final { + $$->fieldInitVect = $1->fieldInitVect; + $$->fieldInitVect->append( $2->fieldInit ); + }; +field_init_list: field_init + final { + $$->fieldInitVect = new FieldInitVect; + $$->fieldInitVect->append( $1->fieldInit ); + }; + +nonterm field_init +{ + FieldInit *fieldInit; +}; + +field_init: code_expr + final { + $$->fieldInit = new FieldInit( InputLoc(), "_name", $1->expr ); + }; + +# +# Regular Expressions +# + +nonterm opt_rl_join uses rl_join; + +opt_rl_join: rl_join opt_context + final { + $$->join = $1->join; + $$->context = $2->context; + + if ( $2->context != 0 ) { + /* Create the enter and leaving actions that will mark the substring. */ + Action *mark = Action::cons( MarkMark, pd->nextMatchEndNum++ ); + pd->actionList.append( mark ); + + $$->join->context = $2->context; + $$->join->mark = mark; + } + }; + +opt_rl_join: + final { + $$->join = 0; + $$->context = 0; + }; + +nonterm rl_join +{ + Join *join; + Join *context; +}; + +rl_join: + rl_join ',' rl_expr + final { + /* Append the expression to the list and return it. */ + $1->join->exprList.append( $3->expression ); + $$->join = $1->join; + }; +rl_join: + rl_expr + final { + $$->join = new Join( $1->expression ); + }; + +# Context at the end of a pattern that is not included in the match +nonterm opt_context uses rl_join; + +opt_context: '@' rl_join final { $$->context = $2->join; }; +opt_context: final { $$->context = 0; }; + +nonterm rl_expr +{ + Expression *expression; +}; + +rl_expr: + rl_expr '|' rl_term_short final { + $$->expression = Expression::cons( $1->expression, + $3->term, Expression::OrType ); + }; +rl_expr: + rl_expr '&' rl_term_short final { + $$->expression = Expression::cons( $1->expression, + $3->term, Expression::IntersectType ); + }; +# This priority specification overrides the innermost parsing strategy which +# results ordered choice interpretation of the grammar. +rl_expr: + rl_expr '-' rl_term_short final { + $$->expression = Expression::cons( $1->expression, + $3->term, Expression::SubtractType ); + }; +rl_expr: + rl_expr TK_DashDash rl_term_short final { + $$->expression = Expression::cons( $1->expression, + $3->term, Expression::StrongSubtractType ); + }; +rl_expr: + rl_term_short final { + $$->expression = Expression::cons( $1->term ); + }; + +nonterm rl_term_short +{ + Term *term; +}; + +shortest rl_term_short; + +rl_term_short: rl_term + final { $$->term = $1->term; }; + +nonterm rl_term +{ + Term *term; +}; + +rl_term: + rl_term factor_with_label final { + $$->term = Term::cons( $1->term, $2->factorWithAug ); + }; +rl_term: + rl_term '.' factor_with_label final { + $$->term = Term::cons( $1->term, $3->factorWithAug ); + }; +rl_term: + rl_term TK_ColonGt factor_with_label final { + $$->term = Term::cons( $1->term, $3->factorWithAug, Term::RightStartType ); + }; +rl_term: + rl_term TK_ColonGtGt factor_with_label final { + $$->term = Term::cons( $1->term, $3->factorWithAug, Term::RightFinishType ); + }; +rl_term: + rl_term TK_LtColon factor_with_label final { + $$->term = Term::cons( $1->term, + $3->factorWithAug, Term::LeftType ); + }; +rl_term: + factor_with_label final { + $$->term = Term::cons( $1->factorWithAug ); + }; + +nonterm factor_with_label +{ + FactorWithAug *factorWithAug; +}; + +factor_with_label: + factor_with_ep final { + $$->factorWithAug = $1->factorWithAug; + }; + +factor_with_label: + TK_Word ':' factor_with_label final { + $$->factorWithAug = $3->factorWithAug; + + if ( pd->objectDef->checkRedecl( $1->data ) != 0 ) + error($1->loc) << "label name \"" << $1->data << "\" already in use" << endp; + + /* Create the object field. */ + NamespaceQual *qual = new NamespaceQual( namespaceStack.top(), regionStack.top() ); + TypeRef *typeRef = TypeRef::cons( $1->loc, qual, "str" ); + ObjField *objField = new ObjField( $1->loc, typeRef, $1->data ); + + /* Insert it into the map. */ + pd->objectDef->insertField( $1->data, objField ); + + /* Create the enter and leaving actions that will mark the substring. */ + Action *enter = Action::cons( MarkMark, pd->nextMatchEndNum++ ); + Action *leave = Action::cons( MarkMark, pd->nextMatchEndNum++ ); + pd->actionList.append( enter ); + pd->actionList.append( leave ); + + /* Add entering and leaving actions. */ + $$->factorWithAug->actions.append( ParserAction( $1->loc, at_start, 0, enter ) ); + $$->factorWithAug->actions.append( ParserAction( $1->loc, at_leave, 0, leave ) ); + + reCaptureVect.append( ReCapture( enter, leave, objField ) ); + }; + +nonterm factor_with_ep +{ + FactorWithAug *factorWithAug; +}; + +factor_with_ep: + factor_with_aug final { + $$->factorWithAug = $1->factorWithAug; + }; + +nonterm factor_with_aug +{ + FactorWithAug *factorWithAug; +}; + +factor_with_aug: + factor_with_rep final { + $$->factorWithAug = FactorWithAug::cons( $1->factorWithRep ); + }; + + +# The fourth level of precedence. These are the trailing unary operators that +# allow for repetition. + +nonterm factor_with_rep +{ + FactorWithRep *factorWithRep; +}; + +factor_with_rep: + factor_with_rep '*' final { + $$->factorWithRep = FactorWithRep::cons( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::StarType ); + }; +factor_with_rep: + factor_with_rep TK_StarStar final { + $$->factorWithRep = FactorWithRep::cons( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::StarStarType ); + }; +factor_with_rep: + factor_with_rep '?' final { + $$->factorWithRep = FactorWithRep::cons( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::OptionalType ); + }; +factor_with_rep: + factor_with_rep '+' final { + $$->factorWithRep = FactorWithRep::cons( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::PlusType ); + }; +factor_with_rep: + factor_with_rep '{' factor_rep_num '}' final { + $$->factorWithRep = FactorWithRep::cons( $2->loc, $1->factorWithRep, + $3->rep, 0, FactorWithRep::ExactType ); + }; +factor_with_rep: + factor_with_rep '{' ',' factor_rep_num '}' final { + $$->factorWithRep = FactorWithRep::cons( $2->loc, $1->factorWithRep, + 0, $4->rep, FactorWithRep::MaxType ); + }; +factor_with_rep: + factor_with_rep '{' factor_rep_num ',' '}' final { + $$->factorWithRep = FactorWithRep::cons( $2->loc, $1->factorWithRep, + $3->rep, 0, FactorWithRep::MinType ); + }; +factor_with_rep: + factor_with_rep '{' factor_rep_num ',' factor_rep_num '}' final { + $$->factorWithRep = FactorWithRep::cons( $2->loc, $1->factorWithRep, + $3->rep, $5->rep, FactorWithRep::RangeType ); + }; +factor_with_rep: + factor_with_neg final { + $$->factorWithRep = FactorWithRep::cons( + $1->factorWithNeg->loc, $1->factorWithNeg ); + }; + +nonterm factor_rep_num +{ + int rep; +}; + +factor_rep_num: + TK_UInt final { + // Convert the priority number to a long. Check for overflow. + errno = 0; + int rep = strtol( $1->data, 0, 10 ); + if ( errno == ERANGE && rep == LONG_MAX ) { + // Repetition too large. Recover by returing repetition 1. */ + error($1->loc) << "repetition number " << $1->data << " overflows" << endl; + $$->rep = 1; + } + else { + // Cannot be negative, so no overflow. + $$->rep = rep; + } + }; + + +# +# The fifth level up in precedence. Negation. +# + +nonterm factor_with_neg +{ + FactorWithNeg *factorWithNeg; +}; + +factor_with_neg: + '!' factor_with_neg final { + $$->factorWithNeg = FactorWithNeg::cons( $1->loc, + $2->factorWithNeg, FactorWithNeg::NegateType ); + }; +factor_with_neg: + '^' factor_with_neg final { + $$->factorWithNeg = FactorWithNeg::cons( $1->loc, + $2->factorWithNeg, FactorWithNeg::CharNegateType ); + }; +factor_with_neg: + rl_factor final { + $$->factorWithNeg = FactorWithNeg::cons( $1->factor->loc, $1->factor ); + }; + +nonterm rl_factor +{ + Factor *factor; +}; + +rl_factor: + TK_Literal final { + /* Create a new factor node going to a concat literal. */ + $$->factor = Factor::cons( Literal::cons( $1->loc, $1->data, Literal::LitString ) ); + }; +rl_factor: + alphabet_num final { + /* Create a new factor node going to a literal number. */ + $$->factor = Factor::cons( Literal::cons( $1->loc, + $1->data, Literal::Number ) ); + }; +rl_factor: + TK_Word final { + /* Find the named graph. */ + Namespace *nspace = namespaceStack.top(); + + while ( nspace != 0 ) { + GraphDictEl *gdNode = nspace->rlMap.find( $1->data ); + if ( gdNode != 0 ) { + if ( gdNode->isInstance ) { + /* Recover by retuning null as the factor node. */ + error($1->loc) << "references to graph instantiations not allowed " + "in expressions" << endl; + $$->factor = 0; + } + else { + /* Create a factor node that is a lookup of an expression. */ + $$->factor = Factor::cons( $1->loc, gdNode->value ); + } + break; + } + + nspace = nspace->parentNamespace; + } + + if ( nspace == 0 ) { + /* Recover by returning null as the factor node. */ + error($1->loc) << "graph lookup of \"" << $1->data << "\" failed" << endl; + $$->factor = 0; + } + }; +rl_factor: + TK_SqOpen regular_expr_or_data TK_SqClose final { + /* Create a new factor node going to an OR expression. */ + $$->factor = Factor::cons( ReItem::cons( $1->loc, $2->reOrBlock, ReItem::OrBlock ) ); + }; +rl_factor: + TK_SqOpenNeg regular_expr_or_data TK_SqClose final { + /* Create a new factor node going to a negated OR expression. */ + $$->factor = Factor::cons( ReItem::cons( $1->loc, $2->reOrBlock, ReItem::NegOrBlock ) ); + }; +rl_factor: + range_lit TK_DotDot range_lit final { + /* Create a new factor node going to a range. */ + $$->factor = Factor::cons( Range::cons( $1->literal, $3->literal ) ); + }; +rl_factor: + '(' rl_join ')' final { + /* Create a new factor going to a parenthesized join. */ + $$->factor = Factor::cons( $2->join ); + }; + +nonterm range_lit +{ + Literal *literal; +}; + +# Literals which can be the end points of ranges. +range_lit: + TK_Literal final { + /* Range literas must have only one char. We restrict this in the parse tree. */ + $$->literal = Literal::cons( $1->loc, $1->data, Literal::LitString ); + }; +range_lit: + alphabet_num final { + /* Create a new literal number. */ + $$->literal = Literal::cons( $1->loc, $1->data, Literal::Number ); + }; + +nonterm alphabet_num uses token_data; + +# Any form of a number that can be used as a basic machine. */ +alphabet_num: + TK_UInt final { + $$->loc = $1->loc; + $$->data = $1->data; + }; +alphabet_num: + '-' TK_UInt final { + $$->loc = $1->loc; + $$->data = '+'; + $$->data += $2->data; + }; +alphabet_num: + TK_Hex final { + $$->loc = $1->loc; + $$->data = $1->data; + }; + +# +# Regular Expressions. +# + + +# The data inside of a [] expression in a regular expression. Accepts any +# number of characters or ranges. */ +nonterm regular_expr_or_data +{ + ReOrBlock *reOrBlock; +}; + +regular_expr_or_data: + regular_expr_or_data regular_expr_or_char final { + /* An optimization to lessen the tree size. If an or char is directly + * under the left side on the right and the right side is another or + * char then paste them together and return the left side. Otherwise + * just put the two under a new or data node. */ + if ( $2->reOrItem->type == ReOrItem::Data && + $1->reOrBlock->type == ReOrBlock::RecurseItem && + $1->reOrBlock->item->type == ReOrItem::Data ) + { + /* Append the right side to right side of the left and toss the + * right side. */ + $1->reOrBlock->item->data += $2->reOrItem->data; + delete $2->reOrItem; + $$->reOrBlock = $1->reOrBlock; + } + else { + /* Can't optimize, put the left and right under a new node. */ + $$->reOrBlock = ReOrBlock::cons( $1->reOrBlock, $2->reOrItem ); + } + }; +regular_expr_or_data: + final { + $$->reOrBlock = ReOrBlock::cons(); + }; + +# A single character inside of an or expression. Can either be a character or a +# set of characters. +nonterm regular_expr_or_char +{ + ReOrItem *reOrItem; +}; + +regular_expr_or_char: + TK_ReChar final { + $$->reOrItem = ReOrItem::cons( $1->loc, $1->data ); + }; +regular_expr_or_char: + TK_ReChar TK_Dash TK_ReChar final { + $$->reOrItem = ReOrItem::cons( $2->loc, $1->data[0], $3->data[0] ); + }; + +# A local state reference. Cannot have :: prefix. +local_state_ref: + no_name_sep state_ref_names; + +# Clear the name ref structure. +no_name_sep: + final { + nameRef.empty(); + }; + +# A qualified state reference. +state_ref: opt_name_sep state_ref_names; + +# Optional leading name separator. +opt_name_sep: + TK_NameSep + final { + /* Insert an initial null pointer val to indicate the existence of the + * initial name seperator. */ + nameRef.setAs( 0 ); + }; +opt_name_sep: + final { + nameRef.empty(); + }; + +# List of names separated by :: +state_ref_names: + state_ref_names TK_NameSep TK_Word + final { + nameRef.append( $3->data ); + }; +state_ref_names: + TK_Word + final { + nameRef.append( $1->data ); + }; + +nonterm opt_commit +{ + bool commit; +}; + +opt_commit: final { $$->commit = false; }; +opt_commit: KW_Commit final { $$->commit = true; }; + +# +# Grammar Finished +# + + write types; + write data; +}%% + +void ColmParser::init() +{ + /* Set up the root namespace. */ + const char *rootNamespaceName = "___ROOT_NAMESPACE"; + Namespace *rootNamespace = new Namespace( InputLoc(), + rootNamespaceName, pd->namespaceList.length(), 0 ); + pd->namespaceList.append( rootNamespace ); + namespaceStack.push( rootNamespace ); + pd->rootNamespace = rootNamespace; + + /* Set up the root token region. */ + const char *rootRegionName = "___ROOT_REGION"; + + TokenRegion *rootRegion = new TokenRegion( InputLoc(), rootRegionName, + pd->regionList.length(), 0 ); + pd->regionList.append( rootRegion ); + addRegionDef( InputLoc(), namespaceStack.top(), rootRegionName, rootRegion ); + + regionStack.push( rootRegion ); + + pd->rootRegion = rootRegion; + + /* Set up the global object. */ + String global = "global"; + pd->globalObjectDef = new ObjectDef( ObjectDef::UserType, + global, pd->nextObjectId++ ); + + /* The eofTokenRegion defaults to the root region. */ + pd->eofTokenRegion = rootRegion; + + /* Initialize the dictionary of graphs. This is our symbol table. The + * initialization needs to be done on construction which happens at the + * beginning of a machine spec so any assignment operators can reference + * the builtins. */ + pd->initGraphDict(); + + pd->rootLocalFrame = new ObjectDef( ObjectDef::FrameType, + "local", pd->nextObjectId++ ); + pd->curLocalFrame = pd->rootLocalFrame; + + %% write init; + + addArgvList(); +} + +void ColmParser::addArgvList() +{ + NamespaceQual *nspaceQual1 = new NamespaceQual( + namespaceStack.top(), regionStack.top() ); + TypeRef *typeRef = TypeRef::cons( InputLoc(), nspaceQual1, "str" ); + + NamespaceQual *nspaceQual2 = new NamespaceQual( + namespaceStack.top(), regionStack.top() ); + + pd->argvTypeRef = TypeRef::cons( InputLoc(), TypeRef::List, + nspaceQual2, typeRef, 0 ); +} + +int ColmParser::parseLangEl( int type, const Token *token ) +{ + %% write exec; + return errCount == 0 ? 0 : -1; +} + +void ColmParser::addRegularDef( const InputLoc &loc, Namespace *nspace, + const String &name, Join *join ) +{ + GraphDictEl *newEl = nspace->rlMap.insert( name ); + if ( newEl != 0 ) { + /* New element in the dict, all good. */ + newEl->value = new VarDef( name, join ); + newEl->isInstance = false; + newEl->loc = loc; + } + else { + // Recover by ignoring the duplicate. + error(loc) << "regular definition \"" << name << "\" already exists" << endl; + } +} + +TokenRegion *ColmParser::createRegion( String &scannerName ) +{ + TokenRegion *tokenRegion = new TokenRegion( InputLoc(), scannerName, + pd->regionList.length(), regionStack.top() ); + + regionStack.top()->childRegions.append( tokenRegion ); + + pd->regionList.append( tokenRegion ); + + addRegionDef( InputLoc(), namespaceStack.top(), scannerName, tokenRegion ); + + return tokenRegion; +} + + +void ColmParser::addRegionDef( const InputLoc &loc, Namespace *nspace, + const String &name, TokenRegion *tokenRegion ) +{ + RegionGraphDictEl *newEl = nspace->graphDict.insert( name ); + if ( newEl != 0 ) { + /* New element in the dict, all good. */ + newEl->value = new RegionDef( name, tokenRegion ); + newEl->isInstance = true; + newEl->loc = loc; + + /* It it is an instance, put on the instance list. */ + pd->instanceList.append( newEl ); + } + else { + // Recover by ignoring the duplicate. + error(loc) << "regular definition \"" << name << "\" already exists" << endl; + } +} + +ostream &ColmParser::parse_error( int tokId, Token &token ) +{ + /* Maintain the error count. */ + gblErrorCount += 1; + + cerr << token.loc.fileName << ":" << token.loc.line << ":" << token.loc.col << ": "; + cerr << "at token "; + if ( tokId < 128 ) + cerr << "\"" << ColmParser_lelNames[tokId] << "\""; + else + cerr << ColmParser_lelNames[tokId]; + if ( token.data != 0 ) + cerr << " with data \"" << token.data << "\""; + cerr << ": "; + + return cerr; +} + +int ColmParser::token( InputLoc &loc, int tokId, char *tokstart, int toklen ) +{ + Token token; + + if ( toklen > 0 ) + token.data.setAs( tokstart, toklen ); + + token.loc = loc; + int res = parseLangEl( tokId, &token ); + if ( res < 0 ) { + parse_error(tokId, token) << "parse error" << endl; + exit(1); + } + return res; +} |