/* * Copyright 2001-2006 Adrian Thurston */ /* This file is part of Colm. * * Colm is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Colm is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Colm; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _PARSETREE_H #define _PARSETREE_H #include #include #include "colm.h" #include "avlmap.h" #include "bstmap.h" #include "bstset.h" #include "vector.h" #include "dlist.h" #include "astring.h" #include "bytecode.h" #include "avlbasic.h" /* Operators that are represented with single symbol characters. */ #define OP_DoubleEql 'e' #define OP_NotEql 'q' #define OP_LessEql 'l' #define OP_GrtrEql 'g' #define OP_LogicalAnd 'a' #define OP_LogicalOr 'o' #define OP_Deref 'd' struct NameInst; struct FsmGraph; struct RedFsm; struct FsmTables; struct FsmRun; struct ObjectDef; struct ElementOf; struct UniqueType; struct ObjField; struct TransBlock; struct CodeBlock; /* Types of builtin machines. */ enum BuiltinMachine { BT_Any, BT_Ascii, BT_Extend, BT_Alpha, BT_Digit, BT_Alnum, BT_Lower, BT_Upper, BT_Cntrl, BT_Graph, BT_Print, BT_Punct, BT_Space, BT_Xdigit, BT_Lambda, BT_Empty }; typedef BstSet CharSet; struct ParseData; struct TypeRef; /* Leaf type. */ struct Literal; /* Tree nodes. */ struct Term; struct FactorWithAug; struct FactorWithRep; struct FactorWithNeg; struct Factor; struct Expression; struct Join; struct JoinOrLm; struct TokenRegion; struct Namespace; struct TokenDef; struct TokenDefList; struct Range; struct KlangEl; /* Type of augmentation. Describes locations in the machine. */ enum AugType { /* Transition actions/priorities. */ at_start, at_all, at_finish, at_leave, /* Global error actions. */ at_start_gbl_error, at_all_gbl_error, at_final_gbl_error, at_not_start_gbl_error, at_not_final_gbl_error, at_middle_gbl_error, /* Local error actions. */ at_start_local_error, at_all_local_error, at_final_local_error, at_not_start_local_error, at_not_final_local_error, at_middle_local_error, /* To State Action embedding. */ at_start_to_state, at_all_to_state, at_final_to_state, at_not_start_to_state, at_not_final_to_state, at_middle_to_state, /* From State Action embedding. */ at_start_from_state, at_all_from_state, at_final_from_state, at_not_start_from_state, at_not_final_from_state, at_middle_from_state, /* EOF Action embedding. */ at_start_eof, at_all_eof, at_final_eof, at_not_start_eof, at_not_final_eof, at_middle_eof }; /* IMPORTANT: These must follow the same order as the state augs in AugType * since we will be using this to compose AugType. */ enum StateAugType { sat_start = 0, sat_all, sat_final, sat_not_start, sat_not_final, sat_middle }; struct Action; struct PriorDesc; struct RegExpr; struct ReItem; struct ReOrBlock; struct ReOrItem; struct ExplicitMachine; struct InlineItem; struct InlineList; /* Reference to a named state. */ typedef Vector NameRef; typedef Vector NameRefList; typedef Vector NameTargList; /* Structure for storing location of epsilon transitons. */ struct EpsilonLink { EpsilonLink( const InputLoc &loc, NameRef &target ) : loc(loc), target(target) { } InputLoc loc; NameRef target; }; struct Label { Label( const InputLoc &loc, const String &data, ObjField *objField ) : loc(loc), data(data), objField(objField) { } InputLoc loc; String data; ObjField *objField; }; /* Structure represents an action assigned to some FactorWithAug node. The * factor with aug will keep an array of these. */ struct ParserAction { ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action ) : loc(loc), type(type), localErrKey(localErrKey), action(action) { } InputLoc loc; AugType type; int localErrKey; Action *action; }; struct Token { String data; InputLoc loc; }; void prepareLitString( String &result, bool &caseInsensitive, const String &srcString, const InputLoc &loc ); std::ostream &operator<<(std::ostream &out, const Token &token ); typedef AvlMap< String, KlangEl*, CmpStr > LiteralDict; typedef AvlMapEl< String, KlangEl* > LiteralDictEl; /* Store the value and type of a priority augmentation. */ struct PriorityAug { PriorityAug( AugType type, int priorKey, int priorValue ) : type(type), priorKey(priorKey), priorValue(priorValue) { } AugType type; int priorKey; int priorValue; }; /* * A Variable Definition */ struct VarDef { VarDef( const String &name, JoinOrLm *joinOrLm ) : name(name), joinOrLm(joinOrLm) { } /* Parse tree traversal. */ FsmGraph *walk( ParseData *pd ); void makeNameTree( const InputLoc &loc, ParseData *pd ); void resolveNameRefs( ParseData *pd ); String name; JoinOrLm *joinOrLm; }; typedef Vector StringVect; typedef CmpTable CmpStrVect; struct NamespaceQual { NamespaceQual( Namespace *declInNspace, TokenRegion *declInRegion ) : cachedNspaceQual(0), declInNspace(declInNspace) {} Namespace *cachedNspaceQual; Namespace *declInNspace; StringVect qualNames; Namespace *searchFrom( Namespace *from, StringVect::Iter &qualPart ); Namespace *getQual( ParseData *pd ); }; struct TokenDef { TokenDef( Join *join, KlangEl *token, InputLoc &semiLoc, int longestMatchId, Namespace *nspace, TokenRegion *tokenRegion ) : join(join), action(0), token(token), semiLoc(semiLoc), longestMatchId(longestMatchId), inLmSelect(false), nspace(nspace), tokenRegion(tokenRegion) {} InputLoc getLoc(); Join *join; Action *action; KlangEl *token; InputLoc semiLoc; Action *setActId; Action *actOnLast; Action *actOnNext; Action *actLagBehind; int longestMatchId; bool inLmSelect; Namespace *nspace; TokenRegion *tokenRegion; TokenDef *prev, *next; }; /* Declare a new type so that ptreetypes.h need not include dlist.h. */ struct TokenDefList : DList {}; /* Symbol Map. */ typedef AvlMap< String, KlangEl*, CmpStr > SymbolMap; typedef AvlMapEl< String, KlangEl* > SymbolMapEl; typedef Vector RegionVect; struct TokenRegion { /* Construct with a list of joins */ TokenRegion( const InputLoc &loc, const String &name, int id, TokenRegion *parentRegion ) : loc(loc), name(name), id(id), lmSwitchHandlesError(false), regionNameInst(0), parentRegion(parentRegion), defaultTokenDef(0), preEofBlock(0) { } /* Tree traversal. */ FsmGraph *walk( ParseData *pd ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); void runLongestMatch( ParseData *pd, FsmGraph *graph ); void transferScannerLeavingActions( FsmGraph *graph ); Action *newAction( ParseData *pd, const InputLoc &loc, const String &name, InlineList *inlineList ); void makeActions( ParseData *pd ); void findName( ParseData *pd ); void restart( FsmGraph *graph, FsmTrans *trans ); InputLoc loc; TokenDefList tokenDefList; String name; int id; Action *lmActSelect; bool lmSwitchHandlesError; /* This gets saved off during the name walk. Can save it off because token * regions are referenced once only. */ NameInst *regionNameInst; TokenRegion *parentRegion; RegionVect childRegions; TokenDef *defaultTokenDef; TokenRegion *next, *prev; CodeBlock *preEofBlock; }; typedef DList RegionList; typedef BstSet< TokenRegion*, CmpOrd > RegionSet; typedef Vector NamespaceVect; struct GenericType : public DListEl { GenericType( const String &name, long typeId, long id, KlangEl *langEl, TypeRef *typeArg ) : name(name), typeId(typeId), id(id), langEl(langEl), typeArg(typeArg), keyTypeArg(0), utArg(0), keyUT(0), objDef(0) {} const String &getKey() const { return name; }; String name; long typeId; long id; KlangEl *langEl; TypeRef *typeArg; TypeRef *keyTypeArg; UniqueType *utArg; UniqueType *keyUT; ObjectDef *objDef; }; typedef DList GenericList; struct UserIter; typedef AvlMap UserIterMap; typedef AvlMapEl UserIterMapEl; /* Graph dictionary. */ struct GraphDictEl : public AvlTreeEl, public DListEl { GraphDictEl( const String &key ) : key(key), value(0), isInstance(false) { } GraphDictEl( const String &key, VarDef *value ) : key(key), value(value), isInstance(false) { } const String &getKey() { return key; } String key; VarDef *value; bool isInstance; /* Location info of graph definition. Points to variable name of assignment. */ InputLoc loc; }; typedef AvlTree GraphDict; typedef DList GraphList; struct Namespace { /* Construct with a list of joins */ Namespace( const InputLoc &loc, const String &name, int id, Namespace *parentNamespace ) : loc(loc), name(name), id(id), parentNamespace(parentNamespace) { } /* Tree traversal. */ Namespace *findNamespace( const String &name ); InputLoc loc; String name; int id; /* Literal patterns and the dictionary mapping literals to the underlying * tokens. */ LiteralDict literalDict; /* Dictionary of symbols within the region. */ SymbolMap symbolMap; GenericList genericList; /* Dictionary of graphs. Both instances and non-instances go here. */ GraphDict graphDict; Namespace *parentNamespace; NamespaceVect childNamespaces; Namespace *next, *prev; }; typedef DList NamespaceList; typedef BstSet< Namespace*, CmpOrd > NamespaceSet; /* List of Expressions. */ typedef DList ExprList; struct JoinOrLm { enum Type { JoinType, LongestMatchType }; JoinOrLm( Join *join ) : join(join), type(JoinType) {} JoinOrLm( TokenRegion *tokenRegion ) : tokenRegion(tokenRegion), type(LongestMatchType) {} FsmGraph *walk( ParseData *pd ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); Join *join; TokenRegion *tokenRegion; Type type; }; /* * Join */ struct Join { /* Construct with the first expression. */ Join( Expression *expr ); Join( const InputLoc &loc, Expression *expr ); /* Tree traversal. */ FsmGraph *walk( ParseData *pd ); FsmGraph *walkJoin( ParseData *pd ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); /* Data. */ InputLoc loc; ExprList exprList; }; /* * Expression */ struct Expression { enum Type { OrType, IntersectType, SubtractType, StrongSubtractType, TermType, BuiltinType }; /* Construct with an expression on the left and a term on the right. */ Expression( Expression *expression, Term *term, Type type ) : expression(expression), term(term), builtin(builtin), type(type), prev(this), next(this) { } /* Construct with only a term. */ Expression( Term *term ) : expression(0), term(term), builtin(builtin), type(TermType) , prev(this), next(this) { } /* Construct with a builtin type. */ Expression( BuiltinMachine builtin ) : expression(0), term(0), builtin(builtin), type(BuiltinType), prev(this), next(this) { } ~Expression(); /* Tree traversal. */ FsmGraph *walk( ParseData *pd, bool lastInSeq = true ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); /* Node data. */ Expression *expression; Term *term; BuiltinMachine builtin; Type type; Expression *prev, *next; }; /* * Term */ struct Term { enum Type { ConcatType, RightStartType, RightFinishType, LeftType, FactorWithAugType }; Term( Term *term, FactorWithAug *factorWithAug ) : term(term), factorWithAug(factorWithAug), type(ConcatType) { } Term( Term *term, FactorWithAug *factorWithAug, Type type ) : term(term), factorWithAug(factorWithAug), type(type) { } Term( FactorWithAug *factorWithAug ) : term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { } ~Term(); FsmGraph *walk( ParseData *pd, bool lastInSeq = true ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); Term *term; FactorWithAug *factorWithAug; Type type; /* Priority descriptor for RightFinish type. */ PriorDesc priorDescs[2]; }; /* Third level of precedence. Augmenting nodes with actions and priorities. */ struct FactorWithAug { FactorWithAug( FactorWithRep *factorWithRep ) : priorDescs(0), factorWithRep(factorWithRep) { } ~FactorWithAug(); /* Tree traversal. */ FsmGraph *walk( ParseData *pd ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); void assignActions( ParseData *pd, FsmGraph *graph, int *actionOrd ); void assignPriorities( FsmGraph *graph, int *priorOrd ); void assignConditions( FsmGraph *graph ); /* Actions and priorities assigned to the factor node. */ Vector actions; Vector priorityAugs; PriorDesc *priorDescs; Vector