/* * Copyright 2006-2011 Adrian Thurston */ /* This file is part of Colm. * * Colm is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Colm is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Colm; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _PARSETREE_H #define _PARSETREE_H #include #include #include "colm.h" #include "avlmap.h" #include "bstmap.h" #include "bstset.h" #include "vector.h" #include "dlist.h" #include "dlistval.h" #include "dlistmel.h" #include "astring.h" #include "bytecode.h" #include "avlbasic.h" #include "fsmrun.h" /* Operators that are represented with single symbol characters. */ #define OP_DoubleEql 'e' #define OP_NotEql 'q' #define OP_LessEql 'l' #define OP_GrtrEql 'g' #define OP_LogicalAnd 'a' #define OP_LogicalOr 'o' #define OP_Deref 'd' #if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 #error "SIZEOF_LONG contained an unexpected value" #endif struct NameInst; struct FsmGraph; struct RedFsm; struct _FsmRun; struct ObjectDef; struct ElementOf; struct UniqueType; struct ObjField; struct TransBlock; struct CodeBlock; struct PdaLiteral; struct TypeAlias; typedef struct _PdaRun PdaRun; /* * Code Vector */ struct CodeVect : public Vector { void appendHalf( Half half ) { /* not optimal. */ append( half & 0xff ); append( (half>>8) & 0xff ); } void appendWord( Word word ) { /* not optimal. */ append( word & 0xff ); append( (word>>8) & 0xff ); append( (word>>16) & 0xff ); append( (word>>24) & 0xff ); #if SIZEOF_LONG == 8 append( (word>>32) & 0xff ); append( (word>>40) & 0xff ); append( (word>>48) & 0xff ); append( (word>>56) & 0xff ); #endif } void setHalf( long pos, Half half ) { /* not optimal. */ data[pos] = half & 0xff; data[pos+1] = (half>>8) & 0xff; } void insertHalf( long pos, Half half ) { /* not optimal. */ insert( pos, half & 0xff ); insert( pos+1, (half>>8) & 0xff ); } void insertWord( long pos, Word word ) { /* not at all optimal. */ insert( pos, word & 0xff ); insert( pos+1, (word>>8) & 0xff ); insert( pos+2, (word>>16) & 0xff ); insert( pos+3, (word>>24) & 0xff ); #if SIZEOF_LONG == 8 insert( pos+4, (word>>32) & 0xff ); insert( pos+5, (word>>40) & 0xff ); insert( pos+6, (word>>48) & 0xff ); insert( pos+7, (word>>56) & 0xff ); #endif } void insertTree( long pos, Tree *tree ) { insertWord( pos, (Word) tree ); } }; /* Types of builtin machines. */ enum BuiltinMachine { BT_Any, BT_Ascii, BT_Extend, BT_Alpha, BT_Digit, BT_Alnum, BT_Lower, BT_Upper, BT_Cntrl, BT_Graph, BT_Print, BT_Punct, BT_Space, BT_Xdigit, BT_Lambda, BT_Empty }; typedef BstSet CharSet; struct ParseData; struct TypeRef; /* Leaf type. */ struct Literal; /* Tree nodes. */ struct Term; struct FactorWithAug; struct FactorWithRep; struct FactorWithNeg; struct Factor; struct Expression; struct Join; struct JoinOrLm; struct TokenRegion; struct Namespace; struct Context; struct TokenDef; struct TokenDefListReg; struct TokenDefListNs; struct Range; struct LangEl; /* Type of augmentation. Describes locations in the machine. */ enum AugType { /* Transition actions/priorities. */ at_start, at_all, at_finish, at_leave, /* Global error actions. */ at_start_gbl_error, at_all_gbl_error, at_final_gbl_error, at_not_start_gbl_error, at_not_final_gbl_error, at_middle_gbl_error, /* Local error actions. */ at_start_local_error, at_all_local_error, at_final_local_error, at_not_start_local_error, at_not_final_local_error, at_middle_local_error, /* To State Action embedding. */ at_start_to_state, at_all_to_state, at_final_to_state, at_not_start_to_state, at_not_final_to_state, at_middle_to_state, /* From State Action embedding. */ at_start_from_state, at_all_from_state, at_final_from_state, at_not_start_from_state, at_not_final_from_state, at_middle_from_state, /* EOF Action embedding. */ at_start_eof, at_all_eof, at_final_eof, at_not_start_eof, at_not_final_eof, at_middle_eof }; /* IMPORTANT: These must follow the same order as the state augs in AugType * since we will be using this to compose AugType. */ enum StateAugType { sat_start = 0, sat_all, sat_final, sat_not_start, sat_not_final, sat_middle }; struct Action; struct PriorDesc; struct RegExpr; struct ReItem; struct ReOrBlock; struct ReOrItem; struct ExplicitMachine; struct InlineItem; struct InlineList; /* Reference to a named state. */ typedef Vector NameRef; typedef Vector NameRefList; typedef Vector NameTargList; /* Structure for storing location of epsilon transitons. */ struct EpsilonLink { EpsilonLink( const InputLoc &loc, NameRef &target ) : loc(loc), target(target) { } InputLoc loc; NameRef target; }; struct Label { Label( const InputLoc &loc, const String &data, ObjField *objField ) : loc(loc), data(data), objField(objField) { } InputLoc loc; String data; ObjField *objField; }; /* Structure represents an action assigned to some FactorWithAug node. The * factor with aug will keep an array of these. */ struct ParserAction { ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action ) : loc(loc), type(type), localErrKey(localErrKey), action(action) { } InputLoc loc; AugType type; int localErrKey; Action *action; }; struct Token { String data; InputLoc loc; }; void prepareLitString( String &result, bool &caseInsensitive, const String &srcString, const InputLoc &loc ); std::ostream &operator<<(std::ostream &out, const Token &token ); typedef AvlMap< String, TokenDef*, CmpStr > LiteralDict; typedef AvlMapEl< String, TokenDef* > LiteralDictEl; /* Store the value and type of a priority augmentation. */ struct PriorityAug { PriorityAug( AugType type, int priorKey, int priorValue ) : type(type), priorKey(priorKey), priorValue(priorValue) { } AugType type; int priorKey; int priorValue; }; /* * A Variable Definition */ struct VarDef { VarDef( const String &name, JoinOrLm *joinOrLm ) : name(name), joinOrLm(joinOrLm) { } /* Parse tree traversal. */ FsmGraph *walk( ParseData *pd ); void makeNameTree( const InputLoc &loc, ParseData *pd ); void resolveNameRefs( ParseData *pd ); String name; JoinOrLm *joinOrLm; }; typedef Vector StringVect; typedef CmpTable CmpStrVect; struct NamespaceQual { NamespaceQual( Namespace *declInNspace, TokenRegion *declInRegion ) : cachedNspaceQual(0), declInNspace(declInNspace) {} Namespace *cachedNspaceQual; Namespace *declInNspace; StringVect qualNames; Namespace *searchFrom( Namespace *from, StringVect::Iter &qualPart ); Namespace *getQual( ParseData *pd ); }; struct ReCapture { ReCapture( Action *markEnter, Action *markLeave, ObjField *objField ) : markEnter(markEnter), markLeave(markLeave), objField(objField) {} Action *markEnter; Action *markLeave; ObjField *objField; }; typedef Vector ContextVect; struct Context { Context( InputLoc &loc, LangEl *lel ) : loc(loc), lel(lel) {} InputLoc loc; LangEl *lel; ObjectDef *contextObjDef; }; typedef Vector ReCaptureVect; struct TokenDefPtr1 { TokenDef *prev, *next; }; struct TokenDefPtr2 { TokenDef *prev, *next; }; struct TokenDef : public TokenDefPtr1, public TokenDefPtr2 { TokenDef( const String &name, const String &literal, bool isLiteral, bool ignore, Join *join, CodeBlock *codeBlock, InputLoc &semiLoc, int longestMatchId, Namespace *nspace, TokenRegion *tokenRegion, ReCaptureVect *pReCaptureVect, ObjectDef *objectDef, Context *contextIn ) : name(name), literal(literal), isLiteral(isLiteral), ignore(ignore), join(join), action(0), codeBlock(codeBlock), token(0), semiLoc(semiLoc), longestMatchId(longestMatchId), inLmSelect(false), nspace(nspace), tokenRegion(tokenRegion), objectDef(objectDef), contextIn(contextIn) { if ( pReCaptureVect != 0 ) reCaptureVect = *pReCaptureVect; } InputLoc getLoc(); String name; String literal; bool isLiteral; bool ignore; Join *join; Action *action; CodeBlock *codeBlock; LangEl *token; InputLoc semiLoc; Action *setActId; Action *actOnLast; Action *actOnNext; Action *actLagBehind; int longestMatchId; bool inLmSelect; Namespace *nspace; TokenRegion *tokenRegion; ReCaptureVect reCaptureVect; ObjectDef *objectDef; Context *contextIn; }; struct LelDefList; struct NtDef { NtDef( const String &name, Namespace *nspace, LelDefList *defList, ObjectDef *objectDef, Context *contextIn, bool reduceFirst ) : name(name), nspace(nspace), defList(defList), objectDef(objectDef), contextIn(contextIn), reduceFirst(reduceFirst) {} String name; Namespace *nspace; LelDefList *defList; ObjectDef *objectDef; Context *contextIn; bool reduceFirst; NtDef *prev, *next; }; struct NtDefList : DList {}; /* Declare a new type so that ptreetypes.h need not include dlist.h. */ struct TokenDefListReg : DListMel {}; struct TokenDefListNs : DListMel {}; struct ContextDef { ContextDef( const String &name, Context *context, Namespace *nspace ) : name(name), context(context), nspace(nspace) {} String name; Context *context; Namespace *nspace; ContextDef *prev, *next; }; struct ContextDefList : DList {}; struct TypeMapEl : public AvlTreeEl { enum Type { TypeAliasType = 1, LangElType }; const String &getKey() { return key; } TypeMapEl( const String &key, TypeRef *typeRef ) : type(TypeAliasType), key(key), value(0), typeRef(typeRef) {} TypeMapEl( const String &key, LangEl *value ) : type(LangElType), key(key), value(value), typeRef(0) {} Type type; String key; LangEl *value; TypeRef *typeRef; TypeMapEl *prev, *next; }; /* Symbol Map. */ typedef AvlTree< TypeMapEl, String, CmpStr > TypeMap; typedef Vector RegionVect; struct TokenRegion { /* Construct with a list of joins */ TokenRegion( const InputLoc &loc, const String &name, int id, TokenRegion *parentRegion ) : loc(loc), name(name), id(id), lmSwitchHandlesError(false), regionNameInst(0), parentRegion(parentRegion), defaultTokenDef(0), preEofBlock(0) { } /* Tree traversal. */ FsmGraph *walk( ParseData *pd ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); void runLongestMatch( ParseData *pd, FsmGraph *graph ); void transferScannerLeavingActions( FsmGraph *graph ); Action *newAction( ParseData *pd, const InputLoc &loc, const String &name, InlineList *inlineList ); void makeActions( ParseData *pd ); void findName( ParseData *pd ); void restart( FsmGraph *graph, FsmTrans *trans ); InputLoc loc; TokenDefListReg tokenDefList; String name; int id; Action *lmActSelect; bool lmSwitchHandlesError; /* This gets saved off during the name walk. Can save it off because token * regions are referenced once only. */ NameInst *regionNameInst; TokenRegion *parentRegion; RegionVect childRegions; TokenDef *defaultTokenDef; TokenRegion *next, *prev; CodeBlock *preEofBlock; }; typedef DList RegionList; typedef BstSet< TokenRegion*, CmpOrd > RegionSet; typedef Vector NamespaceVect; struct GenericType : public DListEl { GenericType( const String &name, long typeId, long id, LangEl *langEl, TypeRef *typeArg ) : name(name), typeId(typeId), id(id), langEl(langEl), typeArg(typeArg), keyTypeArg(0), utArg(0), keyUT(0), objDef(0) {} const String &getKey() const { return name; }; void declare( ParseData *pd, Namespace *nspace ); String name; long typeId; long id; LangEl *langEl; TypeRef *typeArg; TypeRef *keyTypeArg; UniqueType *utArg; UniqueType *keyUT; ObjectDef *objDef; }; typedef DList GenericList; typedef struct _UserIter UserIter; typedef AvlMap UserIterMap; typedef AvlMapEl UserIterMapEl; /* Graph dictionary. */ struct GraphDictEl : public AvlTreeEl, public DListEl { GraphDictEl( const String &key ) : key(key), value(0), isInstance(false) { } GraphDictEl( const String &key, VarDef *value ) : key(key), value(value), isInstance(false) { } const String &getKey() { return key; } String key; VarDef *value; bool isInstance; /* Location info of graph definition. Points to variable name of assignment. */ InputLoc loc; }; typedef AvlTree GraphDict; typedef DList GraphList; struct TypeAlias { TypeAlias( const InputLoc &loc, Namespace *nspace, const String &name, TypeRef *typeRef ) : loc(loc), nspace(nspace), name(name), typeRef(typeRef) {} InputLoc loc; Namespace *nspace; String name; TypeRef *typeRef; TypeAlias *prev, *next; }; typedef DList TypeAliasList; struct Namespace { /* Construct with a list of joins */ Namespace( const InputLoc &loc, const String &name, int id, Namespace *parentNamespace ) : loc(loc), name(name), id(id), parentNamespace(parentNamespace) { } /* Tree traversal. */ Namespace *findNamespace( const String &name ); InputLoc loc; String name; int id; /* Literal patterns and the dictionary mapping literals to the underlying * tokens. */ LiteralDict literalDict; /* List of tokens defs in the namespace. */ TokenDefListNs tokenDefList; /* List of nonterminal defs in the namespace. */ NtDefList ntDefList; /* List of context definitions for encapsulating the data of a parser. */ ContextDefList contextDefList; /* Dictionary of symbols within the region. */ TypeMap typeMap; GenericList genericList; /* Dictionary of graphs. Both instances and non-instances go here. */ GraphDict graphDict; TypeAliasList typeAliasList; Namespace *parentNamespace; NamespaceVect childNamespaces; Namespace *next, *prev; void declare( ParseData *pd ); }; typedef DList NamespaceList; typedef BstSet< Namespace*, CmpOrd > NamespaceSet; /* List of Expressions. */ typedef DList ExprList; struct JoinOrLm { enum Type { JoinType, LongestMatchType }; JoinOrLm( Join *join ) : join(join), type(JoinType) {} JoinOrLm( TokenRegion *tokenRegion ) : tokenRegion(tokenRegion), type(LongestMatchType) {} FsmGraph *walk( ParseData *pd ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); Join *join; TokenRegion *tokenRegion; Type type; }; /* * Join */ struct Join { /* Construct with the first expression. */ Join( Expression *expr ); /* Tree traversal. */ FsmGraph *walk( ParseData *pd ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); /* Data. */ ExprList exprList; Join *context; Action *mark; }; /* * Expression */ struct Expression { enum Type { OrType, IntersectType, SubtractType, StrongSubtractType, TermType, BuiltinType }; /* Construct with an expression on the left and a term on the right. */ Expression( Expression *expression, Term *term, Type type ) : expression(expression), term(term), builtin(builtin), type(type), prev(this), next(this) { } /* Construct with only a term. */ Expression( Term *term ) : expression(0), term(term), builtin(builtin), type(TermType) , prev(this), next(this) { } /* Construct with a builtin type. */ Expression( BuiltinMachine builtin ) : expression(0), term(0), builtin(builtin), type(BuiltinType), prev(this), next(this) { } ~Expression(); /* Tree traversal. */ FsmGraph *walk( ParseData *pd, bool lastInSeq = true ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); /* Node data. */ Expression *expression; Term *term; BuiltinMachine builtin; Type type; Expression *prev, *next; }; /* * Term */ struct Term { enum Type { ConcatType, RightStartType, RightFinishType, LeftType, FactorWithAugType }; Term( Term *term, FactorWithAug *factorWithAug ) : term(term), factorWithAug(factorWithAug), type(ConcatType) { } Term( Term *term, FactorWithAug *factorWithAug, Type type ) : term(term), factorWithAug(factorWithAug), type(type) { } Term( FactorWithAug *factorWithAug ) : term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { } ~Term(); FsmGraph *walk( ParseData *pd, bool lastInSeq = true ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); Term *term; FactorWithAug *factorWithAug; Type type; /* Priority descriptor for RightFinish type. */ PriorDesc priorDescs[2]; }; /* Third level of precedence. Augmenting nodes with actions and priorities. */ struct FactorWithAug { FactorWithAug( FactorWithRep *factorWithRep ) : priorDescs(0), factorWithRep(factorWithRep) { } ~FactorWithAug(); /* Tree traversal. */ FsmGraph *walk( ParseData *pd ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); void assignActions( ParseData *pd, FsmGraph *graph, int *actionOrd ); void assignPriorities( FsmGraph *graph, int *priorOrd ); void assignConditions( FsmGraph *graph ); /* Actions and priorities assigned to the factor node. */ Vector actions; Vector priorityAugs; PriorDesc *priorDescs; Vector