/* * Copyright 2001-2018 Adrian Thurston * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef _PARSETREE_H #define _PARSETREE_H #include "ragel.h" #include "avlmap.h" #include "bstmap.h" #include "vector.h" #include "dlist.h" #include "fsmgraph.h" struct NameInst; /* Types of builtin machines. */ enum BuiltinMachine { BT_Any, BT_Ascii, BT_Extend, BT_Alpha, BT_Digit, BT_Alnum, BT_Lower, BT_Upper, BT_Cntrl, BT_Graph, BT_Print, BT_Punct, BT_Space, BT_Xdigit, BT_Lambda, BT_Empty }; struct ParseData; /* Leaf type. */ struct Literal; /* Tree nodes. */ struct Term; struct FactorWithAug; struct FactorWithRep; struct FactorWithNeg; struct Factor; struct Expression; struct Join; struct NfaUnion; struct MachineDef; struct LongestMatch; struct LongestMatchPart; struct LmPartList; struct Range; struct LengthDef; struct colm_data; struct colm_location; /* Type of augmentation. Describes locations in the machine. */ enum AugType { /* Transition actions/priorities. */ at_start, at_all, at_finish, at_leave, /* Global error actions. */ at_start_gbl_error, at_all_gbl_error, at_final_gbl_error, at_not_start_gbl_error, at_not_final_gbl_error, at_middle_gbl_error, /* Local error actions. */ at_start_local_error, at_all_local_error, at_final_local_error, at_not_start_local_error, at_not_final_local_error, at_middle_local_error, /* To State Action embedding. */ at_start_to_state, at_all_to_state, at_final_to_state, at_not_start_to_state, at_not_final_to_state, at_middle_to_state, /* From State Action embedding. */ at_start_from_state, at_all_from_state, at_final_from_state, at_not_start_from_state, at_not_final_from_state, at_middle_from_state, /* EOF Action embedding. */ at_start_eof, at_all_eof, at_final_eof, at_not_start_eof, at_not_final_eof, at_middle_eof }; /* IMPORTANT: These must follow the same order as the state augs in AugType * since we will be using this to compose AugType. */ enum StateAugType { sat_start = 0, sat_all, sat_final, sat_not_start, sat_not_final, sat_middle }; struct Action; struct PriorDesc; struct RegExpr; struct ReItem; struct ReOrBlock; struct ReOrItem; struct ExplicitMachine; struct InlineItem; struct InlineList; /* Reference to a named state. */ struct NameRef : public Vector {}; typedef Vector NameRefList; typedef Vector NameTargList; /* Structure for storing location of epsilon transitons. */ struct EpsilonLink { EpsilonLink( const InputLoc &loc, NameRef *target ) : loc(loc), target(target) { } InputLoc loc; NameRef *target; }; struct Label { Label( const InputLoc &loc, std::string data ) : loc(loc), data(data), cut(false) { } InputLoc loc; std::string data; bool cut; }; /* Structrue represents an action assigned to some FactorWithAug node. The * factor with aug will keep an array of these. */ struct ParserAction { ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action ) : loc(loc), type(type), localErrKey(localErrKey), action(action) { } InputLoc loc; AugType type; int localErrKey; Action *action; }; struct ConditionTest { ConditionTest( const InputLoc &loc, AugType type, Action *action, bool sense ) : loc(loc), type(type), action(action), sense(sense) { } InputLoc loc; AugType type; Action *action; bool sense; }; struct Token { char *data; int length; ParserLoc loc; void set( const char *str, int len, colm_location *cl); void set( colm_data *cd, colm_location *cl); void set( const char *str, int len, const InputLoc &loc ); void set( const char *str, int len, const ParserLoc &loc ); private: void _set( const char *str, int len ); }; struct RedToken { const char *data; int length; ParserLoc loc; void set( colm_data *cd, colm_location *cl); }; /* Store the value and type of a priority augmentation. */ struct PriorityAug { PriorityAug( AugType type, int priorKey, int priorValue ) : type(type), priorKey(priorKey), priorValue(priorValue) { } AugType type; int priorKey; int priorValue; }; /* * A Variable Definition */ struct VarDef { VarDef( std::string name, MachineDef *machineDef ) : name(name), machineDef(machineDef), isExport(false) { } ~VarDef(); /* Parse tree traversal. */ FsmRes walk( ParseData *pd ); void makeNameTree( const InputLoc &loc, ParseData *pd ); void resolveNameRefs( ParseData *pd ); std::string name; MachineDef *machineDef; bool isExport; }; /* * LongestMatch * * Wherever possible the item match will execute on the character. If not * possible the item match will execute on a lookahead character and either * hold the current char (if one away) or backup. * * How to handle the problem of backing up over a buffer break? * * Don't want to use pending out transitions for embedding item match because * the role of item match action is different: it may sometimes match on the * final transition, or may match on a lookahead character. * * Don't want to invent a new operator just for this. So just trail action * after machine, this means we can only use literal actions. * * The item action may * * What states of the machine will be final. The item actions that wrap around * on the last character will go straight to the start state. * * Some transitions will be lookahead transitions, they will hold the current * character. Crossing them with regular transitions must be restricted * because it does not make sense. The transition cannot simultaneously hold * and consume the current character. */ struct LongestMatchPart { LongestMatchPart( Join *join, Action *action, const InputLoc &semiLoc, int longestMatchId ) : join(join), action(action), semiLoc(semiLoc), longestMatchId(longestMatchId), inLmSelect(false) { } InputLoc getLoc(); Join *join; Action *action; InputLoc semiLoc; Action *setActId; Action *actOnLast; Action *actOnNext; Action *actLagBehind; Action *actNfaOnLast; Action *actNfaOnNext; Action *actNfaOnEof; int longestMatchId; bool inLmSelect; LongestMatch *longestMatch; LongestMatchPart *prev, *next; }; /* Declare a new type so that ptreetypes.h need not include dlist.h. */ struct LmPartList : DList {}; struct LongestMatch { /* Construct with a list of joins */ LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) : loc(loc), longestMatchList(longestMatchList), lmSwitchHandlesError(false), nfaConstruction(false) { } InputLoc loc; LmPartList *longestMatchList; std::string name; Action *lmActSelect; bool lmSwitchHandlesError; bool nfaConstruction; LongestMatch *next, *prev; /* Tree traversal. */ FsmRes walkClassic( ParseData *pd ); FsmRes walk( ParseData *pd ); FsmRes mergeNfaStates( ParseData *pd, FsmAp *fsm ); bool onlyOneNfa( ParseData *pd, FsmAp *fsm, StateAp *st, NfaTrans *in ); bool matchCanFail( ParseData *pd, FsmAp *fsm, StateAp *st ); void eliminateNfaActions( ParseData *pd, FsmAp *fsm ); void advanceNfaActions( ParseData *pd, FsmAp *fsm ); FsmRes buildBaseNfa( ParseData *pd ); FsmRes walkNfa( ParseData *pd ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); void transferScannerLeavingActions( FsmAp *graph ); void runLongestMatch( ParseData *pd, FsmAp *graph ); Action *newLmAction( ParseData *pd, const InputLoc &loc, const char *name, InlineList *inlineList ); void makeActions( ParseData *pd ); void findName( ParseData *pd ); void restart( FsmAp *graph, TransAp *trans ); void restart( FsmAp *graph, CondAp *cond ); }; /* List of Expressions. */ typedef DList ExprList; struct MachineDef { enum Type { JoinType, LongestMatchType, LengthDefType, NfaUnionType }; MachineDef( Join *join ) : join(join), longestMatch(0), lengthDef(0), nfaUnion(0), type(JoinType) {} MachineDef( LongestMatch *longestMatch ) : join(0), longestMatch(longestMatch), lengthDef(0), nfaUnion(0), type(LongestMatchType) {} MachineDef( LengthDef *lengthDef ) : join(0), longestMatch(0), lengthDef(lengthDef), nfaUnion(0), type(LengthDefType) {} MachineDef( NfaUnion *nfaUnion ) : join(0), longestMatch(0), lengthDef(0), nfaUnion(nfaUnion), type(NfaUnionType) {} ~MachineDef(); FsmRes walk( ParseData *pd ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); Join *join; LongestMatch *longestMatch; LengthDef *lengthDef; NfaUnion *nfaUnion; Type type; }; /* * Join */ struct Join { /* Construct with the first expression. */ Join( Expression *expr ); Join( const InputLoc &loc, Expression *expr ); ~Join() { exprList.empty(); } /* Tree traversal. */ FsmRes walk( ParseData *pd ); FsmRes walkJoin( ParseData *pd ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); /* Data. */ InputLoc loc; ExprList exprList; }; /* * Expression */ struct Expression { enum Type { OrType, IntersectType, SubtractType, StrongSubtractType, TermType, BuiltinType }; /* Construct with an expression on the left and a term on the right. */ Expression( Expression *expression, Term *term, Type type ) : expression(expression), term(term), type(type), prev(this), next(this) { } /* Construct with only a term. */ Expression( Term *term ) : expression(0), term(term), type(TermType) , prev(this), next(this) { } /* Construct with a builtin type. */ Expression( BuiltinMachine builtin ) : expression(0), term(0), builtin(builtin), type(BuiltinType), prev(this), next(this) { } ~Expression(); /* Tree traversal. */ FsmRes walk( ParseData *pd, bool lastInSeq = true ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); /* Node data. */ Expression *expression; Term *term; BuiltinMachine builtin; Type type; Expression *prev, *next; }; typedef Vector TermVect; /* * NfaUnion */ struct NfaUnion { /* Construct with only a term. */ NfaUnion() : roundsList(0) { } ~NfaUnion(); /* Tree traversal. */ FsmRes walk( ParseData *pd ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); /* Node data. */ TermVect terms; NfaRoundVect *roundsList; }; /* * Term */ struct Term { enum Type { ConcatType, RightStartType, RightFinishType, LeftType, FactorWithAugType }; Term( Term *term, FactorWithAug *factorWithAug ) : term(term), factorWithAug(factorWithAug), type(ConcatType) { } Term( Term *term, FactorWithAug *factorWithAug, Type type ) : term(term), factorWithAug(factorWithAug), type(type) { } Term( Action *action1, Action *action2, Action *action3, Term *term, FactorWithAug *factorWithAug, FactorWithAug *factorWithAug2, Type type ) : action1(action1), action2(action2), action3(action3), term(term), factorWithAug(factorWithAug), factorWithAug2(factorWithAug2), type(type) { } Term( FactorWithAug *factorWithAug ) : term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { } ~Term(); FsmRes walk( ParseData *pd, bool lastInSeq = true ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); Action *action1; Action *action2; Action *action3; Term *term; FactorWithAug *factorWithAug; FactorWithAug *factorWithAug2; Type type; /* Priority descriptor for RightFinish type. */ PriorDesc priorDescs[2]; }; /* Third level of precedence. Augmenting nodes with actions and priorities. */ struct FactorWithAug { FactorWithAug( FactorWithRep *factorWithRep ) : priorDescs(0), factorWithRep(factorWithRep) {} ~FactorWithAug(); /* Tree traversal. */ FsmRes walk( ParseData *pd ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd ); void assignPriorities( FsmAp *graph, int *priorOrd ); void assignConditions( FsmAp *graph ); /* Actions and priorities assigned to the factor node. */ Vector actions; Vector priorityAugs; PriorDesc *priorDescs; std::vector