diff options
Diffstat (limited to 'libfsm/parsetree.h')
-rw-r--r-- | libfsm/parsetree.h | 873 |
1 files changed, 0 insertions, 873 deletions
diff --git a/libfsm/parsetree.h b/libfsm/parsetree.h deleted file mode 100644 index 1d4f7e6b..00000000 --- a/libfsm/parsetree.h +++ /dev/null @@ -1,873 +0,0 @@ -/* - * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef _PARSETREE_H -#define _PARSETREE_H - -#include "ragel.h" -#include "avlmap.h" -#include "bstmap.h" -#include "vector.h" -#include "dlist.h" -#include "fsmgraph.h" - -struct NameInst; - -/* Types of builtin machines. */ -enum BuiltinMachine -{ - BT_Any, - BT_Ascii, - BT_Extend, - BT_Alpha, - BT_Digit, - BT_Alnum, - BT_Lower, - BT_Upper, - BT_Cntrl, - BT_Graph, - BT_Print, - BT_Punct, - BT_Space, - BT_Xdigit, - BT_Lambda, - BT_Empty -}; - - -struct ParseData; - -/* Leaf type. */ -struct Literal; - -/* Tree nodes. */ - -struct Term; -struct FactorWithAug; -struct FactorWithRep; -struct FactorWithNeg; -struct Factor; -struct Expression; -struct Join; -struct NfaUnion; -struct MachineDef; -struct LongestMatch; -struct LongestMatchPart; -struct LmPartList; -struct Range; -struct LengthDef; -struct colm_data; -struct colm_location; - -/* Type of augmentation. Describes locations in the machine. */ -enum AugType -{ - /* Transition actions/priorities. */ - at_start, - at_all, - at_finish, - at_leave, - - /* Global error actions. */ - at_start_gbl_error, - at_all_gbl_error, - at_final_gbl_error, - at_not_start_gbl_error, - at_not_final_gbl_error, - at_middle_gbl_error, - - /* Local error actions. */ - at_start_local_error, - at_all_local_error, - at_final_local_error, - at_not_start_local_error, - at_not_final_local_error, - at_middle_local_error, - - /* To State Action embedding. */ - at_start_to_state, - at_all_to_state, - at_final_to_state, - at_not_start_to_state, - at_not_final_to_state, - at_middle_to_state, - - /* From State Action embedding. */ - at_start_from_state, - at_all_from_state, - at_final_from_state, - at_not_start_from_state, - at_not_final_from_state, - at_middle_from_state, - - /* EOF Action embedding. */ - at_start_eof, - at_all_eof, - at_final_eof, - at_not_start_eof, - at_not_final_eof, - at_middle_eof -}; - -/* IMPORTANT: These must follow the same order as the state augs in AugType - * since we will be using this to compose AugType. */ -enum StateAugType -{ - sat_start = 0, - sat_all, - sat_final, - sat_not_start, - sat_not_final, - sat_middle -}; - -struct Action; -struct PriorDesc; -struct RegExpr; -struct ReItem; -struct ReOrBlock; -struct ReOrItem; -struct ExplicitMachine; -struct InlineItem; -struct InlineList; - -/* Reference to a named state. */ -struct NameRef : public Vector<std::string> {}; -typedef Vector<NameRef*> NameRefList; -typedef Vector<NameInst*> NameTargList; - -/* Structure for storing location of epsilon transitons. */ -struct EpsilonLink -{ - EpsilonLink( const InputLoc &loc, NameRef *target ) - : loc(loc), target(target) { } - - InputLoc loc; - NameRef *target; -}; - -struct Label -{ - Label( const InputLoc &loc, std::string data ) - : loc(loc), data(data), cut(false) { } - - InputLoc loc; - std::string data; - bool cut; -}; - -/* Structrue represents an action assigned to some FactorWithAug node. The - * factor with aug will keep an array of these. */ -struct ParserAction -{ - ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action ) - : loc(loc), type(type), localErrKey(localErrKey), action(action) { } - - InputLoc loc; - AugType type; - int localErrKey; - Action *action; -}; - -struct ConditionTest -{ - ConditionTest( const InputLoc &loc, AugType type, Action *action, bool sense ) : - loc(loc), type(type), action(action), sense(sense) { } - - InputLoc loc; - AugType type; - Action *action; - bool sense; -}; - -struct Token -{ - char *data; - int length; - ParserLoc loc; - - void set( const char *str, int len, colm_location *cl); - void set( colm_data *cd, colm_location *cl); - void set( const char *str, int len, const InputLoc &loc ); - void set( const char *str, int len, const ParserLoc &loc ); - -private: - void _set( const char *str, int len ); -}; - - -struct RedToken -{ - const char *data; - int length; - ParserLoc loc; - - void set( colm_data *cd, colm_location *cl); -}; - - -/* Store the value and type of a priority augmentation. */ -struct PriorityAug -{ - PriorityAug( AugType type, int priorKey, int priorValue ) : - type(type), priorKey(priorKey), priorValue(priorValue) { } - - AugType type; - int priorKey; - int priorValue; -}; - -/* - * A Variable Definition - */ -struct VarDef -{ - VarDef( std::string name, MachineDef *machineDef ) - : name(name), machineDef(machineDef), isExport(false) { } - - ~VarDef(); - - /* Parse tree traversal. */ - FsmRes walk( ParseData *pd ); - void makeNameTree( const InputLoc &loc, ParseData *pd ); - void resolveNameRefs( ParseData *pd ); - - std::string name; - MachineDef *machineDef; - bool isExport; -}; - - -/* - * LongestMatch - * - * Wherever possible the item match will execute on the character. If not - * possible the item match will execute on a lookahead character and either - * hold the current char (if one away) or backup. - * - * How to handle the problem of backing up over a buffer break? - * - * Don't want to use pending out transitions for embedding item match because - * the role of item match action is different: it may sometimes match on the - * final transition, or may match on a lookahead character. - * - * Don't want to invent a new operator just for this. So just trail action - * after machine, this means we can only use literal actions. - * - * The item action may - * - * What states of the machine will be final. The item actions that wrap around - * on the last character will go straight to the start state. - * - * Some transitions will be lookahead transitions, they will hold the current - * character. Crossing them with regular transitions must be restricted - * because it does not make sense. The transition cannot simultaneously hold - * and consume the current character. - */ -struct LongestMatchPart -{ - LongestMatchPart( Join *join, Action *action, - const InputLoc &semiLoc, int longestMatchId ) - : - join(join), action(action), semiLoc(semiLoc), - longestMatchId(longestMatchId), inLmSelect(false) { } - - InputLoc getLoc(); - - Join *join; - Action *action; - InputLoc semiLoc; - - Action *setActId; - Action *actOnLast; - Action *actOnNext; - Action *actLagBehind; - Action *actNfaOnLast; - Action *actNfaOnNext; - Action *actNfaOnEof; - int longestMatchId; - bool inLmSelect; - LongestMatch *longestMatch; - - LongestMatchPart *prev, *next; -}; - -/* Declare a new type so that ptreetypes.h need not include dlist.h. */ -struct LmPartList : DList<LongestMatchPart> {}; - -struct LongestMatch -{ - /* Construct with a list of joins */ - LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) - : - loc(loc), - longestMatchList(longestMatchList), - lmSwitchHandlesError(false), - nfaConstruction(false) - { } - - InputLoc loc; - LmPartList *longestMatchList; - std::string name; - Action *lmActSelect; - bool lmSwitchHandlesError; - bool nfaConstruction; - - LongestMatch *next, *prev; - - /* Tree traversal. */ - FsmRes walkClassic( ParseData *pd ); - FsmRes walk( ParseData *pd ); - - FsmRes mergeNfaStates( ParseData *pd, FsmAp *fsm ); - bool onlyOneNfa( ParseData *pd, FsmAp *fsm, StateAp *st, NfaTrans *in ); - bool matchCanFail( ParseData *pd, FsmAp *fsm, StateAp *st ); - void eliminateNfaActions( ParseData *pd, FsmAp *fsm ); - void advanceNfaActions( ParseData *pd, FsmAp *fsm ); - FsmRes buildBaseNfa( ParseData *pd ); - FsmRes walkNfa( ParseData *pd ); - - void makeNameTree( ParseData *pd ); - void resolveNameRefs( ParseData *pd ); - void transferScannerLeavingActions( FsmAp *graph ); - void runLongestMatch( ParseData *pd, FsmAp *graph ); - Action *newLmAction( ParseData *pd, const InputLoc &loc, const char *name, - InlineList *inlineList ); - void makeActions( ParseData *pd ); - void findName( ParseData *pd ); - void restart( FsmAp *graph, TransAp *trans ); - void restart( FsmAp *graph, CondAp *cond ); -}; - - -/* List of Expressions. */ -typedef DList<Expression> ExprList; - -struct MachineDef -{ - enum Type { - JoinType, - LongestMatchType, - LengthDefType, - NfaUnionType - }; - - MachineDef( Join *join ) - : join(join), longestMatch(0), lengthDef(0), nfaUnion(0), - type(JoinType) {} - - MachineDef( LongestMatch *longestMatch ) - : join(0), longestMatch(longestMatch), lengthDef(0), nfaUnion(0), - type(LongestMatchType) {} - - MachineDef( LengthDef *lengthDef ) - : join(0), longestMatch(0), lengthDef(lengthDef), nfaUnion(0), - type(LengthDefType) {} - - MachineDef( NfaUnion *nfaUnion ) - : join(0), longestMatch(0), lengthDef(0), nfaUnion(nfaUnion), - type(NfaUnionType) {} - - ~MachineDef(); - - FsmRes walk( ParseData *pd ); - void makeNameTree( ParseData *pd ); - void resolveNameRefs( ParseData *pd ); - - Join *join; - LongestMatch *longestMatch; - LengthDef *lengthDef; - NfaUnion *nfaUnion; - Type type; -}; - -/* - * Join - */ -struct Join -{ - /* Construct with the first expression. */ - Join( Expression *expr ); - Join( const InputLoc &loc, Expression *expr ); - - ~Join() - { - exprList.empty(); - } - - /* Tree traversal. */ - FsmRes walk( ParseData *pd ); - FsmRes walkJoin( ParseData *pd ); - void makeNameTree( ParseData *pd ); - void resolveNameRefs( ParseData *pd ); - - /* Data. */ - InputLoc loc; - ExprList exprList; -}; - -/* - * Expression - */ -struct Expression -{ - enum Type { - OrType, - IntersectType, - SubtractType, - StrongSubtractType, - TermType, - BuiltinType - }; - - /* Construct with an expression on the left and a term on the right. */ - Expression( Expression *expression, Term *term, Type type ) : - expression(expression), term(term), - type(type), prev(this), next(this) { } - - /* Construct with only a term. */ - Expression( Term *term ) : - expression(0), term(term), - type(TermType) , prev(this), next(this) { } - - /* Construct with a builtin type. */ - Expression( BuiltinMachine builtin ) : - expression(0), term(0), builtin(builtin), - type(BuiltinType), prev(this), next(this) { } - - ~Expression(); - - /* Tree traversal. */ - FsmRes walk( ParseData *pd, bool lastInSeq = true ); - void makeNameTree( ParseData *pd ); - void resolveNameRefs( ParseData *pd ); - - /* Node data. */ - Expression *expression; - Term *term; - BuiltinMachine builtin; - Type type; - - Expression *prev, *next; -}; - -typedef Vector<Term*> TermVect; - -/* - * NfaUnion - */ -struct NfaUnion -{ - /* Construct with only a term. */ - NfaUnion() : roundsList(0) { } - ~NfaUnion(); - - /* Tree traversal. */ - FsmRes walk( ParseData *pd ); - void makeNameTree( ParseData *pd ); - void resolveNameRefs( ParseData *pd ); - - /* Node data. */ - TermVect terms; - NfaRoundVect *roundsList; -}; - - -/* - * Term - */ -struct Term -{ - enum Type { - ConcatType, - RightStartType, - RightFinishType, - LeftType, - FactorWithAugType - }; - - Term( Term *term, FactorWithAug *factorWithAug ) : - term(term), factorWithAug(factorWithAug), type(ConcatType) { } - - Term( Term *term, FactorWithAug *factorWithAug, Type type ) : - term(term), factorWithAug(factorWithAug), type(type) { } - - Term( Action *action1, Action *action2, Action *action3, - Term *term, FactorWithAug *factorWithAug, - FactorWithAug *factorWithAug2, Type type ) - : - action1(action1), action2(action2), action3(action3), - term(term), factorWithAug(factorWithAug), - factorWithAug2(factorWithAug2), type(type) - { } - - Term( FactorWithAug *factorWithAug ) : - term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { } - - ~Term(); - - FsmRes walk( ParseData *pd, bool lastInSeq = true ); - void makeNameTree( ParseData *pd ); - void resolveNameRefs( ParseData *pd ); - - Action *action1; - Action *action2; - Action *action3; - - Term *term; - FactorWithAug *factorWithAug; - FactorWithAug *factorWithAug2; - Type type; - - /* Priority descriptor for RightFinish type. */ - PriorDesc priorDescs[2]; -}; - - -/* Third level of precedence. Augmenting nodes with actions and priorities. */ -struct FactorWithAug -{ - FactorWithAug( FactorWithRep *factorWithRep ) - : - priorDescs(0), - factorWithRep(factorWithRep) - {} - - ~FactorWithAug(); - - /* Tree traversal. */ - FsmRes walk( ParseData *pd ); - void makeNameTree( ParseData *pd ); - void resolveNameRefs( ParseData *pd ); - - void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd ); - void assignPriorities( FsmAp *graph, int *priorOrd ); - - void assignConditions( FsmAp *graph ); - - /* Actions and priorities assigned to the factor node. */ - Vector<ParserAction> actions; - Vector<PriorityAug> priorityAugs; - PriorDesc *priorDescs; - std::vector<Label> labels; - Vector<EpsilonLink> epsilonLinks; - Vector<ConditionTest> conditions; - - FactorWithRep *factorWithRep; -}; - -/* Fourth level of precedence. Trailing unary operators. Provide kleen star, - * optional and plus. */ -struct FactorWithRep -{ - enum Type { - StarType, - StarStarType, - OptionalType, - PlusType, - ExactType, - MaxType, - MinType, - RangeType, - FactorWithNegType - }; - - FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep, - int lowerRep, int upperRep, Type type ) - : - loc(loc), repId(0), factorWithRep(factorWithRep), - factorWithNeg(0), lowerRep(lowerRep), - upperRep(upperRep), type(type) - {} - - FactorWithRep( FactorWithNeg *factorWithNeg ) - : factorWithNeg(factorWithNeg), type(FactorWithNegType) - {} - - ~FactorWithRep(); - - /* Tree traversal. */ - FsmRes walk( ParseData *pd ); - void makeNameTree( ParseData *pd ); - void resolveNameRefs( ParseData *pd ); - - InputLoc loc; - long long repId; - FactorWithRep *factorWithRep; - FactorWithNeg *factorWithNeg; - int lowerRep, upperRep; - Type type; - - /* Priority descriptor for StarStar type. */ - PriorDesc priorDescs[4]; -}; - -/* Fifth level of precedence. Provides Negation. */ -struct FactorWithNeg -{ - enum Type { - NegateType, - CharNegateType, - FactorType - }; - - FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) : - loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { } - - FactorWithNeg( Factor *factor ) : - factorWithNeg(0), factor(factor), type(FactorType) { } - - ~FactorWithNeg(); - - /* Tree traversal. */ - FsmRes walk( ParseData *pd ); - void makeNameTree( ParseData *pd ); - void resolveNameRefs( ParseData *pd ); - - InputLoc loc; - FactorWithNeg *factorWithNeg; - Factor *factor; - Type type; -}; - -/* - * Factor - */ -struct Factor -{ - /* Language elements a factor node can be. */ - enum Type { - LiteralType, - RangeType, - OrExprType, - RegExprType, - ReferenceType, - ParenType, - LongestMatchType, - NfaRep, - NfaWrap, - CondStar, - CondPlus - }; - - enum NfaRepeatMode { - NfaLegacy = 1, - NfaGreedy, - NfaLazy - }; - - /* Construct with a literal fsm. */ - Factor( Literal *literal ) : - literal(literal), type(LiteralType) { } - - /* Construct with a range. */ - Factor( Range *range ) : - range(range), type(RangeType) { } - - /* Construct with the or part of a regular expression. */ - Factor( ReItem *reItem ) : - reItem(reItem), type(OrExprType) { } - - /* Construct with a regular expression. */ - Factor( RegExpr *regExpr ) : - regExpr(regExpr), type(RegExprType) { } - - /* Construct with a reference to a var def. */ - Factor( const InputLoc &loc, VarDef *varDef ) : - loc(loc), varDef(varDef), type(ReferenceType) {} - - /* Construct with a parenthesized join. */ - Factor( Join *join ) : - join(join), type(ParenType) {} - - /* Construct with a longest match operator. */ - Factor( LongestMatch *longestMatch ) : - longestMatch(longestMatch), type(LongestMatchType) {} - - Factor( const InputLoc &loc, long long repId, Expression *expression, - Action *action1, Action *action2, Action *action3, - Action *action4, Action *action5, Action *action6, Type type ) - : - loc(loc), repId(repId), expression(expression), - action1(action1), action2(action2), action3(action3), - action4(action4), action5(action5), action6(action6), - type(type) - {} - - /* Cleanup. */ - ~Factor(); - - /* Tree traversal. */ - FsmRes walk( ParseData *pd ); - void makeNameTree( ParseData *pd ); - void resolveNameRefs( ParseData *pd ); - - InputLoc loc; - Literal *literal; - Range *range; - ReItem *reItem; - RegExpr *regExpr; - VarDef *varDef; - Join *join; - LongestMatch *longestMatch; - int lower, upper; - long repId; - Expression *expression; - Action *action1; - Action *action2; - Action *action3; - Action *action4; - Action *action5; - Action *action6; - PriorDesc priorDescs[4]; - NfaRepeatMode mode; - - Type type; -}; - -/* A range machine. Only ever composed of two literals. */ -struct Range -{ - Range( Literal *lowerLit, Literal *upperLit, bool caseIndep ) - : lowerLit(lowerLit), upperLit(upperLit), caseIndep(caseIndep) { } - - ~Range(); - FsmAp *walk( ParseData *pd ); - - Literal *lowerLit; - Literal *upperLit; - bool caseIndep; -}; - -/* Some literal machine. Can be a number or literal string. */ -struct Literal -{ - enum LiteralType { Number, LitString, HexString }; - - Literal( const InputLoc &loc, bool neg, const char *_data, int len, LiteralType type ) - : loc(loc), neg(neg), type(type) - { - data.append( _data, len ); - } - - FsmAp *walk( ParseData *pd ); - - InputLoc loc; - bool neg; - Vector<char> data; - LiteralType type; -}; - -/* Regular expression. */ -struct RegExpr -{ - enum RegExpType { RecurseItem, Empty }; - - /* Constructors. */ - RegExpr() : - type(Empty), caseInsensitive(false) { } - RegExpr(RegExpr *regExpr, ReItem *item) : - regExpr(regExpr), item(item), - type(RecurseItem), caseInsensitive(false) { } - - ~RegExpr(); - FsmAp *walk( ParseData *pd, RegExpr *rootRegex ); - - RegExpr *regExpr; - ReItem *item; - RegExpType type; - bool caseInsensitive; -}; - -/* An item in a regular expression. */ -struct ReItem -{ - enum ReItemType { Data, Dot, OrBlock, NegOrBlock }; - - ReItem( const InputLoc &loc, const char *_data, int len ) - : - loc(loc), star(false), type(Data) - { - data.append( _data, len ); - } - - ReItem( const InputLoc &loc, ReItemType type ) - : loc(loc), star(false), type(type) { } - - ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type ) - : loc(loc), orBlock(orBlock), star(false), type(type) { } - - ~ReItem(); - FsmRes walk( ParseData *pd, RegExpr *rootRegex ); - - InputLoc loc; - Vector<char> data; - ReOrBlock *orBlock; - bool star; - ReItemType type; -}; - -/* An or block item. */ -struct ReOrBlock -{ - enum ReOrBlockType { RecurseItem, Empty }; - - /* Constructors. */ - ReOrBlock() - : type(Empty) { } - ReOrBlock(ReOrBlock *orBlock, ReOrItem *item) - : orBlock(orBlock), item(item), type(RecurseItem) { } - - ~ReOrBlock(); - FsmAp *walk( ParseData *pd, RegExpr *rootRegex ); - - ReOrBlock *orBlock; - ReOrItem *item; - ReOrBlockType type; -}; - -/* An item in an or block. */ -struct ReOrItem -{ - enum ReOrItemType { Data, Range }; - - ReOrItem( const InputLoc &loc, const char *_data, int len ) - : - loc(loc), type(Data) - { - data.append( _data, len ); - } - - ReOrItem( const InputLoc &loc, char lower, char upper ) - : loc(loc), lower(lower), upper(upper), type(Range) { } - - FsmAp *walk( ParseData *pd, RegExpr *rootRegex ); - - InputLoc loc; - Vector<char> data; - char lower; - char upper; - ReOrItemType type; -}; - - -#endif |