/* * Copyright 2001-2006 Adrian Thurston */ /* This file is part of Ragel. * * Ragel is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Ragel is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Ragel; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _PARSETREE_H #define _PARSETREE_H #include "ragel.h" #include "avlmap.h" #include "bstmap.h" #include "vector.h" #include "dlist.h" struct NameInst; /* Types of builtin machines. */ enum BuiltinMachine { BT_Any, BT_Ascii, BT_Extend, BT_Alpha, BT_Digit, BT_Alnum, BT_Lower, BT_Upper, BT_Cntrl, BT_Graph, BT_Print, BT_Punct, BT_Space, BT_Xdigit, BT_Lambda, BT_Empty }; struct ParseData; /* Leaf type. */ struct Literal; /* Tree nodes. */ struct Term; struct FactorWithAug; struct FactorWithRep; struct FactorWithNeg; struct Factor; struct Expression; struct Join; struct MachineDef; struct LongestMatch; struct LongestMatchPart; struct LmPartList; struct Range; struct LengthDef; /* Type of augmentation. Describes locations in the machine. */ enum AugType { /* Transition actions/priorities. */ at_start, at_all, at_finish, at_leave, /* Global error actions. */ at_start_gbl_error, at_all_gbl_error, at_final_gbl_error, at_not_start_gbl_error, at_not_final_gbl_error, at_middle_gbl_error, /* Local error actions. */ at_start_local_error, at_all_local_error, at_final_local_error, at_not_start_local_error, at_not_final_local_error, at_middle_local_error, /* To State Action embedding. */ at_start_to_state, at_all_to_state, at_final_to_state, at_not_start_to_state, at_not_final_to_state, at_middle_to_state, /* From State Action embedding. */ at_start_from_state, at_all_from_state, at_final_from_state, at_not_start_from_state, at_not_final_from_state, at_middle_from_state, /* EOF Action embedding. */ at_start_eof, at_all_eof, at_final_eof, at_not_start_eof, at_not_final_eof, at_middle_eof }; /* IMPORTANT: These must follow the same order as the state augs in AugType * since we will be using this to compose AugType. */ enum StateAugType { sat_start = 0, sat_all, sat_final, sat_not_start, sat_not_final, sat_middle }; struct Action; struct PriorDesc; struct RegExpr; struct ReItem; struct ReOrBlock; struct ReOrItem; struct ExplicitMachine; struct InlineItem; struct InlineList; /* Reference to a named state. */ typedef Vector NameRef; typedef Vector NameRefList; typedef Vector NameTargList; /* Structure for storing location of epsilon transitons. */ struct EpsilonLink { EpsilonLink( const InputLoc &loc, NameRef &target ) : loc(loc), target(target) { } InputLoc loc; NameRef target; }; struct Label { Label( const InputLoc &loc, char *data ) : loc(loc), data(data) { } InputLoc loc; char *data; }; /* Structrue represents an action assigned to some FactorWithAug node. The * factor with aug will keep an array of these. */ struct ParserAction { ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action ) : loc(loc), type(type), localErrKey(localErrKey), action(action) { } InputLoc loc; AugType type; int localErrKey; Action *action; }; struct ConditionTest { ConditionTest( const InputLoc &loc, AugType type, Action *action, bool sense ) : loc(loc), type(type), action(action), sense(sense) { } InputLoc loc; AugType type; Action *action; bool sense; }; struct Token { char *data; int length; InputLoc loc; void append( const Token &other ); void set( const char *str, int len ); }; char *prepareLitString( const InputLoc &loc, const char *src, long length, long &resLen, bool &caseInsensitive ); /* Store the value and type of a priority augmentation. */ struct PriorityAug { PriorityAug( AugType type, int priorKey, int priorValue ) : type(type), priorKey(priorKey), priorValue(priorValue) { } AugType type; int priorKey; int priorValue; }; /* * A Variable Definition */ struct VarDef { VarDef( const char *name, MachineDef *machineDef ) : name(name), machineDef(machineDef), isExport(false) { } /* Parse tree traversal. */ FsmAp *walk( ParseData *pd ); void makeNameTree( const InputLoc &loc, ParseData *pd ); void resolveNameRefs( ParseData *pd ); const char *name; MachineDef *machineDef; bool isExport; }; /* * LongestMatch * * Wherever possible the item match will execute on the character. If not * possible the item match will execute on a lookahead character and either * hold the current char (if one away) or backup. * * How to handle the problem of backing up over a buffer break? * * Don't want to use pending out transitions for embedding item match because * the role of item match action is different: it may sometimes match on the * final transition, or may match on a lookahead character. * * Don't want to invent a new operator just for this. So just trail action * after machine, this means we can only use literal actions. * * The item action may * * What states of the machine will be final. The item actions that wrap around * on the last character will go straight to the start state. * * Some transitions will be lookahead transitions, they will hold the current * character. Crossing them with regular transitions must be restricted * because it does not make sense. The transition cannot simultaneously hold * and consume the current character. */ struct LongestMatchPart { LongestMatchPart( Join *join, Action *action, InputLoc &semiLoc, int longestMatchId ) : join(join), action(action), semiLoc(semiLoc), longestMatchId(longestMatchId), inLmSelect(false) { } InputLoc getLoc(); Join *join; Action *action; InputLoc semiLoc; Action *setActId; Action *actOnLast; Action *actOnNext; Action *actLagBehind; int longestMatchId; bool inLmSelect; LongestMatch *longestMatch; LongestMatchPart *prev, *next; }; /* Declare a new type so that ptreetypes.h need not include dlist.h. */ struct LmPartList : DList {}; struct LongestMatch { /* Construct with a list of joins */ LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) : loc(loc), longestMatchList(longestMatchList), name(0), lmSwitchHandlesError(false) { } /* Tree traversal. */ FsmAp *walk( ParseData *pd ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); void transferScannerLeavingActions( FsmAp *graph ); void runLongestMatch( ParseData *pd, FsmAp *graph ); Action *newAction( ParseData *pd, const InputLoc &loc, const char *name, InlineList *inlineList ); void makeActions( ParseData *pd ); void findName( ParseData *pd ); void restart( FsmAp *graph, TransAp *trans ); InputLoc loc; LmPartList *longestMatchList; const char *name; Action *lmActSelect; bool lmSwitchHandlesError; LongestMatch *next, *prev; }; /* List of Expressions. */ typedef DList ExprList; struct MachineDef { enum Type { JoinType, LongestMatchType, LengthDefType }; MachineDef( Join *join ) : join(join), longestMatch(0), lengthDef(0), type(JoinType) {} MachineDef( LongestMatch *longestMatch ) : join(0), longestMatch(longestMatch), lengthDef(0), type(LongestMatchType) {} MachineDef( LengthDef *lengthDef ) : join(0), longestMatch(0), lengthDef(lengthDef), type(LengthDefType) {} FsmAp *walk( ParseData *pd ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); Join *join; LongestMatch *longestMatch; LengthDef *lengthDef; Type type; }; /* * Join */ struct Join { /* Construct with the first expression. */ Join( Expression *expr ); Join( const InputLoc &loc, Expression *expr ); /* Tree traversal. */ FsmAp *walk( ParseData *pd ); FsmAp *walkJoin( ParseData *pd ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); /* Data. */ InputLoc loc; ExprList exprList; }; /* * Expression */ struct Expression { enum Type { OrType, IntersectType, SubtractType, StrongSubtractType, TermType, BuiltinType }; /* Construct with an expression on the left and a term on the right. */ Expression( Expression *expression, Term *term, Type type ) : expression(expression), term(term), type(type), prev(this), next(this) { } /* Construct with only a term. */ Expression( Term *term ) : expression(0), term(term), type(TermType) , prev(this), next(this) { } /* Construct with a builtin type. */ Expression( BuiltinMachine builtin ) : expression(0), term(0), builtin(builtin), type(BuiltinType), prev(this), next(this) { } ~Expression(); /* Tree traversal. */ FsmAp *walk( ParseData *pd, bool lastInSeq = true ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); /* Node data. */ Expression *expression; Term *term; BuiltinMachine builtin; Type type; Expression *prev, *next; }; /* * Term */ struct Term { enum Type { ConcatType, RightStartType, RightFinishType, LeftType, FactorWithAugType }; Term( Term *term, FactorWithAug *factorWithAug ) : term(term), factorWithAug(factorWithAug), type(ConcatType) { } Term( Term *term, FactorWithAug *factorWithAug, Type type ) : term(term), factorWithAug(factorWithAug), type(type) { } Term( FactorWithAug *factorWithAug ) : term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { } ~Term(); FsmAp *walk( ParseData *pd, bool lastInSeq = true ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); Term *term; FactorWithAug *factorWithAug; Type type; /* Priority descriptor for RightFinish type. */ PriorDesc priorDescs[2]; }; /* Third level of precedence. Augmenting nodes with actions and priorities. */ struct FactorWithAug { FactorWithAug( FactorWithRep *factorWithRep ) : priorDescs(0), factorWithRep(factorWithRep) { } ~FactorWithAug(); /* Tree traversal. */ FsmAp *walk( ParseData *pd ); void makeNameTree( ParseData *pd ); void resolveNameRefs( ParseData *pd ); void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd ); void assignPriorities( FsmAp *graph, int *priorOrd ); void assignConditions( FsmAp *graph ); /* Actions and priorities assigned to the factor node. */ Vector actions; Vector priorityAugs; PriorDesc *priorDescs; Vector