/* * Copyright 2001-2018 Adrian Thurston * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef _FSMGRAPH_H #define _FSMGRAPH_H #include "config.h" #include "ragel.h" #include "common.h" #include "vector.h" #include "bstset.h" #include "compare.h" #include "avltree.h" #include "dlist.h" #include "dlistmel.h" #include "bstmap.h" #include "sbstmap.h" #include "sbstset.h" #include "sbsttable.h" #include "avlset.h" #include "avlmap.h" #include #include #include #include /* Flags that control merging. */ #define STB_GRAPH1 0x01 #define STB_GRAPH2 0x02 #define STB_BOTH 0x03 #define STB_ISFINAL 0x04 #define STB_ISMARKED 0x08 #define STB_ONLIST 0x10 #define STB_NFA_REP 0x20 using std::ostream; struct TransAp; struct StateAp; struct FsmAp; struct Action; struct FsmLongestMatchPart; struct CondSpace; struct FsmCtx; struct InlineBlock; struct InlineList; struct TooManyStates {}; struct PriorInteraction { PriorInteraction( long long id ) : id(id) {} long long id; }; struct NfaRound { NfaRound( long depth, long groups ) : depth(depth), groups(groups) {} long depth; long groups; }; typedef Vector NfaRoundVect; struct CondCostTooHigh { CondCostTooHigh( long long costId ) : costId(costId) {} long long costId; }; /* State list element for unambiguous access to list element. */ struct FsmListEl { StateAp *prev, *next; }; /* This is the marked index for a state pair. Used in minimization. It keeps * track of whether or not the state pair is marked. */ struct MarkIndex { MarkIndex(int states); ~MarkIndex(); void markPair(int state1, int state2); bool isPairMarked(int state1, int state2); private: int numStates; bool *array; }; /* Transistion Action Element. */ typedef SBstMapEl< int, Action* > ActionTableEl; /* Nodes in the tree that use this action. */ struct NameInst; struct InlineList; typedef Vector NameInstVect; struct ActionParam { ActionParam( std::string name ) : name(name) {} std::string name; }; typedef Vector ActionParamList; typedef Vector ActionArgList; struct CmpActionArgList { static inline int compare( const ActionArgList *list1, const ActionArgList *list2 ) { return CmpTable::compare( *list1, *list2 ); } }; typedef BstMap ActionArgListMap; typedef BstMapEl ActionArgListMapEl; /* Element in list of actions. Contains the string for the code to exectute. */ struct Action : public DListEl, public AvlTreeEl { public: Action( const InputLoc &loc, std::string name, InlineList *inlineList, int condId ) : loc(loc), name(name), inlineList(inlineList), actionId(-1), numTransRefs(0), numToStateRefs(0), numFromStateRefs(0), numEofRefs(0), numCondRefs(0), numNfaRefs(0), anyCall(false), isLmAction(false), condId(condId), costMark(false), costId(0), paramList(0), argListMap(0), substOf(0), argList(0) { } ~Action(); static Action *cons( const InputLoc &loc, Action *substOf, ActionArgList *argList, int condId ) { Action *action = new Action( loc, std::string(), 0, condId ); action->substOf = substOf; action->argList = argList; action->inlineList = substOf->inlineList; return action; } /* Key for action dictionary. */ std::string getKey() const { return name; } /* Data collected during parse. */ InputLoc loc; std::string name; InlineList *inlineList; int actionId; void actionName( ostream &out ) { if ( name.empty() ) out << loc.line << ":" << loc.col; else out << name; } /* Nodes in the name tree where the action is embedded. This serves as the * root for name searches. Since actions can be used multiple times we use * a vector. Name resolver deals with contracts. */ NameInstVect embedRoots; /* Number of references in the final machine. */ int numRefs() { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs + numNfaRefs; } int numTransRefs; int numToStateRefs; int numFromStateRefs; int numEofRefs; int numCondRefs; int numNfaRefs; bool anyCall; bool isLmAction; int condId; bool costMark; long long costId; ActionParamList *paramList; ActionArgListMap *argListMap; Action *substOf; ActionArgList *argList; }; struct CmpCondId { static inline int compare( const Action *cond1, const Action *cond2 ) { if ( cond1->condId < cond2->condId ) return -1; else if ( cond1->condId > cond2->condId ) return 1; return 0; } }; /* A list of actions. */ typedef DList ActionList; typedef AvlTree ActionDict; /* Structure for reverse action mapping. */ struct RevActionMapEl { char *name; InputLoc location; }; /* Transition Action Table. */ struct ActionTable : public SBstMap< int, Action*, CmpOrd > { void setAction( int ordering, Action *action ); void setActions( int *orderings, Action **actions, int nActs ); void setActions( const ActionTable &other ); bool hasAction( Action *action ); }; typedef SBstSet< Action*, CmpOrd > ActionSet; typedef CmpSTable< Action*, CmpOrd > CmpActionSet; /* Transistion Action Element. */ typedef SBstMapEl< int, FsmLongestMatchPart* > LmActionTableEl; /* Transition Action Table. */ struct LmActionTable : public SBstMap< int, FsmLongestMatchPart*, CmpOrd > { void setAction( int ordering, FsmLongestMatchPart *action ); void setActions( const LmActionTable &other ); }; /* Compare of a whole action table element (key & value). */ struct CmpActionTableEl { static int compare( const ActionTableEl &action1, const ActionTableEl &action2 ) { if ( action1.key < action2.key ) return -1; else if ( action1.key > action2.key ) return 1; else if ( action1.value < action2.value ) return -1; else if ( action1.value > action2.value ) return 1; return 0; } }; /* Compare for ActionTable. */ typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable; /* Compare of a whole lm action table element (key & value). */ struct CmpLmActionTableEl { static int compare( const LmActionTableEl &lmAction1, const LmActionTableEl &lmAction2 ) { if ( lmAction1.key < lmAction2.key ) return -1; else if ( lmAction1.key > lmAction2.key ) return 1; else if ( lmAction1.value < lmAction2.value ) return -1; else if ( lmAction1.value > lmAction2.value ) return 1; return 0; } }; /* Compare for ActionTable. */ typedef CmpSTable< LmActionTableEl, CmpLmActionTableEl > CmpLmActionTable; /* Action table element for error action tables. Adds the encoding of transfer * point. */ struct ErrActionTableEl { ErrActionTableEl( Action *action, int ordering, int transferPoint ) : ordering(ordering), action(action), transferPoint(transferPoint) { } /* Ordering and id of the action embedding. */ int ordering; Action *action; /* Id of point of transfere from Error action table to transtions and * eofActionTable. */ int transferPoint; int getKey() const { return ordering; } }; struct ErrActionTable : public SBstTable< ErrActionTableEl, int, CmpOrd > { void setAction( int ordering, Action *action, int transferPoint ); void setActions( const ErrActionTable &other ); }; /* Compare of an error action table element (key & value). */ struct CmpErrActionTableEl { static int compare( const ErrActionTableEl &action1, const ErrActionTableEl &action2 ) { if ( action1.ordering < action2.ordering ) return -1; else if ( action1.ordering > action2.ordering ) return 1; else if ( action1.action < action2.action ) return -1; else if ( action1.action > action2.action ) return 1; else if ( action1.transferPoint < action2.transferPoint ) return -1; else if ( action1.transferPoint > action2.transferPoint ) return 1; return 0; } }; /* Compare for ErrActionTable. */ typedef CmpSTable< ErrActionTableEl, CmpErrActionTableEl > CmpErrActionTable; /* Descibe a priority, shared among PriorEls. * Has key and whether or not used. */ struct PriorDesc { PriorDesc() : key(0), priority(0), guarded(false), guardId(0), other(0) {} int key; int priority; bool guarded; long long guardId; PriorDesc *other; PriorDesc *prev, *next; }; typedef DList PriorDescList; /* Element in the arrays of priorities for transitions and arrays. Ordering is * unique among instantiations of machines, desc is shared. */ struct PriorEl { PriorEl( int ordering, PriorDesc *desc ) : ordering(ordering), desc(desc) { } int ordering; PriorDesc *desc; }; /* Compare priority elements, which are ordered by the priority descriptor * key. */ struct PriorElCmp { static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) { if ( pel1.desc->key < pel2.desc->key ) return -1; else if ( pel1.desc->key > pel2.desc->key ) return 1; else return 0; } }; /* Priority Table. */ struct PriorTable : public SBstSet< PriorEl, PriorElCmp > { void setPrior( int ordering, PriorDesc *desc ); void setPriors( const PriorTable &other ); }; /* Compare of prior table elements for distinguising state data. */ struct CmpPriorEl { static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) { if ( pel1.desc < pel2.desc ) return -1; else if ( pel1.desc > pel2.desc ) return 1; else if ( pel1.ordering < pel2.ordering ) return -1; else if ( pel1.ordering > pel2.ordering ) return 1; return 0; } }; /* Compare of PriorTable distinguising state data. Using a compare of the * pointers is a little more strict than it needs be. It requires that * prioritiy tables have the exact same set of priority assignment operators * (from the input lang) to be considered equal. * * Really only key-value pairs need be tested and ordering be merged. However * this would require that in the fuseing of states, priority descriptors be * chosen for the new fused state based on priority. Since the out transition * lists and ranges aren't necessarily going to line up, this is more work for * little gain. Final compression resets all priorities first, so this would * only be useful for compression at every operator, which is only an * undocumented test feature. */ typedef CmpSTable CmpPriorTable; /* Plain action list that imposes no ordering. */ typedef Vector TransFuncList; /* Comparison for TransFuncList. */ typedef CmpTable< int, CmpOrd > TransFuncListCompare; /* In transition list. Like DList except only has head pointers, which is all * that is required. Insertion and deletion is handled by the graph. This class * provides the iterator of a single list. */ template struct InList { InList() : head(0) { } Element *head; struct Iter { /* Default construct. */ Iter() : ptr(0) { } /* Construct, assign from a list. */ Iter( const InList &il ) : ptr(il.head) { } Iter &operator=( const InList &dl ) { ptr = dl.head; return *this; } /* At the end */ bool lte() const { return ptr != 0; } bool end() const { return ptr == 0; } /* At the first, last element. */ bool first() const { return ptr && ptr->ilprev == 0; } bool last() const { return ptr && ptr->ilnext == 0; } /* Cast, dereference, arrow ops. */ operator Element*() const { return ptr; } Element &operator *() const { return *ptr; } Element *operator->() const { return ptr; } /* Increment, decrement. */ inline void operator++(int) { ptr = ptr->ilnext; } inline void operator--(int) { ptr = ptr->ilprev; } /* The iterator is simply a pointer. */ Element *ptr; }; }; struct TransData { TransData() : fromState(0), toState(0) {} TransData( const TransData &other ) : fromState(0), toState(0), actionTable(other.actionTable), priorTable(other.priorTable), lmActionTable(other.lmActionTable) { } StateAp *fromState; StateAp *toState; /* The function table and priority for the transition. */ ActionTable actionTable; PriorTable priorTable; LmActionTable lmActionTable; }; /* The element for the sub-list within a TransAp. These specify the transitions * and are keyed by the condition expressions. */ struct CondAp : public TransData { CondAp( TransAp *transAp ) : TransData(), transAp(transAp), key(0) {} CondAp( const CondAp &other, TransAp *transAp ) : TransData( other ), transAp(transAp), key(other.key) { } /* Owning transition. */ TransAp *transAp; CondKey key; /* Pointers for outlist. */ CondAp *prev, *next; /* Pointers for in-list. */ CondAp *ilprev, *ilnext; }; typedef DList CondList; struct TransCondAp; struct TransDataAp; /* Transition class that implements actions and priorities. */ struct TransAp { TransAp() : condSpace(0) {} TransAp( const TransAp &other ) : lowKey(other.lowKey), highKey(other.highKey), condSpace(other.condSpace) { } ~TransAp() { // delete condList.head; // condList.abandon(); } bool plain() const { return condSpace == 0; } TransCondAp *tcap(); TransDataAp *tdap(); long condFullSize(); Key lowKey, highKey; /* Which conditions are tested on this range. */ CondSpace *condSpace; /* Pointers for outlist. */ TransAp *prev, *next; }; struct TransCondAp : public TransAp { TransCondAp() : TransAp() {} TransCondAp( const TransCondAp &other ) : TransAp( other ), condList() {} ~TransCondAp() { condList.empty(); } /* Cond trans list. Sorted by key value. */ CondList condList; }; struct TransDataAp : public TransAp, public TransData { TransDataAp() : TransAp(), TransData() {} TransDataAp( const TransDataAp &other ) : TransAp( other ), TransData( other ) {} /* Pointers for in-list. */ TransDataAp *ilprev, *ilnext; }; inline TransCondAp *TransAp::tcap() { return this->condSpace != 0 ? static_cast( this ) : 0; } inline TransDataAp *TransAp::tdap() { return this->condSpace == 0 ? static_cast( this ) : 0; } typedef DList TransList; /* Need the base vector type for accessing underlying remove function. */ typedef BstSet CondKeySet; typedef Vector CondKeyVect; /* State class that implements actions and priorities. */ struct NfaActions { NfaActions( Action *push, Action *pop, int order ) : push(push), pop(pop), order(order) {} Action *push; Action *pop; int order; ActionTable pushTable; ActionTable popTable; }; struct NfaTrans { NfaTrans( int order ) : fromState(0), toState(0), order(order), popCondSpace(0) { } NfaTrans( const ActionTable &pushTable, const ActionTable &restoreTable, const ActionTable &popFrom, CondSpace *popCondSpace, const CondKeySet popCondKeys, const ActionTable &popAction, const ActionTable &popTable, int order ) : fromState(0), toState(0), order(order), pushTable(pushTable), restoreTable(restoreTable), popFrom(popFrom), popCondSpace(popCondSpace), popCondKeys(popCondKeys), popAction(popAction), popTest(popTable) {} NfaTrans( const NfaTrans &other ) : fromState(0), toState(0), order(other.order), pushTable(other.pushTable), restoreTable(other.restoreTable), popCondSpace(other.popCondSpace), popCondKeys(other.popCondKeys), popAction(other.popAction), popTest(other.popTest), priorTable(other.priorTable) {} StateAp *fromState; StateAp *toState; int order; ActionTable pushTable; ActionTable restoreTable; /* * 1. Conditions transferred (always tested first) * 2. Actions transferred * 3. Pop actions created during epsilon draw. */ ActionTable popFrom; CondSpace *popCondSpace; CondKeySet popCondKeys; ActionTable popAction; ActionTable popTest; PriorTable priorTable; NfaTrans *prev, *next; NfaTrans *ilprev, *ilnext; }; typedef BstMap NfaStateMap; typedef BstMapEl NfaStateMapEl; typedef DList NfaTransList; typedef InList NfaInList; struct CmpNfaTrans { static int compare( NfaTrans *t1, NfaTrans *t2 ) { /* This comparison is too strong. (okay to use something too strong -- * we just don't find minimal). * */ if ( t1->toState < t2->toState ) return -1; else if ( t1->toState > t2->toState ) return 1; else if ( t1->order < t2->order ) return -1; else if ( t1->order > t2->order ) return 1; else { int r = CmpActionTable::compare( t1->pushTable, t2->pushTable ); if ( r != 0 ) return r; r = CmpActionTable::compare( t1->restoreTable, t2->restoreTable ); if ( r != 0 ) return r; if ( t1->popCondSpace < t2->popCondSpace ) return -1; else if ( t1->popCondSpace > t2->popCondSpace ) return 1; r = CmpTable::compare( t1->popCondKeys, t2->popCondKeys ); if ( r != 0 ) return r; r = CmpActionTable::compare( t1->popTest, t2->popTest ); if ( r != 0 ) return r; r = CmpActionTable::compare( t1->popAction, t2->popAction ); if ( r != 0 ) return r; } return 0; } }; struct CmpNfaTransList { static int compare( const NfaTransList &l1, const NfaTransList &l2 ) { if ( l1.length() < l2.length() ) return -1; else if ( l1.length() > l2.length() ) return 1; else { NfaTransList::Iter i1 = l1; NfaTransList::Iter i2 = l2; while ( i1.lte() ) { int r = CmpNfaTrans::compare( i1, i2 ); if ( r != 0 ) return r; i1++, i2++; } } return 0; } }; struct CmpNfaStateMapEl { static int compare( const NfaStateMapEl &el1, const NfaStateMapEl &el2 ) { if ( el1.key < el2.key ) return -1; else if ( el1.key > el2.key ) return 1; else if ( el1.value.push < el2.value.push ) return -1; else if ( el1.value.push > el2.value.push ) return 1; else if ( el1.value.pop < el2.value.pop ) return -1; else if ( el1.value.pop > el2.value.pop ) return 1; else if ( el1.value.order < el2.value.order ) return -1; else if ( el1.value.order > el2.value.order ) return 1; return 0; } }; /* Set of states, list of states. */ typedef BstSet StateSet; typedef DList StateList; /* A element in a state dict. */ struct StateDictEl : public AvlTreeEl { StateDictEl(const StateSet &stateSet) : stateSet(stateSet) { } const StateSet &getKey() { return stateSet; } StateSet stateSet; StateAp *targState; }; /* Dictionary mapping a set of states to a target state. */ typedef AvlTree< StateDictEl, StateSet, CmpTable > StateDict; struct TransEl { /* Constructors. */ TransEl() { } TransEl( Key lowKey, Key highKey ) : lowKey(lowKey), highKey(highKey) { } TransEl( Key lowKey, Key highKey, TransAp *value ) : lowKey(lowKey), highKey(highKey), value(value) { } Key lowKey, highKey; TransAp *value; }; struct CmpKey { CmpKey() : keyOps(0) {} KeyOps *keyOps; int compare( const Key key1, const Key key2 ) { if ( keyOps->lt( key1, key2 ) ) return -1; else if ( keyOps->gt( key1, key2 ) ) return 1; else return 0; } }; /* Vector based set of key items. */ struct KeySet : public BstSet { KeySet( KeyOps *keyOps ) { CmpKey::keyOps = keyOps; } }; struct MinPartition { MinPartition() : active(false) { } StateList list; bool active; MinPartition *prev, *next; }; /* Epsilon transition stored in a state. Specifies the target */ typedef Vector EpsilonTrans; /* List of states that are to be drawn into this. */ struct EptVectEl { EptVectEl( StateAp *targ, bool leaving ) : targ(targ), leaving(leaving) { } StateAp *targ; bool leaving; }; typedef Vector EptVect; /* Set of entry ids that go into this state. */ typedef BstSet EntryIdSet; /* Set of longest match items that may be active in a given state. */ typedef BstSet LmItemSet; /* A Conditions which is to be * transfered on pending out transitions. */ struct OutCond { OutCond( Action *action, bool sense ) : action(action), sense(sense) {} Action *action; bool sense; }; struct CmpOutCond { static int compare( const OutCond &outCond1, const OutCond &outCond2 ) { if ( outCond1.action < outCond2.action ) return -1; else if ( outCond1.action > outCond2.action ) return 1; else if ( outCond1.sense < outCond2.sense ) return -1; else if ( outCond1.sense > outCond2.sense ) return 1; return 0; } }; /* Conditions. */ typedef BstSet< Action*, CmpCondId > CondSet; typedef CmpTable< Action*, CmpCondId > CmpCondSet; struct CondSpace : public AvlTreeEl { CondSpace( const CondSet &condSet ) : condSet(condSet) {} const CondSet &getKey() { return condSet; } long fullSize() { return ( 1 << condSet.length() ); } CondSet condSet; long condSpaceId; }; typedef Vector CondSpaceVect; typedef AvlTree CondSpaceMap; typedef Vector LongVect; struct CondData { CondSpaceMap condSpaceMap; ~CondData() { condSpaceMap.empty(); } }; struct FsmGbl { FsmGbl() : printStatistics(false), errorCount(0), displayPrintables(false), stringTables(false), checkPriorInteraction(0), wantDupsRemoved(true), minimizeLevel(MinimizePartition2), minimizeOpt(MinimizeMostOps) {} bool printStatistics; /* * Error reporting. */ /* PROGNAME: txt */ std::ostream &error(); /* file:loc: txt */ std::ostream &error( const InputLoc &loc ); /* txt */ std::ostream &error_plain(); /* file:loc: warning: txt */ std::ostream &warning( const InputLoc &loc ); /* Stats reporting. */ std::ostream &stats(); /* Requested info. */ std::ostream &info(); std::stringstream libcerr; std::stringstream libcout; int errorCount; void abortCompile( int code ); bool displayPrintables; bool stringTables; bool checkPriorInteraction; bool wantDupsRemoved; MinimizeLevel minimizeLevel; MinimizeOpt minimizeOpt; }; /* All FSM operations must be between machines that have been created using the * same context object. */ struct FsmCtx { FsmCtx( FsmGbl *fsmGbl ); ~FsmCtx(); KeyOps *keyOps; CondData *condData; MinimizeLevel minimizeLevel; MinimizeOpt minimizeOpt; static const int STATE_UNLIMITED = 0; long stateLimit; bool printStatistics; bool checkPriorInteraction; bool unionOp; long condsCheckDepth; /* Counting the action and priority ordering. */ int curActionOrd; int curPriorOrd; int nextPriorKey; int nextCondId; PriorDesc *allocPriorDesc() { PriorDesc *priorDesc = new PriorDesc(); priorDescList.append( priorDesc ); return priorDesc; } PriorDescList priorDescList; FsmGbl *fsmGbl; /* List of actions. Will be pasted into a switch statement. */ ActionList actionList; ExportList exportList; bool generatingSectionSubset; bool lmRequiresErrorState; /* Make name ids to name inst pointers. */ NameInst **nameIndex; /* Element type and get key expression. */ InlineList *getKeyExpr; InlineList *accessExpr; /* Stack management */ InlineBlock *prePushExpr; InlineBlock *postPopExpr; /* Nfa stack managment. */ InlineBlock *nfaPrePushExpr; InlineBlock *nfaPostPopExpr; /* Overriding variables. */ InlineList *pExpr; InlineList *peExpr; InlineList *eofExpr; InlineList *csExpr; InlineList *topExpr; InlineList *stackExpr; InlineList *actExpr; InlineList *tokstartExpr; InlineList *tokendExpr; InlineList *dataExpr; Action *newNfaWrapAction( const char *name, InlineList *inlineList, Action *optWrap ); void createNfaActions( FsmAp *fsm ); /* Checking the contents of actions. */ void checkAction( Action *action ); void checkInlineList( Action *act, InlineList *inlineList ); void analyzeAction( Action *action, InlineList *inlineList ); void analyzeGraph( FsmAp *graph ); void finalizeInstance( FsmAp *graph ); void prepareReduction( FsmAp *sectionGraph ); }; typedef InList CondInList; typedef InList TransInList; struct NfaStateEl { StateAp *prev, *next; }; typedef DListMel NfaStateList; struct StateAp : public NfaStateEl { StateAp(); StateAp(const StateAp &other); ~StateAp(); /* Is the state final? */ bool isFinState() { return stateBits & STB_ISFINAL; } /* Out transition list and the pointer for the default out trans. */ TransList outList; /* In transition Lists. */ TransInList inTrans; CondInList inCond; /* Set only during scanner construction when actions are added. NFA to DFA * code can ignore this. */ StateAp *eofTarget; /* Entry points into the state. */ EntryIdSet entryIds; /* Epsilon transitions. */ EpsilonTrans epsilonTrans; /* Number of in transitions from states other than ourselves. */ int foreignInTrans; /* Temporary data for various algorithms. */ union { /* When duplicating the fsm we need to map each * state to the new state representing it. */ StateAp *stateMap; /* When minimizing machines by partitioning, this maps to the group * the state is in. */ MinPartition *partition; /* Identification for printing and stable minimization. */ int stateNum; } alg; /* Data used in epsilon operation, maybe fit into alg? */ StateAp *isolatedShadow; int owningGraph; /* A pointer to a dict element that contains the set of states this state * represents. This cannot go into alg, because alg.next is used during * the merging process. */ StateDictEl *stateDictEl; StateSet *stateDictIn; NfaTransList *nfaOut; NfaInList *nfaIn; /* When drawing epsilon transitions, holds the list of states to merge * with. */ EptVect *eptVect; /* Bits controlling the behaviour of the state during collapsing to dfa. */ int stateBits; /* State list elements. */ StateAp *next, *prev; /* * Priority and Action data. */ /* Out priorities transfered to out transitions. */ PriorTable outPriorTable; /* The following two action tables are distinguished by the fact that when * toState actions are executed immediatly after transition actions of * incoming transitions and the current character will be the same as the * one available then. The fromState actions are executed immediately * before the transition actions of outgoing transitions and the current * character is same as the one available then. */ /* Actions to execute upon entering into a state. */ ActionTable toStateActionTable; /* Actions to execute when going from the state to the transition. */ ActionTable fromStateActionTable; /* Actions to add to any future transitions that leave via this state. */ ActionTable outActionTable; /* Conditions to add to any future transiions that leave via this state. */ CondSpace *outCondSpace; CondKeySet outCondKeys; /* Error action tables. */ ErrActionTable errActionTable; /* Actions to execute on eof. */ ActionTable eofActionTable; /* Set of longest match items that may be active in this state. */ LmItemSet lmItemSet; PriorTable guardedInTable; /* Used by the NFA-based scanner to track the origin of final states. We * only use it in cases where just one match is possible, starting with the * final state duplicates that are drawn using NFA transitions. */ LmItemSet lmNfaParts; }; /* Return and re-entry for the co-routine iterators. This should ALWAYS be * used inside of a block. */ #define CO_RETURN(label) \ itState = label; \ return; \ entry##label: {} /* Return and re-entry for the co-routine iterators. This should ALWAYS be * used inside of a block. */ #define CO_RETURN2(label, uState) \ itState = label; \ userState = uState; \ return; \ entry##label: {} template struct PiList { PiList() : ptr(0) {} PiList( const DList &l ) : ptr(l.head) {} PiList( Item *ptr ) : ptr(ptr) {} operator Item *() const { return ptr; } Item *operator->() const { return ptr; } bool end() { return ptr == 0; } void clear() { ptr = 0; } PiList next() { return PiList( ptr->next ); } Item *ptr; }; template struct PiSingle { PiSingle() : ptr(0) {} PiSingle( Item *ptr ) : ptr(ptr) {} operator Item *() const { return ptr; } Item *operator->() const { return ptr; } bool end() { return ptr == 0; } void clear() { ptr = 0; } /* Next is always nil. */ PiSingle next() { return PiSingle( 0 ); } Item *ptr; }; template struct PiVector { PiVector() : ptr(0), length(0) {} PiVector( const Vector &v ) : ptr(v.data), length(v.length()) {} PiVector( Item *ptr, long length ) : ptr(ptr), length(length) {} operator Item *() const { return ptr; } Item *operator->() const { return ptr; } bool end() { return length == 0; } void clear() { ptr = 0; length = 0; } PiVector next() { return PiVector( ptr + 1, length - 1 ); } Item *ptr; long length; }; template struct ValPairIter { /* Encodes the states that are meaningful to the of caller the iterator. */ enum UserState { RangeInS1, RangeInS2, RangeOverlap, }; /* Encodes the different states that an fsm iterator can be in. */ enum IterState { Begin, ConsumeS1Range, ConsumeS2Range, OnlyInS1Range, OnlyInS2Range, ExactOverlap, End }; ValPairIter( const ItemIter1 &list1, const ItemIter2 &list2 ); template struct NextTrans { CondKey key; ItemIter trans; ItemIter next; NextTrans() { key = 0; } void load() { if ( trans.end() ) next.clear(); else { next = trans->next; key = trans->key; } } void set( const ItemIter &t ) { trans = t; load(); } void increment() { trans = next; load(); } }; /* Query iterator. */ bool lte() { return itState != End; } bool end() { return itState == End; } void operator++(int) { findNext(); } void operator++() { findNext(); } /* Iterator state. */ ItemIter1 list1; ItemIter2 list2; IterState itState; UserState userState; NextTrans s1Tel; NextTrans s2Tel; Key bottomLow, bottomHigh; ItemIter1 *bottomTrans1; ItemIter2 *bottomTrans2; private: void findNext(); }; /* Init the iterator by advancing to the first item. */ template ValPairIter:: ValPairIter( const ItemIter1 &list1, const ItemIter2 &list2 ) : list1(list1), list2(list2), itState(Begin) { findNext(); } /* Advance to the next transition. When returns, trans points to the next * transition, unless there are no more, in which case end() returns true. */ template void ValPairIter::findNext() { /* Jump into the iterator routine base on the iterator state. */ switch ( itState ) { case Begin: goto entryBegin; case ConsumeS1Range: goto entryConsumeS1Range; case ConsumeS2Range: goto entryConsumeS2Range; case OnlyInS1Range: goto entryOnlyInS1Range; case OnlyInS2Range: goto entryOnlyInS2Range; case ExactOverlap: goto entryExactOverlap; case End: goto entryEnd; } entryBegin: /* Set up the next structs at the head of the transition lists. */ s1Tel.set( list1 ); s2Tel.set( list2 ); /* Concurrently scan both out ranges. */ while ( true ) { if ( s1Tel.trans.end() ) { /* We are at the end of state1's ranges. Process the rest of * state2's ranges. */ while ( !s2Tel.trans.end() ) { /* Range is only in s2. */ CO_RETURN2( ConsumeS2Range, RangeInS2 ); s2Tel.increment(); } break; } else if ( s2Tel.trans.end() ) { /* We are at the end of state2's ranges. Process the rest of * state1's ranges. */ while ( !s1Tel.trans.end() ) { /* Range is only in s1. */ CO_RETURN2( ConsumeS1Range, RangeInS1 ); s1Tel.increment(); } break; } /* Both state1's and state2's transition elements are good. * The signiture of no overlap is a back key being in front of a * front key. */ else if ( s1Tel.key < s2Tel.key ) { /* A range exists in state1 that does not overlap with state2. */ CO_RETURN2( OnlyInS1Range, RangeInS1 ); s1Tel.increment(); } else if ( s2Tel.key < s1Tel.key ) { /* A range exists in state2 that does not overlap with state1. */ CO_RETURN2( OnlyInS2Range, RangeInS2 ); s2Tel.increment(); } else { /* There is an exact overlap. */ CO_RETURN2( ExactOverlap, RangeOverlap ); s1Tel.increment(); s2Tel.increment(); } } /* Done, go into end state. */ CO_RETURN( End ); } template struct RangePairIter { /* Encodes the states that are meaningful to the of caller the iterator. */ enum UserState { RangeInS1, RangeInS2, RangeOverlap, BreakS1, BreakS2 }; /* Encodes the different states that an fsm iterator can be in. */ enum IterState { Begin, ConsumeS1Range, ConsumeS2Range, OnlyInS1Range, OnlyInS2Range, S1SticksOut, S1SticksOutBreak, S2SticksOut, S2SticksOutBreak, S1DragsBehind, S1DragsBehindBreak, S2DragsBehind, S2DragsBehindBreak, ExactOverlap, End }; RangePairIter( FsmCtx *ctx, const ItemIter1 &list1, const ItemIter2 &list2 ); template struct NextTrans { Key lowKey, highKey; ItemIter trans; ItemIter next; NextTrans() { highKey = 0; lowKey = 0; } void load() { if ( trans.end() ) next.clear(); else { next = trans.next(); lowKey = trans->lowKey; highKey = trans->highKey; } } void set( const ItemIter &t ) { trans = t; load(); } void increment() { trans = next; load(); } }; /* Query iterator. */ bool lte() { return itState != End; } bool end() { return itState == End; } void operator++(int) { findNext(); } void operator++() { findNext(); } FsmCtx *ctx; /* Iterator state. */ ItemIter1 list1; ItemIter2 list2; IterState itState; UserState userState; NextTrans s1Tel; NextTrans s2Tel; Key bottomLow, bottomHigh; ItemIter1 bottomTrans1; ItemIter2 bottomTrans2; private: void findNext(); }; /* Init the iterator by advancing to the first item. */ template RangePairIter:: RangePairIter( FsmCtx *ctx, const ItemIter1 &list1, const ItemIter2 &list2 ) : ctx(ctx), list1(list1), list2(list2), itState(Begin) { bottomLow = 0; bottomHigh = 0; findNext(); } /* Advance to the next transition. When returns, trans points to the next * transition, unless there are no more, in which case end() returns true. */ template void RangePairIter::findNext() { /* Jump into the iterator routine base on the iterator state. */ switch ( itState ) { case Begin: goto entryBegin; case ConsumeS1Range: goto entryConsumeS1Range; case ConsumeS2Range: goto entryConsumeS2Range; case OnlyInS1Range: goto entryOnlyInS1Range; case OnlyInS2Range: goto entryOnlyInS2Range; case S1SticksOut: goto entryS1SticksOut; case S1SticksOutBreak: goto entryS1SticksOutBreak; case S2SticksOut: goto entryS2SticksOut; case S2SticksOutBreak: goto entryS2SticksOutBreak; case S1DragsBehind: goto entryS1DragsBehind; case S1DragsBehindBreak: goto entryS1DragsBehindBreak; case S2DragsBehind: goto entryS2DragsBehind; case S2DragsBehindBreak: goto entryS2DragsBehindBreak; case ExactOverlap: goto entryExactOverlap; case End: goto entryEnd; } entryBegin: /* Set up the next structs at the head of the transition lists. */ s1Tel.set( list1 ); s2Tel.set( list2 ); /* Concurrently scan both out ranges. */ while ( true ) { if ( s1Tel.trans.end() ) { /* We are at the end of state1's ranges. Process the rest of * state2's ranges. */ while ( !s2Tel.trans.end() ) { /* Range is only in s2. */ CO_RETURN2( ConsumeS2Range, RangeInS2 ); s2Tel.increment(); } break; } else if ( s2Tel.trans.end() ) { /* We are at the end of state2's ranges. Process the rest of * state1's ranges. */ while ( !s1Tel.trans.end() ) { /* Range is only in s1. */ CO_RETURN2( ConsumeS1Range, RangeInS1 ); s1Tel.increment(); } break; } /* Both state1's and state2's transition elements are good. * The signiture of no overlap is a back key being in front of a * front key. */ else if ( ctx->keyOps->lt( s1Tel.highKey, s2Tel.lowKey ) ) { /* A range exists in state1 that does not overlap with state2. */ CO_RETURN2( OnlyInS1Range, RangeInS1 ); s1Tel.increment(); } else if ( ctx->keyOps->lt( s2Tel.highKey, s1Tel.lowKey ) ) { /* A range exists in state2 that does not overlap with state1. */ CO_RETURN2( OnlyInS2Range, RangeInS2 ); s2Tel.increment(); } /* There is overlap, must mix the ranges in some way. */ else if ( ctx->keyOps->lt( s1Tel.lowKey, s2Tel.lowKey ) ) { /* Range from state1 sticks out front. Must break it into * non-overlaping and overlaping segments. */ bottomLow = s2Tel.lowKey; bottomHigh = s1Tel.highKey; s1Tel.highKey = s2Tel.lowKey; ctx->keyOps->decrement( s1Tel.highKey ); bottomTrans1 = s1Tel.trans; /* Notify the caller that we are breaking s1. This gives them a * chance to duplicate s1Tel[0,1].value. */ CO_RETURN2( S1SticksOutBreak, BreakS1 ); /* Broken off range is only in s1. */ CO_RETURN2( S1SticksOut, RangeInS1 ); /* Advance over the part sticking out front. */ s1Tel.lowKey = bottomLow; s1Tel.highKey = bottomHigh; s1Tel.trans = bottomTrans1; } else if ( ctx->keyOps->lt( s2Tel.lowKey, s1Tel.lowKey ) ) { /* Range from state2 sticks out front. Must break it into * non-overlaping and overlaping segments. */ bottomLow = s1Tel.lowKey; bottomHigh = s2Tel.highKey; s2Tel.highKey = s1Tel.lowKey; ctx->keyOps->decrement( s2Tel.highKey ); bottomTrans2 = s2Tel.trans; /* Notify the caller that we are breaking s2. This gives them a * chance to duplicate s2Tel[0,1].value. */ CO_RETURN2( S2SticksOutBreak, BreakS2 ); /* Broken off range is only in s2. */ CO_RETURN2( S2SticksOut, RangeInS2 ); /* Advance over the part sticking out front. */ s2Tel.lowKey = bottomLow; s2Tel.highKey = bottomHigh; s2Tel.trans = bottomTrans2; } /* Low ends are even. Are the high ends even? */ else if ( ctx->keyOps->lt( s1Tel.highKey, s2Tel.highKey ) ) { /* Range from state2 goes longer than the range from state1. We * must break the range from state2 into an evenly overlaping * segment. */ bottomLow = s1Tel.highKey; ctx->keyOps->increment( bottomLow ); bottomHigh = s2Tel.highKey; s2Tel.highKey = s1Tel.highKey; bottomTrans2 = s2Tel.trans; /* Notify the caller that we are breaking s2. This gives them a * chance to duplicate s2Tel[0,1].value. */ CO_RETURN2( S2DragsBehindBreak, BreakS2 ); /* Breaking s2 produces exact overlap. */ CO_RETURN2( S2DragsBehind, RangeOverlap ); /* Advance over the front we just broke off of range 2. */ s2Tel.lowKey = bottomLow; s2Tel.highKey = bottomHigh; s2Tel.trans = bottomTrans2; /* Advance over the entire s1Tel. We have consumed it. */ s1Tel.increment(); } else if ( ctx->keyOps->lt( s2Tel.highKey, s1Tel.highKey ) ) { /* Range from state1 goes longer than the range from state2. We * must break the range from state1 into an evenly overlaping * segment. */ bottomLow = s2Tel.highKey; ctx->keyOps->increment( bottomLow ); bottomHigh = s1Tel.highKey; s1Tel.highKey = s2Tel.highKey; bottomTrans1 = s1Tel.trans; /* Notify the caller that we are breaking s1. This gives them a * chance to duplicate s2Tel[0,1].value. */ CO_RETURN2( S1DragsBehindBreak, BreakS1 ); /* Breaking s1 produces exact overlap. */ CO_RETURN2( S1DragsBehind, RangeOverlap ); /* Advance over the front we just broke off of range 1. */ s1Tel.lowKey = bottomLow; s1Tel.highKey = bottomHigh; s1Tel.trans = bottomTrans1; /* Advance over the entire s2Tel. We have consumed it. */ s2Tel.increment(); } else { /* There is an exact overlap. */ CO_RETURN2( ExactOverlap, RangeOverlap ); s1Tel.increment(); s2Tel.increment(); } } /* Done, go into end state. */ CO_RETURN( End ); } /* Compare lists of epsilon transitions. Entries are name ids of targets. */ typedef CmpTable< int, CmpOrd > CmpEpsilonTrans; /* Compare class for the Approximate minimization. */ class ApproxCompare { public: ApproxCompare( FsmCtx *ctx = 0 ) : ctx(ctx) { } int compare( const StateAp *pState1, const StateAp *pState2 ); FsmCtx *ctx; }; /* Compare class for the initial partitioning of a partition minimization. */ class InitPartitionCompare { public: InitPartitionCompare( FsmCtx *ctx = 0 ) : ctx(ctx) { } int compare( const StateAp *pState1, const StateAp *pState2 ); FsmCtx *ctx; }; /* Compare class for the regular partitioning of a partition minimization. */ class PartitionCompare { public: PartitionCompare( FsmCtx *ctx = 0 ) : ctx(ctx) { } int compare( const StateAp *pState1, const StateAp *pState2 ); FsmCtx *ctx; }; /* Compare class for a minimization that marks pairs. Provides the shouldMark * routine. */ class MarkCompare { public: MarkCompare( FsmCtx *ctx ) : ctx(ctx) { } bool shouldMark( MarkIndex &markIndex, const StateAp *pState1, const StateAp *pState2 ); FsmCtx *ctx; }; /* List of partitions. */ typedef DList< MinPartition > PartitionList; /* List of transtions out of a state. */ typedef Vector TransListVect; /* Entry point map used for keeping track of entry points in a machine. */ typedef BstSet< int > EntryIdSet; typedef BstMapEl< int, StateAp* > EntryMapEl; typedef BstMap< int, StateAp* > EntryMap; typedef Vector EntryMapBase; struct BreadthCost { BreadthCost( std::string name, double cost ) : name(name), cost(cost) {} std::string name; double cost; }; struct BreadthResult { BreadthResult( double start ) : start(start) {} double start; Vector costs; }; /* Result of an operation. */ struct FsmRes { struct Fsm {}; struct TooManyStates {}; struct PriorInteraction {}; struct CondCostTooHigh {}; struct InternalError {}; enum Type { TypeFsm = 1, TypeTooManyStates, TypePriorInteraction, TypeCondCostTooHigh, TypeInternalError, }; FsmRes( const Fsm &, FsmAp *fsm ) : fsm(fsm), type(TypeFsm) {} FsmRes( const TooManyStates & ) : fsm(0), type(TypeTooManyStates) {} FsmRes( const PriorInteraction &, long long guardId ) : fsm(0), type(TypePriorInteraction), id(guardId) {} FsmRes( const CondCostTooHigh &, long long costId ) : fsm(0), type(TypeCondCostTooHigh), id(costId) {} FsmRes( const InternalError & ) : fsm(0), type(TypeInternalError) {} bool success() { return fsm != 0; } operator FsmAp*() { return type == TypeFsm ? fsm : 0; } FsmAp *operator->() { return type == TypeFsm ? fsm : 0; } FsmAp *fsm; Type type; long long id; }; /* Graph class that implements actions and priorities. */ struct FsmAp { /* Constructors/Destructors. */ FsmAp( FsmCtx *ctx ); FsmAp( const FsmAp &graph ); ~FsmAp(); FsmCtx *ctx; bool priorInteraction; int guardId; /* The list of states. */ StateList stateList; StateList misfitList; NfaStateList nfaList; StateDict stateDict; /* The map of entry points. */ EntryMap entryPoints; /* The start state. */ StateAp *startState; /* Error state, possibly created only when the final machine has been * created and the XML machine is about to be written. No transitions * point to this state. */ StateAp *errState; /* The set of final states. */ StateSet finStateSet; /* Misfit Accounting. Are misfits put on a separate list. */ bool misfitAccounting; /* * Transition actions and priorities. */ /* Set priorities on transtions. */ void startFsmPrior( int ordering, PriorDesc *prior ); void allTransPrior( int ordering, PriorDesc *prior ); void finishFsmPrior( int ordering, PriorDesc *prior ); void leaveFsmPrior( int ordering, PriorDesc *prior ); /* Action setting support. */ void transferOutActions( StateAp *state ); void transferErrorActions( StateAp *state, int transferPoint ); void setErrorActions( StateAp *state, const ActionTable &other ); void setErrorAction( StateAp *state, int ordering, Action *action ); /* Fill all spaces in a transition list with an error transition. */ void fillGaps( StateAp *state ); /* Similar to setErrorAction, instead gives a state to go to on error. */ void setErrorTarget( StateAp *state, StateAp *target, int *orderings, Action **actions, int nActs ); /* Set actions to execute. */ void startFsmAction( int ordering, Action *action ); void allTransAction( int ordering, Action *action ); void finishFsmAction( int ordering, Action *action ); void leaveFsmAction( int ordering, Action *action ); void longMatchAction( int ordering, FsmLongestMatchPart *lmPart ); /* Set conditions. */ CondSpace *addCondSpace( const CondSet &condSet ); void convertToCondAp( StateAp *state ); private: /* Can generate states. */ void doEmbedCondition( StateAp *state, const CondSet &set, const CondKeySet &vals ); public: static FsmRes embedCondition( FsmAp *fsm, StateAp *state, const CondSet &set, const CondKeySet &vals ); FsmRes startFsmCondition( Action *condAction, bool sense ); void allTransCondition( Action *condAction, bool sense ); void leaveFsmCondition( Action *condAction, bool sense ); /* Set error actions to execute. */ void startErrorAction( int ordering, Action *action, int transferPoint ); void allErrorAction( int ordering, Action *action, int transferPoint ); void finalErrorAction( int ordering, Action *action, int transferPoint ); void notStartErrorAction( int ordering, Action *action, int transferPoint ); void notFinalErrorAction( int ordering, Action *action, int transferPoint ); void middleErrorAction( int ordering, Action *action, int transferPoint ); /* Set EOF actions. */ void startEOFAction( int ordering, Action *action ); void allEOFAction( int ordering, Action *action ); void finalEOFAction( int ordering, Action *action ); void notStartEOFAction( int ordering, Action *action ); void notFinalEOFAction( int ordering, Action *action ); void middleEOFAction( int ordering, Action *action ); /* Set To State actions. */ void startToStateAction( int ordering, Action *action ); void allToStateAction( int ordering, Action *action ); void finalToStateAction( int ordering, Action *action ); void notStartToStateAction( int ordering, Action *action ); void notFinalToStateAction( int ordering, Action *action ); void middleToStateAction( int ordering, Action *action ); /* Set From State actions. */ void startFromStateAction( int ordering, Action *action ); void allFromStateAction( int ordering, Action *action ); void finalFromStateAction( int ordering, Action *action ); void notStartFromStateAction( int ordering, Action *action ); void notFinalFromStateAction( int ordering, Action *action ); void middleFromStateAction( int ordering, Action *action ); /* Shift the action ordering of the start transitions to start at * fromOrder and increase in units of 1. Useful before kleene star * operation. */ int shiftStartActionOrder( int fromOrder ); /* Clear all priorities from the fsm to so they won't affcet minimization * of the final fsm. */ void clearAllPriorities(); /* Zero out all the function keys. */ void nullActionKeys(); /* Walk the list of states and verify state properties. */ void verifyStates(); /* Misfit Accounting. Are misfits put on a separate list. */ void setMisfitAccounting( bool val ) { misfitAccounting = val; } /* Set and Unset a state as final. */ void setFinState( StateAp *state ); void unsetFinState( StateAp *state ); void setStartState( StateAp *state ); void unsetStartState( ); /* Set and unset a state as an entry point. */ void setEntry( int id, StateAp *state ); void changeEntry( int id, StateAp *to, StateAp *from ); void unsetEntry( int id, StateAp *state ); void unsetEntry( int id ); void unsetAllEntryPoints(); /* Epsilon transitions. */ void epsilonTrans( int id ); void checkEpsilonRegularInteraction( const PriorTable &t1, const PriorTable &t2 ); private: /* Can generate staes. */ void shadowReadWriteStates(); void afterOpMinimize( bool lastInSeq = true ); void removeDups( ActionTable &table ); public: void removeActionDups(); /* * Basic attaching and detaching. */ /* Common to attaching/detaching list and default. */ template < class Head > void attachToInList( StateAp *from, StateAp *to, Head *&head, Head *trans ); template < class Head > void detachFromInList( StateAp *from, StateAp *to, Head *&head, Head *trans ); void attachToNfa( StateAp *from, StateAp *to, NfaTrans *nfaTrans ); void detachFromNfa( StateAp *from, StateAp *to, NfaTrans *nfaTrans ); void attachStateDict( StateAp *from, StateAp *to ); void detachStateDict( StateAp *from, StateAp *to ); /* Attach with a new transition. */ CondAp *attachNewCond( TransAp *trans, StateAp *from, StateAp *to, CondKey onChar ); TransAp *attachNewTrans( StateAp *from, StateAp *to, Key onChar1, Key onChar2 ); /* Attach with an existing transition that already in an out list. */ void attachTrans( StateAp *from, StateAp *to, TransDataAp *trans ); void attachTrans( StateAp *from, StateAp *to, CondAp *trans ); /* Redirect a transition away from error and towards some state. */ void redirectErrorTrans( StateAp *from, StateAp *to, TransDataAp *trans ); void redirectErrorTrans( StateAp *from, StateAp *to, CondAp *trans ); /* Detach a transition from a target state. */ void detachTrans( StateAp *from, StateAp *to, TransDataAp *trans ); void detachTrans( StateAp *from, StateAp *to, CondAp *trans ); /* Detach a state from the graph. */ void detachState( StateAp *state ); /* * NFA to DFA conversion routines. */ /* Duplicate a transition that will dropin to a free spot. */ TransDataAp *dupTransData( StateAp *from, TransDataAp *srcTrans ); TransAp *dupTrans( StateAp *from, TransAp *srcTrans ); CondAp *dupCondTrans( StateAp *from, TransAp *destParent, CondAp *srcTrans ); private: /* In crossing, two transitions both go to real states. Can generate * states. */ template< class Trans > Trans *fsmAttachStates( StateAp *from, Trans *destTrans, Trans *srcTrans ); public: void expandConds( StateAp *fromState, TransAp *trans, CondSpace *fromSpace, CondSpace *mergedSpace ); TransAp *copyTransForExpansion( StateAp *fromState, TransAp *srcTrans ); StateAp *copyStateForExpansion( StateAp *srcState ); void freeEffectiveTrans( TransAp *srcTrans ); private: /* Two transitions are to be crossed, handle the possibility of either * going to the error state. Can generate states. */ template< class Trans > Trans *mergeTrans( StateAp *from, Trans *destTrans, Trans *srcTrans ); public: /* Compare deterimne relative priorities of two transition tables. */ int comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ); void addOutCondition( StateAp *state, Action *condAction, bool sense ); void expandCondKeys( CondKeySet &condKeys, CondSpace *fromSpace, CondSpace *mergedSpace ); /* Back to trans ap (minimmization) */ TransDataAp *convertToTransAp( StateAp *from, CondAp *cond ); /* Cross a src transition with one that is already occupying a spot. */ TransCondAp *convertToCondAp( StateAp *state, TransDataAp *trans ); CondSpace *expandCondSpace( TransAp *destTrans, TransAp *srcTrans ); private: /* Can generate states. */ TransAp *crossTransitions( StateAp *from, TransAp *destTrans, TransAp *srcTrans ); TransDataAp *crossTransitionsBothPlain( StateAp *from, TransDataAp *destTrans, TransDataAp *srcTrans ); CondAp *crossCondTransitions( StateAp *from, TransAp *destParent, CondAp *destTrans, CondAp *srcTrans ); public: void prepareNfaRound(); void finalizeNfaRound(); void outTransCopy( StateAp *dest, TransAp *srcList ); void nfaMergeStates( StateAp *destState, StateAp **srcStates, int numSrc ); void mergeOutConds( StateAp *destState, StateAp *srcState, bool leaving = false ); void checkPriorInteractions( StateAp *destState, StateAp *srcState ); void mergeNfaTransitions( StateAp *destState, StateAp *srcState ); void mergeStateProperties( StateAp *destState, StateAp *srcState ); void mergeStatesLeaving( StateAp *destState, StateAp *srcState ); void mergeStateBits( StateAp *destState, StateAp *srcState ); void mergeStates( StateAp *destState, StateAp *srcState, bool leaving = false ); /* Merge a set of states into destState. */ void mergeStateList( StateAp *destState, StateAp **srcStates, int numSrc ); /* Make all states that are combinations of other states and that * have not yet had their out transitions filled in. This will * empty out stateDict and stFil. */ void cleanAbortedFill( StateAp *state ); void cleanAbortedFill(); bool overStateLimit(); void nfaFillInStates(); /* * Transition Comparison. */ template< class Trans > int compareCondBitElim( Trans *trans1, Trans *trans2 ); template< class Trans > int compareCondBitElimPtr( Trans *trans1, Trans *trans2 ); int compareCondListBitElim( const CondList &condList1, const CondList &condList2 ); /* Compare priority and function table of transitions. */ static int compareTransData( TransAp *trans1, TransAp *trans2 ); template< class Trans > static int compareCondData( Trans *trans1, Trans *trans2 ); /* Compare transition data. Either of the pointers may be null. */ static int compareTransDataPtr( TransAp *trans1, TransAp *trans2 ); template< class Trans > static int compareCondDataPtr( Trans *trans1, Trans *trans2 ); /* Compare target state and transition data. Either pointer may be null. */ static int compareFullPtr( TransAp *trans1, TransAp *trans2 ); /* Compare target partitions. Either pointer may be null. */ static int compareTransPartPtr( TransAp *trans1, TransAp *trans2 ); template< class Trans > static int compareCondPartPtr( Trans *trans1, Trans *trans2 ); static int comparePart( TransAp *trans1, TransAp *trans2 ); /* Check marked status of target states. Either pointer may be null. */ static bool shouldMarkPtr( MarkIndex &markIndex, TransAp *trans1, TransAp *trans2 ); /* * Callbacks. */ /* Add in the properties of srcTrans into this. */ template< class Trans > void addInTrans( Trans *destTrans, Trans *srcTrans ); /* Compare states on data stored in the states. */ static int compareStateData( const StateAp *state1, const StateAp *state2 ); /* Out transition data. */ void clearOutData( StateAp *state ); bool hasOutData( StateAp *state ); void transferOutData( StateAp *destState, StateAp *srcState ); /* * Allocation. */ /* New up a state and add it to the graph. */ StateAp *addState(); /* * Building basic machines */ static FsmAp *concatFsm( FsmCtx *ctx, Key c ); static FsmAp *concatFsmCI( FsmCtx *ctx, Key c ); static FsmAp *concatFsm( FsmCtx *ctx, Key *str, int len ); static FsmAp *concatFsmCI( FsmCtx *ctx, Key *str, int len ); static FsmAp *orFsm( FsmCtx *ctx, Key *set, int len ); static FsmAp *rangeFsm( FsmCtx *ctx, Key low, Key high ); static FsmAp *rangeFsmCI( FsmCtx *ctx, Key low, Key high ); static FsmAp *rangeStarFsm( FsmCtx *ctx, Key low, Key high ); static FsmAp *emptyFsm( FsmCtx *ctx ); static FsmAp *lambdaFsm( FsmCtx *ctx ); static FsmAp *dotFsm( FsmCtx *ctx ); static FsmAp *dotStarFsm( FsmCtx *ctx ); static FsmAp *notRangeFsm( FsmCtx *ctx, Key low, Key high ); /* * Fsm operators. */ static FsmRes starOp( FsmAp *fsm ); static FsmRes plusOp( FsmAp *fsm ); static FsmRes questionOp( FsmAp *fsm ); static FsmRes exactRepeatOp( FsmAp *fsm, int times ); static FsmRes maxRepeatOp( FsmAp *fsm, int times ); static FsmRes minRepeatOp( FsmAp *fsm, int times ); static FsmRes rangeRepeatOp( FsmAp *fsm, int lower, int upper ); static FsmRes concatOp( FsmAp *fsm, FsmAp *other, bool lastInSeq = true, StateSet *fromStates = 0, bool optional = false ); static FsmRes unionOp( FsmAp *fsm, FsmAp *other, bool lastInSeq = true ); static FsmRes intersectOp( FsmAp *fsm, FsmAp *other, bool lastInSeq = true ); static FsmRes subtractOp( FsmAp *fsm, FsmAp *other, bool lastInSeq = true ); static FsmRes epsilonOp( FsmAp *fsm ); static FsmRes joinOp( FsmAp *fsm, int startId, int finalId, FsmAp **others, int numOthers ); static FsmRes rightStartConcatOp( FsmAp *fsm, FsmAp *other, bool lastInSeq = true ); void transferOutToNfaTrans( NfaTrans *trans, StateAp *state ); enum NfaRepeatMode { NfaLegacy = 1, NfaGreedy, NfaLazy }; static FsmRes applyNfaTrans( FsmAp *fsm, StateAp *fromState, StateAp *toState, NfaTrans *nfaTrans ); /* Results in an NFA. */ static FsmRes nfaUnionOp( FsmAp *fsm, FsmAp **others, int n, int depth, std::ostream &stats ); static FsmRes nfaRepeatOp( FsmAp *fsm, Action *push, Action *pop, Action *init, Action *stay, Action *repeat, Action *exit ); static FsmRes nfaRepeatOp2( FsmAp *fsm, Action *push, Action *pop, Action *init, Action *stay, Action *repeat, Action *exit, NfaRepeatMode mode = NfaGreedy ); static FsmRes nfaWrap( FsmAp *fsm, Action *push, Action *pop, Action *init, Action *stay, Action *exit, NfaRepeatMode mode = NfaGreedy ); static FsmRes nfaUnion( const NfaRoundVect &roundsList, FsmAp **machines, int numMachines, std::ostream &stats, bool printStatistics ); static FsmRes condPlus( FsmAp *fsm, long repId, Action *ini, Action *inc, Action *min, Action *max ); static FsmRes condStar( FsmAp *fsm, long repId, Action *ini, Action *inc, Action *min, Action *max ); /* Make a new start state that has no entry points. Will not change the * meaning of the fsm. */ static FsmRes isolateStartState( FsmAp *fsm ); /* * Analysis Functions */ static FsmRes condCostFromState( FsmAp *fsm, StateAp *state, long depth ); static FsmRes condCostSearch( FsmAp *fsm ); static void breadthFromEntry( double &total, int &minDepth, double *histogram, FsmAp *fsm, StateAp *state ); static void breadthFromState( double &total, int &minDepth, double *histogram, FsmAp *fsm, StateAp *state, long depth, int maxDepth, double stateScore); /* * Operator workers */ void globOp( FsmAp **others, int numOthers ); void deterministicEntry(); /* Determine if there are any entry points into a start state other than * the start state. */ bool isStartStateIsolated(); /* Make a new start state that has no entry points. Will not change the * meaning of the fsm. */ StateAp *dupStartState(); /* Workers for resolving epsilon transitions. */ bool inEptVect( EptVect *eptVect, StateAp *targ ); void epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving ); void resolveEpsilonTrans(); static bool fillAbort( FsmRes &res, FsmAp *fsm ); static FsmRes fillInStates( FsmAp *fsm ); /* Workers for concatenation and union. */ static FsmRes doUnion( FsmAp *fsm, FsmAp *other ); static FsmRes doConcat( FsmAp *fsm, FsmAp *other, StateSet *fromStates, bool optional ); static void condCost( Action *action, long repId ); static void applyEntryPriorGuard( FsmAp *fsm, long repId ); static void applyRepeatPriorGuard( FsmAp *fsm, long repId ); /* * Final states */ /* Unset any final states that are no longer to be final * due to final bits. */ void unsetIncompleteFinals(); void unsetKilledFinals(); /* Bring in other's entry points. Assumes others states are going to be * copied into this machine. */ void copyInEntryPoints( FsmAp *other ); /* Ordering states. */ void depthFirstOrdering( StateAp *state ); void depthFirstOrdering(); void sortStatesByFinal(); /* Set sqequential state numbers starting at 0. */ void setStateNumbers( int base ); /* Unset all final states. */ void unsetAllFinStates(); /* Set the bits of final states and clear the bits of non final states. */ void setFinBits( int finStateBits ); void unsetFinBits( int finStateBits ); /* * Self-consistency checks. */ /* Run a sanity check on the machine. */ void verifyIntegrity(); /* Verify that there are no unreachable states, or dead end states. */ void verifyReachability(); void verifyNoDeadEndStates(); /* * Path pruning */ /* Mark all states reachable from state. */ void markReachableFromHereReverse( StateAp *state ); /* Mark all states reachable from state. */ void markReachableFromHere( StateAp *state ); void markReachableFromHereStopFinal( StateAp *state ); /* Any transitions to another state? */ bool anyRegularTransitions( StateAp *state ); /* Removes states that cannot be reached by any path in the fsm and are * thus wasted silicon. */ void removeDeadEndStates(); /* Removes states that cannot be reached by any path in the fsm and are * thus wasted silicon. */ long removeUnreachableStates(); /* Remove error actions from states on which the error transition will * never be taken. */ bool outListCovers( StateAp *state ); bool anyErrorRange( StateAp *state ); /* Remove states that are on the misfit list. */ void removeMisfits(); /* * FSM Minimization */ /* Minimization by partitioning. */ void minimizePartition1(); void minimizePartition2(); /* Minimize the final state Machine. The result is the minimal fsm. Slow * but stable, correct minimization. Uses n^2 space (lookout) and average * n^2 time. Worst case n^3 time, but a that is a very rare case. */ void minimizeStable(); /* Minimize the final state machine. Does not find the minimal fsm, but a * pretty good approximation. Does not use any extra space. Average n^2 * time. Worst case n^3 time, but a that is a very rare case. */ void minimizeApproximate(); /* This is the worker for the minimize approximate solution. It merges * states that have identical out transitions. */ bool minimizeRound( ); /* Given an intial partioning of states, split partitions that have out trans * to differing partitions. */ int partitionRound( StateAp **statePtrs, MinPartition *parts, int numParts ); /* Split partitions that have a transition to a previously split partition, until * there are no more partitions to split. */ int splitCandidates( StateAp **statePtrs, MinPartition *parts, int numParts ); /* Fuse together states in the same partition. */ void fusePartitions( MinPartition *parts, int numParts ); /* Mark pairs where out final stateness differs, out trans data differs, * trans pairs go to a marked pair or trans data differs. Should get * alot of pairs. */ void initialMarkRound( MarkIndex &markIndex ); /* One marking round on all state pairs. Considers if trans pairs go * to a marked state only. Returns whether or not a pair was marked. */ bool markRound( MarkIndex &markIndex ); /* Move the in trans into src into dest. */ void moveInwardTrans(StateAp *dest, StateAp *src); /* Make state src and dest the same state. */ void fuseEquivStates( StateAp *dest, StateAp *src ); /* Find any states that didn't get marked by the marking algorithm and * merge them into the primary states of their equivalence class. */ void fuseUnmarkedPairs( MarkIndex &markIndex ); /* Merge neighboring transitions go to the same state and have the same * transitions data. */ void compressTransitions(); /* Returns true if there is a transtion (either explicit or by a gap) to * the error state. */ bool checkErrTrans( StateAp *state, TransAp *trans ); bool checkErrTrans( StateAp *state, CondAp *trans ); bool checkErrTransFinish( StateAp *state ); bool hasErrorTrans(); /* Check if a machine defines a single character. This is useful in * validating ranges and machines to export. */ bool checkSingleCharMachine( ); bool elimCondBits(); }; /* Callback invoked when another trans (or possibly this) is added into this * transition during the merging process. Draw in any properties of srcTrans * into this transition. AddInTrans is called when a new transitions is made * that will be a duplicate of another transition or a combination of several * other transitions. AddInTrans will be called for each transition that the * new transition is to represent. */ template< class Trans > void FsmAp::addInTrans( Trans *destTrans, Trans *srcTrans ) { /* Protect against adding in from ourselves. */ if ( srcTrans == destTrans ) { /* Adding in ourselves, need to make a copy of the source transitions. * The priorities are not copied in as that would have no effect. */ destTrans->lmActionTable.setActions( LmActionTable(srcTrans->lmActionTable) ); destTrans->actionTable.setActions( ActionTable(srcTrans->actionTable) ); } else { /* Not a copy of ourself, get the functions and priorities. */ destTrans->lmActionTable.setActions( srcTrans->lmActionTable ); destTrans->actionTable.setActions( srcTrans->actionTable ); destTrans->priorTable.setPriors( srcTrans->priorTable ); } } /* Compares two transition pointers according to priority and functions. * Either pointer may be null. Does not consider to state or from state. */ template< class Trans > int FsmAp::compareCondDataPtr( Trans *trans1, Trans *trans2 ) { if ( trans1 == 0 && trans2 != 0 ) return -1; else if ( trans1 != 0 && trans2 == 0 ) return 1; else if ( trans1 != 0 ) { /* Both of the transition pointers are set. */ int compareRes = compareCondData( trans1, trans2 ); if ( compareRes != 0 ) return compareRes; } return 0; } /* Compares two transition pointers according to priority and functions. * Either pointer may be null. Does not consider to state or from state. */ template< class Trans > int FsmAp::compareCondBitElimPtr( Trans *trans1, Trans *trans2 ) { if ( trans1 == 0 && trans2 != 0 ) return -1; else if ( trans1 != 0 && trans2 == 0 ) return 1; else if ( trans1 != 0 ) { /* Both of the transition pointers are set. */ int compareRes = compareCondBitElim( trans1, trans2 ); if ( compareRes != 0 ) return compareRes; } return 0; } struct NameInst; /* Tree nodes. */ struct NfaUnion; struct MachineDef; struct FsmLongestMatch; struct FsmLongestMatchPart; struct FsmLmPartList; struct Range; struct LengthDef; struct Action; struct InlineList; /* Reference to a named state. */ struct NameRef : public Vector {}; typedef Vector NameRefList; typedef Vector NameTargList; /* * FsmLongestMatch * * Wherever possible the item match will execute on the character. If not * possible the item match will execute on a lookahead character and either * hold the current char (if one away) or backup. * * How to handle the problem of backing up over a buffer break? * * Don't want to use pending out transitions for embedding item match because * the role of item match action is different: it may sometimes match on the * final transition, or may match on a lookahead character. * * Don't want to invent a new operator just for this. So just trail action * after machine, this means we can only use literal actions. * * The item action may * * What states of the machine will be final. The item actions that wrap around * on the last character will go straight to the start state. * * Some transitions will be lookahead transitions, they will hold the current * character. Crossing them with regular transitions must be restricted * because it does not make sense. The transition cannot simultaneously hold * and consume the current character. */ struct FsmLongestMatchPart { FsmLongestMatchPart( Action *action, int longestMatchId ) : action(action), longestMatchId(longestMatchId), inLmSelect(false) { } Action *action; Action *setActId; Action *actOnLast; Action *actOnNext; Action *actLagBehind; Action *actNfaOnLast; Action *actNfaOnNext; Action *actNfaOnEof; int longestMatchId; bool inLmSelect; FsmLongestMatch *longestMatch; FsmLongestMatchPart *prev, *next; }; /* Declare a new type so that ptreetypes.h need not include dlist.h. */ struct FsmLmPartList : DList {}; struct FsmLongestMatch { /* Construct with a list of joins */ FsmLongestMatch( FsmLmPartList *longestMatchList ) : longestMatchList(longestMatchList), lmSwitchHandlesError(false) { } FsmLmPartList *longestMatchList; bool lmSwitchHandlesError; void restart( FsmAp *graph, TransAp *trans ); void restart( FsmAp *graph, CondAp *cond ); }; struct NameMapVal { Vector vals; }; /* Tree of instantiated names. */ typedef AvlMapEl NameMapEl; typedef AvlMap NameMap; typedef Vector NameVect; /* Node in the tree of instantiated names. */ struct NameInst { NameInst( const InputLoc &loc, NameInst *parent, std::string name, int id, bool isLabel ) : loc(loc), parent(parent), name(name), id(id), isLabel(isLabel), isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {} ~NameInst(); InputLoc loc; /* Keep parent pointers in the name tree to retrieve * fully qulified names. */ NameInst *parent; std::string name; int id; bool isLabel; bool isLongestMatch; int numRefs; int numUses; /* Names underneath us, excludes anonymous names. */ NameMap children; /* All names underneath us in order of appearance. */ NameVect childVect; /* Join scopes need an implicit "final" target. */ NameInst *start, *final; /* During a fsm generation walk, lists the names that are referenced by * epsilon operations in the current scope. After the link is made by the * epsilon reference and the join operation is complete, the label can * have its refcount decremented. Once there are no more references the * entry point can be removed from the fsm returned. */ NameVect referencedNames; /* Pointers for the name search queue. */ NameInst *prev, *next; /* Check if this name inst or any name inst below is referenced. */ bool anyRefsRec(); }; typedef DList NameInstList; extern const int ORD_PUSH; extern const int ORD_RESTORE; extern const int ORD_COND; extern const int ORD_COND2; extern const int ORD_TEST; #endif