/* * Copyright 2001, 2002, 2006 Adrian Thurston */ /* This file is part of Ragel. * * Ragel is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Ragel is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Ragel; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include "fsmgraph.h" #include "mergesort.h" #include "parsedata.h" using std::cerr; using std::endl; /* Make a new state. The new state will be put on the graph's * list of state. The new state can be created final or non final. */ StateAp *FsmAp::addState() { /* Make the new state to return. */ StateAp *state = new StateAp(); if ( misfitAccounting ) { /* Create the new state on the misfit list. All states are created * with no foreign in transitions. */ misfitList.append( state ); } else { /* Create the new state. */ stateList.append( state ); } return state; } /* Construct an FSM that is the concatenation of an array of characters. A new * machine will be made that has len+1 states with one transition between each * state for each integer in str. IsSigned determines if the integers are to * be considered as signed or unsigned ints. */ void FsmAp::concatFsm( Key *str, int len ) { /* Make the first state and set it as the start state. */ StateAp *last = addState(); setStartState( last ); /* Attach subsequent states. */ for ( int i = 0; i < len; i++ ) { StateAp *newState = addState(); attachNewTrans( last, newState, str[i], str[i] ); last = newState; } /* Make the last state the final state. */ setFinState( last ); } /* Case insensitive version of concatFsm. */ void FsmAp::concatFsmCI( Key *str, int len ) { /* Make the first state and set it as the start state. */ StateAp *last = addState(); setStartState( last ); /* Attach subsequent states. */ for ( int i = 0; i < len; i++ ) { StateAp *newState = addState(); KeySet keySet; if ( str[i].isLower() ) keySet.insert( str[i].toUpper() ); if ( str[i].isUpper() ) keySet.insert( str[i].toLower() ); keySet.insert( str[i] ); for ( int i = 0; i < keySet.length(); i++ ) attachNewTrans( last, newState, keySet[i], keySet[i] ); last = newState; } /* Make the last state the final state. */ setFinState( last ); } /* Construct a machine that matches one character. A new machine will be made * that has two states with a single transition between the states. IsSigned * determines if the integers are to be considered as signed or unsigned ints. */ void FsmAp::concatFsm( Key chr ) { /* Two states first start, second final. */ setStartState( addState() ); StateAp *end = addState(); setFinState( end ); /* Attach on the character. */ attachNewTrans( startState, end, chr, chr ); } /* Construct a machine that matches any character in set. A new machine will * be made that has two states and len transitions between the them. The set * should be ordered correctly accroding to KeyOps and should not contain * any duplicates. */ void FsmAp::orFsm( Key *set, int len ) { /* Two states first start, second final. */ setStartState( addState() ); StateAp *end = addState(); setFinState( end ); for ( int i = 1; i < len; i++ ) assert( set[i-1] < set[i] ); /* Attach on all the integers in the given string of ints. */ for ( int i = 0; i < len; i++ ) attachNewTrans( startState, end, set[i], set[i] ); } /* Construct a machine that matches a range of characters. A new machine will * be made with two states and a range transition between them. The range will * match any characters from low to high inclusive. Low should be less than or * equal to high otherwise undefined behaviour results. IsSigned determines * if the integers are to be considered as signed or unsigned ints. */ void FsmAp::rangeFsm( Key low, Key high ) { /* Two states first start, second final. */ setStartState( addState() ); StateAp *end = addState(); setFinState( end ); /* Attach using the range of characters. */ attachNewTrans( startState, end, low, high ); } /* Construct a machine that a repeated range of characters. */ void FsmAp::rangeStarFsm( Key low, Key high) { /* One state which is final and is the start state. */ setStartState( addState() ); setFinState( startState ); /* Attach start to start using range of characters. */ attachNewTrans( startState, startState, low, high ); } /* Construct a machine that matches the empty string. A new machine will be * made with only one state. The new state will be both a start and final * state. IsSigned determines if the machine has a signed or unsigned * alphabet. Fsm operations must be done on machines with the same alphabet * signedness. */ void FsmAp::lambdaFsm( ) { /* Give it one state with no transitions making it * the start state and final state. */ setStartState( addState() ); setFinState( startState ); } /* Construct a machine that matches nothing at all. A new machine will be * made with only one state. It will not be final. */ void FsmAp::emptyFsm( ) { /* Give it one state with no transitions making it * the start state and final state. */ setStartState( addState() ); } void FsmAp::transferOutData( StateAp *destState, StateAp *srcState ) { for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) { if ( trans->toState != 0 ) { /* Get the actions data from the outActionTable. */ trans->actionTable.setActions( srcState->outActionTable ); /* Get the priorities from the outPriorTable. */ trans->priorTable.setPriors( srcState->outPriorTable ); } } } /* Kleene star operator. Makes this machine the kleene star of itself. Any * transitions made going out of the machine and back into itself will be * notified that they are leaving transitions by having the leavingFromState * callback invoked. */ void FsmAp::starOp( ) { /* For the merging process. */ MergeData md; /* Turn on misfit accounting to possibly catch the old start state. */ setMisfitAccounting( true ); /* Create the new new start state. It will be set final after the merging * of the final states with the start state is complete. */ StateAp *prevStartState = startState; unsetStartState(); setStartState( addState() ); /* Merge the new start state with the old one to isolate it. */ mergeStates( md, startState, prevStartState ); /* Merge the start state into all final states. Except the start state on * the first pass. If the start state is set final we will be doubling up * its transitions, which will get transfered to any final states that * follow it in the final state set. This will be determined by the order * of items in the final state set. To prevent this we just merge with the * start on a second pass. */ for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) { if ( *st != startState ) mergeStatesLeaving( md, *st, startState ); } /* Now it is safe to merge the start state with itself (provided it * is set final). */ if ( startState->isFinState() ) mergeStatesLeaving( md, startState, startState ); /* Now ensure the new start state is a final state. */ setFinState( startState ); /* Fill in any states that were newed up as combinations of others. */ fillInStates( md ); /* Remove the misfits and turn off misfit accounting. */ removeMisfits(); setMisfitAccounting( false ); } void FsmAp::repeatOp( int times ) { /* Must be 1 and up. 0 produces null machine and requires deleting this. */ assert( times > 0 ); /* A repeat of one does absolutely nothing. */ if ( times == 1 ) return; /* Make a machine to make copies from. */ FsmAp *copyFrom = new FsmAp( *this ); /* Concatentate duplicates onto the end up until before the last. */ for ( int i = 1; i < times-1; i++ ) { FsmAp *dup = new FsmAp( *copyFrom ); doConcat( dup, 0, false ); } /* Now use the copyFrom on the end. */ doConcat( copyFrom, 0, false ); } void FsmAp::optionalRepeatOp( int times ) { /* Must be 1 and up. 0 produces null machine and requires deleting this. */ assert( times > 0 ); /* A repeat of one optional merely allows zero string. */ if ( times == 1 ) { setFinState( startState ); return; } /* Make a machine to make copies from. */ FsmAp *copyFrom = new FsmAp( *this ); /* The state set used in the from end of the concatentation. Starts with * the initial final state set, then after each concatenation, gets set to * the the final states that come from the the duplicate. */ StateSet lastFinSet( finStateSet ); /* Set the initial state to zero to allow zero copies. */ setFinState( startState ); /* Concatentate duplicates onto the end up until before the last. */ for ( int i = 1; i < times-1; i++ ) { /* Make a duplicate for concating and set the fin bits to graph 2 so we * can pick out it's final states after the optional style concat. */ FsmAp *dup = new FsmAp( *copyFrom ); dup->setFinBits( STB_GRAPH2 ); doConcat( dup, &lastFinSet, true ); /* Clear the last final state set and make the new one by taking only * the final states that come from graph 2.*/ lastFinSet.empty(); for ( int i = 0; i < finStateSet.length(); i++ ) { /* If the state came from graph 2, add it to the last set and clear * the bits. */ StateAp *fs = finStateSet[i]; if ( fs->stateBits & STB_GRAPH2 ) { lastFinSet.insert( fs ); fs->stateBits &= ~STB_GRAPH2; } } } /* Now use the copyFrom on the end, no bits set, no bits to clear. */ doConcat( copyFrom, &lastFinSet, true ); } /* Fsm concatentation worker. Supports treating the concatentation as optional, * which essentially leaves the final states of machine one as final. */ void FsmAp::doConcat( FsmAp *other, StateSet *fromStates, bool optional ) { /* For the merging process. */ StateSet finStateSetCopy, startStateSet; MergeData md; /* Turn on misfit accounting for both graphs. */ setMisfitAccounting( true ); other->setMisfitAccounting( true ); /* Get the other's start state. */ StateAp *otherStartState = other->startState; /* Unset other's start state before bringing in the entry points. */ other->unsetStartState(); /* Bring in the rest of other's entry points. */ copyInEntryPoints( other ); other->entryPoints.empty(); /* Bring in other's states into our state lists. */ stateList.append( other->stateList ); misfitList.append( other->misfitList ); /* If from states is not set, then get a copy of our final state set before * we clobber it and use it instead. */ if ( fromStates == 0 ) { finStateSetCopy = finStateSet; fromStates = &finStateSetCopy; } /* Unset all of our final states and get the final states from other. */ if ( !optional ) unsetAllFinStates(); finStateSet.insert( other->finStateSet ); /* Since other's lists are empty, we can delete the fsm without * affecting any states. */ delete other; /* Merge our former final states with the start state of other. */ for ( int i = 0; i < fromStates->length(); i++ ) { StateAp *state = fromStates->data[i]; /* Merge the former final state with other's start state. */ mergeStatesLeaving( md, state, otherStartState ); /* If the former final state was not reset final then we must clear * the state's out trans data. If it got reset final then it gets to * keep its out trans data. This must be done before fillInStates gets * called to prevent the data from being sourced. */ if ( ! state->isFinState() ) clearOutData( state ); } /* Fill in any new states made from merging. */ fillInStates( md ); /* Remove the misfits and turn off misfit accounting. */ removeMisfits(); setMisfitAccounting( false ); } /* Concatenates other to the end of this machine. Other is deleted. Any * transitions made leaving this machine and entering into other are notified * that they are leaving transitions by having the leavingFromState callback * invoked. */ void FsmAp::concatOp( FsmAp *other ) { /* Assert same signedness and return graph concatenation op. */ doConcat( other, 0, false ); } void FsmAp::doOr( FsmAp *other ) { /* For the merging process. */ MergeData md; /* Build a state set consisting of both start states */ StateSet startStateSet; startStateSet.insert( startState ); startStateSet.insert( other->startState ); /* Both of the original start states loose their start state status. */ unsetStartState(); other->unsetStartState(); /* Bring in the rest of other's entry points. */ copyInEntryPoints( other ); other->entryPoints.empty(); /* Merge the lists. This will move all the states from other * into this. No states will be deleted. */ stateList.append( other->stateList ); misfitList.append( other->misfitList ); /* Move the final set data from other into this. */ finStateSet.insert(other->finStateSet); other->finStateSet.empty(); /* Since other's list is empty, we can delete the fsm without * affecting any states. */ delete other; /* Create a new start state. */ setStartState( addState() ); /* Merge the start states. */ mergeStates( md, startState, startStateSet.data, startStateSet.length() ); /* Fill in any new states made from merging. */ fillInStates( md ); } /* Unions other with this machine. Other is deleted. */ void FsmAp::unionOp( FsmAp *other ) { /* Turn on misfit accounting for both graphs. */ setMisfitAccounting( true ); other->setMisfitAccounting( true ); /* Call Worker routine. */ doOr( other ); /* Remove the misfits and turn off misfit accounting. */ removeMisfits(); setMisfitAccounting( false ); } /* Intersects other with this machine. Other is deleted. */ void FsmAp::intersectOp( FsmAp *other ) { /* Turn on misfit accounting for both graphs. */ setMisfitAccounting( true ); other->setMisfitAccounting( true ); /* Set the fin bits on this and other to want each other. */ setFinBits( STB_GRAPH1 ); other->setFinBits( STB_GRAPH2 ); /* Call worker Or routine. */ doOr( other ); /* Unset any final states that are no longer to * be final due to final bits. */ unsetIncompleteFinals(); /* Remove the misfits and turn off misfit accounting. */ removeMisfits(); setMisfitAccounting( false ); /* Remove states that have no path to a final state. */ removeDeadEndStates(); } /* Set subtracts other machine from this machine. Other is deleted. */ void FsmAp::subtractOp( FsmAp *other ) { /* Turn on misfit accounting for both graphs. */ setMisfitAccounting( true ); other->setMisfitAccounting( true ); /* Set the fin bits of other to be killers. */ other->setFinBits( STB_GRAPH1 ); /* Call worker Or routine. */ doOr( other ); /* Unset any final states that are no longer to * be final due to final bits. */ unsetKilledFinals(); /* Remove the misfits and turn off misfit accounting. */ removeMisfits(); setMisfitAccounting( false ); /* Remove states that have no path to a final state. */ removeDeadEndStates(); } bool FsmAp::inEptVect( EptVect *eptVect, StateAp *state ) { if ( eptVect != 0 ) { /* Vect is there, walk it looking for state. */ for ( int i = 0; i < eptVect->length(); i++ ) { if ( eptVect->data[i].targ == state ) return true; } } return false; } /* Fill epsilon vectors in a root state from a given starting point. Epmploys * a depth first search through the graph of epsilon transitions. */ void FsmAp::epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving ) { /* Walk the epsilon transitions out of the state. */ for ( EpsilonTrans::Iter ep = from->epsilonTrans; ep.lte(); ep++ ) { /* Find the entry point, if the it does not resove, ignore it. */ EntryMapEl *enLow, *enHigh; if ( entryPoints.findMulti( *ep, enLow, enHigh ) ) { /* Loop the targets. */ for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) { /* Do not add the root or states already in eptVect. */ StateAp *targ = en->value; if ( targ != from && !inEptVect(root->eptVect, targ) ) { /* Maybe need to create the eptVect. */ if ( root->eptVect == 0 ) root->eptVect = new EptVect(); /* If moving to a different graph or if any parent is * leaving then we are leaving. */ bool leaving = parentLeaving || root->owningGraph != targ->owningGraph; /* All ok, add the target epsilon and recurse. */ root->eptVect->append( EptVectEl(targ, leaving) ); epsilonFillEptVectFrom( root, targ, leaving ); } } } } } void FsmAp::shadowReadWriteStates( MergeData &md ) { /* Init isolatedShadow algorithm data. */ for ( StateList::Iter st = stateList; st.lte(); st++ ) st->isolatedShadow = 0; /* Any states that may be both read from and written to must * be shadowed. */ for ( StateList::Iter st = stateList; st.lte(); st++ ) { /* Find such states by looping through stateVect lists, which give us * the states that will be read from. May cause us to visit the states * that we are interested in more than once. */ if ( st->eptVect != 0 ) { /* For all states that will be read from. */ for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { /* Check for read and write to the same state. */ StateAp *targ = ept->targ; if ( targ->eptVect != 0 ) { /* State is to be written to, if the shadow is not already * there, create it. */ if ( targ->isolatedShadow == 0 ) { StateAp *shadow = addState(); mergeStates( md, shadow, targ ); targ->isolatedShadow = shadow; } /* Write shadow into the state vector so that it is the * state that the epsilon transition will read from. */ ept->targ = targ->isolatedShadow; } } } } } void FsmAp::resolveEpsilonTrans( MergeData &md ) { /* Walk the state list and invoke recursive worker on each state. */ for ( StateList::Iter st = stateList; st.lte(); st++ ) epsilonFillEptVectFrom( st, st, false ); /* Prevent reading from and writing to of the same state. */ shadowReadWriteStates( md ); /* For all states that have epsilon transitions out, draw the transitions, * clear the epsilon transitions. */ for ( StateList::Iter st = stateList; st.lte(); st++ ) { /* If there is a state vector, then create the pre-merge state. */ if ( st->eptVect != 0 ) { /* Merge all the epsilon targets into the state. */ for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { if ( ept->leaving ) mergeStatesLeaving( md, st, ept->targ ); else mergeStates( md, st, ept->targ ); } /* Clean up the target list. */ delete st->eptVect; st->eptVect = 0; } /* Clear the epsilon transitions vector. */ st->epsilonTrans.empty(); } } void FsmAp::epsilonOp() { /* For merging process. */ MergeData md; setMisfitAccounting( true ); for ( StateList::Iter st = stateList; st.lte(); st++ ) st->owningGraph = 0; /* Perform merges. */ resolveEpsilonTrans( md ); /* Epsilons can caused merges which leave behind unreachable states. */ fillInStates( md ); /* Remove the misfits and turn off misfit accounting. */ removeMisfits(); setMisfitAccounting( false ); } /* Make a new maching by joining together a bunch of machines without making * any transitions between them. A negative finalId results in there being no * final id. */ void FsmAp::joinOp( int startId, int finalId, FsmAp **others, int numOthers ) { /* For the merging process. */ MergeData md; /* Set the owning machines. Start at one. Zero is reserved for the start * and final states. */ for ( StateList::Iter st = stateList; st.lte(); st++ ) st->owningGraph = 1; for ( int m = 0; m < numOthers; m++ ) { for ( StateList::Iter st = others[m]->stateList; st.lte(); st++ ) st->owningGraph = 2+m; } /* All machines loose start state status. */ unsetStartState(); for ( int m = 0; m < numOthers; m++ ) others[m]->unsetStartState(); /* Bring the other machines into this. */ for ( int m = 0; m < numOthers; m++ ) { /* Bring in the rest of other's entry points. */ copyInEntryPoints( others[m] ); others[m]->entryPoints.empty(); /* Merge the lists. This will move all the states from other into * this. No states will be deleted. */ stateList.append( others[m]->stateList ); assert( others[m]->misfitList.length() == 0 ); /* Move the final set data from other into this. */ finStateSet.insert( others[m]->finStateSet ); others[m]->finStateSet.empty(); /* Since other's list is empty, we can delete the fsm without * affecting any states. */ delete others[m]; } /* Look up the start entry point. */ EntryMapEl *enLow = 0, *enHigh = 0; bool findRes = entryPoints.findMulti( startId, enLow, enHigh ); if ( ! findRes ) { /* No start state. Set a default one and proceed with the join. Note * that the result of the join will be a very uninteresting machine. */ setStartState( addState() ); } else { /* There is at least one start state, create a state that will become * the new start state. */ StateAp *newStart = addState(); setStartState( newStart ); /* The start state is in an owning machine class all it's own. */ newStart->owningGraph = 0; /* Create the set of states to merge from. */ StateSet stateSet; for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) stateSet.insert( en->value ); /* Merge in the set of start states into the new start state. */ mergeStates( md, newStart, stateSet.data, stateSet.length() ); } /* Take a copy of the final state set, before unsetting them all. This * will allow us to call clearOutData on the states that don't get * final state status back back. */ StateSet finStateSetCopy = finStateSet; /* Now all final states are unset. */ unsetAllFinStates(); if ( finalId >= 0 ) { /* Create the implicit final state. */ StateAp *finState = addState(); setFinState( finState ); /* Assign an entry into the final state on the final state entry id. Note * that there may already be an entry on this id. That's ok. Also set the * final state owning machine id. It's in a class all it's own. */ setEntry( finalId, finState ); finState->owningGraph = 0; } /* Hand over to workers for resolving epsilon trans. This will merge states * with the targets of their epsilon transitions. */ resolveEpsilonTrans( md ); /* Invoke the relinquish final callback on any states that did not get * final state status back. */ for ( StateSet::Iter st = finStateSetCopy; st.lte(); st++ ) { if ( !((*st)->stateBits & STB_ISFINAL) ) clearOutData( *st ); } /* Fill in any new states made from merging. */ fillInStates( md ); /* Joining can be messy. Instead of having misfit accounting on (which is * tricky here) do a full cleaning. */ removeUnreachableStates(); } void FsmAp::globOp( FsmAp **others, int numOthers ) { /* All other machines loose start states status. */ for ( int m = 0; m < numOthers; m++ ) others[m]->unsetStartState(); /* Bring the other machines into this. */ for ( int m = 0; m < numOthers; m++ ) { /* Bring in the rest of other's entry points. */ copyInEntryPoints( others[m] ); others[m]->entryPoints.empty(); /* Merge the lists. This will move all the states from other into * this. No states will be deleted. */ stateList.append( others[m]->stateList ); assert( others[m]->misfitList.length() == 0 ); /* Move the final set data from other into this. */ finStateSet.insert( others[m]->finStateSet ); others[m]->finStateSet.empty(); /* Since other's list is empty, we can delete the fsm without * affecting any states. */ delete others[m]; } } void FsmAp::deterministicEntry() { /* For the merging process. */ MergeData md; /* States may loose their entry points, turn on misfit accounting. */ setMisfitAccounting( true ); /* Get a copy of the entry map then clear all the entry points. As we * iterate the old entry map finding duplicates we will add the entry * points for the new states that we create. */ EntryMap prevEntry = entryPoints; unsetAllEntryPoints(); for ( int enId = 0; enId < prevEntry.length(); ) { /* Count the number of states on this entry key. */ int highId = enId; while ( highId < prevEntry.length() && prevEntry[enId].key == prevEntry[highId].key ) highId += 1; int numIds = highId - enId; if ( numIds == 1 ) { /* Only a single entry point, just set the entry. */ setEntry( prevEntry[enId].key, prevEntry[enId].value ); } else { /* Multiple entry points, need to create a new state and merge in * all the targets of entry points. */ StateAp *newEntry = addState(); for ( int en = enId; en < highId; en++ ) mergeStates( md, newEntry, prevEntry[en].value ); /* Add the new state as the single entry point. */ setEntry( prevEntry[enId].key, newEntry ); } enId += numIds; } /* The old start state may be unreachable. Remove the misfits and turn off * misfit accounting. */ removeMisfits(); setMisfitAccounting( false ); } /* Unset any final states that are no longer to be final due to final bits. */ void FsmAp::unsetKilledFinals() { /* Duplicate the final state set before we begin modifying it. */ StateSet fin( finStateSet ); for ( int s = 0; s < fin.length(); s++ ) { /* Check for killing bit. */ StateAp *state = fin.data[s]; if ( state->stateBits & STB_GRAPH1 ) { /* One final state is a killer, set to non-final. */ unsetFinState( state ); } /* Clear all killing bits. Non final states should never have had those * state bits set in the first place. */ state->stateBits &= ~STB_GRAPH1; } } /* Unset any final states that are no longer to be final due to final bits. */ void FsmAp::unsetIncompleteFinals() { /* Duplicate the final state set before we begin modifying it. */ StateSet fin( finStateSet ); for ( int s = 0; s < fin.length(); s++ ) { /* Check for one set but not the other. */ StateAp *state = fin.data[s]; if ( state->stateBits & STB_BOTH && (state->stateBits & STB_BOTH) != STB_BOTH ) { /* One state wants the other but it is not there. */ unsetFinState( state ); } /* Clear wanting bits. Non final states should never have had those * state bits set in the first place. */ state->stateBits &= ~STB_BOTH; } } /* Ensure that the start state is free of entry points (aside from the fact * that it is the start state). If the start state has entry points then Make a * new start state by merging with the old one. Useful before modifying start * transitions. If the existing start state has any entry points other than the * start state entry then modifying its transitions changes more than the start * transitions. So isolate the start state by separating it out such that it * only has start stateness as it's entry point. */ void FsmAp::isolateStartState( ) { /* For the merging process. */ MergeData md; /* Bail out if the start state is already isolated. */ if ( isStartStateIsolated() ) return; /* Turn on misfit accounting to possibly catch the old start state. */ setMisfitAccounting( true ); /* This will be the new start state. The existing start * state is merged with it. */ StateAp *prevStartState = startState; unsetStartState(); setStartState( addState() ); /* Merge the new start state with the old one to isolate it. */ mergeStates( md, startState, prevStartState ); /* Stfil and stateDict will be empty because the merging of the old start * state into the new one will not have any conflicting transitions. */ assert( md.stateDict.treeSize == 0 ); assert( md.stfillHead == 0 ); /* The old start state may be unreachable. Remove the misfits and turn off * misfit accounting. */ removeMisfits(); setMisfitAccounting( false ); } #ifdef LOG_CONDS void logCondSpace( CondSpace *condSpace ) { if ( condSpace == 0 ) cerr << ""; else { for ( CondSet::Iter csi = condSpace->condSet.last(); csi.gtb(); csi-- ) { if ( ! csi.last() ) cerr << ','; (*csi)->actionName( cerr ); } } } void logNewExpansion( Expansion *exp ) { cerr << "created expansion:" << endl; cerr << " range: " << exp->lowKey.getVal() << " .. " << exp->highKey.getVal() << endl; cerr << " fromCondSpace: "; logCondSpace( exp->fromCondSpace ); cerr << endl; cerr << " fromVals: " << exp->fromVals << endl; cerr << " toCondSpace: "; logCondSpace( exp->toCondSpace ); cerr << endl; cerr << " toValsList: "; for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ ) cerr << " " << *to; cerr << endl; } #endif void FsmAp::findTransExpansions( ExpansionList &expansionList, StateAp *destState, StateAp *srcState ) { PairIter transCond( destState->outList.head, srcState->stateCondList.head ); for ( ; !transCond.end(); transCond++ ) { if ( transCond.userState == RangeOverlap ) { Expansion *expansion = new Expansion( transCond.s1Tel.lowKey, transCond.s1Tel.highKey ); expansion->fromTrans = new TransAp(*transCond.s1Tel.trans); expansion->fromTrans->fromState = 0; expansion->fromTrans->toState = transCond.s1Tel.trans->toState; expansion->fromCondSpace = 0; expansion->fromVals = 0; CondSpace *srcCS = transCond.s2Tel.trans->condSpace; expansion->toCondSpace = srcCS; long numTargVals = (1 << srcCS->condSet.length()); for ( long targVals = 0; targVals < numTargVals; targVals++ ) expansion->toValsList.append( targVals ); #ifdef LOG_CONDS logNewExpansion( expansion ); #endif expansionList.append( expansion ); } } } void FsmAp::findCondExpInTrans( ExpansionList &expansionList, StateAp *state, Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace, long fromVals, LongVect &toValsList ) { /* Make condition-space low and high keys for searching. */ TransAp searchTrans; searchTrans.lowKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() + (lowKey - keyOps->minKey); searchTrans.highKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() + (highKey - keyOps->minKey); searchTrans.prev = searchTrans.next = 0; PairIter pairIter( state->outList.head, &searchTrans ); for ( ; !pairIter.end(); pairIter++ ) { if ( pairIter.userState == RangeOverlap ) { /* Need to make character-space low and high keys from the range * overlap for the expansion object. */ Key expLowKey = pairIter.s1Tel.lowKey - fromCondSpace->baseKey - fromVals * keyOps->alphSize() + keyOps->minKey; Key expHighKey = pairIter.s1Tel.highKey - fromCondSpace->baseKey - fromVals * keyOps->alphSize() + keyOps->minKey; Expansion *expansion = new Expansion( expLowKey, expHighKey ); expansion->fromTrans = new TransAp(*pairIter.s1Tel.trans); expansion->fromTrans->fromState = 0; expansion->fromTrans->toState = pairIter.s1Tel.trans->toState; expansion->fromCondSpace = fromCondSpace; expansion->fromVals = fromVals; expansion->toCondSpace = toCondSpace; expansion->toValsList = toValsList; expansionList.append( expansion ); #ifdef LOG_CONDS logNewExpansion( expansion ); #endif } } } void FsmAp::findCondExpansions( ExpansionList &expansionList, StateAp *destState, StateAp *srcState ) { PairIter condCond( destState->stateCondList.head, srcState->stateCondList.head ); for ( ; !condCond.end(); condCond++ ) { if ( condCond.userState == RangeOverlap ) { /* Loop over all existing condVals . */ CondSet &destCS = condCond.s1Tel.trans->condSpace->condSet; long destLen = destCS.length(); /* Find the items in src cond set that are not in dest * cond set. These are the items that we must expand. */ CondSet srcOnlyCS = condCond.s2Tel.trans->condSpace->condSet; for ( CondSet::Iter dcsi = destCS; dcsi.lte(); dcsi++ ) srcOnlyCS.remove( *dcsi ); long srcOnlyLen = srcOnlyCS.length(); if ( srcOnlyCS.length() > 0 ) { #ifdef LOG_CONDS cerr << "there are " << srcOnlyCS.length() << " item(s) that are " "only in the srcCS" << endl; #endif CondSet mergedCS = destCS; mergedCS.insert( condCond.s2Tel.trans->condSpace->condSet ); CondSpace *fromCondSpace = addCondSpace( destCS ); CondSpace *toCondSpace = addCondSpace( mergedCS ); /* Loop all values in the dest space. */ for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) { long basicVals = 0; for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) { if ( destVals & (1 << csi.pos()) ) { Action **cim = mergedCS.find( *csi ); long bitPos = (cim - mergedCS.data); basicVals |= 1 << bitPos; } } /* Loop all new values. */ LongVect expandToVals; for ( long soVals = 0; soVals < (1 << srcOnlyLen); soVals++ ) { long targVals = basicVals; for ( CondSet::Iter csi = srcOnlyCS; csi.lte(); csi++ ) { if ( soVals & (1 << csi.pos()) ) { Action **cim = mergedCS.find( *csi ); long bitPos = (cim - mergedCS.data); targVals |= 1 << bitPos; } } expandToVals.append( targVals ); } findCondExpInTrans( expansionList, destState, condCond.s1Tel.lowKey, condCond.s1Tel.highKey, fromCondSpace, toCondSpace, destVals, expandToVals ); } } } } } void FsmAp::doExpand( MergeData &md, StateAp *destState, ExpansionList &expList1 ) { for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) { for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ ) { long targVals = *to; /* We will use the copy of the transition that was made when the * expansion was created. It will get used multiple times. Each * time we must set up the keys, everything else is constant and * and already prepared. */ TransAp *srcTrans = exp->fromTrans; srcTrans->lowKey = exp->toCondSpace->baseKey + targVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey); srcTrans->highKey = exp->toCondSpace->baseKey + targVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey); TransList srcList; srcList.append( srcTrans ); outTransCopy( md, destState, srcList.head ); srcList.abandon(); } } } void FsmAp::doRemove( MergeData &md, StateAp *destState, ExpansionList &expList1 ) { for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) { Removal removal; if ( exp->fromCondSpace == 0 ) { removal.lowKey = exp->lowKey; removal.highKey = exp->highKey; } else { removal.lowKey = exp->fromCondSpace->baseKey + exp->fromVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey); removal.highKey = exp->fromCondSpace->baseKey + exp->fromVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey); } removal.next = 0; TransList destList; PairIter pairIter( destState->outList.head, &removal ); for ( ; !pairIter.end(); pairIter++ ) { switch ( pairIter.userState ) { case RangeInS1: { TransAp *destTrans = pairIter.s1Tel.trans; destTrans->lowKey = pairIter.s1Tel.lowKey; destTrans->highKey = pairIter.s1Tel.highKey; destList.append( destTrans ); break; } case RangeInS2: break; case RangeOverlap: { TransAp *trans = pairIter.s1Tel.trans; detachTrans( trans->fromState, trans->toState, trans ); delete trans; break; } case BreakS1: { pairIter.s1Tel.trans = dupTrans( destState, pairIter.s1Tel.trans ); break; } case BreakS2: break; } } destState->outList.transfer( destList ); } } void FsmAp::mergeStateConds( StateAp *destState, StateAp *srcState ) { StateCondList destList; PairIter pairIter( destState->stateCondList.head, srcState->stateCondList.head ); for ( ; !pairIter.end(); pairIter++ ) { switch ( pairIter.userState ) { case RangeInS1: { StateCond *destCond = pairIter.s1Tel.trans; destCond->lowKey = pairIter.s1Tel.lowKey; destCond->highKey = pairIter.s1Tel.highKey; destList.append( destCond ); break; } case RangeInS2: { StateCond *newCond = new StateCond( *pairIter.s2Tel.trans ); newCond->lowKey = pairIter.s2Tel.lowKey; newCond->highKey = pairIter.s2Tel.highKey; destList.append( newCond ); break; } case RangeOverlap: { StateCond *destCond = pairIter.s1Tel.trans; StateCond *srcCond = pairIter.s2Tel.trans; CondSet mergedCondSet; mergedCondSet.insert( destCond->condSpace->condSet ); mergedCondSet.insert( srcCond->condSpace->condSet ); destCond->condSpace = addCondSpace( mergedCondSet ); destCond->lowKey = pairIter.s1Tel.lowKey; destCond->highKey = pairIter.s1Tel.highKey; destList.append( destCond ); break; } case BreakS1: pairIter.s1Tel.trans = new StateCond( *pairIter.s1Tel.trans ); break; case BreakS2: break; } } destState->stateCondList.transfer( destList ); } /* A state merge which represents the drawing in of leaving transitions. If * there is any out data then we duplicate the source state, transfer the out * data, then merge in the state. The new state will be reaped because it will * not be given any in transitions. */ void FsmAp::mergeStatesLeaving( MergeData &md, StateAp *destState, StateAp *srcState ) { if ( !hasOutData( destState ) ) mergeStates( md, destState, srcState ); else { StateAp *ssMutable = addState(); mergeStates( md, ssMutable, srcState ); transferOutData( ssMutable, destState ); for ( OutCondSet::Iter cond = destState->outCondSet; cond.lte(); cond++ ) embedCondition( md, ssMutable, cond->action, cond->sense ); mergeStates( md, destState, ssMutable ); } } void FsmAp::mergeStates( MergeData &md, StateAp *destState, StateAp **srcStates, int numSrc ) { for ( int s = 0; s < numSrc; s++ ) mergeStates( md, destState, srcStates[s] ); } void FsmAp::mergeStates( MergeData &md, StateAp *destState, StateAp *srcState ) { ExpansionList expList1; ExpansionList expList2; findTransExpansions( expList1, destState, srcState ); findCondExpansions( expList1, destState, srcState ); findTransExpansions( expList2, srcState, destState ); findCondExpansions( expList2, srcState, destState ); mergeStateConds( destState, srcState ); outTransCopy( md, destState, srcState->outList.head ); doExpand( md, destState, expList1 ); doExpand( md, destState, expList2 ); doRemove( md, destState, expList1 ); doRemove( md, destState, expList2 ); expList1.empty(); expList2.empty(); /* Get its bits and final state status. */ destState->stateBits |= ( srcState->stateBits & ~STB_ISFINAL ); if ( srcState->isFinState() ) setFinState( destState ); /* Draw in any properties of srcState into destState. */ if ( srcState == destState ) { /* Duplicate the list to protect against write to source. The * priorities sets are not copied in because that would have no * effect. */ destState->epsilonTrans.append( EpsilonTrans( srcState->epsilonTrans ) ); /* Get all actions, duplicating to protect against write to source. */ destState->toStateActionTable.setActions( ActionTable( srcState->toStateActionTable ) ); destState->fromStateActionTable.setActions( ActionTable( srcState->fromStateActionTable ) ); destState->outActionTable.setActions( ActionTable( srcState->outActionTable ) ); destState->outCondSet.insert( OutCondSet( srcState->outCondSet ) ); destState->errActionTable.setActions( ErrActionTable( srcState->errActionTable ) ); destState->eofActionTable.setActions( ActionTable( srcState->eofActionTable ) ); } else { /* Get the epsilons, out priorities. */ destState->epsilonTrans.append( srcState->epsilonTrans ); destState->outPriorTable.setPriors( srcState->outPriorTable ); /* Get all actions. */ destState->toStateActionTable.setActions( srcState->toStateActionTable ); destState->fromStateActionTable.setActions( srcState->fromStateActionTable ); destState->outActionTable.setActions( srcState->outActionTable ); destState->outCondSet.insert( srcState->outCondSet ); destState->errActionTable.setActions( srcState->errActionTable ); destState->eofActionTable.setActions( srcState->eofActionTable ); } } void FsmAp::fillInStates( MergeData &md ) { /* Merge any states that are awaiting merging. This will likey cause * other states to be added to the stfil list. */ StateAp *state = md.stfillHead; while ( state != 0 ) { StateSet *stateSet = &state->stateDictEl->stateSet; mergeStates( md, state, stateSet->data, stateSet->length() ); state = state->alg.next; } /* Delete the state sets of all states that are on the fill list. */ state = md.stfillHead; while ( state != 0 ) { /* Delete and reset the state set. */ delete state->stateDictEl; state->stateDictEl = 0; /* Next state in the stfill list. */ state = state->alg.next; } /* StateDict will still have its ptrs/size set but all of it's element * will be deleted so we don't need to clean it up. */ } void FsmAp::findEmbedExpansions( ExpansionList &expansionList, StateAp *destState, Action *condAction, bool sense ) { StateCondList destList; PairIter transCond( destState->outList.head, destState->stateCondList.head ); for ( ; !transCond.end(); transCond++ ) { switch ( transCond.userState ) { case RangeInS1: { if ( transCond.s1Tel.lowKey <= keyOps->maxKey ) { assert( transCond.s1Tel.highKey <= keyOps->maxKey ); /* Make a new state cond. */ StateCond *newStateCond = new StateCond( transCond.s1Tel.lowKey, transCond.s1Tel.highKey ); newStateCond->condSpace = addCondSpace( CondSet( condAction ) ); destList.append( newStateCond ); /* Create the expansion. */ Expansion *expansion = new Expansion( transCond.s1Tel.lowKey, transCond.s1Tel.highKey ); expansion->fromTrans = new TransAp(*transCond.s1Tel.trans); expansion->fromTrans->fromState = 0; expansion->fromTrans->toState = transCond.s1Tel.trans->toState; expansion->fromCondSpace = 0; expansion->fromVals = 0; expansion->toCondSpace = newStateCond->condSpace; expansion->toValsList.append( sense?1:0 ); #ifdef LOG_CONDS logNewExpansion( expansion ); #endif expansionList.append( expansion ); } break; } case RangeInS2: { /* Enhance state cond and find the expansion. */ StateCond *stateCond = transCond.s2Tel.trans; stateCond->lowKey = transCond.s2Tel.lowKey; stateCond->highKey = transCond.s2Tel.highKey; CondSet &destCS = stateCond->condSpace->condSet; long destLen = destCS.length(); CondSpace *fromCondSpace = stateCond->condSpace; CondSet mergedCS = destCS; mergedCS.insert( condAction ); CondSpace *toCondSpace = addCondSpace( mergedCS ); stateCond->condSpace = toCondSpace; destList.append( stateCond ); /* Loop all values in the dest space. */ for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) { long basicVals = 0; for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) { if ( destVals & (1 << csi.pos()) ) { Action **cim = mergedCS.find( *csi ); long bitPos = (cim - mergedCS.data); basicVals |= 1 << bitPos; } } long targVals = basicVals; Action **cim = mergedCS.find( condAction ); long bitPos = (cim - mergedCS.data); targVals |= (sense?1:0) << bitPos; LongVect expandToVals( targVals ); findCondExpInTrans( expansionList, destState, transCond.s2Tel.lowKey, transCond.s2Tel.highKey, fromCondSpace, toCondSpace, destVals, expandToVals ); } break; } case RangeOverlap: case BreakS1: case BreakS2: assert( false ); break; } } destState->stateCondList.transfer( destList ); } void FsmAp::embedCondition( StateAp *state, Action *condAction, bool sense ) { MergeData md; ExpansionList expList; /* Turn on misfit accounting to possibly catch the old start state. */ setMisfitAccounting( true ); /* Worker. */ embedCondition( md, state, condAction, sense ); /* Fill in any states that were newed up as combinations of others. */ fillInStates( md ); /* Remove the misfits and turn off misfit accounting. */ removeMisfits(); setMisfitAccounting( false ); } void FsmAp::embedCondition( MergeData &md, StateAp *state, Action *condAction, bool sense ) { ExpansionList expList; findEmbedExpansions( expList, state, condAction, sense ); doExpand( md, state, expList ); doRemove( md, state, expList ); expList.empty(); } /* Check if a machine defines a single character. This is useful in validating * ranges and machines to export. */ bool FsmAp::checkSingleCharMachine() { /* Must have two states. */ if ( stateList.length() != 2 ) return false; /* The start state cannot be final. */ if ( startState->isFinState() ) return false; /* There should be only one final state. */ if ( finStateSet.length() != 1 ) return false; /* The final state cannot have any transitions out. */ if ( finStateSet[0]->outList.length() != 0 ) return false; /* The start state should have only one transition out. */ if ( startState->outList.length() != 1 ) return false; /* The singe transition out of the start state should not be a range. */ TransAp *startTrans = startState->outList.head; if ( startTrans->lowKey != startTrans->highKey ) return false; return true; }