diff options
Diffstat (limited to 'src/libfsm/fsmgraph.cc')
-rw-r--r-- | src/libfsm/fsmgraph.cc | 1948 |
1 files changed, 1948 insertions, 0 deletions
diff --git a/src/libfsm/fsmgraph.cc b/src/libfsm/fsmgraph.cc new file mode 100644 index 00000000..819bfa96 --- /dev/null +++ b/src/libfsm/fsmgraph.cc @@ -0,0 +1,1948 @@ +/* + * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <assert.h> +#include <iostream> + +#include "fsmgraph.h" +#include "mergesort.h" +#include "action.h" + +using std::endl; + +Action::~Action() +{ + /* If we were created by substitution of another action then we don't own the inline list. */ + if ( substOf == 0 && inlineList != 0 ) { + inlineList->empty(); + delete inlineList; + inlineList = 0; + } +} + +InlineItem::~InlineItem() +{ + if ( children != 0 ) { + children->empty(); + delete children; + } +} + +/* Make a new state. The new state will be put on the graph's + * list of state. The new state can be created final or non final. */ +StateAp *FsmAp::addState() +{ + /* Make the new state to return. */ + StateAp *state = new StateAp(); + + if ( misfitAccounting ) { + /* Create the new state on the misfit list. All states are created + * with no foreign in transitions. */ + misfitList.append( state ); + } + else { + /* Create the new state. */ + stateList.append( state ); + } + + return state; +} + +/* Construct an FSM that is the concatenation of an array of characters. A new + * machine will be made that has len+1 states with one transition between each + * state for each integer in str. IsSigned determines if the integers are to + * be considered as signed or unsigned ints. */ +FsmAp *FsmAp::concatFsm( FsmCtx *ctx, Key *str, int len ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Make the first state and set it as the start state. */ + StateAp *last = fsm->addState(); + fsm->setStartState( last ); + + /* Attach subsequent states. */ + for ( int i = 0; i < len; i++ ) { + StateAp *newState = fsm->addState(); + fsm->attachNewTrans( last, newState, str[i], str[i] ); + last = newState; + } + + /* Make the last state the final state. */ + fsm->setFinState( last ); + + return fsm; +} + +/* Case insensitive version of concatFsm. */ +FsmAp *FsmAp::concatFsmCI( FsmCtx *ctx, Key *str, int len ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Make the first state and set it as the start state. */ + StateAp *last = fsm->addState(); + fsm->setStartState( last ); + + /* Attach subsequent states. */ + for ( int i = 0; i < len; i++ ) { + StateAp *newState = fsm->addState(); + + KeySet keySet( ctx->keyOps ); + if ( str[i].isLower() ) + keySet.insert( str[i].toUpper() ); + if ( str[i].isUpper() ) + keySet.insert( str[i].toLower() ); + keySet.insert( str[i] ); + + for ( int i = 0; i < keySet.length(); i++ ) + fsm->attachNewTrans( last, newState, keySet[i], keySet[i] ); + + last = newState; + } + + /* Make the last state the final state. */ + fsm->setFinState( last ); + + return fsm; +} + + +/* Construct a machine that matches one character. A new machine will be made + * that has two states with a single transition between the states. */ +FsmAp *FsmAp::concatFsm( FsmCtx *ctx, Key chr ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Two states first start, second final. */ + fsm->setStartState( fsm->addState() ); + + StateAp *end = fsm->addState(); + fsm->setFinState( end ); + + /* Attach on the character. */ + fsm->attachNewTrans( fsm->startState, end, chr, chr ); + + return fsm; +} + +/* Case insensitive version of single-char concat FSM. */ +FsmAp *FsmAp::concatFsmCI( FsmCtx *ctx, Key chr ) +{ + return concatFsmCI( ctx, &chr, 1 ); +} + + +/* Construct a machine that matches any character in set. A new machine will + * be made that has two states and len transitions between the them. The set + * should be ordered correctly accroding to KeyOps and should not contain + * any duplicates. */ +FsmAp *FsmAp::orFsm( FsmCtx *ctx, Key *set, int len ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Two states first start, second final. */ + fsm->setStartState( fsm->addState() ); + + StateAp *end = fsm->addState(); + fsm->setFinState( end ); + + for ( int i = 1; i < len; i++ ) + assert( ctx->keyOps->lt( set[i-1], set[i] ) ); + + /* Attach on all the integers in the given string of ints. */ + for ( int i = 0; i < len; i++ ) + fsm->attachNewTrans( fsm->startState, end, set[i], set[i] ); + + return fsm; +} + +FsmAp *FsmAp::dotFsm( FsmCtx *ctx ) +{ + FsmAp *retFsm = FsmAp::rangeFsm( ctx, + ctx->keyOps->minKey, ctx->keyOps->maxKey ); + return retFsm; +} + +FsmAp *FsmAp::dotStarFsm( FsmCtx *ctx ) +{ + FsmAp *retFsm = FsmAp::rangeStarFsm( ctx, + ctx->keyOps->minKey, ctx->keyOps->maxKey ); + return retFsm; +} + +/* Construct a machine that matches a range of characters. A new machine will + * be made with two states and a range transition between them. The range will + * match any characters from low to high inclusive. Low should be less than or + * equal to high otherwise undefined behaviour results. IsSigned determines + * if the integers are to be considered as signed or unsigned ints. */ +FsmAp *FsmAp::rangeFsm( FsmCtx *ctx, Key low, Key high ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Two states first start, second final. */ + fsm->setStartState( fsm->addState() ); + + StateAp *end = fsm->addState(); + fsm->setFinState( end ); + + /* Attach using the range of characters. */ + fsm->attachNewTrans( fsm->startState, end, low, high ); + + return fsm; +} + +FsmAp *FsmAp::notRangeFsm( FsmCtx *ctx, Key low, Key high ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Two states first start, second final. */ + fsm->setStartState( fsm->addState() ); + + StateAp *end = fsm->addState(); + fsm->setFinState( end ); + + /* Attach using the range of characters. */ + if ( ctx->keyOps->lt( ctx->keyOps->minKey, low ) ) { + ctx->keyOps->decrement( low ); + fsm->attachNewTrans( fsm->startState, end, ctx->keyOps->minKey, low ); + } + + if ( ctx->keyOps->lt( high, ctx->keyOps->maxKey ) ) { + ctx->keyOps->increment( high ); + fsm->attachNewTrans( fsm->startState, end, high, ctx->keyOps->maxKey ); + } + + return fsm; +} + + +FsmAp *FsmAp::rangeFsmCI( FsmCtx *ctx, Key lowKey, Key highKey ) +{ + FsmAp *retFsm = rangeFsm( ctx, lowKey, highKey ); + + /* Union the portion that covers alphas. */ + if ( lowKey.getVal() <= 'z' ) { + int low, high; + if ( lowKey.getVal() <= 'a' ) + low = 'a'; + else + low = lowKey.getVal(); + + if ( highKey.getVal() >= 'a' ) { + if ( highKey.getVal() >= 'z' ) + high = 'z'; + else + high = highKey.getVal(); + + /* Add in upper(low) .. upper(high) */ + + FsmAp *addFsm = FsmAp::rangeFsm( ctx, toupper(low), toupper(high) ); + FsmRes res = FsmAp::unionOp( retFsm, addFsm ); + retFsm = res.fsm; + } + } + + if ( lowKey.getVal() <= 'Z' ) { + int low, high; + if ( lowKey.getVal() <= 'A' ) + low = 'A'; + else + low = lowKey.getVal(); + + if ( highKey.getVal() >= 'A' ) { + if ( highKey.getVal() >= 'Z' ) + high = 'Z'; + else + high = highKey.getVal(); + + /* Add in lower(low) .. lower(high) */ + FsmAp *addFsm = FsmAp::rangeFsm( ctx, tolower(low), tolower(high) ); + FsmRes res = FsmAp::unionOp( retFsm, addFsm ); + retFsm = res.fsm; + } + } + + return retFsm; +} + +/* Construct a machine that a repeated range of characters. */ +FsmAp *FsmAp::rangeStarFsm( FsmCtx *ctx, Key low, Key high ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* One state which is final and is the start state. */ + fsm->setStartState( fsm->addState() ); + fsm->setFinState( fsm->startState ); + + /* Attach start to start using range of characters. */ + fsm->attachNewTrans( fsm->startState, fsm->startState, low, high ); + + return fsm; +} + +/* Construct a machine that matches the empty string. A new machine will be + * made with only one state. The new state will be both a start and final + * state. IsSigned determines if the machine has a signed or unsigned + * alphabet. Fsm operations must be done on machines with the same alphabet + * signedness. */ +FsmAp *FsmAp::lambdaFsm( FsmCtx *ctx ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Give it one state with no transitions making it + * the start state and final state. */ + fsm->setStartState( fsm->addState() ); + fsm->setFinState( fsm->startState ); + + return fsm; +} + +/* Construct a machine that matches nothing at all. A new machine will be + * made with only one state. It will not be final. */ +FsmAp *FsmAp::emptyFsm( FsmCtx *ctx ) +{ + FsmAp *fsm = new FsmAp( ctx ); + + /* Give it one state with no transitions making it + * the start state and final state. */ + fsm->setStartState( fsm->addState() ); + + return fsm; +} + +void FsmAp::transferOutData( StateAp *destState, StateAp *srcState ) +{ + for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) { + /* Get the actions data from the outActionTable. */ + trans->tdap()->actionTable.setActions( srcState->outActionTable ); + + /* Get the priorities from the outPriorTable. */ + trans->tdap()->priorTable.setPriors( srcState->outPriorTable ); + } + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) { + /* Get the actions data from the outActionTable. */ + cond->actionTable.setActions( srcState->outActionTable ); + + /* Get the priorities from the outPriorTable. */ + cond->priorTable.setPriors( srcState->outPriorTable ); + } + } + } + } + + if ( destState->nfaOut != 0 ) { + for ( NfaTransList::Iter na = *destState->nfaOut; na.lte(); na++ ) + transferOutToNfaTrans( na, srcState ); + } +} + +/* Union worker used by union, set diff (subtract) and intersection. */ +FsmRes FsmAp::doUnion( FsmAp *fsm, FsmAp *other ) +{ + /* Build a state set consisting of both start states */ + StateSet startStateSet; + startStateSet.insert( fsm->startState ); + startStateSet.insert( other->startState ); + + /* Both of the original start states loose their start state status. */ + fsm->unsetStartState(); + other->unsetStartState(); + + /* Bring in the rest of other's entry points. */ + fsm->copyInEntryPoints( other ); + other->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other + * into this. No states will be deleted. */ + fsm->stateList.append( other->stateList ); + fsm->misfitList.append( other->misfitList ); + + /* Move the final set data from other into this. */ + fsm->finStateSet.insert(other->finStateSet); + other->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete other; + + /* Create a new start state. */ + fsm->setStartState( fsm->addState() ); + + /* Merge the start states. */ + fsm->mergeStateList( fsm->startState, startStateSet.data, startStateSet.length() ); + + /* Fill in any new states made from merging. */ + return fillInStates( fsm ); +} + +bool FsmAp::inEptVect( EptVect *eptVect, StateAp *state ) +{ + if ( eptVect != 0 ) { + /* Vect is there, walk it looking for state. */ + for ( int i = 0; i < eptVect->length(); i++ ) { + if ( eptVect->data[i].targ == state ) + return true; + } + } + return false; +} + +/* Fill epsilon vectors in a root state from a given starting point. Epmploys + * a depth first search through the graph of epsilon transitions. */ +void FsmAp::epsilonFillEptVectFrom( StateAp *root, StateAp *from, bool parentLeaving ) +{ + /* Walk the epsilon transitions out of the state. */ + for ( EpsilonTrans::Iter ep = from->epsilonTrans; ep.lte(); ep++ ) { + /* Find the entry point, if the it does not resove, ignore it. */ + EntryMapEl *enLow, *enHigh; + if ( entryPoints.findMulti( *ep, enLow, enHigh ) ) { + /* Loop the targets. */ + for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) { + /* Do not add the root or states already in eptVect. */ + StateAp *targ = en->value; + if ( targ != from && !inEptVect(root->eptVect, targ) ) { + /* Maybe need to create the eptVect. */ + if ( root->eptVect == 0 ) + root->eptVect = new EptVect(); + + /* If moving to a different graph or if any parent is + * leaving then we are leaving. */ + bool leaving = parentLeaving || + root->owningGraph != targ->owningGraph; + + /* All ok, add the target epsilon and recurse. */ + root->eptVect->append( EptVectEl(targ, leaving) ); + epsilonFillEptVectFrom( root, targ, leaving ); + } + } + } + } +} + +void FsmAp::shadowReadWriteStates() +{ + /* Init isolatedShadow algorithm data. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->isolatedShadow = 0; + + /* Any states that may be both read from and written to must + * be shadowed. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Find such states by looping through stateVect lists, which give us + * the states that will be read from. May cause us to visit the states + * that we are interested in more than once. */ + if ( st->eptVect != 0 ) { + /* For all states that will be read from. */ + for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { + /* Check for read and write to the same state. */ + StateAp *targ = ept->targ; + if ( targ->eptVect != 0 ) { + /* State is to be written to, if the shadow is not already + * there, create it. */ + if ( targ->isolatedShadow == 0 ) { + StateAp *shadow = addState(); + mergeStates( shadow, targ ); + targ->isolatedShadow = shadow; + } + + /* Write shadow into the state vector so that it is the + * state that the epsilon transition will read from. */ + ept->targ = targ->isolatedShadow; + } + } + } + } +} + +void FsmAp::resolveEpsilonTrans() +{ + /* Walk the state list and invoke recursive worker on each state. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + epsilonFillEptVectFrom( st, st, false ); + + /* Prevent reading from and writing to of the same state. */ + shadowReadWriteStates(); + + /* For all states that have epsilon transitions out, draw the transitions, + * clear the epsilon transitions. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* If there is a state vector, then create the pre-merge state. */ + if ( st->eptVect != 0 ) { + /* Merge all the epsilon targets into the state. */ + for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { + if ( ept->leaving ) + mergeStatesLeaving( st, ept->targ ); + else + mergeStates( st, ept->targ ); + } + + /* Clean up the target list. */ + delete st->eptVect; + st->eptVect = 0; + } + + /* Clear the epsilon transitions vector. */ + st->epsilonTrans.empty(); + } +} + +FsmRes FsmAp::applyNfaTrans( FsmAp *fsm, StateAp *fromState, StateAp *toState, NfaTrans *nfaTrans ) +{ + fsm->setMisfitAccounting( true ); + + fsm->mergeStates( fromState, toState, false ); + + /* Epsilons can caused merges which leave behind unreachable states. */ + FsmRes res = FsmAp::fillInStates( fsm ); + if ( !res.success() ) + return res; + + /* Can nuke the epsilon transition that we will never + * follow. */ + fsm->detachFromNfa( fromState, toState, nfaTrans ); + fromState->nfaOut->detach( nfaTrans ); + delete nfaTrans; + + if ( fromState->nfaOut->length() == 0 ) { + delete fromState->nfaOut; + fromState->nfaOut = 0; + } + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + return FsmRes( FsmRes::Fsm(), fsm ); +} + +void FsmAp::globOp( FsmAp **others, int numOthers ) +{ + for ( int m = 0; m < numOthers; m++ ) { + assert( ctx == others[m]->ctx ); + } + + /* All other machines loose start states status. */ + for ( int m = 0; m < numOthers; m++ ) + others[m]->unsetStartState(); + + /* Bring the other machines into this. */ + for ( int m = 0; m < numOthers; m++ ) { + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( others[m] ); + others[m]->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other into + * this. No states will be deleted. */ + stateList.append( others[m]->stateList ); + assert( others[m]->misfitList.length() == 0 ); + + /* Move the final set data from other into this. */ + finStateSet.insert( others[m]->finStateSet ); + others[m]->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete others[m]; + } +} + +/* Used near the end of an fsm construction. Any labels that are still around + * are referenced only by gotos and calls and they need to be made into + * deterministic entry points. */ +void FsmAp::deterministicEntry() +{ + /* States may loose their entry points, turn on misfit accounting. */ + setMisfitAccounting( true ); + + /* Get a copy of the entry map then clear all the entry points. As we + * iterate the old entry map finding duplicates we will add the entry + * points for the new states that we create. */ + EntryMap prevEntry = entryPoints; + unsetAllEntryPoints(); + + for ( int enId = 0; enId < prevEntry.length(); ) { + /* Count the number of states on this entry key. */ + int highId = enId; + while ( highId < prevEntry.length() && prevEntry[enId].key == prevEntry[highId].key ) + highId += 1; + + int numIds = highId - enId; + if ( numIds == 1 ) { + /* Only a single entry point, just set the entry. */ + setEntry( prevEntry[enId].key, prevEntry[enId].value ); + } + else { + /* Multiple entry points, need to create a new state and merge in + * all the targets of entry points. */ + StateAp *newEntry = addState(); + for ( int en = enId; en < highId; en++ ) + mergeStates( newEntry, prevEntry[en].value ); + + /* Add the new state as the single entry point. */ + setEntry( prevEntry[enId].key, newEntry ); + } + + enId += numIds; + } + + /* The old start state may be unreachable. Remove the misfits and turn off + * misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Unset any final states that are no longer to be final due to final bits. */ +void FsmAp::unsetKilledFinals() +{ + /* Duplicate the final state set before we begin modifying it. */ + StateSet fin( finStateSet ); + + for ( int s = 0; s < fin.length(); s++ ) { + /* Check for killing bit. */ + StateAp *state = fin.data[s]; + if ( state->stateBits & STB_GRAPH1 ) { + /* One final state is a killer, set to non-final. */ + unsetFinState( state ); + } + + /* Clear all killing bits. Non final states should never have had those + * state bits set in the first place. */ + state->stateBits &= ~STB_GRAPH1; + } +} + +/* Unset any final states that are no longer to be final due to final bits. */ +void FsmAp::unsetIncompleteFinals() +{ + /* Duplicate the final state set before we begin modifying it. */ + StateSet fin( finStateSet ); + + for ( int s = 0; s < fin.length(); s++ ) { + /* Check for one set but not the other. */ + StateAp *state = fin.data[s]; + if ( state->stateBits & STB_BOTH && + (state->stateBits & STB_BOTH) != STB_BOTH ) + { + /* One state wants the other but it is not there. */ + unsetFinState( state ); + } + + /* Clear wanting bits. Non final states should never have had those + * state bits set in the first place. */ + state->stateBits &= ~STB_BOTH; + } +} + +/* Kleene star operator. Makes this machine the kleene star of itself. Any + * transitions made going out of the machine and back into itself will be + * notified that they are leaving transitions by having the leavingFromState + * callback invoked. */ +FsmRes FsmAp::starOp( FsmAp *fsm ) +{ + /* The start func orders need to be shifted before doing the star. */ + fsm->ctx->curActionOrd += fsm->shiftStartActionOrder( fsm->ctx->curActionOrd ); + + /* Turn on misfit accounting to possibly catch the old start state. */ + fsm->setMisfitAccounting( true ); + + /* Create the new new start state. It will be set final after the merging + * of the final states with the start state is complete. */ + StateAp *prevStartState = fsm->startState; + fsm->unsetStartState(); + fsm->setStartState( fsm->addState() ); + + /* Merge the new start state with the old one to isolate it. */ + fsm->mergeStates( fsm->startState, prevStartState ); + + if ( !fsm->startState->isFinState() ) { + /* Common case, safe to merge. */ + for ( StateSet::Iter st = fsm->finStateSet; st.lte(); st++ ) + fsm->mergeStatesLeaving( *st, fsm->startState ); + } + else { + /* Merge the start state into all final states. Except the start state on + * the first pass. If the start state is set final we will be doubling up + * its transitions, which will get transfered to any final states that + * follow it in the final state set. This will be determined by the order + * of items in the final state set. To prevent this we just merge with the + * start on a second pass. */ + StateSet origFin = fsm->finStateSet; + for ( StateSet::Iter st = origFin; st.lte(); st++ ) { + if ( *st != fsm->startState ) + fsm->mergeStatesLeaving( *st, fsm->startState ); + } + + /* Now it is safe to merge the start state with itself (provided it + * is set final). */ + if ( fsm->startState->isFinState() ) + fsm->mergeStatesLeaving( fsm->startState, fsm->startState ); + } + + /* Now ensure the new start state is a final state. */ + fsm->setFinState( fsm->startState ); + + /* Fill in any states that were newed up as combinations of others. */ + FsmRes res = FsmAp::fillInStates( fsm ); + if ( !res.success() ) + return res; + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + fsm->afterOpMinimize(); + + return res; +} + +FsmRes FsmAp::plusOp( FsmAp *fsm ) +{ + /* Need a duplicate for the star end. */ + FsmAp *factorDup = new FsmAp( *fsm ); + + /* Star the duplicate. */ + FsmRes res1 = FsmAp::starOp( factorDup ); + if ( !res1.success() ) + return res1; + + FsmRes res2 = FsmAp::concatOp( fsm, res1.fsm ); + if ( !res2.success() ) + return res2; + + return res2; +} + +FsmRes FsmAp::questionOp( FsmAp *fsm ) +{ + /* Make the null fsm. */ + FsmAp *nu = FsmAp::lambdaFsm( fsm->ctx ); + + /* Perform the question operator. */ + FsmRes res = FsmAp::unionOp( fsm, nu ); + if ( !res.success() ) + return res; + + return res; +} + +FsmRes FsmAp::exactRepeatOp( FsmAp *fsm, int times ) +{ + /* Zero repetitions produces lambda machine. */ + if ( times == 0 ) { + FsmCtx *fsmCtx = fsm->ctx; + delete fsm; + return FsmRes( FsmRes::Fsm(), FsmAp::lambdaFsm( fsmCtx ) ); + } + + /* The start func orders need to be shifted before doing the + * repetition. */ + fsm->ctx->curActionOrd += fsm->shiftStartActionOrder( fsm->ctx->curActionOrd ); + + /* A repeat of one does absolutely nothing. */ + if ( times == 1 ) + return FsmRes( FsmRes::Fsm(), fsm ); + + /* Make a machine to make copies from. */ + FsmAp *copyFrom = new FsmAp( *fsm ); + + /* Concatentate duplicates onto the end up until before the last. */ + for ( int i = 1; i < times-1; i++ ) { + FsmAp *dup = new FsmAp( *copyFrom ); + FsmRes res = concatOp( fsm, dup ); + if ( !res.success() ) { + delete copyFrom; + return res; + } + } + + /* Now use the copyFrom on the end. */ + FsmRes res = concatOp( fsm, copyFrom ); + if ( !res.success()) + return res; + + res.fsm->afterOpMinimize(); + + return res; +} + +FsmRes FsmAp::maxRepeatOp( FsmAp *fsm, int times ) +{ + /* Zero repetitions produces lambda machine. */ + if ( times == 0 ) { + FsmCtx *fsmCtx = fsm->ctx; + delete fsm; + return FsmRes( FsmRes::Fsm(), FsmAp::lambdaFsm( fsmCtx ) ); + } + + fsm->ctx->curActionOrd += fsm->shiftStartActionOrder( fsm->ctx->curActionOrd ); + + /* A repeat of one optional merely allows zero string. */ + if ( times == 1 ) { + isolateStartState( fsm ); + fsm->setFinState( fsm->startState ); + return FsmRes( FsmRes::Fsm(), fsm ); + } + + /* Make a machine to make copies from. */ + FsmAp *copyFrom = new FsmAp( *fsm ); + + /* The state set used in the from end of the concatentation. Starts with + * the initial final state set, then after each concatenation, gets set to + * the the final states that come from the the duplicate. */ + StateSet lastFinSet( fsm->finStateSet ); + + /* Set the initial state to zero to allow zero copies. */ + isolateStartState( fsm ); + fsm->setFinState( fsm->startState ); + + /* Concatentate duplicates onto the end up until before the last. */ + for ( int i = 1; i < times-1; i++ ) { + /* Make a duplicate for concating and set the fin bits to graph 2 so we + * can pick out it's final states after the optional style concat. */ + FsmAp *dup = new FsmAp( *copyFrom ); + dup->setFinBits( STB_GRAPH2 ); + FsmRes res = concatOp( fsm, dup, false, &lastFinSet, true ); + if ( !res.success() ) { + delete copyFrom; + return res; + } + + /* Clear the last final state set and make the new one by taking only + * the final states that come from graph 2.*/ + lastFinSet.empty(); + for ( int i = 0; i < fsm->finStateSet.length(); i++ ) { + /* If the state came from graph 2, add it to the last set and clear + * the bits. */ + StateAp *fs = fsm->finStateSet[i]; + if ( fs->stateBits & STB_GRAPH2 ) { + lastFinSet.insert( fs ); + fs->stateBits &= ~STB_GRAPH2; + } + } + } + + /* Now use the copyFrom on the end, no bits set, no bits to clear. */ + FsmRes res = concatOp( fsm, copyFrom, false, &lastFinSet, true ); + if ( !res.success() ) + return res; + + res.fsm->afterOpMinimize(); + + return res; +} + +FsmRes FsmAp::minRepeatOp( FsmAp *fsm, int times ) +{ + if ( times == 0 ) { + /* Acts just like a star op on the machine to return. */ + return FsmAp::starOp( fsm ); + } + else { + /* Take a duplicate for the star below. */ + FsmAp *dup = new FsmAp( *fsm ); + + /* Do repetition on the first half. */ + FsmRes exact = FsmAp::exactRepeatOp( fsm, times ); + if ( !exact.success() ) { + delete dup; + return exact; + } + + /* Star the duplicate. */ + FsmRes star = FsmAp::starOp( dup ); + if ( !star.success() ) { + delete exact.fsm; + return star; + } + + /* Tack on the kleene star. */ + return FsmAp::concatOp( exact.fsm, star.fsm ); + } +} + +FsmRes FsmAp::rangeRepeatOp( FsmAp *fsm, int lowerRep, int upperRep ) +{ + if ( lowerRep == 0 && upperRep == 0 ) { + FsmCtx *fsmCtx = fsm->ctx; + delete fsm; + return FsmRes( FsmRes::Fsm(), FsmAp::lambdaFsm( fsmCtx ) ); + } + else if ( lowerRep == 0 ) { + /* Just doing max repetition. Already guarded against n == 0. */ + return FsmAp::maxRepeatOp( fsm, upperRep ); + } + else if ( lowerRep == upperRep ) { + /* Just doing exact repetition. Already guarded against n == 0. */ + return FsmAp::exactRepeatOp( fsm, lowerRep ); + } + else { + /* This is the case that 0 < lowerRep < upperRep. Take a + * duplicate for the optional repeat. */ + FsmAp *dup = new FsmAp( *fsm ); + + /* Do repetition on the first half. */ + FsmRes exact = FsmAp::exactRepeatOp( fsm, lowerRep ); + if ( !exact.success() ) { + delete dup; + return exact; + } + + /* Do optional repetition on the second half. */ + FsmRes optional = FsmAp::maxRepeatOp( dup, upperRep - lowerRep ); + if ( !optional.success() ) { + delete exact.fsm; + return optional; + } + + /* Concat two halves. */ + return FsmAp::concatOp( exact.fsm, optional.fsm ); + } +} + +/* Concatenates other to the end of this machine. Other is deleted. Any + * transitions made leaving this machine and entering into other are notified + * that they are leaving transitions by having the leavingFromState callback + * invoked. Supports specifying the fromStates (istead of first final state + * set). This is useful for a max-repeat schenario, where from states are not + * all of first's final states. Also supports treating the concatentation as + * optional, which leaves the final states of the first machine as final. */ +FsmRes FsmAp::concatOp( FsmAp *fsm, FsmAp *other, bool lastInSeq, StateSet *fromStates, bool optional ) +{ + for ( PriorTable::Iter g = other->startState->guardedInTable; g.lte(); g++ ) { + fsm->allTransPrior( 0, g->desc ); + other->allTransPrior( 0, g->desc->other ); + } + + /* Assert same signedness and return graph concatenation op. */ + assert( fsm->ctx == other->ctx ); + + /* For the merging process. */ + StateSet finStateSetCopy, startStateSet; + + /* Turn on misfit accounting for both graphs. */ + fsm->setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Get the other's start state. */ + StateAp *otherStartState = other->startState; + + /* Unset other's start state before bringing in the entry points. */ + other->unsetStartState(); + + /* Bring in the rest of other's entry points. */ + fsm->copyInEntryPoints( other ); + other->entryPoints.empty(); + + /* Bring in other's states into our state lists. */ + fsm->stateList.append( other->stateList ); + fsm->misfitList.append( other->misfitList ); + + /* If from states is not set, then get a copy of our final state set before + * we clobber it and use it instead. */ + if ( fromStates == 0 ) { + finStateSetCopy = fsm->finStateSet; + fromStates = &finStateSetCopy; + } + + /* Unset all of our final states and get the final states from other. */ + if ( !optional ) + fsm->unsetAllFinStates(); + fsm->finStateSet.insert( other->finStateSet ); + + /* Since other's lists are empty, we can delete the fsm without + * affecting any states. */ + delete other; + + /* Merge our former final states with the start state of other. */ + for ( int i = 0; i < fromStates->length(); i++ ) { + StateAp *state = fromStates->data[i]; + + /* Merge the former final state with other's start state. */ + fsm->mergeStatesLeaving( state, otherStartState ); + + /* If the former final state was not reset final then we must clear + * the state's out trans data. If it got reset final then it gets to + * keep its out trans data. This must be done before fillInStates gets + * called to prevent the data from being sourced. */ + if ( ! state->isFinState() ) + fsm->clearOutData( state ); + } + + /* Fill in any new states made from merging. */ + FsmRes res = fillInStates( fsm ); + if ( !res.success() ) + return res; + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + res.fsm->afterOpMinimize( lastInSeq ); + + return res; +} + +FsmRes FsmAp::rightStartConcatOp( FsmAp *fsm, FsmAp *other, bool lastInSeq ) +{ + PriorDesc *priorDesc0 = fsm->ctx->allocPriorDesc(); + PriorDesc *priorDesc1 = fsm->ctx->allocPriorDesc(); + + /* Set up the priority descriptors. The left machine gets the + * lower priority where as the right get the higher start priority. */ + priorDesc0->key = fsm->ctx->nextPriorKey++; + priorDesc0->priority = 0; + fsm->allTransPrior( fsm->ctx->curPriorOrd++, priorDesc0 ); + + /* The start transitions of the right machine gets the higher + * priority. Use the same unique key. */ + priorDesc1->key = priorDesc0->key; + priorDesc1->priority = 1; + other->startFsmPrior( fsm->ctx->curPriorOrd++, priorDesc1 ); + + return concatOp( fsm, other, lastInSeq ); +} + +/* Returns union of fsm and other. Other is deleted. */ +FsmRes FsmAp::unionOp( FsmAp *fsm, FsmAp *other, bool lastInSeq ) +{ + assert( fsm->ctx == other->ctx ); + + fsm->ctx->unionOp = true; + + fsm->setFinBits( STB_GRAPH1 ); + other->setFinBits( STB_GRAPH2 ); + + /* Turn on misfit accounting for both graphs. */ + fsm->setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Call Worker routine. */ + FsmRes res = doUnion( fsm, other ); + if ( !res.success() ) + return res; + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + fsm->ctx->unionOp = false; + fsm->unsetFinBits( STB_BOTH ); + + fsm->afterOpMinimize( lastInSeq ); + + return res; +} + +/* Intersects other with this machine. Other is deleted. */ +FsmRes FsmAp::intersectOp( FsmAp *fsm, FsmAp *other, bool lastInSeq ) +{ + assert( fsm->ctx == other->ctx ); + + /* Turn on misfit accounting for both graphs. */ + fsm->setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Set the fin bits on this and other to want each other. */ + fsm->setFinBits( STB_GRAPH1 ); + other->setFinBits( STB_GRAPH2 ); + + /* Call worker Or routine. */ + FsmRes res = doUnion( fsm, other ); + if ( !res.success() ) + return res; + + /* Unset any final states that are no longer to + * be final due to final bits. */ + fsm->unsetIncompleteFinals(); + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + /* Remove states that have no path to a final state. */ + fsm->removeDeadEndStates(); + + fsm->afterOpMinimize( lastInSeq ); + + return res; +} + +/* Set subtracts other machine from this machine. Other is deleted. */ +FsmRes FsmAp::subtractOp( FsmAp *fsm, FsmAp *other, bool lastInSeq ) +{ + assert( fsm->ctx == other->ctx ); + + /* Turn on misfit accounting for both graphs. */ + fsm->setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Set the fin bits of other to be killers. */ + other->setFinBits( STB_GRAPH1 ); + + /* Call worker Or routine. */ + FsmRes res = doUnion( fsm, other ); + if ( !res.success() ) + return res; + + /* Unset any final states that are no longer to + * be final due to final bits. */ + fsm->unsetKilledFinals(); + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + /* Remove states that have no path to a final state. */ + fsm->removeDeadEndStates(); + + fsm->afterOpMinimize( lastInSeq ); + + return res; +} + +FsmRes FsmAp::epsilonOp( FsmAp *fsm ) +{ + fsm->setMisfitAccounting( true ); + + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) + st->owningGraph = 0; + + /* Perform merges. */ + fsm->resolveEpsilonTrans(); + + /* Epsilons can caused merges which leave behind unreachable states. */ + FsmRes res = FsmAp::fillInStates( fsm ); + if ( !res.success() ) + return res; + + /* Remove the misfits and turn off misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + return res; +} + +/* Make a new maching by joining together a bunch of machines without making + * any transitions between them. A negative finalId results in there being no + * final id. */ +FsmRes FsmAp::joinOp( FsmAp *fsm, int startId, int finalId, FsmAp **others, int numOthers ) +{ + for ( int m = 0; m < numOthers; m++ ) { + assert( fsm->ctx == others[m]->ctx ); + } + + /* Set the owning machines. Start at one. Zero is reserved for the start + * and final states. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) + st->owningGraph = 1; + for ( int m = 0; m < numOthers; m++ ) { + for ( StateList::Iter st = others[m]->stateList; st.lte(); st++ ) + st->owningGraph = 2+m; + } + + /* All machines loose start state status. */ + fsm->unsetStartState(); + for ( int m = 0; m < numOthers; m++ ) + others[m]->unsetStartState(); + + /* Bring the other machines into this. */ + for ( int m = 0; m < numOthers; m++ ) { + /* Bring in the rest of other's entry points. */ + fsm->copyInEntryPoints( others[m] ); + others[m]->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other into + * this. No states will be deleted. */ + fsm->stateList.append( others[m]->stateList ); + assert( others[m]->misfitList.length() == 0 ); + + /* Move the final set data from other into this. */ + fsm->finStateSet.insert( others[m]->finStateSet ); + others[m]->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete others[m]; + } + + /* Look up the start entry point. */ + EntryMapEl *enLow = 0, *enHigh = 0; + bool findRes = fsm->entryPoints.findMulti( startId, enLow, enHigh ); + if ( ! findRes ) { + /* No start state. Set a default one and proceed with the join. Note + * that the result of the join will be a very uninteresting machine. */ + fsm->setStartState( fsm->addState() ); + } + else { + /* There is at least one start state, create a state that will become + * the new start state. */ + StateAp *newStart = fsm->addState(); + fsm->setStartState( newStart ); + + /* The start state is in an owning machine class all it's own. */ + newStart->owningGraph = 0; + + /* Create the set of states to merge from. */ + StateSet stateSet; + for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) + stateSet.insert( en->value ); + + /* Merge in the set of start states into the new start state. */ + fsm->mergeStateList( newStart, stateSet.data, stateSet.length() ); + } + + /* Take a copy of the final state set, before unsetting them all. This + * will allow us to call clearOutData on the states that don't get + * final state status back back. */ + StateSet finStateSetCopy = fsm->finStateSet; + + /* Now all final states are unset. */ + fsm->unsetAllFinStates(); + + if ( finalId >= 0 ) { + /* Create the implicit final state. */ + StateAp *finState = fsm->addState(); + fsm->setFinState( finState ); + + /* Assign an entry into the final state on the final state entry id. Note + * that there may already be an entry on this id. That's ok. Also set the + * final state owning machine id. It's in a class all it's own. */ + fsm->setEntry( finalId, finState ); + finState->owningGraph = 0; + } + + /* Hand over to workers for resolving epsilon trans. This will merge states + * with the targets of their epsilon transitions. */ + fsm->resolveEpsilonTrans(); + + /* Invoke the relinquish final callback on any states that did not get + * final state status back. */ + for ( StateSet::Iter st = finStateSetCopy; st.lte(); st++ ) { + if ( !((*st)->stateBits & STB_ISFINAL) ) + fsm->clearOutData( *st ); + } + + /* Fill in any new states made from merging. */ + FsmRes res = FsmAp::fillInStates( fsm ); + if ( !res.success() ) + return res; + + /* Joining can be messy. Instead of having misfit accounting on (which is + * tricky here) do a full cleaning. */ + fsm->removeUnreachableStates(); + + return res; +} + +/* Ensure that the start state is free of entry points (aside from the fact + * that it is the start state). If the start state has entry points then Make a + * new start state by merging with the old one. Useful before modifying start + * transitions. If the existing start state has any entry points other than the + * start state entry then modifying its transitions changes more than the start + * transitions. So isolate the start state by separating it out such that it + * only has start stateness as it's entry point. */ +FsmRes FsmAp::isolateStartState( FsmAp *fsm ) +{ + /* Do nothing if the start state is already isolated. */ + if ( fsm->isStartStateIsolated() ) + return FsmRes( FsmRes::Fsm(), fsm ); + + /* Turn on misfit accounting to possibly catch the old start state. */ + fsm->setMisfitAccounting( true ); + + /* This will be the new start state. The existing start + * state is merged with it. */ + StateAp *prevStartState = fsm->startState; + fsm->unsetStartState(); + fsm->setStartState( fsm->addState() ); + + /* Merge the new start state with the old one to isolate it. */ + fsm->mergeStates( fsm->startState, prevStartState ); + + /* Stfil and stateDict will be empty because the merging of the old start + * state into the new one will not have any conflicting transitions. */ + assert( fsm->stateDict.treeSize == 0 ); + assert( fsm->nfaList.length() == 0 ); + + /* The old start state may be unreachable. Remove the misfits and turn off + * misfit accounting. */ + fsm->removeMisfits(); + fsm->setMisfitAccounting( false ); + + return FsmRes( FsmRes::Fsm(), fsm ); +} + +StateAp *FsmAp::dupStartState() +{ + StateAp *dup = addState(); + mergeStates( dup, startState ); + return dup; +} + +/* A state merge which represents the drawing in of leaving transitions. If + * there is any out data then we duplicate the source state, transfer the out + * data, then merge in the state. The new state will be reaped because it will + * not be given any in transitions. */ +void FsmAp::mergeStatesLeaving( StateAp *destState, StateAp *srcState ) +{ + if ( !hasOutData( destState ) ) { + /* Perform the merge, indicating we are leaving, which will affect how + * out conds are merged. */ + mergeStates( destState, srcState, true ); + } + else { + /* Dup the source state. */ + StateAp *ssMutable = addState(); + mergeStates( ssMutable, srcState ); + + /* Do out data transfer (and out condition embedding). */ + transferOutData( ssMutable, destState ); + + if ( destState->outCondSpace != 0 ) { + + doEmbedCondition( ssMutable, destState->outCondSpace->condSet, + destState->outCondKeys ); + } + + /* Now we merge with dest, setting leaving = true. This dictates how + * out conditions should be merged. */ + mergeStates( destState, ssMutable, true ); + } +} + +void FsmAp::checkEpsilonRegularInteraction( const PriorTable &t1, const PriorTable &t2 ) +{ + for ( PriorTable::Iter pd1 = t1; pd1.lte(); pd1++ ) { + for ( PriorTable::Iter pd2 = t2; pd2.lte(); pd2++ ) { + /* Looking for unequal guarded priorities with the same key. */ + if ( pd1->desc->key == pd2->desc->key ) { + if ( pd1->desc->priority < pd2->desc->priority || + pd1->desc->priority > pd2->desc->priority ) + { + if ( ctx->checkPriorInteraction && pd1->desc->guarded ) { + if ( ! priorInteraction ) { + priorInteraction = true; + guardId = pd1->desc->guardId; + } + } + } + } + } + } +} + +void FsmAp::mergeStateProperties( StateAp *destState, StateAp *srcState ) +{ + /* Draw in any properties of srcState into destState. */ + if ( srcState == destState ) { + /* Duplicate the list to protect against write to source. The + * priorities sets are not copied in because that would have no + * effect. */ + destState->epsilonTrans.append( EpsilonTrans( srcState->epsilonTrans ) ); + + /* Get all actions, duplicating to protect against write to source. */ + destState->toStateActionTable.setActions( + ActionTable( srcState->toStateActionTable ) ); + destState->fromStateActionTable.setActions( + ActionTable( srcState->fromStateActionTable ) ); + destState->outActionTable.setActions( ActionTable( srcState->outActionTable ) ); + destState->errActionTable.setActions( ErrActionTable( srcState->errActionTable ) ); + destState->eofActionTable.setActions( ActionTable( srcState->eofActionTable ) ); + + /* Not touching guarded-in table or out conditions. Probably should + * leave some of the above alone as well. */ + } + else { + /* Get the epsilons, out priorities. */ + destState->epsilonTrans.append( srcState->epsilonTrans ); + destState->outPriorTable.setPriors( srcState->outPriorTable ); + + /* Get all actions. */ + destState->toStateActionTable.setActions( srcState->toStateActionTable ); + destState->fromStateActionTable.setActions( srcState->fromStateActionTable ); + destState->outActionTable.setActions( srcState->outActionTable ); + destState->errActionTable.setActions( srcState->errActionTable ); + destState->eofActionTable.setActions( srcState->eofActionTable ); + destState->lmNfaParts.insert( srcState->lmNfaParts ); + destState->guardedInTable.setPriors( srcState->guardedInTable ); + } +} + +void FsmAp::mergeStateBits( StateAp *destState, StateAp *srcState ) +{ + /* Get bits and final state status. Note in the above code we depend on the + * original final state status being present. */ + destState->stateBits |= ( srcState->stateBits & ~STB_ISFINAL ); + if ( srcState->isFinState() ) + setFinState( destState ); +} + +void FsmAp::mergeNfaTransitions( StateAp *destState, StateAp *srcState ) +{ + /* Copy in any NFA transitions. */ + if ( srcState->nfaOut != 0 ) { + if ( destState->nfaOut == 0 ) + destState->nfaOut = new NfaTransList; + + for ( NfaTransList::Iter nt = *srcState->nfaOut; nt.lte(); nt++ ) { + NfaTrans *trans = new NfaTrans( + nt->pushTable, nt->restoreTable, + nt->popFrom, nt->popCondSpace, nt->popCondKeys, + nt->popAction, nt->popTest, nt->order ); + + destState->nfaOut->append( trans ); + attachToNfa( destState, nt->toState, trans ); + } + } +} + +void FsmAp::checkPriorInteractions( StateAp *destState, StateAp *srcState ) +{ + /* Run a check on priority interactions between epsilon transitions and + * regular transitions. This can't be used to affect machine construction, + * only to check for priority guards. */ + if ( destState->nfaOut != 0 ) { + for ( NfaTransList::Iter nt = *destState->nfaOut; nt.lte(); nt++ ) { + for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + checkEpsilonRegularInteraction( + trans->tdap()->priorTable, nt->priorTable ); + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; + cond.lte(); cond++ ) + { + checkEpsilonRegularInteraction( + cond->priorTable, nt->priorTable ); + + } + } + } + } + } +} + +void FsmAp::mergeStates( StateAp *destState, StateAp *srcState, bool leaving ) +{ + /* Transitions. */ + outTransCopy( destState, srcState->outList.head ); + + /* Properties such as out data, to/from actions. */ + mergeStateProperties( destState, srcState ); + + /* Merge out conditions, depends on the operation (leaving or not). */ + mergeOutConds( destState, srcState, leaving ); + + /* State bits, including final state stats. Out conds depnds on this + * happening after. */ + mergeStateBits( destState, srcState ); + + /* Draw in the NFA transitions. */ + mergeNfaTransitions( destState, srcState ); + + /* Hacked in check for priority interactions, allowing detection of some + * bad situations. */ + checkPriorInteractions( destState, srcState ); +} + +void FsmAp::mergeStateList( StateAp *destState, + StateAp **srcStates, int numSrc ) +{ + for ( int s = 0; s < numSrc; s++ ) + mergeStates( destState, srcStates[s] ); +} + +void FsmAp::cleanAbortedFill( StateAp *state ) +{ + /* Iterate the out transitions, deleting them. */ + for ( TransList::Iter n, t = state->outList; t.lte(); ) { + n = t.next(); + if ( t->plain() ) + delete t->tdap(); + else + delete t->tcap(); + t = n; + } + + state->outList.abandon(); + + if ( state->nfaIn != 0 ) { + delete state->nfaIn; + state->nfaIn = 0; + } + + if ( state->nfaOut != 0 ) { + state->nfaOut->empty(); + delete state->nfaOut; + state->nfaOut = 0; + } +} + +void FsmAp::cleanAbortedFill() +{ + while ( nfaList.length() > 0 ) { + StateAp *state = nfaList.head; + + StateSet *stateSet = &state->stateDictEl->stateSet; + //mergeStateList( state, stateSet->data, stateSet->length() ); + + for ( StateSet::Iter s = *stateSet; s.lte(); s++ ) + detachStateDict( state, *s ); + + nfaList.detach( state ); + } + + /* Disassociated state dict elements from states. */ + for ( StateDict::Iter sdi = stateDict; sdi.lte(); sdi++ ) + sdi->targState->stateDictEl = 0; + + /* Delete all the state dict elements. */ + stateDict.empty(); + + /* Delete all the transitions. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + cleanAbortedFill( state ); + + /* Delete all the states. */ + stateList.empty(); + + /* Delete all the transitions. */ + for ( StateList::Iter state = misfitList; state.lte(); state++ ) + cleanAbortedFill( state ); + + /* Delete all the states. */ + misfitList.empty(); +} + +bool FsmAp::overStateLimit() +{ + if ( ctx->stateLimit > FsmCtx::STATE_UNLIMITED ) { + long states = misfitList.length() + stateList.length(); + if ( states > ctx->stateLimit ) + return true; + } + return false; +} + +bool FsmAp::fillAbort( FsmRes &res, FsmAp *fsm ) +{ + if ( fsm->priorInteraction ) { + fsm->cleanAbortedFill(); + int guardId = fsm->guardId; + delete fsm; + res = FsmRes( FsmRes::PriorInteraction(), guardId ); + return true; + } + + if ( fsm->overStateLimit() ) { + fsm->cleanAbortedFill(); + delete fsm; + res = FsmRes( FsmRes::TooManyStates() ); + return true; + } + + return false; +} + +FsmRes FsmAp::fillInStates( FsmAp *fsm ) +{ + /* Used as return value on success. Filled in with error on abort. */ + FsmRes res( FsmRes::Fsm(), fsm ); + + /* Merge any states that are awaiting merging. This will likey cause other + * states to be added to the NFA list. */ + while ( true ) { + if ( fillAbort( res, fsm ) ) + return res; + + if ( fsm->nfaList.length() == 0 ) + break; + + StateAp *state = fsm->nfaList.head; + + StateSet *stateSet = &state->stateDictEl->stateSet; + fsm->mergeStateList( state, stateSet->data, stateSet->length() ); + + for ( StateSet::Iter s = *stateSet; s.lte(); s++ ) + fsm->detachStateDict( state, *s ); + + fsm->nfaList.detach( state ); + } + + /* The NFA list is empty at this point. There are no state sets we need to + * preserve. */ + + /* Disassociated state dict elements from states. */ + for ( StateDict::Iter sdi = fsm->stateDict; sdi.lte(); sdi++ ) + sdi->targState->stateDictEl = 0; + + /* Delete all the state dict elements. */ + fsm->stateDict.empty(); + + return res; +} + +/* Check if a machine defines a single character. This is useful in validating + * ranges and machines to export. */ +bool FsmAp::checkSingleCharMachine() +{ + /* Must have two states. */ + if ( stateList.length() != 2 ) + return false; + /* The start state cannot be final. */ + if ( startState->isFinState() ) + return false; + /* There should be only one final state. */ + if ( finStateSet.length() != 1 ) + return false; + /* The final state cannot have any transitions out. */ + if ( finStateSet[0]->outList.length() != 0 ) + return false; + /* The start state should have only one transition out. */ + if ( startState->outList.length() != 1 ) + return false; + /* The singe transition out of the start state should not be a range. */ + TransAp *startTrans = startState->outList.head; + if ( ctx->keyOps->ne( startTrans->lowKey, startTrans->highKey ) ) + return false; + return true; +} + +FsmRes FsmAp::condCostFromState( FsmAp *fsm, StateAp *state, long depth ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->stateBits & STB_ONLIST ) + return FsmRes( FsmRes::Fsm(), fsm ); + + if ( depth > fsm->ctx->condsCheckDepth ) + return FsmRes( FsmRes::Fsm(), fsm ); + + /* Doing depth first, put state on the list. */ + state->stateBits |= STB_ONLIST; + + /* Recurse on everything ranges. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) { + FsmRes res = condCostFromState( fsm, trans->tdap()->toState, depth + 1 ); + if ( !res.success() ) + return res; + } + } + else { + for ( CondSet::Iter csi = trans->condSpace->condSet; csi.lte(); csi++ ) { + if ( (*csi)->costMark ) + return FsmRes( FsmRes::CondCostTooHigh(), (*csi)->costId ); + } + + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) { + FsmRes res = condCostFromState( fsm, cond->toState, depth + 1 ); + if ( !res.success() ) + return res; + } + } + } + } + + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter n = *state->nfaOut; n.lte(); n++ ) { + /* We do not increment depth here since this is an epsilon transition. */ + FsmRes res = condCostFromState( fsm, n->toState, depth ); + if ( !res.success() ) + return res; + } + } + + for ( ActionTable::Iter a = state->fromStateActionTable; a.lte(); a++ ) { + if ( a->value->costMark ) + return FsmRes( FsmRes::CondCostTooHigh(), a->value->costId ); + } + + return FsmRes( FsmRes::Fsm(), fsm ); +} + + +/* Returns either success (using supplied fsm), or some error condition. */ +FsmRes FsmAp::condCostSearch( FsmAp *fsm ) +{ + /* Init on state list flags. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) + st->stateBits &= ~STB_ONLIST; + + FsmRes res = condCostFromState( fsm, fsm->startState, 1 ); + if ( !res.success() ) + delete fsm; + return res; +} + +void FsmAp::condCost( Action *action, long repId ) +{ + action->costMark = true; + action->costId = repId; +} + +/* + * This algorithm assigns a price to each state visit, then adds that to a + * running total. Note that we do not guard against multiple visits to a state, + * since we are estimating runtime cost. + * + * We rely on a character histogram and are looking for a probability of being + * in any given state, given that histogram, simple and very effective. + */ +void FsmAp::breadthFromState( double &total, int &minDepth, double *histogram, + FsmAp *fsm, StateAp *state, long depth, int maxDepth, double stateScore ) +{ + if ( depth > maxDepth ) + return; + + /* Recurse on everything ranges. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + + /* Compute target state score. */ + double span = 0; + for ( int i = trans->lowKey.getVal(); i <= trans->highKey.getVal(); i++ ) + span += histogram[i]; + + double targetStateScore = stateScore * ( span ); + + /* Add to the level. */ + total += targetStateScore; + + if ( trans->plain() ) { + if ( trans->tdap()->toState != 0 ) { + if ( trans->tdap()->toState->isFinState() && ( minDepth < 0 || depth < minDepth ) ) + minDepth = depth; + + breadthFromState( total, minDepth, histogram, fsm, trans->tdap()->toState, + depth + 1, maxDepth, targetStateScore ); + } + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + if ( cond->toState != 0 ) { + if ( cond->toState->isFinState() && ( minDepth < 0 || depth < minDepth ) ) + minDepth = depth; + + breadthFromState( total, minDepth, histogram, fsm, cond->toState, + depth + 1, maxDepth, targetStateScore ); + } + } + } + } + + if ( state->nfaOut != 0 ) { + for ( NfaTransList::Iter n = *state->nfaOut; n.lte(); n++ ) { + if ( n->toState->isFinState() && ( minDepth < 0 || depth < minDepth ) ) + minDepth = depth; + + /* We do not increment depth here since this is an epsilon transition. */ + breadthFromState( total, minDepth, histogram, fsm, n->toState, depth, maxDepth, stateScore ); + } + } +} + +void FsmAp::breadthFromEntry( double &total, int &minDepth, double *histogram, FsmAp *fsm, StateAp *state ) +{ + long depth = 1; + int maxDepth = 5; + double stateScore = 1.0; + + FsmAp::breadthFromState( total, minDepth, histogram, fsm, state, depth, maxDepth, stateScore ); +} + + +void FsmAp::applyEntryPriorGuard( FsmAp *fsm, long repId ) +{ + PriorDesc *priorDesc0 = fsm->ctx->allocPriorDesc(); + PriorDesc *priorDesc1 = fsm->ctx->allocPriorDesc(); + + priorDesc0->key = fsm->ctx->nextPriorKey; + priorDesc0->priority = 0; + priorDesc0->guarded = true; + priorDesc0->guardId = repId; + priorDesc0->other = priorDesc1; + + priorDesc1->key = fsm->ctx->nextPriorKey; + priorDesc1->priority = 1; + priorDesc1->guarded = true; + priorDesc1->guardId = repId; + priorDesc1->other = priorDesc0; + + /* Roll over for next allocation. */ + fsm->ctx->nextPriorKey += 1; + + /* Only need to set the first. Second is referenced using 'other' field. */ + fsm->startState->guardedInTable.setPrior( 0, priorDesc0 ); +} + +void FsmAp::applyRepeatPriorGuard( FsmAp *fsm, long repId ) +{ + PriorDesc *priorDesc2 = fsm->ctx->allocPriorDesc(); + PriorDesc *priorDesc3 = fsm->ctx->allocPriorDesc(); + + priorDesc2->key = fsm->ctx->nextPriorKey; + priorDesc2->priority = 0; + priorDesc2->guarded = true; + priorDesc2->guardId = repId; + priorDesc2->other = priorDesc3; + + priorDesc3->key = fsm->ctx->nextPriorKey; + priorDesc3->guarded = true; + priorDesc3->priority = 1; + priorDesc3->guardId = repId; + priorDesc3->other = priorDesc2; + + /* Roll over for next allocation. */ + fsm->ctx->nextPriorKey += 1; + + /* Only need to set the first. Second is referenced using 'other' field. */ + fsm->startState->guardedInTable.setPrior( 0, priorDesc2 ); + + fsm->allTransPrior( fsm->ctx->curPriorOrd++, priorDesc3 ); + fsm->leaveFsmPrior( fsm->ctx->curPriorOrd++, priorDesc2 ); +} + +FsmRes FsmAp::condPlus( FsmAp *fsm, long repId, Action *ini, Action *inc, Action *min, Action *max ) +{ + condCost( ini, repId ); + condCost( inc, repId ); + condCost( min, repId ); + if ( max != 0 ) + condCost( max, repId ); + + fsm->startFsmAction( 0, inc ); + + if ( max != 0 ) { + FsmRes res = fsm->startFsmCondition( max, true ); + if ( !res.success() ) + return res; + } + + /* Need a duplicated for the star end. */ + FsmAp *dup = new FsmAp( *fsm ); + + applyRepeatPriorGuard( dup, repId ); + + /* Star the duplicate. */ + FsmRes dupStar = FsmAp::starOp( dup ); + if ( !dupStar.success() ) { + delete fsm; + return dupStar; + } + + FsmRes res = FsmAp::concatOp( fsm, dupStar.fsm ); + if ( !res.success() ) + return res; + + /* End plus operation. */ + + res.fsm->leaveFsmCondition( min, true ); + + /* Init action. */ + res.fsm->startFromStateAction( 0, ini ); + + /* Leading priority guard. */ + applyEntryPriorGuard( res.fsm, repId ); + + return res; +} + +FsmRes FsmAp::condStar( FsmAp *fsm, long repId, Action *ini, Action *inc, Action *min, Action *max ) +{ + condCost( ini, repId ); + condCost( inc, repId ); + condCost( min, repId ); + if ( max != 0 ) + condCost( max, repId ); + + /* Increment. */ + fsm->startFsmAction( 0, inc ); + + /* Max (optional). */ + if ( max != 0 ) { + FsmRes res = fsm->startFsmCondition( max, true ); + if ( !res.success() ) + return res; + } + + applyRepeatPriorGuard( fsm, repId ); + + /* Star. */ + FsmRes res = FsmAp::starOp( fsm ); + if ( !res.success() ) + return res; + + /* Restrict leaving. */ + res.fsm->leaveFsmCondition( min, true ); + + /* Init action. */ + res.fsm->startFromStateAction( 0, ini ); + + /* Leading priority guard. */ + applyEntryPriorGuard( res.fsm, repId ); + + return res; +} + +/* Remove duplicates of unique actions from an action table. */ +void FsmAp::removeDups( ActionTable &table ) +{ + /* Scan through the table looking for unique actions to + * remove duplicates of. */ + for ( int i = 0; i < table.length(); i++ ) { + /* Remove any duplicates ahead of i. */ + for ( int r = i+1; r < table.length(); ) { + if ( table[r].value == table[i].value ) + table.vremove(r); + else + r += 1; + } + } +} + +/* Remove duplicates from action lists. This operates only on transition and + * eof action lists and so should be called once all actions have been + * transfered to their final resting place. */ +void FsmAp::removeActionDups() +{ + /* Loop all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Loop all transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->plain() ) + removeDups( trans->tdap()->actionTable ); + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) + removeDups( cond->actionTable ); + } + } + removeDups( state->toStateActionTable ); + removeDups( state->fromStateActionTable ); + removeDups( state->eofActionTable ); + } +} + |