diff options
author | Adrian Thurston <thurston@colm.net> | 2020-10-18 11:44:43 -0700 |
---|---|---|
committer | Adrian Thurston <thurston@colm.net> | 2020-10-18 11:44:43 -0700 |
commit | 85b76476de71f43d3eb25d6bef4ee6d84cb71f6c (patch) | |
tree | 65c127fbcf70e62d8a4848be2c9c4ff7d74d86a1 /src/libfsm/idbase.cc | |
parent | 86bb5882a70224a29650ccfa1e46c9b023c2a3ef (diff) | |
download | colm-85b76476de71f43d3eb25d6bef4ee6d84cb71f6c.tar.gz |
lift all source code into src/ dirinto-src
Diffstat (limited to 'src/libfsm/idbase.cc')
-rw-r--r-- | src/libfsm/idbase.cc | 421 |
1 files changed, 421 insertions, 0 deletions
diff --git a/src/libfsm/idbase.cc b/src/libfsm/idbase.cc new file mode 100644 index 00000000..38850b56 --- /dev/null +++ b/src/libfsm/idbase.cc @@ -0,0 +1,421 @@ +/* + * Copyright 2008-2018 Adrian Thurston <thurston@colm.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ragel.h" +#include "fsmgraph.h" +#include "parsedata.h" +#include "action.h" + +/* Error reporting format. */ +ErrorFormat errorFormat = ErrorFormatGNU; + +void FsmCtx::finalizeInstance( FsmAp *graph ) +{ + /* Resolve any labels that point to multiple states. Any labels that are + * still around are referenced only by gotos and calls and they need to be + * made into deterministic entry points. */ + graph->deterministicEntry(); + + /* + * All state construction is now complete. + */ + + /* Transfer actions from the out action tables to eof action tables. */ + for ( StateSet::Iter state = graph->finStateSet; state.lte(); state++ ) + graph->transferOutActions( *state ); + + /* Transfer global error actions. */ + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) + graph->transferErrorActions( state, 0 ); + + if ( fsmGbl->wantDupsRemoved ) + graph->removeActionDups(); + + /* Remove unreachable states. There should be no dead end states. The + * subtract and intersection operators are the only places where they may + * be created and those operators clean them up. */ + graph->removeUnreachableStates(); + + /* No more fsm operations are to be done. Action ordering numbers are + * no longer of use and will just hinder minimization. Clear them. */ + graph->nullActionKeys(); + + /* Transition priorities are no longer of use. We can clear them + * because they will just hinder minimization as well. Clear them. */ + graph->clearAllPriorities(); + + if ( graph->ctx->minimizeOpt != MinimizeNone ) { + /* Minimize here even if we minimized at every op. Now that function + * keys have been cleared we may get a more minimal fsm. */ + switch ( graph->ctx->minimizeLevel ) { + #ifdef TO_UPGRADE_CONDS + case MinimizeApprox: + graph->minimizeApproximate(); + break; + #endif + #ifdef TO_UPGRADE_CONDS + case MinimizeStable: + graph->minimizeStable(); + break; + #endif + case MinimizePartition1: + graph->minimizePartition1(); + break; + case MinimizePartition2: + graph->minimizePartition2(); + break; + } + } + + graph->compressTransitions(); + + createNfaActions( graph ); +} + +void FsmCtx::analyzeAction( Action *action, InlineList *inlineList ) +{ + /* FIXME: Actions used as conditions should be very constrained. */ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr || + item->type == InlineItem::Ncall || item->type == InlineItem::NcallExpr ) + { + action->anyCall = true; + } + + /* Need to recurse into longest match items. */ + if ( item->type == InlineItem::LmSwitch ) { + FsmLongestMatch *lm = item->longestMatch; + for ( FsmLmPartList::Iter lmi = *lm->longestMatchList; lmi.lte(); lmi++ ) { + if ( lmi->action != 0 ) + analyzeAction( action, lmi->action->inlineList ); + } + } + + if ( item->type == InlineItem::LmOnLast || + item->type == InlineItem::LmOnNext || + item->type == InlineItem::LmOnLagBehind ) + { + FsmLongestMatchPart *lmi = item->longestMatchPart; + if ( lmi->action != 0 ) + analyzeAction( action, lmi->action->inlineList ); + } + + if ( item->children != 0 ) + analyzeAction( action, item->children ); + } +} + +/* Check actions for bad uses of fsm directives. We don't go inside longest + * match items in actions created by ragel, since we just want the user + * actions. */ +void FsmCtx::checkInlineList( Action *act, InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + /* EOF checks. */ + if ( act->numEofRefs > 0 ) { + switch ( item->type ) { + /* Currently no checks. */ + default: + break; + } + } + + /* Recurse. */ + if ( item->children != 0 ) + checkInlineList( act, item->children ); + } +} + +void FsmCtx::checkAction( Action *action ) +{ + /* Check for actions with calls that are embedded within a longest match + * machine. */ + if ( !action->isLmAction && action->numRefs() > 0 && action->anyCall ) { + for ( NameInstVect::Iter ar = action->embedRoots; ar.lte(); ar++ ) { + NameInst *check = *ar; + while ( check != 0 ) { + if ( check->isLongestMatch ) { + fsmGbl->error(action->loc) << "within a scanner, fcall and fncall are permitted" + " only in pattern actions" << endl; + break; + } + check = check->parent; + } + } + } + + checkInlineList( action, action->inlineList ); +} + +void FsmCtx::analyzeGraph( FsmAp *graph ) +{ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) + analyzeAction( act, act->inlineList ); + + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + /* The transition list. */ + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + //if ( trans->condSpace != 0 ) { + // for ( CondSet::Iter sci = trans->condSpace->condSet; sci.lte(); sci++ ) + // (*sci)->numCondRefs += 1; + //} + + if ( trans->plain() ) { + for ( ActionTable::Iter at = trans->tdap()->actionTable; at.lte(); at++ ) + at->value->numTransRefs += 1; + } + else { + for ( CondList::Iter cond = trans->tcap()->condList; cond.lte(); cond++ ) { + for ( ActionTable::Iter at = cond->actionTable; at.lte(); at++ ) + at->value->numTransRefs += 1; + } + } + } + + for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ ) + at->value->numToStateRefs += 1; + + for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ ) + at->value->numFromStateRefs += 1; + + for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ ) + at->value->numEofRefs += 1; + + //for ( OutCondSet::Iter oci = st->outCondSet; oci.lte(); oci++ ) + // oci->action->numCondRefs += 1; + + if ( st->nfaOut != 0 ) { + for ( NfaTransList::Iter n = *st->nfaOut; n.lte(); n++ ) { + for ( ActionTable::Iter ati = n->pushTable; ati.lte(); ati++ ) + ati->value->numNfaRefs += 1; + + for ( ActionTable::Iter ati = n->restoreTable; ati.lte(); ati++ ) + ati->value->numNfaRefs += 1; + + for ( ActionTable::Iter ati = n->popAction; ati.lte(); ati++ ) + ati->value->numNfaRefs += 1; + + for ( ActionTable::Iter ati = n->popTest; ati.lte(); ati++ ) + ati->value->numNfaRefs += 1; + } + } + } + + /* Can't count on cond references in transitions, since we don't refcount + * the spaces. FIXME: That would be the proper solution. */ + for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) { + for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ ) + (*csi)->numCondRefs += 1; + } + + /* Checks for bad usage of directives in action code. */ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) + checkAction( act ); +} + +/* This create an action that refs the original embed roots, if the optWrap arg + * is supplied. */ +Action *FsmCtx::newNfaWrapAction( const char *name, InlineList *inlineList, Action *optWrap ) +{ + InputLoc loc; + loc.line = 1; + loc.col = 1; + loc.fileName = "NONE"; + + Action *action = new Action( loc, name, inlineList, nextCondId++ ); + + if ( optWrap != 0 ) + action->embedRoots.append( optWrap->embedRoots ); + + actionList.append( action ); + return action; +} + +void FsmCtx::createNfaActions( FsmAp *fsm ) +{ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + if ( st->nfaOut != 0 ) { + for ( NfaTransList::Iter n = *st->nfaOut; n.lte(); n++ ) { + /* Move pop restore actions into poptest. Wrap to override the + * condition-like testing. */ + for ( ActionTable::Iter ati = n->restoreTable; ati.lte(); ati++ ) { + n->popTest.setAction( ati->key, ati->value ); + } + + /* Move pop actions into pop test. Wrap to override the + * condition-like testing. */ + for ( ActionTable::Iter ati = n->popFrom; ati.lte(); ati++ ) { + + InlineList *il1 = new InlineList; + il1->append( new InlineItem( InputLoc(), + ati->value, InlineItem::NfaWrapAction ) ); + Action *wrap = newNfaWrapAction( "action_wrap", il1, ati->value ); + n->popTest.setAction( ORD_COND2, wrap ); + } + + /* Move condition evaluation into pop test. Wrap with condition + * execution. */ + if ( n->popCondSpace != 0 ) { + InlineList *il1 = new InlineList; + il1->append( new InlineItem( InputLoc(), + n->popCondSpace, n->popCondKeys, + InlineItem::NfaWrapConds ) ); + Action *wrap = newNfaWrapAction( "cond_wrap", il1, 0 ); + n->popTest.setAction( ORD_COND, wrap ); + } + + /* Move pop actions into pop test. Wrap to override the + * condition-like testing. */ + for ( ActionTable::Iter ati = n->popAction; ati.lte(); ati++ ) { + + InlineList *il1 = new InlineList; + il1->append( new InlineItem( InputLoc(), + ati->value, InlineItem::NfaWrapAction ) ); + Action *wrap = newNfaWrapAction( "action_wrap", il1, ati->value ); + n->popTest.setAction( ati->key, wrap ); + } + } + } + } +} + +void FsmCtx::prepareReduction( FsmAp *sectionGraph ) +{ + /* Decide if an error state is necessary. + * 1. There is an error transition + * 2. There is a gap in the transitions + * 3. The longest match operator requires it. */ + if ( lmRequiresErrorState || sectionGraph->hasErrorTrans() ) + sectionGraph->errState = sectionGraph->addState(); + + /* State numbers need to be assigned such that all final states have a + * larger state id number than all non-final states. This enables the + * first_final mechanism to function correctly. We also want states to be + * ordered in a predictable fashion. So we first apply a depth-first + * search, then do a stable sort by final state status, then assign + * numbers. */ + + sectionGraph->depthFirstOrdering(); + sectionGraph->sortStatesByFinal(); + sectionGraph->setStateNumbers( 0 ); +} + + +void translatedHostData( ostream &out, const std::string &data ) +{ + const char *p = data.c_str(); + for ( const char *c = p; *c != 0; ) { + if ( c[0] == '}' && ( c[1] == '@' || c[1] == '$' || c[1] == '=' ) ) { + out << "@}@" << c[1]; + c += 2; + } + else if ( c[0] == '@' ) { + out << "@@"; + c += 1; + } + // Have some escaping issues that these fix, but they lead to other problems. + // Can be reproduced by passing "={}" through ragel and adding --colm-backend + // else if ( c[0] == '=' ) { + // out << "@="; + // c += 1; + //} + // else if ( c[0] == '$' ) { + // out << "@$"; + // c += 1; + //} + else { + out << c[0]; + c += 1; + } + } +} + + +void FsmGbl::abortCompile( int code ) +{ + throw AbortCompile( code ); +} + +/* Print the opening to a warning in the input, then return the error ostream. */ +ostream &FsmGbl::warning( const InputLoc &loc ) +{ + ostream &err = std::cerr; + err << loc << ": warning: "; + return err; +} + +/* Print the opening to a program error, then return the error stream. */ +ostream &FsmGbl::error() +{ + errorCount += 1; + ostream &err = std::cerr; + err << PROGNAME ": "; + return err; +} + +ostream &FsmGbl::error( const InputLoc &loc ) +{ + errorCount += 1; + ostream &err = std::cerr; + err << loc << ": "; + return err; +} + +ostream &FsmGbl::error_plain() +{ + errorCount += 1; + ostream &err = std::cerr; + return err; +} + + +std::ostream &FsmGbl::stats() +{ + return std::cout; +} + +/* Requested info. */ +std::ostream &FsmGbl::info() +{ + return std::cout; +} + +ostream &operator<<( ostream &out, const InputLoc &loc ) +{ + assert( loc.fileName != 0 ); + switch ( errorFormat ) { + case ErrorFormatMSVC: + out << loc.fileName << "(" << loc.line; + if ( loc.col ) + out << "," << loc.col; + out << ")"; + break; + + default: + out << loc.fileName << ":" << loc.line; + if ( loc.col ) + out << ":" << loc.col; + break; + } + return out; +} + |