diff options
Diffstat (limited to 'ragel/parsetree.cc')
-rw-r--r-- | ragel/parsetree.cc | 2199 |
1 files changed, 0 insertions, 2199 deletions
diff --git a/ragel/parsetree.cc b/ragel/parsetree.cc deleted file mode 100644 index 38646cf6..00000000 --- a/ragel/parsetree.cc +++ /dev/null @@ -1,2199 +0,0 @@ -/* - * Copyright 2001-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <iostream> -#include <iomanip> -#include <sstream> -#include <errno.h> -#include <limits.h> -#include <stdlib.h> -#include <inputdata.h> - -/* Parsing. */ -#include "ragel.h" -#include "parsetree.h" -#include "parsedata.h" - -using namespace std; -ostream &operator<<( ostream &out, const NameRef &nameRef ); -ostream &operator<<( ostream &out, const NameInst &nameInst ); - -/* Read string literal (and regex) options and return the true end. */ -const char *checkLitOptions( InputData *id, const InputLoc &loc, - const char *data, int length, bool &caseInsensitive ) -{ - const char *end = data + length - 1; - while ( *end != '\'' && *end != '\"' && *end != '/' ) { - if ( *end == 'i' ) - caseInsensitive = true; - else { - id->error( loc ) << "literal string '" << *end << - "' option not supported" << endl; - } - end -= 1; - } - return end; -} - -/* Convert the literal string which comes in from the scanner into an array of - * characters with escapes and options interpreted. Also null terminates the - * string. Though this null termination should not be relied on for - * interpreting literals in the parser because the string may contain \0 */ -char *prepareLitString( InputData *id, const InputLoc &loc, const char *data, long length, - long &resLen, bool &caseInsensitive ) -{ - char *resData = new char[length+1]; - caseInsensitive = false; - - const char *src = data + 1; - const char *end = checkLitOptions( id, loc, data, length, caseInsensitive ); - - char *dest = resData; - long dlen = 0; - while ( src != end ) { - if ( *src == '\\' ) { - switch ( src[1] ) { - case '0': dest[dlen++] = '\0'; break; - case 'a': dest[dlen++] = '\a'; break; - case 'b': dest[dlen++] = '\b'; break; - case 't': dest[dlen++] = '\t'; break; - case 'n': dest[dlen++] = '\n'; break; - case 'v': dest[dlen++] = '\v'; break; - case 'f': dest[dlen++] = '\f'; break; - case 'r': dest[dlen++] = '\r'; break; - case '\n': break; - default: dest[dlen++] = src[1]; break; - } - src += 2; - } - else { - dest[dlen++] = *src++; - } - } - - resLen = dlen; - resData[resLen] = 0; - return resData; -} - -Key *prepareHexString( ParseData *pd, const InputLoc &loc, - const char *data, long length, long &resLen ) -{ - Key *dest = new Key[( length - 2 ) >> 1]; - const char *src = data; - const char *end = data + length; - long dlen = 0; - char s[3]; - - /* Scan forward over 0x. */ - src += 2; - - s[2] = 0; - while ( src < end ) { - s[0] = src[0]; - s[1] = src[1]; - - dest[dlen++] = makeFsmKeyHex( s, loc, pd ); - - /* Scan forward over the hex chars, then any whitespace or . characters. */ - src += 2; - while ( *src == ' ' || *src == '\t' || *src == '\n' || *src == '.' ) - src += 1; - - /* Scan forward over 0x. */ - src += 2; - } - - resLen = dlen; - return dest; -} - -FsmRes VarDef::walk( ParseData *pd ) -{ - /* We enter into a new name scope. */ - NameFrame nameFrame = pd->enterNameScope( true, 1 ); - - /* Recurse on the expression. */ - FsmRes rtnVal = machineDef->walk( pd ); - if ( !rtnVal.success() ) - return rtnVal; - - /* Do the tranfer of local error actions. */ - LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name ); - if ( localErrDictEl != 0 ) { - for ( StateList::Iter state = rtnVal.fsm->stateList; state.lte(); state++ ) - rtnVal.fsm->transferErrorActions( state, localErrDictEl->value ); - } - - /* If the expression below is a join operation with multiple expressions - * then it just had epsilon transisions resolved. If it is a join - * with only a single expression then run the epsilon op now. */ - if ( machineDef->type == MachineDef::JoinType && - machineDef->join->exprList.length() == 1 ) - { - rtnVal = FsmAp::epsilonOp( rtnVal.fsm ); - if ( !rtnVal.success() ) - return rtnVal; - } - - /* We can now unset entry points that are not longer used. */ - pd->unsetObsoleteEntries( rtnVal.fsm ); - - /* If the name of the variable is referenced then add the entry point to - * the graph. */ - if ( pd->curNameInst->numRefs > 0 ) - rtnVal.fsm->setEntry( pd->curNameInst->id, rtnVal.fsm->startState ); - - /* Pop the name scope. */ - pd->popNameScope( nameFrame ); - return rtnVal; -} - -void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd ) -{ - /* The variable definition enters a new scope. */ - NameInst *prevNameInst = pd->curNameInst; - pd->curNameInst = pd->addNameInst( loc, name, false ); - - if ( machineDef->type == MachineDef::LongestMatchType ) - pd->curNameInst->isLongestMatch = true; - - /* Recurse. */ - machineDef->makeNameTree( pd ); - - /* The name scope ends, pop the name instantiation. */ - pd->curNameInst = prevNameInst; -} - -void VarDef::resolveNameRefs( ParseData *pd ) -{ - /* Entering into a new scope. */ - NameFrame nameFrame = pd->enterNameScope( true, 1 ); - - /* Recurse. */ - machineDef->resolveNameRefs( pd ); - - /* The name scope ends, pop the name instantiation. */ - pd->popNameScope( nameFrame ); -} - -VarDef::~VarDef() -{ - delete machineDef; -} - -InputLoc LongestMatchPart::getLoc() -{ - return action != 0 ? action->loc : semiLoc; -} - -/* - * If there are any LMs then all of the following entry points must reset - * tokstart: - * - * 1. fentry(StateRef) - * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef) - * 3. targt of any transition that has an fcall (the return loc). - * 4. start state of all longest match routines. - */ - -Action *LongestMatch::newLmAction( ParseData *pd, const InputLoc &loc, - const char *name, InlineList *inlineList ) -{ - Action *action = new Action( loc, name, inlineList, pd->fsmCtx->nextCondId++ ); - action->embedRoots.append( pd->curNameInst ); - pd->fsmCtx->actionList.append( action ); - action->isLmAction = true; - return action; -} - -void LongestMatch::makeActions( ParseData *pd ) -{ - /* Make actions that set the action id. */ - for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = new InlineList; - inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); - inlineList->head->children = new InlineList; - inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, - InlineItem::LmSetActId ) ); - char *actName = new char[50]; - sprintf( actName, "store%i", lmi->longestMatchId ); - lmi->setActId = newLmAction( pd, lmi->getLoc(), actName, inlineList ); - } - - /* Make actions that execute the user action and restart on the last - * character. */ - for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = new InlineList; - inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); - inlineList->head->children = new InlineList; - inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, - InlineItem::LmOnLast ) ); - char *actName = new char[50]; - sprintf( actName, "last%i", lmi->longestMatchId ); - lmi->actOnLast = newLmAction( pd, lmi->getLoc(), actName, inlineList ); - } - - /* Make actions that execute the user action and restart on the next - * character. These actions will set tokend themselves (it is the current - * char). */ - for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = new InlineList; - inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); - inlineList->head->children = new InlineList; - inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, - InlineItem::LmOnNext ) ); - char *actName = new char[50]; - sprintf( actName, "next%i", lmi->longestMatchId ); - lmi->actOnNext = newLmAction( pd, lmi->getLoc(), actName, inlineList ); - } - - /* Make actions that execute the user action and restart at tokend. These - * actions execute some time after matching the last char. */ - for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = new InlineList; - inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); - inlineList->head->children = new InlineList; - inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, - InlineItem::LmOnLagBehind ) ); - char *actName = new char[50]; - sprintf( actName, "lag%i", lmi->longestMatchId ); - lmi->actLagBehind = newLmAction( pd, lmi->getLoc(), actName, inlineList ); - } - - /* - * NFA actions - * - * Actions that execute the user action and restart on the next character. - * These actions will set tokend themselves (it is the current char). They - * also reset the nfa machinery used to choose between tokens. - */ - for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = new InlineList; - inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); - inlineList->head->children = new InlineList; - inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, - InlineItem::LmNfaOnLast ) ); - char *actName = new char[50]; - sprintf( actName, "nlast%i", lmi->longestMatchId ); - lmi->actNfaOnLast = newLmAction( pd, lmi->getLoc(), actName, inlineList ); - } - - for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = new InlineList; - inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); - inlineList->head->children = new InlineList; - inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, - InlineItem::LmNfaOnNext ) ); - char *actName = new char[50]; - sprintf( actName, "nnext%i", lmi->longestMatchId ); - lmi->actNfaOnNext = newLmAction( pd, lmi->getLoc(), actName, inlineList ); - } - - for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = new InlineList; - inlineList->append( new InlineItem( InputLoc(), InlineItem::Stmt ) ); - inlineList->head->children = new InlineList; - inlineList->head->children->append( new InlineItem( lmi->getLoc(), this, lmi, - InlineItem::LmNfaOnEof ) ); - char *actName = new char[50]; - sprintf( actName, "neof%i", lmi->longestMatchId ); - lmi->actNfaOnEof = newLmAction( pd, lmi->getLoc(), actName, inlineList ); - } - - InputLoc loc; - loc.line = 1; - loc.col = 1; - loc.fileName = "NONE"; - - /* Create the error action. */ - InlineList *il6 = new InlineList; - il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) ); - lmActSelect = newLmAction( pd, loc, "switch", il6 ); -} - -void LongestMatch::findName( ParseData *pd ) -{ - NameInst *nameInst = pd->curNameInst; - while ( nameInst->name.empty() ) { - nameInst = nameInst->parent; - /* Since every machine must must have a name, we should always find a - * name for the longest match. */ - assert( nameInst != 0 ); - } - name = nameInst->name; -} - -void LongestMatch::makeNameTree( ParseData *pd ) -{ - /* Create an anonymous scope for the longest match. Will be used for - * restarting machine after matching a token. */ - NameInst *prevNameInst = pd->curNameInst; - pd->curNameInst = pd->addNameInst( loc, std::string(), false ); - - /* Recurse into all parts of the longest match operator. */ - for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) - lmi->join->makeNameTree( pd ); - - /* Traverse the name tree upwards to find a name for this lm. */ - findName( pd ); - - /* Also make the longest match's actions at this point. */ - makeActions( pd ); - - /* The name scope ends, pop the name instantiation. */ - pd->curNameInst = prevNameInst; -} - -void LongestMatch::resolveNameRefs( ParseData *pd ) -{ - /* The longest match gets its own name scope. */ - NameFrame nameFrame = pd->enterNameScope( true, 1 ); - - /* Take an action reference for each longest match item and recurse. */ - for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) { - /* Record the reference if the item has an action. */ - if ( lmi->action != 0 ) - lmi->action->embedRoots.append( pd->localNameScope ); - - /* Recurse down the join. */ - lmi->join->resolveNameRefs( pd ); - } - - /* The name scope ends, pop the name instantiation. */ - pd->popNameScope( nameFrame ); -} - -void LongestMatch::restart( FsmAp *graph, TransAp *trans ) -{ - StateAp *fromState = trans->tdap()->fromState; - graph->detachTrans( fromState, trans->tdap()->toState, trans->tdap() ); - graph->attachTrans( fromState, graph->startState, trans->tdap() ); -} - -void LongestMatch::restart( FsmAp *graph, CondAp *cti ) -{ - StateAp *fromState = cti->fromState; - graph->detachTrans( fromState, cti->toState, cti ); - graph->attachTrans( fromState, graph->startState, cti ); -} - -void LongestMatch::transferScannerLeavingActions( FsmAp *graph ) -{ - for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { - if ( st->outActionTable.length() > 0 ) - graph->setErrorActions( st, st->outActionTable ); - } -} - -FsmRes LongestMatch::walkClassic( ParseData *pd ) -{ - /* The longest match has it's own name scope. */ - NameFrame nameFrame = pd->enterNameScope( true, 1 ); - - /* Make each part of the longest match. */ - FsmAp **parts = new FsmAp*[longestMatchList->length()]; - LmPartList::Iter lmi = *longestMatchList; - for ( int i = 0; lmi.lte(); lmi++, i++ ) { - /* Create the machine and embed the setting of the longest match id. */ - FsmRes res = lmi->join->walk( pd ); - if ( !res.success() ) - return res; - - parts[i] = res.fsm; - parts[i]->longMatchAction( pd->fsmCtx->curActionOrd++, lmi ); - } - - /* Before we union the patterns we need to deal with leaving actions. They - * are transfered to error transitions out of the final states (like local - * error actions) and to eof actions. In the scanner we need to forbid - * on_last for any final state that has an leaving action. */ - for ( int i = 0; i < longestMatchList->length(); i++ ) - transferScannerLeavingActions( parts[i] ); - - /* Union machines one and up with machine zero. The grammar dictates that - * there will always be at least one part. */ - FsmRes res( FsmRes::Fsm(), parts[0] ); - for ( int i = 1; i < longestMatchList->length(); i++ ) { - res = FsmAp::unionOp( res.fsm, parts[i] ); - if ( !res.success() ) - return res; - } - - runLongestMatch( pd, res.fsm ); - - /* Pop the name scope. */ - pd->popNameScope( nameFrame ); - - delete[] parts; - return res; -} - - -FsmRes LongestMatch::walk( ParseData *pd ) -{ - if ( nfaConstruction ) - return walkNfa( pd ); - else - return walkClassic( pd ); -} - -NfaUnion::~NfaUnion() -{ - for ( TermVect::Iter term = terms; term.lte(); term++ ) - delete *term; - if ( roundsList != 0 ) - delete roundsList; -} - -FsmRes NfaUnion::walk( ParseData *pd ) -{ - if ( pd->id->printStatistics ) - pd->id->stats() << "nfa union terms\t" << terms.length() << endl; - - /* Compute the individual expressions. */ - long numMachines = 0; - FsmAp **machines = new FsmAp*[terms.length()]; - for ( TermVect::Iter term = terms; term.lte(); term++ ) { - FsmRes res = (*term)->walk( pd ); - if ( !res.success() ) { - /* Delete previos. */ - for ( int m = 0; m < numMachines; ++m) - delete machines[m]; - delete[] machines; - return res; - } - - machines[numMachines++] = res.fsm; - } - - std::ostream &stats = pd->id->stats(); - bool printStatistics = pd->id->printStatistics; - - return FsmAp::nfaUnion( *roundsList, machines, numMachines, stats, printStatistics ); -} - -void NfaUnion::makeNameTree( ParseData *pd ) -{ - for ( TermVect::Iter term = terms; term.lte(); term++ ) - (*term)->makeNameTree( pd ); -} - -void NfaUnion::resolveNameRefs( ParseData *pd ) -{ - for ( TermVect::Iter term = terms; term.lte(); term++ ) - (*term)->resolveNameRefs( pd ); -} - -FsmRes MachineDef::walk( ParseData *pd ) -{ - switch ( type ) { - case JoinType: - return join->walk( pd ); - case LongestMatchType: - return longestMatch->walk( pd ); - case LengthDefType: - /* Towards lengths. */ - return FsmRes( FsmRes::Fsm(), FsmAp::lambdaFsm( pd->fsmCtx ) ); - case NfaUnionType: - return nfaUnion->walk( pd ); - } - return FsmRes( FsmRes::InternalError() ); -} - -void MachineDef::makeNameTree( ParseData *pd ) -{ - switch ( type ) { - case JoinType: - join->makeNameTree( pd ); - break; - case LongestMatchType: - longestMatch->makeNameTree( pd ); - break; - case LengthDefType: - break; - case NfaUnionType: - nfaUnion->makeNameTree( pd ); - break; - } -} - -void MachineDef::resolveNameRefs( ParseData *pd ) -{ - switch ( type ) { - case JoinType: - join->resolveNameRefs( pd ); - break; - case LongestMatchType: - longestMatch->resolveNameRefs( pd ); - break; - case LengthDefType: - break; - case NfaUnionType: - nfaUnion->resolveNameRefs( pd ); - break; - } -} - -MachineDef::~MachineDef() -{ - if ( join != 0 ) - delete join; - if ( longestMatch != 0 ) - delete longestMatch; - if ( lengthDef != 0 ) - delete lengthDef; - if ( nfaUnion != 0 ) - delete nfaUnion; -} - -/* Construct with a location and the first expression. */ -Join::Join( const InputLoc &loc, Expression *expr ) -: - loc(loc) -{ - exprList.append( expr ); -} - -/* Construct with a location and the first expression. */ -Join::Join( Expression *expr ) -{ - exprList.append( expr ); -} - -/* Walk an expression node. */ -FsmRes Join::walk( ParseData *pd ) -{ - if ( exprList.length() == 1 ) - return exprList.head->walk( pd ); - - return walkJoin( pd ); -} - -/* There is a list of expressions to join. */ -FsmRes Join::walkJoin( ParseData *pd ) -{ - /* We enter into a new name scope. */ - NameFrame nameFrame = pd->enterNameScope( true, 1 ); - - /* Evaluate the machines. */ - FsmAp **fsms = new FsmAp*[exprList.length()]; - ExprList::Iter expr = exprList; - for ( int e = 0; e < exprList.length(); e++, expr++ ) { - FsmRes res = expr->walk( pd ); - if ( !res.success() ) - return res; - fsms[e] = res.fsm; - } - - /* Get the start and final names. Final is - * guaranteed to exist, start is not. */ - NameInst *startName = pd->curNameInst->start; - NameInst *finalName = pd->curNameInst->final; - - int startId = -1; - if ( startName != 0 ) { - /* Take note that there was an implicit link to the start machine. */ - pd->localNameScope->referencedNames.append( startName ); - startId = startName->id; - } - - /* A final id of -1 indicates there is no epsilon that references the - * final state, therefor do not create one or set an entry point to it. */ - int finalId = -1; - if ( finalName->numRefs > 0 ) - finalId = finalName->id; - - /* Join machines 1 and up onto machine 0. */ - FsmRes res = FsmAp::joinOp( fsms[0], startId, finalId, fsms+1, exprList.length()-1 ); - if ( !res.success() ) - return res; - - /* We can now unset entry points that are not longer used. */ - pd->unsetObsoleteEntries( res.fsm ); - - /* Pop the name scope. */ - pd->popNameScope( nameFrame ); - - delete[] fsms; - return res; -} - -void Join::makeNameTree( ParseData *pd ) -{ - if ( exprList.length() > 1 ) { - /* Create the new anonymous scope. */ - NameInst *prevNameInst = pd->curNameInst; - pd->curNameInst = pd->addNameInst( loc, std::string(), false ); - - /* Join scopes need an implicit "final" target. */ - pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final", - pd->nextNameId++, false ); - - /* Recurse into all expressions in the list. */ - for ( ExprList::Iter expr = exprList; expr.lte(); expr++ ) - expr->makeNameTree( pd ); - - /* The name scope ends, pop the name instantiation. */ - pd->curNameInst = prevNameInst; - } - else { - /* Recurse into the single expression. */ - exprList.head->makeNameTree( pd ); - } -} - - -void Join::resolveNameRefs( ParseData *pd ) -{ - /* Branch on whether or not there is to be a join. */ - if ( exprList.length() > 1 ) { - /* The variable definition enters a new scope. */ - NameFrame nameFrame = pd->enterNameScope( true, 1 ); - - /* The join scope must contain a start label. */ - NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true ); - if ( resolved.length() > 0 ) { - /* Take the first. */ - pd->curNameInst->start = resolved[0]; - if ( resolved.length() > 1 ) { - /* Complain about the multiple references. */ - pd->id->error(loc) << "join operation has multiple start labels" << endl; - pd->errorStateLabels( resolved ); - } - } - - /* Make sure there is a start label. */ - if ( pd->curNameInst->start != 0 ) { - /* There is an implicit reference to start name. */ - pd->curNameInst->start->numRefs += 1; - } - else { - /* No start label. */ - pd->id->error(loc) << "join operation has no start label" << endl; - } - - /* Recurse into all expressions in the list. */ - for ( ExprList::Iter expr = exprList; expr.lte(); expr++ ) - expr->resolveNameRefs( pd ); - - /* The name scope ends, pop the name instantiation. */ - pd->popNameScope( nameFrame ); - } - else { - /* Recurse into the single expression. */ - exprList.head->resolveNameRefs( pd ); - } -} - -/* Clean up after an expression node. */ -Expression::~Expression() -{ - if ( expression ) - delete expression; - if ( term ) - delete term; -} - -/* Evaluate a single expression node. */ -FsmRes Expression::walk( ParseData *pd, bool lastInSeq ) -{ - switch ( type ) { - case OrType: { - /* Evaluate the expression. */ - FsmRes exprFsm = expression->walk( pd, false ); - if ( !exprFsm.success() ) - return exprFsm; - - /* Evaluate the term. */ - FsmRes rhs = term->walk( pd ); - if ( !rhs.success() ) - return rhs; - - /* Perform union. */ - FsmRes res = FsmAp::unionOp( exprFsm.fsm, rhs.fsm, lastInSeq ); - if ( !res.success() ) - return res; - - return res; - } - case IntersectType: { - /* Evaluate the expression. */ - FsmRes exprFsm = expression->walk( pd ); - if ( !exprFsm.success() ) - return exprFsm; - - /* Evaluate the term. */ - FsmRes rhs = term->walk( pd ); - if ( !rhs.success() ) - return rhs; - - /* Perform intersection. */ - FsmRes res = FsmAp::intersectOp( exprFsm.fsm, rhs.fsm, lastInSeq ); - if ( !res.success() ) - return res; - - return res; - } - case SubtractType: { - /* Evaluate the expression. */ - FsmRes exprFsm = expression->walk( pd ); - if ( !exprFsm.success() ) - return exprFsm; - - /* Evaluate the term. */ - FsmRes rhs = term->walk( pd ); - if ( !rhs.success() ) - return rhs; - - /* Perform subtraction. */ - FsmRes res = FsmAp::subtractOp( exprFsm.fsm, rhs.fsm, lastInSeq ); - if ( !res.success() ) - return res; - - return res; - } - case StrongSubtractType: { - /* Evaluate the expression. */ - FsmRes exprFsm = expression->walk( pd ); - if ( !exprFsm.success() ) - return exprFsm; - - FsmAp *leadAnyStar = FsmAp::dotStarFsm( pd->fsmCtx ); - FsmAp *trailAnyStar = FsmAp::dotStarFsm( pd->fsmCtx ); - - /* Evaluate the term and pad it with any* machines. */ - FsmRes termFsm = term->walk( pd ); - if ( !termFsm.success() ) - return termFsm; - - FsmRes res1 = FsmAp::concatOp( leadAnyStar, termFsm.fsm ); - if ( !res1.success() ) - return res1; - - FsmRes res2 = FsmAp::concatOp( res1.fsm, trailAnyStar ); - if ( !res2.success() ) - return res2; - - /* Perform subtraction. */ - FsmRes res3 = FsmAp::subtractOp( exprFsm.fsm, res2.fsm, lastInSeq ); - if ( !res3.success() ) - return res3; - - return res3; - } - case TermType: { - /* Return result of the term. */ - return term->walk( pd ); - } - case BuiltinType: { - /* Construct the builtin. */ - return FsmRes( FsmRes::Fsm(), makeBuiltin( builtin, pd ) ); - } - } - - return FsmRes( FsmRes::InternalError() ); -} - -void Expression::makeNameTree( ParseData *pd ) -{ - switch ( type ) { - case OrType: - case IntersectType: - case SubtractType: - case StrongSubtractType: - expression->makeNameTree( pd ); - term->makeNameTree( pd ); - break; - case TermType: - term->makeNameTree( pd ); - break; - case BuiltinType: - break; - } -} - -void Expression::resolveNameRefs( ParseData *pd ) -{ - switch ( type ) { - case OrType: - case IntersectType: - case SubtractType: - case StrongSubtractType: - expression->resolveNameRefs( pd ); - term->resolveNameRefs( pd ); - break; - case TermType: - term->resolveNameRefs( pd ); - break; - case BuiltinType: - break; - } -} - -/* Clean up after a term node. */ -Term::~Term() -{ - if ( term ) - delete term; - if ( factorWithAug ) - delete factorWithAug; -} - -/* Evaluate a term node. */ -FsmRes Term::walk( ParseData *pd, bool lastInSeq ) -{ - switch ( type ) { - case ConcatType: { - /* Evaluate the Term. */ - FsmRes termFsm = term->walk( pd, false ); - if ( !termFsm.success() ) - return termFsm; - - /* Evaluate the FactorWithRep. */ - FsmRes rhs = factorWithAug->walk( pd ); - if ( !rhs.success() ) { - delete termFsm.fsm; - return rhs; - } - - /* Perform concatenation. */ - FsmRes res = FsmAp::concatOp( termFsm.fsm, rhs.fsm, lastInSeq ); - if ( !res.success() ) - return res; - - return res; - } - case RightStartType: { - /* Evaluate the Term. */ - FsmRes termFsm = term->walk( pd ); - if ( !termFsm.success() ) - return termFsm; - - /* Evaluate the FactorWithRep. */ - FsmRes rhs = factorWithAug->walk( pd ); - if ( !rhs.success() ) { - delete termFsm.fsm; - return rhs; - } - - /* Perform concatenation. */ - FsmRes res = FsmAp::rightStartConcatOp( termFsm.fsm, rhs.fsm, lastInSeq ); - if ( !res.success() ) - return res; - - return res; - } - case RightFinishType: { - /* Evaluate the Term. */ - FsmRes termFsm = term->walk( pd ); - if ( !termFsm.success() ) - return termFsm; - - /* Evaluate the FactorWithRep. */ - FsmRes rhs = factorWithAug->walk( pd ); - if ( !rhs.success() ) { - delete termFsm.fsm; - return rhs; - } - - /* Set up the priority descriptors. The left machine gets the - * lower priority where as the finishing transitions to the right - * get the higher priority. */ - priorDescs[0].key = pd->fsmCtx->nextPriorKey++; - priorDescs[0].priority = 0; - termFsm.fsm->allTransPrior( pd->fsmCtx->curPriorOrd++, &priorDescs[0] ); - - /* The finishing transitions of the right machine get the higher - * priority. Use the same unique key. */ - priorDescs[1].key = priorDescs[0].key; - priorDescs[1].priority = 1; - rhs.fsm->finishFsmPrior( pd->fsmCtx->curPriorOrd++, &priorDescs[1] ); - - /* If the right machine's start state is final we need to guard - * against the left machine persisting by moving through the empty - * string. */ - if ( rhs.fsm->startState->isFinState() ) { - rhs.fsm->startState->outPriorTable.setPrior( - pd->fsmCtx->curPriorOrd++, &priorDescs[1] ); - } - - /* Perform concatenation. */ - FsmRes res = FsmAp::concatOp( termFsm.fsm, rhs.fsm, lastInSeq ); - if ( !res.success() ) - return res; - - return res; - } - case LeftType: { - /* Evaluate the Term. */ - FsmRes termFsm = term->walk( pd ); - if ( !termFsm.success() ) - return termFsm; - - /* Evaluate the FactorWithRep. */ - FsmRes rhs = factorWithAug->walk( pd ); - if ( !rhs.success() ) { - delete termFsm.fsm; - return rhs; - } - - /* Set up the priority descriptors. The left machine gets the - * higher priority. */ - priorDescs[0].key = pd->fsmCtx->nextPriorKey++; - priorDescs[0].priority = 1; - termFsm.fsm->allTransPrior( pd->fsmCtx->curPriorOrd++, &priorDescs[0] ); - - /* The right machine gets the lower priority. We cannot use - * allTransPrior here in case the start state of the right machine - * is final. It would allow the right machine thread to run along - * with the left if just passing through the start state. Using - * startFsmPrior prevents this. */ - priorDescs[1].key = priorDescs[0].key; - priorDescs[1].priority = 0; - rhs.fsm->startFsmPrior( pd->fsmCtx->curPriorOrd++, &priorDescs[1] ); - - /* Perform concatenation. */ - FsmRes res = FsmAp::concatOp( termFsm.fsm, rhs.fsm, lastInSeq ); - if ( !res.success() ) - return res; - - return res; - } - case FactorWithAugType: { - return factorWithAug->walk( pd ); - } - } - return FsmRes( FsmRes::InternalError() ); -} - -void Term::makeNameTree( ParseData *pd ) -{ - switch ( type ) { - case ConcatType: - case RightStartType: - case RightFinishType: - case LeftType: - term->makeNameTree( pd ); - factorWithAug->makeNameTree( pd ); - break; - case FactorWithAugType: - factorWithAug->makeNameTree( pd ); - break; - } -} - -void Term::resolveNameRefs( ParseData *pd ) -{ - switch ( type ) { - case ConcatType: - case RightStartType: - case RightFinishType: - case LeftType: - term->resolveNameRefs( pd ); - factorWithAug->resolveNameRefs( pd ); - break; - case FactorWithAugType: - factorWithAug->resolveNameRefs( pd ); - break; - } -} - -/* Clean up after a factor with augmentation node. */ -FactorWithAug::~FactorWithAug() -{ - delete factorWithRep; - - /* Walk the vector of parser actions, deleting function names. */ - - /* Clean up priority descriptors. */ - if ( priorDescs != 0 ) - delete[] priorDescs; -} - -void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd ) -{ - /* Assign actions. */ - for ( int i = 0; i < actions.length(); i++ ) { - switch ( actions[i].type ) { - /* Transition actions. */ - case at_start: - graph->startFsmAction( actionOrd[i], actions[i].action ); - break; - case at_all: - graph->allTransAction( actionOrd[i], actions[i].action ); - break; - case at_finish: - graph->finishFsmAction( actionOrd[i], actions[i].action ); - break; - case at_leave: - graph->leaveFsmAction( actionOrd[i], actions[i].action ); - break; - - /* Global error actions. */ - case at_start_gbl_error: - graph->startErrorAction( actionOrd[i], actions[i].action, 0 ); - break; - case at_all_gbl_error: - graph->allErrorAction( actionOrd[i], actions[i].action, 0 ); - break; - case at_final_gbl_error: - graph->finalErrorAction( actionOrd[i], actions[i].action, 0 ); - break; - case at_not_start_gbl_error: - graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 ); - break; - case at_not_final_gbl_error: - graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 ); - break; - case at_middle_gbl_error: - graph->middleErrorAction( actionOrd[i], actions[i].action, 0 ); - break; - - /* Local error actions. */ - case at_start_local_error: - graph->startErrorAction( actionOrd[i], actions[i].action, - actions[i].localErrKey ); - break; - case at_all_local_error: - graph->allErrorAction( actionOrd[i], actions[i].action, - actions[i].localErrKey ); - break; - case at_final_local_error: - graph->finalErrorAction( actionOrd[i], actions[i].action, - actions[i].localErrKey ); - break; - case at_not_start_local_error: - graph->notStartErrorAction( actionOrd[i], actions[i].action, - actions[i].localErrKey ); - break; - case at_not_final_local_error: - graph->notFinalErrorAction( actionOrd[i], actions[i].action, - actions[i].localErrKey ); - break; - case at_middle_local_error: - graph->middleErrorAction( actionOrd[i], actions[i].action, - actions[i].localErrKey ); - break; - - /* EOF actions. */ - case at_start_eof: - graph->startEOFAction( actionOrd[i], actions[i].action ); - break; - case at_all_eof: - graph->allEOFAction( actionOrd[i], actions[i].action ); - break; - case at_final_eof: - graph->finalEOFAction( actionOrd[i], actions[i].action ); - break; - case at_not_start_eof: - graph->notStartEOFAction( actionOrd[i], actions[i].action ); - break; - case at_not_final_eof: - graph->notFinalEOFAction( actionOrd[i], actions[i].action ); - break; - case at_middle_eof: - graph->middleEOFAction( actionOrd[i], actions[i].action ); - break; - - /* To State Actions. */ - case at_start_to_state: - graph->startToStateAction( actionOrd[i], actions[i].action ); - break; - case at_all_to_state: - graph->allToStateAction( actionOrd[i], actions[i].action ); - break; - case at_final_to_state: - graph->finalToStateAction( actionOrd[i], actions[i].action ); - break; - case at_not_start_to_state: - graph->notStartToStateAction( actionOrd[i], actions[i].action ); - break; - case at_not_final_to_state: - graph->notFinalToStateAction( actionOrd[i], actions[i].action ); - break; - case at_middle_to_state: - graph->middleToStateAction( actionOrd[i], actions[i].action ); - break; - - /* From State Actions. */ - case at_start_from_state: - graph->startFromStateAction( actionOrd[i], actions[i].action ); - break; - case at_all_from_state: - graph->allFromStateAction( actionOrd[i], actions[i].action ); - break; - case at_final_from_state: - graph->finalFromStateAction( actionOrd[i], actions[i].action ); - break; - case at_not_start_from_state: - graph->notStartFromStateAction( actionOrd[i], actions[i].action ); - break; - case at_not_final_from_state: - graph->notFinalFromStateAction( actionOrd[i], actions[i].action ); - break; - case at_middle_from_state: - graph->middleFromStateAction( actionOrd[i], actions[i].action ); - break; - - /* Remaining cases, prevented by the parser. */ - default: - assert( false ); - break; - } - } -} - -void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd ) -{ - /* Assign priorities. */ - for ( int i = 0; i < priorityAugs.length(); i++ ) { - switch ( priorityAugs[i].type ) { - case at_start: - graph->startFsmPrior( priorOrd[i], &priorDescs[i]); - break; - case at_all: - graph->allTransPrior( priorOrd[i], &priorDescs[i] ); - break; - case at_finish: - graph->finishFsmPrior( priorOrd[i], &priorDescs[i] ); - break; - case at_leave: - graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] ); - break; - - default: - /* Parser Prevents this case. */ - break; - } - } -} - -void FactorWithAug::assignConditions( FsmAp *graph ) -{ - for ( int i = 0; i < conditions.length(); i++ ) { - switch ( conditions[i].type ) { - /* Transition actions. */ - case at_start: - graph->startFsmCondition( conditions[i].action, conditions[i].sense ); - break; - case at_all: - graph->allTransCondition( conditions[i].action, conditions[i].sense ); - break; - case at_leave: - graph->leaveFsmCondition( conditions[i].action, conditions[i].sense ); - break; - default: - break; - } - } -} - -/* Evaluate a factor with augmentation node. */ -FsmRes FactorWithAug::walk( ParseData *pd ) -{ - /* Enter into the scopes created for the labels. */ - NameFrame nameFrame = pd->enterNameScope( false, labels.size() ); - - /* Make the array of function orderings. */ - int *actionOrd = 0; - if ( actions.length() > 0 ) - actionOrd = new int[actions.length()]; - - /* First walk the list of actions, assigning order to all starting - * actions. */ - for ( int i = 0; i < actions.length(); i++ ) { - if ( actions[i].type == at_start || - actions[i].type == at_start_gbl_error || - actions[i].type == at_start_local_error || - actions[i].type == at_start_to_state || - actions[i].type == at_start_from_state || - actions[i].type == at_start_eof ) - actionOrd[i] = pd->fsmCtx->curActionOrd++; - } - - /* Evaluate the factor with repetition. */ - FsmRes factorTree = factorWithRep->walk( pd ); - if ( !factorTree.success() ) { - delete [] actionOrd; - return factorTree; - } - - FsmAp *rtnVal = factorTree.fsm; - - /* Compute the remaining action orderings. */ - for ( int i = 0; i < actions.length(); i++ ) { - if ( actions[i].type != at_start && - actions[i].type != at_start_gbl_error && - actions[i].type != at_start_local_error && - actions[i].type != at_start_to_state && - actions[i].type != at_start_from_state && - actions[i].type != at_start_eof ) - actionOrd[i] = pd->fsmCtx->curActionOrd++; - } - - /* Embed conditions. */ - assignConditions( rtnVal ); - - /* Embed actions. */ - assignActions( pd, rtnVal , actionOrd ); - - /* Make the array of priority orderings. Orderings are local to this walk - * of the factor with augmentation. */ - int *priorOrd = 0; - if ( priorityAugs.length() > 0 ) - priorOrd = new int[priorityAugs.length()]; - - /* Walk all priorities, assigning the priority ordering. */ - for ( int i = 0; i < priorityAugs.length(); i++ ) - priorOrd[i] = pd->fsmCtx->curPriorOrd++; - - /* If the priority descriptors have not been made, make them now. Make - * priority descriptors for each priority asignment that will be passed to - * the fsm. Used to keep track of the key, value and used bit. */ - if ( priorDescs == 0 && priorityAugs.length() > 0 ) { - priorDescs = new PriorDesc[priorityAugs.length()]; - for ( int i = 0; i < priorityAugs.length(); i++ ) { - /* Init the prior descriptor for the priority setting. */ - priorDescs[i].key = priorityAugs[i].priorKey; - priorDescs[i].priority = priorityAugs[i].priorValue; - priorDescs[i].guarded = false; - priorDescs[i].guardId = 0; - } - } - - /* Assign priorities into the machine. */ - assignPriorities( rtnVal, priorOrd ); - - /* Assign epsilon transitions. */ - for ( int e = 0; e < epsilonLinks.length(); e++ ) { - /* Get the name, which may not exist. If it doesn't then silently - * ignore it because an error has already been reported. */ - NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++]; - if ( epTarg != 0 ) { - /* Make the epsilon transitions. */ - rtnVal->epsilonTrans( epTarg->id ); - - /* Note that we have made a link to the name. */ - pd->localNameScope->referencedNames.append( epTarg ); - } - } - - /* Set entry points for labels. */ - if ( labels.size() > 0 ) { - /* Pop the names. */ - pd->resetNameScope( nameFrame ); - - /* Make labels that are referenced into entry points. */ - for ( size_t i = 0; i < labels.size(); i++ ) { - pd->enterNameScope( false, 1 ); - - /* Will always be found. */ - NameInst *name = pd->curNameInst; - - /* If the name is referenced then set the entry point. */ - if ( name->numRefs > 0 ) - rtnVal->setEntry( name->id, rtnVal->startState ); - - if ( labels[i].cut ) - pd->cuts.append( ParseData::Cut( labels[i].data, name->id ) ); - } - - pd->popNameScope( nameFrame ); - } - - if ( priorOrd != 0 ) - delete[] priorOrd; - if ( actionOrd != 0 ) - delete[] actionOrd; - return FsmRes( FsmRes::Fsm(), rtnVal ); -} - -void FactorWithAug::makeNameTree( ParseData *pd ) -{ - /* Add the labels to the tree of instantiated names. Each label - * makes a new scope. */ - NameInst *prevNameInst = pd->curNameInst; - for ( size_t i = 0; i < labels.size(); i++ ) { - pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true ); - - if ( labels[i].cut ) - pd->curNameInst->numRefs += 1; - } - - /* Recurse, then pop the names. */ - factorWithRep->makeNameTree( pd ); - pd->curNameInst = prevNameInst; -} - - -void FactorWithAug::resolveNameRefs( ParseData *pd ) -{ - /* Enter into the name scope created by any labels. */ - NameFrame nameFrame = pd->enterNameScope( false, labels.size() ); - - /* Note action references. */ - for ( int i = 0; i < actions.length(); i++ ) - actions[i].action->embedRoots.append( pd->localNameScope ); - - /* Recurse first. IMPORTANT: we must do the exact same traversal as when - * the tree is constructed. */ - factorWithRep->resolveNameRefs( pd ); - - /* Resolve epsilon transitions. */ - for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) { - /* Get the link. */ - EpsilonLink &link = epsilonLinks[ep]; - NameInst *resolvedName = 0; - - if ( link.target->length() == 1 && link.target->data[0] == "final" ) { - /* Epsilon drawn to an implicit final state. An implicit final is - * only available in join operations. */ - resolvedName = pd->localNameScope->final; - } - else { - /* Do an search for the name. */ - NameSet resolved; - pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 ); - if ( resolved.length() > 0 ) { - /* Take the first one. */ - resolvedName = resolved[0]; - if ( resolved.length() > 1 ) { - /* Complain about the multiple references. */ - pd->id->error(link.loc) << "state reference " << link.target << - " resolves to multiple entry points" << endl; - pd->errorStateLabels( resolved ); - } - } - } - - /* This is tricky, we stuff resolved epsilon transitions into one long - * vector in the parse data structure. Since the name resolution and - * graph generation both do identical walks of the parse tree we - * should always find the link resolutions in the right place. */ - pd->epsilonResolvedLinks.append( resolvedName ); - - if ( resolvedName != 0 ) { - /* Found the name, bump of the reference count on it. */ - resolvedName->numRefs += 1; - } - else { - /* Complain, no recovery action, the epsilon op will ignore any - * epsilon transitions whose names did not resolve. */ - pd->id->error(link.loc) << "could not resolve label " << link.target << endl; - } - } - - if ( labels.size() > 0 ) - pd->popNameScope( nameFrame ); -} - - -/* Clean up after a factor with repetition node. */ -FactorWithRep::~FactorWithRep() -{ - switch ( type ) { - case StarType: case StarStarType: case OptionalType: case PlusType: - case ExactType: case MaxType: case MinType: case RangeType: - delete factorWithRep; - case FactorWithNegType: - delete factorWithNeg; - break; - } -} - - -/* Evaluate a factor with repetition node. */ -FsmRes FactorWithRep::walk( ParseData *pd ) -{ - switch ( type ) { - case StarType: { - /* Evaluate the FactorWithRep. */ - FsmRes factorTree = factorWithRep->walk( pd ); - if ( !factorTree.success() ) - return factorTree; - - if ( factorTree.fsm->startState->isFinState() ) { - pd->id->warning(loc) << "applying kleene star to a machine that " - "accepts zero length word" << endl; - factorTree.fsm->unsetFinState( factorTree.fsm->startState ); - } - - return FsmAp::starOp( factorTree.fsm ); - } - case StarStarType: { - /* Evaluate the FactorWithRep. */ - FsmRes factorTree = factorWithRep->walk( pd ); - if ( !factorTree.success() ) - return factorTree; - - if ( factorTree.fsm->startState->isFinState() ) { - pd->id->warning(loc) << "applying kleene star to a machine that " - "accepts zero length word" << endl; - } - - /* Set up the prior descs. All gets priority one, whereas leaving gets - * priority zero. Make a unique key so that these priorities don't - * interfere with any priorities set by the user. */ - priorDescs[0].key = pd->fsmCtx->nextPriorKey++; - priorDescs[0].priority = 1; - factorTree.fsm->allTransPrior( pd->fsmCtx->curPriorOrd++, &priorDescs[0] ); - - /* Leaveing gets priority 0. Use same unique key. */ - priorDescs[1].key = priorDescs[0].key; - priorDescs[1].priority = 0; - factorTree.fsm->leaveFsmPrior( pd->fsmCtx->curPriorOrd++, &priorDescs[1] ); - - return FsmAp::starOp( factorTree.fsm ); - } - case OptionalType: { - /* Evaluate the FactorWithRep. */ - FsmRes factorTree = factorWithRep->walk( pd ); - if ( !factorTree.success() ) - return factorTree; - - return FsmAp::questionOp( factorTree.fsm ); - } - case PlusType: { - /* Evaluate the FactorWithRep. */ - FsmRes factorTree = factorWithRep->walk( pd ); - if ( !factorTree.success() ) - return factorTree; - - if ( factorTree.fsm->startState->isFinState() ) { - pd->id->warning(loc) << "applying plus operator to a machine that " - "accepts zero length word" << endl; - } - - return FsmAp::plusOp( factorTree.fsm ); - } - case ExactType: { - /* Evaluate the first FactorWithRep. */ - FsmRes factorTree = factorWithRep->walk( pd ); - if ( !factorTree.success() ) - return factorTree; - - /* Get an int from the repetition amount. */ - if ( lowerRep == 0 ) { - /* No copies. Don't need to evaluate the factorWithRep. - * This Defeats the purpose so give a warning. */ - pd->id->warning(loc) << "exactly zero repetitions results " - "in the null machine" << endl; - } - else { - if ( factorTree.fsm->startState->isFinState() ) { - pd->id->warning(loc) << "applying repetition to a machine that " - "accepts zero length word" << endl; - } - } - - /* Handles the n == 0 case. */ - return FsmAp::exactRepeatOp( factorTree.fsm, lowerRep ); - } - case MaxType: { - /* Evaluate the first FactorWithRep. */ - FsmRes factorTree = factorWithRep->walk( pd ); - if ( !factorTree.success() ) - return factorTree; - - /* Get an int from the repetition amount. */ - if ( upperRep == 0 ) { - /* No copies. Don't need to evaluate the factorWithRep. - * This Defeats the purpose so give a warning. */ - pd->id->warning(loc) << "max zero repetitions results " - "in the null machine" << endl; - - return FsmRes( FsmRes::Fsm(), FsmAp::lambdaFsm( pd->fsmCtx ) ); - } - else { - - if ( factorTree.fsm->startState->isFinState() ) { - pd->id->warning(loc) << "applying max repetition to a machine that " - "accepts zero length word" << endl; - } - } - - /* Do the repetition on the machine. Handles the n == 0 case. */ - return FsmAp::maxRepeatOp( factorTree.fsm, upperRep ); - } - case MinType: { - /* Evaluate the repeated machine. */ - FsmRes factorTree = factorWithRep->walk( pd ); - if ( !factorTree.success() ) - return factorTree; - - if ( factorTree.fsm->startState->isFinState() ) { - pd->id->warning(loc) << "applying min repetition to a machine that " - "accepts zero length word" << endl; - } - - return FsmAp::minRepeatOp( factorTree.fsm, lowerRep ); - } - case RangeType: { - /* Check for bogus range. */ - if ( upperRep - lowerRep < 0 ) { - pd->id->error(loc) << "invalid range repetition" << endl; - - /* Return null machine as recovery. */ - return FsmRes( FsmRes::Fsm(), FsmAp::lambdaFsm( pd->fsmCtx ) ); - } - - /* Now need to evaluate the repeated machine. */ - FsmRes factorTree = factorWithRep->walk( pd ); - if ( !factorTree.success() ) - return factorTree; - - if ( lowerRep == 0 && upperRep == 0 ) { - /* No copies. Don't need to evaluate the factorWithRep. This - * defeats the purpose so give a warning. */ - pd->id->warning(loc) << "zero to zero repetitions results " - "in the null machine" << endl; - } - else { - - if ( factorTree.fsm->startState->isFinState() ) { - pd->id->warning(loc) << "applying range repetition to a machine that " - "accepts zero length word" << endl; - } - - } - return FsmAp::rangeRepeatOp( factorTree.fsm, lowerRep, upperRep ); - } - case FactorWithNegType: { - /* Evaluate the Factor. Pass it up. */ - return factorWithNeg->walk( pd ); - }} - return FsmRes( FsmRes::InternalError() ); -} - -void FactorWithRep::makeNameTree( ParseData *pd ) -{ - switch ( type ) { - case StarType: - case StarStarType: - case OptionalType: - case PlusType: - case ExactType: - case MaxType: - case MinType: - case RangeType: - factorWithRep->makeNameTree( pd ); - break; - case FactorWithNegType: - factorWithNeg->makeNameTree( pd ); - break; - } -} - -void FactorWithRep::resolveNameRefs( ParseData *pd ) -{ - switch ( type ) { - case StarType: - case StarStarType: - case OptionalType: - case PlusType: - case ExactType: - case MaxType: - case MinType: - case RangeType: - factorWithRep->resolveNameRefs( pd ); - break; - case FactorWithNegType: - factorWithNeg->resolveNameRefs( pd ); - break; - } -} - -/* Clean up after a factor with negation node. */ -FactorWithNeg::~FactorWithNeg() -{ - switch ( type ) { - case NegateType: - case CharNegateType: - delete factorWithNeg; - break; - case FactorType: - delete factor; - break; - } -} - -/* Evaluate a factor with negation node. */ -FsmRes FactorWithNeg::walk( ParseData *pd ) -{ - switch ( type ) { - case NegateType: { - /* Evaluate the factorWithNeg. */ - FsmRes toNegate = factorWithNeg->walk( pd ); - - /* Negation is subtract from dot-star. */ - FsmAp *ds = FsmAp::dotStarFsm( pd->fsmCtx ); - FsmRes res = FsmAp::subtractOp( ds, toNegate.fsm ); - - return res; - } - case CharNegateType: { - /* Evaluate the factorWithNeg. */ - FsmRes toNegate = factorWithNeg->walk( pd ); - - /* CharNegation is subtract from dot. */ - FsmAp *ds = FsmAp::dotFsm( pd->fsmCtx ); - FsmRes res = FsmAp::subtractOp( ds, toNegate.fsm ); - - return res; - } - case FactorType: { - /* Evaluate the Factor. Pass it up. */ - return factor->walk( pd ); - }} - return FsmRes( FsmRes::InternalError() ); -} - -void FactorWithNeg::makeNameTree( ParseData *pd ) -{ - switch ( type ) { - case NegateType: - case CharNegateType: - factorWithNeg->makeNameTree( pd ); - break; - case FactorType: - factor->makeNameTree( pd ); - break; - } -} - -void FactorWithNeg::resolveNameRefs( ParseData *pd ) -{ - switch ( type ) { - case NegateType: - case CharNegateType: - factorWithNeg->resolveNameRefs( pd ); - break; - case FactorType: - factor->resolveNameRefs( pd ); - break; - } -} - -/* Clean up after a factor node. */ -Factor::~Factor() -{ - switch ( type ) { - case LiteralType: - delete literal; - break; - case RangeType: - delete range; - break; - case OrExprType: - delete reItem; - break; - case RegExprType: - delete regExpr; - break; - case ReferenceType: - break; - case ParenType: - delete join; - break; - case LongestMatchType: - delete longestMatch; - break; - case NfaWrap: case NfaRep: - case CondStar: case CondPlus: - delete expression; - break; - } -} - - -/* Evaluate a factor node. */ -FsmRes Factor::walk( ParseData *pd ) -{ - switch ( type ) { - case LiteralType: - return FsmRes( FsmRes::Fsm(), literal->walk( pd ) ); - case RangeType: - return FsmRes( FsmRes::Fsm(), range->walk( pd ) ); - case OrExprType: - return reItem->walk( pd, 0 ); - case RegExprType: - return FsmRes( FsmRes::Fsm(), regExpr->walk( pd, 0 ) ); - case ReferenceType: - return varDef->walk( pd ); - case ParenType: - return join->walk( pd ); - case LongestMatchType: - return longestMatch->walk( pd ); - case NfaRep: { - FsmRes exprTree = expression->walk( pd ); - - if ( mode == Factor::NfaLegacy ) { - FsmRes res = FsmAp::nfaRepeatOp( exprTree.fsm, action1, action2, action3, - action4, action5, action6 ); - - res.fsm->verifyIntegrity(); - return res; - } - else if ( mode == Factor::NfaLazy ) { - FsmRes res = FsmAp::nfaRepeatOp2( exprTree.fsm, action1, action2, action3, - action4, action5, action6, FsmAp::NfaLazy ); - - res.fsm->verifyIntegrity(); - return res; - } - else { - FsmRes res = FsmAp::nfaRepeatOp2( exprTree.fsm, action1, action2, action3, - action4, action5, action6, FsmAp::NfaGreedy ); - - res.fsm->verifyIntegrity(); - return res; - } - } - case NfaWrap: { - FsmRes exprTree = expression->walk( pd ); - if ( mode == Factor::NfaLazy ) { - FsmRes res = FsmAp::nfaWrap( exprTree.fsm, action1, action2, action3, - action4, /* action5, */ action6, FsmAp::NfaLazy ); - - res.fsm->verifyIntegrity(); - return res; - } - else { - FsmRes res = FsmAp::nfaWrap( exprTree.fsm, action1, action2, action3, - action4, /* action5, */ action6, FsmAp::NfaGreedy ); - - res.fsm->verifyIntegrity(); - return res; - } - } - case CondStar: { - FsmRes exprTree = expression->walk( pd ); - if ( !exprTree.success() ) - return exprTree; - - if ( exprTree.fsm->startState->isFinState() ) { - pd->id->warning(loc) << "applying plus operator to a machine that " - "accepts zero length word" << endl; - } - - return FsmAp::condStar( exprTree.fsm, repId, action1, action2, action3, action4 ); - } - case CondPlus: { - FsmRes exprTree = expression->walk( pd ); - if ( !exprTree.success() ) - return exprTree; - - if ( exprTree.fsm->startState->isFinState() ) { - pd->id->warning(loc) << "applying plus operator to a machine that " - "accepts zero length word" << endl; - } - - return FsmAp::condPlus( exprTree.fsm, repId, action1, action2, action3, action4 ); - }} - - return FsmRes( FsmRes::InternalError() ); -} - -void Factor::makeNameTree( ParseData *pd ) -{ - switch ( type ) { - case LiteralType: - case RangeType: - case OrExprType: - case RegExprType: - break; - case ReferenceType: - varDef->makeNameTree( loc, pd ); - break; - case ParenType: - join->makeNameTree( pd ); - break; - case LongestMatchType: - longestMatch->makeNameTree( pd ); - break; - case NfaWrap: - case NfaRep: - case CondStar: - case CondPlus: - expression->makeNameTree( pd ); - break; - } -} - -void Factor::resolveNameRefs( ParseData *pd ) -{ - switch ( type ) { - case LiteralType: - case RangeType: - case OrExprType: - case RegExprType: - break; - case ReferenceType: - varDef->resolveNameRefs( pd ); - break; - case ParenType: - join->resolveNameRefs( pd ); - break; - case LongestMatchType: - longestMatch->resolveNameRefs( pd ); - break; - case NfaRep: - case NfaWrap: - case CondStar: - case CondPlus: - expression->resolveNameRefs( pd ); - break; - } -} - -/* Clean up a range object. Must delete the two literals. */ -Range::~Range() -{ - delete lowerLit; - delete upperLit; -} - -/* Evaluate a range. Gets the lower an upper key and makes an fsm range. */ -FsmAp *Range::walk( ParseData *pd ) -{ - /* Construct and verify the suitability of the lower end of the range. */ - FsmAp *lowerFsm = lowerLit->walk( pd ); - if ( !lowerFsm->checkSingleCharMachine() ) { - pd->id->error(lowerLit->loc) << - "bad range lower end, must be a single character" << endl; - } - - /* Construct and verify the upper end. */ - FsmAp *upperFsm = upperLit->walk( pd ); - if ( !upperFsm->checkSingleCharMachine() ) { - pd->id->error(upperLit->loc) << - "bad range upper end, must be a single character" << endl; - } - - /* Grab the keys from the machines, then delete them. */ - Key lowKey = lowerFsm->startState->outList.head->lowKey; - Key highKey = upperFsm->startState->outList.head->lowKey; - delete lowerFsm; - delete upperFsm; - - /* Validate the range. */ - if ( pd->fsmCtx->keyOps->gt( lowKey, highKey ) ) { - /* Recover by setting upper to lower; */ - pd->id->error(lowerLit->loc) << "lower end of range is greater then upper end" << endl; - highKey = lowKey; - } - - /* Return the range now that it is validated. */ - FsmAp *retFsm; - if ( caseIndep ) - retFsm = FsmAp::rangeFsmCI( pd->fsmCtx, lowKey, highKey ); - else - retFsm = FsmAp::rangeFsm( pd->fsmCtx, lowKey, highKey ); - - return retFsm; -} - -/* Evaluate a literal object. */ -FsmAp *Literal::walk( ParseData *pd ) -{ - /* FsmAp to return, is the alphabet signed. */ - FsmAp *rtnVal = 0; - - switch ( type ) { - case Number: { - /* Make a C string. Maybe put - up front. */ - Vector<char> num = data; - if ( neg ) - num.insert( 0, '-' ); - num.append( 0 ); - - /* Make the fsm key in int format. */ - Key fsmKey = makeFsmKeyNum( num.data, loc, pd ); - - /* Make the new machine. */ - rtnVal = FsmAp::concatFsm( pd->fsmCtx, fsmKey ); - break; - } - case LitString: { - /* Make the array of keys in int format. */ - long length; - bool caseInsensitive; - char *litstr = prepareLitString( pd->id, loc, data.data, data.length(), - length, caseInsensitive ); - Key *arr = new Key[length]; - makeFsmKeyArray( arr, litstr, length, pd ); - - /* Make the new machine. */ - if ( caseInsensitive ) - rtnVal = FsmAp::concatFsmCI( pd->fsmCtx, arr, length ); - else - rtnVal = FsmAp::concatFsm( pd->fsmCtx, arr, length ); - delete[] litstr; - delete[] arr; - break; - } - case HexString: { - long length; - Key *arr = prepareHexString( pd, loc, data.data, data.length(), length ); - rtnVal = FsmAp::concatFsm( pd->fsmCtx, arr, length ); - delete[] arr; - break; - }} - return rtnVal; -} - -/* Clean up after a regular expression object. */ -RegExpr::~RegExpr() -{ - switch ( type ) { - case RecurseItem: - delete regExpr; - delete item; - break; - case Empty: - break; - } -} - -/* Evaluate a regular expression object. */ -FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex ) -{ - /* This is the root regex, pass down a pointer to this. */ - if ( rootRegex == 0 ) - rootRegex = this; - - FsmAp *rtnVal = 0; - switch ( type ) { - case RecurseItem: { - /* Walk both items. */ - rtnVal = regExpr->walk( pd, rootRegex ); - FsmRes fsm2 = item->walk( pd, rootRegex ); - FsmRes res = FsmAp::concatOp( rtnVal, fsm2.fsm ); - rtnVal = res.fsm; - break; - } - case Empty: { - rtnVal = FsmAp::lambdaFsm( pd->fsmCtx ); - break; - } - } - return rtnVal; -} - -/* Clean up after an item in a regular expression. */ -ReItem::~ReItem() -{ - switch ( type ) { - case Data: - case Dot: - break; - case OrBlock: - case NegOrBlock: - delete orBlock; - break; - } -} - -/* Evaluate a regular expression object. */ -FsmRes ReItem::walk( ParseData *pd, RegExpr *rootRegex ) -{ - /* The fsm to return, is the alphabet signed? */ - FsmAp *rtnVal = 0; - - switch ( type ) { - case Data: { - /* Move the data into an integer array and make a concat fsm. */ - Key *arr = new Key[data.length()]; - makeFsmKeyArray( arr, data.data, data.length(), pd ); - - /* Make the concat fsm. */ - if ( rootRegex != 0 && rootRegex->caseInsensitive ) - rtnVal = FsmAp::concatFsmCI( pd->fsmCtx, arr, data.length() ); - else - rtnVal = FsmAp::concatFsm( pd->fsmCtx, arr, data.length() ); - delete[] arr; - break; - } - case Dot: { - /* Make the dot fsm. */ - rtnVal = FsmAp::dotFsm( pd->fsmCtx ); - break; - } - case OrBlock: { - /* Get the or block and minmize it. */ - rtnVal = orBlock->walk( pd, rootRegex ); - if ( rtnVal == 0 ) - rtnVal = FsmAp::lambdaFsm( pd->fsmCtx ); - rtnVal->minimizePartition2(); - break; - } - case NegOrBlock: { - /* Get the or block and minimize it. */ - FsmAp *fsm = orBlock->walk( pd, rootRegex ); - fsm->minimizePartition2(); - - /* Make a dot fsm and subtract from it. */ - rtnVal = FsmAp::dotFsm( pd->fsmCtx ); - FsmRes res = FsmAp::subtractOp( rtnVal, fsm ); - rtnVal = res.fsm; - rtnVal->minimizePartition2(); - break; - } - } - - /* If the item is followed by a star, then apply the star op. */ - if ( star ) { - if ( rtnVal->startState->isFinState() ) { - pd->id->warning(loc) << "applying kleene star to a machine that " - "accepts zero length word" << endl; - } - - FsmRes res = FsmAp::starOp( rtnVal ); - rtnVal = res.fsm; - rtnVal->minimizePartition2(); - } - - return FsmRes( FsmRes::Fsm(), rtnVal ); -} - -/* Clean up after an or block of a regular expression. */ -ReOrBlock::~ReOrBlock() -{ - switch ( type ) { - case RecurseItem: - delete orBlock; - delete item; - break; - case Empty: - break; - } -} - - -/* Evaluate an or block of a regular expression. */ -FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex ) -{ - FsmAp *rtnVal = 0; - switch ( type ) { - case RecurseItem: { - /* Evaluate the two fsm. */ - FsmAp *fsm1 = orBlock->walk( pd, rootRegex ); - FsmAp *fsm2 = item->walk( pd, rootRegex ); - if ( fsm1 == 0 ) - rtnVal = fsm2; - else { - FsmRes res = FsmAp::unionOp( fsm1, fsm2 ); - fsm1 = res.fsm; - rtnVal = fsm1; - } - break; - } - case Empty: { - rtnVal = 0; - break; - } - } - return rtnVal;; -} - -/* Evaluate an or block item of a regular expression. */ -FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex ) -{ - KeyOps *keyOps = pd->fsmCtx->keyOps; - - /* The return value, is the alphabet signed? */ - FsmAp *rtnVal = 0; - switch ( type ) { - case Data: { - /* Put the or data into an array of ints. Note that we find unique - * keys. Duplicates are silently ignored. The alternative would be to - * issue warning or an error but since we can't with [a0-9a] or 'a' | - * 'a' don't bother here. */ - KeySet keySet( keyOps ); - makeFsmUniqueKeyArray( keySet, data.data, data.length(), - rootRegex != 0 ? rootRegex->caseInsensitive : false, pd ); - - /* Run the or operator. */ - rtnVal = FsmAp::orFsm( pd->fsmCtx, keySet.data, keySet.length() ); - break; - } - case Range: { - /* Make the upper and lower keys. */ - Key lowKey = makeFsmKeyChar( lower, pd ); - Key highKey = makeFsmKeyChar( upper, pd ); - - /* Validate the range. */ - if ( keyOps->gt( lowKey, highKey ) ) { - /* Recover by setting upper to lower; */ - pd->id->error(loc) << "lower end of range is greater then upper end" << endl; - highKey = lowKey; - } - - /* Make the range machine. */ - rtnVal = FsmAp::rangeFsm( pd->fsmCtx, lowKey, highKey ); - - if ( rootRegex != 0 && rootRegex->caseInsensitive ) { - if ( keyOps->le( lowKey, 'Z' ) && pd->fsmCtx->keyOps->le( 'A', highKey ) ) { - Key otherLow = keyOps->lt( lowKey, 'A' ) ? Key('A') : lowKey; - Key otherHigh = keyOps->lt( 'Z', highKey ) ? Key('Z') : highKey; - - otherLow = keyOps->add( 'a', ( keyOps->sub( otherLow, 'A' ) ) ); - otherHigh = keyOps->add( 'a', ( keyOps->sub( otherHigh, 'A' ) ) ); - - FsmAp *otherRange = FsmAp::rangeFsm( pd->fsmCtx, otherLow, otherHigh ); - FsmRes res = FsmAp::unionOp( rtnVal, otherRange ); - rtnVal = res.fsm; - rtnVal->minimizePartition2(); - } - else if ( keyOps->le( lowKey, 'z' ) && keyOps->le( 'a', highKey ) ) { - Key otherLow = keyOps->lt( lowKey, 'a' ) ? Key('a') : lowKey; - Key otherHigh = keyOps->lt( 'z', highKey ) ? Key('z') : highKey; - - otherLow = keyOps->add('A' , ( keyOps->sub( otherLow , 'a' ) )); - otherHigh = keyOps->add('A' , ( keyOps->sub( otherHigh , 'a' ) )); - - FsmAp *otherRange = FsmAp::rangeFsm( pd->fsmCtx, otherLow, otherHigh ); - FsmRes res = FsmAp::unionOp( rtnVal, otherRange ); - rtnVal = res.fsm; - rtnVal->minimizePartition2(); - } - } - - break; - }} - return rtnVal; -} |