diff options
author | Adrian Thurston <thurston@colm.net> | 2019-09-08 21:11:17 -0600 |
---|---|---|
committer | Adrian Thurston <thurston@colm.net> | 2019-09-08 21:11:17 -0600 |
commit | c860c61607117582abd8f23881eed87957197484 (patch) | |
tree | 4d4e65dddc710e15f008189a9308d95924350c3f /src/compiler.cc | |
parent | f37c916aed2600951b8966a86020406b0b0542cf (diff) | |
download | colm-c860c61607117582abd8f23881eed87957197484.tar.gz |
moved the original colm src dir to /colm
Diffstat (limited to 'src/compiler.cc')
-rw-r--r-- | src/compiler.cc | 1247 |
1 files changed, 0 insertions, 1247 deletions
diff --git a/src/compiler.cc b/src/compiler.cc deleted file mode 100644 index 72cf99fa..00000000 --- a/src/compiler.cc +++ /dev/null @@ -1,1247 +0,0 @@ -/* - * Copyright 2006-2018 Adrian Thurston <thurston@colm.net> - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "compiler.h" - -#include <errno.h> -#include <stdlib.h> -#include <string.h> -#include <stdbool.h> -#include <unistd.h> -#include <assert.h> -#include <iostream> - -#include "redbuild.h" -#include "pdacodegen.h" -#include "fsmcodegen.h" -#include "colm.h" - -using std::ostringstream; -using std::cout; -using std::cerr; -using std::endl; - -char machineMain[] = "main"; -exit_object endp; -void operator<<( ostream &out, exit_object & ) -{ - out << endl; - exit(1); -} - -/* Perform minimization after an operation according - * to the command line args. */ -void afterOpMinimize( FsmGraph *fsm, bool lastInSeq ) -{ - /* Switch on the prefered minimization algorithm. */ - if ( lastInSeq ) { - /* First clean up the graph. FsmGraph operations may leave these - * lying around. There should be no dead end states. The subtract - * intersection operators are the only places where they may be - * created and those operators clean them up. */ - fsm->removeUnreachableStates(); - fsm->minimizePartition2(); - } -} - -/* Count the transitions in the fsm by walking the state list. */ -int countTransitions( FsmGraph *fsm ) -{ - int numTrans = 0; - FsmState *state = fsm->stateList.head; - while ( state != 0 ) { - numTrans += state->outList.length(); - state = state->next; - } - return numTrans; -} - -Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd ) -{ - /* Reset errno so we can check for overflow or underflow. In the event of - * an error, sets the return val to the upper or lower bound being tested - * against. */ - errno = 0; - unsigned int size = keyOps->alphType->size; - bool unusedBits = size < sizeof(unsigned long); - - unsigned long ul = strtoul( str, 0, 16 ); - - if ( errno == ERANGE || (unusedBits && ul >> (size * 8)) ) { - error(loc) << "literal " << str << " overflows the alphabet type" << endl; - ul = 1 << (size * 8); - } - - if ( unusedBits && ul >> (size * 8 - 1) ) - ul |= (ULONG_MAX >> (size*8 ) ) << (size*8); - - return Key( (long)ul ); -} - -Key makeFsmKeyDec( char *str, const InputLoc &loc, Compiler *pd ) -{ - /* Convert the number to a decimal. First reset errno so we can check - * for overflow or underflow. */ - errno = 0; - long long minVal = keyOps->alphType->minVal; - long long maxVal = keyOps->alphType->maxVal; - - long long ll = strtoll( str, 0, 10 ); - - /* Check for underflow. */ - if ( (errno == ERANGE && ll < 0) || ll < minVal) { - error(loc) << "literal " << str << " underflows the alphabet type" << endl; - ll = minVal; - } - /* Check for overflow. */ - else if ( (errno == ERANGE && ll > 0) || ll > maxVal ) { - error(loc) << "literal " << str << " overflows the alphabet type" << endl; - ll = maxVal; - } - - return Key( (long)ll ); -} - -/* Make an fsm key in int format (what the fsm graph uses) from an alphabet - * number returned by the parser. Validates that the number doesn't overflow - * the alphabet type. */ -Key makeFsmKeyNum( char *str, const InputLoc &loc, Compiler *pd ) -{ - /* Switch on hex/decimal format. */ - if ( str[0] == '0' && str[1] == 'x' ) - return makeFsmKeyHex( str, loc, pd ); - else - return makeFsmKeyDec( str, loc, pd ); -} - -/* Make an fsm int format (what the fsm graph uses) from a single character. - * Performs proper conversion depending on signed/unsigned property of the - * alphabet. */ -Key makeFsmKeyChar( char c, Compiler *pd ) -{ - /* Copy from a char type. */ - return Key( c ); -} - -/* Make an fsm key array in int format (what the fsm graph uses) from a string - * of characters. Performs proper conversion depending on signed/unsigned - * property of the alphabet. */ -void makeFsmKeyArray( Key *result, char *data, int len, Compiler *pd ) -{ - /* Copy from a char star type. */ - char *src = data; - for ( int i = 0; i < len; i++ ) - result[i] = Key(src[i]); -} - -/* Like makeFsmKeyArray except the result has only unique keys. They ordering - * will be changed. */ -void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, - bool caseInsensitive, Compiler *pd ) -{ - /* Copy from a char star type. */ - char *src = data; - for ( int si = 0; si < len; si++ ) { - Key key( src[si] ); - result.insert( key ); - if ( caseInsensitive ) { - if ( key.isLower() ) - result.insert( key.toUpper() ); - else if ( key.isUpper() ) - result.insert( key.toLower() ); - } - } -} - -FsmGraph *dotFsm( Compiler *pd ) -{ - FsmGraph *retFsm = new FsmGraph(); - retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey ); - return retFsm; -} - -FsmGraph *dotStarFsm( Compiler *pd ) -{ - FsmGraph *retFsm = new FsmGraph(); - retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey ); - return retFsm; -} - -/* Make a builtin type. Depends on the signed nature of the alphabet type. */ -FsmGraph *makeBuiltin( BuiltinMachine builtin, Compiler *pd ) -{ - /* FsmGraph created to return. */ - FsmGraph *retFsm = 0; - - switch ( builtin ) { - case BT_Any: { - /* All characters. */ - retFsm = dotFsm( pd ); - break; - } - case BT_Ascii: { - /* Ascii characters 0 to 127. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( 0, 127 ); - break; - } - case BT_Extend: { - /* Ascii extended characters. This is the full byte range. Dependent - * on signed, vs no signed. If the alphabet is one byte then just use - * dot fsm. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( -128, 127 ); - break; - } - case BT_Alpha: { - /* Alpha [A-Za-z]. */ - FsmGraph *upper = new FsmGraph(), *lower = new FsmGraph(); - upper->rangeFsm( 'A', 'Z' ); - lower->rangeFsm( 'a', 'z' ); - upper->unionOp( lower ); - upper->minimizePartition2(); - retFsm = upper; - break; - } - case BT_Digit: { - /* Digits [0-9]. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( '0', '9' ); - break; - } - case BT_Alnum: { - /* Alpha numerics [0-9A-Za-z]. */ - FsmGraph *digit = new FsmGraph(), *lower = new FsmGraph(); - FsmGraph *upper = new FsmGraph(); - digit->rangeFsm( '0', '9' ); - upper->rangeFsm( 'A', 'Z' ); - lower->rangeFsm( 'a', 'z' ); - digit->unionOp( upper ); - digit->unionOp( lower ); - digit->minimizePartition2(); - retFsm = digit; - break; - } - case BT_Lower: { - /* Lower case characters. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( 'a', 'z' ); - break; - } - case BT_Upper: { - /* Upper case characters. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( 'A', 'Z' ); - break; - } - case BT_Cntrl: { - /* Control characters. */ - FsmGraph *cntrl = new FsmGraph(); - FsmGraph *highChar = new FsmGraph(); - cntrl->rangeFsm( 0, 31 ); - highChar->concatFsm( 127 ); - cntrl->unionOp( highChar ); - cntrl->minimizePartition2(); - retFsm = cntrl; - break; - } - case BT_Graph: { - /* Graphical ascii characters [!-~]. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( '!', '~' ); - break; - } - case BT_Print: { - /* Printable characters. Same as graph except includes space. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( ' ', '~' ); - break; - } - case BT_Punct: { - /* Punctuation. */ - FsmGraph *range1 = new FsmGraph(); - FsmGraph *range2 = new FsmGraph(); - FsmGraph *range3 = new FsmGraph(); - FsmGraph *range4 = new FsmGraph(); - range1->rangeFsm( '!', '/' ); - range2->rangeFsm( ':', '@' ); - range3->rangeFsm( '[', '`' ); - range4->rangeFsm( '{', '~' ); - range1->unionOp( range2 ); - range1->unionOp( range3 ); - range1->unionOp( range4 ); - range1->minimizePartition2(); - retFsm = range1; - break; - } - case BT_Space: { - /* Whitespace: [\t\v\f\n\r ]. */ - FsmGraph *cntrl = new FsmGraph(); - FsmGraph *space = new FsmGraph(); - cntrl->rangeFsm( '\t', '\r' ); - space->concatFsm( ' ' ); - cntrl->unionOp( space ); - cntrl->minimizePartition2(); - retFsm = cntrl; - break; - } - case BT_Xdigit: { - /* Hex digits [0-9A-Fa-f]. */ - FsmGraph *digit = new FsmGraph(); - FsmGraph *upper = new FsmGraph(); - FsmGraph *lower = new FsmGraph(); - digit->rangeFsm( '0', '9' ); - upper->rangeFsm( 'A', 'F' ); - lower->rangeFsm( 'a', 'f' ); - digit->unionOp( upper ); - digit->unionOp( lower ); - digit->minimizePartition2(); - retFsm = digit; - break; - } - case BT_Lambda: { - retFsm = new FsmGraph(); - retFsm->lambdaFsm(); - break; - } - case BT_Empty: { - retFsm = new FsmGraph(); - retFsm->emptyFsm(); - break; - }} - - return retFsm; -} - -/* - * Compiler - */ - -/* Initialize the structure that will collect info during the parse of a - * machine. */ -Compiler::Compiler( ) -: - nextPriorKey(0), - nextNameId(0), - alphTypeSet(false), - getKeyExpr(0), - accessExpr(0), - curStateExpr(0), - lowerNum(0), - upperNum(0), - errorCount(0), - curActionOrd(0), - curPriorOrd(0), - nextEpsilonResolvedLink(0), - nextTokenId(1), - rootCodeBlock(0), - mainReturnUT(0), - //access(0), - //tokenStruct(0), - - ptrLangEl(0), - strLangEl(0), - anyLangEl(0), - rootLangEl(0), - noTokenLangEl(0), - eofLangEl(0), - errorLangEl(0), - ignoreLangEl(0), - - firstNonTermId(0), - prodIdIndex(0), - - global(0), - globalSel(0), - globalObjectDef(0), - arg0(0), - argv(0), - - stream(0), - inputSel(0), - streamSel(0), - - uniqueTypeNil(0), - uniqueTypePtr(0), - uniqueTypeBool(0), - uniqueTypeInt(0), - uniqueTypeStr(0), - uniqueTypeIgnore(0), - uniqueTypeAny(0), - uniqueTypeInput(0), - uniqueTypeStream(0), - nextPatConsId(0), - nextGenericId(1), - nextFuncId(0), - nextHostId(0), - nextObjectId(1), /* 0 is reserved for no object. */ - nextFrameId(0), - nextParserId(0), - revertOn(true), - predValue(0), - nextMatchEndNum(0), - argvTypeRef(0), - inContiguous(false), - contiguousOffset(0), - contiguousStretch(0) -{ -} - -/* Clean up the data collected during a parse. */ -Compiler::~Compiler() -{ - /* Delete all the nodes in the action list. Will cause all the - * string data that represents the actions to be deallocated. */ - actionList.empty(); - - for ( CharVectVect::Iter fns = streamFileNames; fns.lte(); fns++ ) { - const char **ptr = *fns; - while ( *ptr != 0 ) { - ::free( (void*)*ptr ); - ptr += 1; - } - free( (void*) *fns ); - } -} - -ostream &operator<<( ostream &out, const Token &token ) -{ - out << token.data; - return out; -} - -/* Write out a name reference. */ -ostream &operator<<( ostream &out, const NameRef &nameRef ) -{ - int pos = 0; - if ( nameRef[pos] == 0 ) { - out << "::"; - pos += 1; - } - out << nameRef[pos++]; - for ( ; pos < nameRef.length(); pos++ ) - out << "::" << nameRef[pos]; - return out; -} - -NameInst **Compiler::makeNameIndex() -{ - /* The number of nodes in the tree can now be given by nextNameId. Put a - * null pointer on the end of the list to terminate it. */ - NameInst **nameIndex = new NameInst*[nextNameId+1]; - memset( nameIndex, 0, sizeof(NameInst*)*(nextNameId+1) ); - - for ( NameInstList::Iter ni = nameInstList; ni.lte(); ni++ ) - nameIndex[ni->id] = ni; - - return nameIndex; -} - -void Compiler::createBuiltin( const char *name, BuiltinMachine builtin ) -{ - LexExpression *expression = LexExpression::cons( builtin ); - LexJoin *join = LexJoin::cons( expression ); - LexDefinition *varDef = new LexDefinition( name, join ); - GraphDictEl *graphDictEl = new GraphDictEl( name, varDef ); - rootNamespace->rlMap.insert( graphDictEl ); -} - -/* Initialize the graph dict with builtin types. */ -void Compiler::initGraphDict( ) -{ - createBuiltin( "any", BT_Any ); - createBuiltin( "ascii", BT_Ascii ); - createBuiltin( "extend", BT_Extend ); - createBuiltin( "alpha", BT_Alpha ); - createBuiltin( "digit", BT_Digit ); - createBuiltin( "alnum", BT_Alnum ); - createBuiltin( "lower", BT_Lower ); - createBuiltin( "upper", BT_Upper ); - createBuiltin( "cntrl", BT_Cntrl ); - createBuiltin( "graph", BT_Graph ); - createBuiltin( "print", BT_Print ); - createBuiltin( "punct", BT_Punct ); - createBuiltin( "space", BT_Space ); - createBuiltin( "xdigit", BT_Xdigit ); - createBuiltin( "null", BT_Lambda ); - createBuiltin( "zlen", BT_Lambda ); - createBuiltin( "empty", BT_Empty ); -} - -/* Initialize the key operators object that will be referenced by all fsms - * created. */ -void Compiler::initKeyOps( ) -{ - /* Signedness and bounds. */ - HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType; - thisKeyOps.setAlphType( alphType ); - - if ( lowerNum != 0 ) { - /* If ranges are given then interpret the alphabet type. */ - thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this ); - thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this ); - } -} - -/* Remove duplicates of unique actions from an action table. */ -void Compiler::removeDups( ActionTable &table ) -{ - /* Scan through the table looking for unique actions to - * remove duplicates of. */ - for ( int i = 0; i < table.length(); i++ ) { - /* Remove any duplicates ahead of i. */ - for ( int r = i+1; r < table.length(); ) { - if ( table[r].value == table[i].value ) - table.vremove(r); - else - r += 1; - } - } -} - -/* Remove duplicates from action lists. This operates only on transition and - * eof action lists and so should be called once all actions have been - * transfered to their final resting place. */ -void Compiler::removeActionDups( FsmGraph *graph ) -{ - /* Loop all states. */ - for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) { - /* Loop all transitions. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) - removeDups( trans->actionTable ); - removeDups( state->toStateActionTable ); - removeDups( state->fromStateActionTable ); - removeDups( state->eofActionTable ); - } -} - -Action *Compiler::newAction( const String &name, InlineList *inlineList ) -{ - InputLoc loc; - loc.line = 1; - loc.col = 1; - loc.fileName = 0; - - Action *action = Action::cons( loc, name, inlineList ); - actionList.append( action ); - return action; -} - -void Compiler::initLongestMatchData() -{ - if ( regionSetList.length() > 0 ) { - /* The initActId action gives act a default value. */ - InlineList *il4 = InlineList::cons(); - il4->append( InlineItem::cons( InputLoc(), InlineItem::LmInitAct ) ); - initActId = newAction( "initact", il4 ); - initActId->isLmAction = true; - - /* The setTokStart action sets tokstart. */ - InlineList *il5 = InlineList::cons(); - il5->append( InlineItem::cons( InputLoc(), InlineItem::LmSetTokStart ) ); - setTokStart = newAction( "tokstart", il5 ); - setTokStart->isLmAction = true; - - /* The setTokEnd action sets tokend. */ - InlineList *il3 = InlineList::cons(); - il3->append( InlineItem::cons( InputLoc(), InlineItem::LmSetTokEnd ) ); - setTokEnd = newAction( "tokend", il3 ); - setTokEnd->isLmAction = true; - - /* The action will also need an ordering: ahead of all user action - * embeddings. */ - initActIdOrd = curActionOrd++; - setTokStartOrd = curActionOrd++; - setTokEndOrd = curActionOrd++; - } -} - -void Compiler::finishGraphBuild( FsmGraph *graph ) -{ - /* Resolve any labels that point to multiple states. Any labels that are - * still around are referenced only by gotos and calls and they need to be - * made into deterministic entry points. */ - graph->deterministicEntry(); - - /* - * All state construction is now complete. - */ - - /* Transfer global error actions. */ - for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) - graph->transferErrorActions( state, 0 ); - - removeActionDups( graph ); - - /* Remove unreachable states. There should be no dead end states. The - * subtract and intersection operators are the only places where they may - * be created and those operators clean them up. */ - graph->removeUnreachableStates(); - - /* No more fsm operations are to be done. Action ordering numbers are - * no longer of use and will just hinder minimization. Clear them. */ - graph->nullActionKeys(); - - /* Transition priorities are no longer of use. We can clear them - * because they will just hinder minimization as well. Clear them. */ - graph->clearAllPriorities(); - - /* Minimize here even if we minimized at every op. Now that function - * keys have been cleared we may get a more minimal fsm. */ - graph->minimizePartition2(); - graph->compressTransitions(); -} - -/* Build the name tree and supporting data structures. */ -NameInst *Compiler::makeNameTree() -{ - /* Create the root name. */ - nextNameId = 1; - - /* First make the name tree. */ - for ( RegionImplList::Iter rel = regionImplList; rel.lte(); rel++ ) { - /* Recurse on the instance. */ - rel->makeNameTree( rel->loc, this ); - } - - return 0; -} - -FsmGraph *Compiler::makeAllRegions() -{ - /* Build the name tree and supporting data structures. */ - makeNameTree(); - NameInst **nameIndex = makeNameIndex(); - - int numGraphs = 0; - FsmGraph **graphs = new FsmGraph*[regionImplList.length()]; - - /* Make all the instantiations, we know that main exists in this list. */ - for ( RegionImplList::Iter rel = regionImplList; rel.lte(); rel++ ) { - /* Build the graph from a walk of the parse tree. */ - FsmGraph *newGraph = rel->walk( this ); - - /* Wrap up the construction. */ - finishGraphBuild( newGraph ); - - /* Save off the new graph. */ - graphs[numGraphs++] = newGraph; - } - - /* NOTE: If putting in minimization here we need to include eofTarget - * into the minimization algorithm. It is currently set by the longest - * match operator and not considered anywhere else. */ - - FsmGraph *all; - if ( numGraphs == 0 ) { - all = new FsmGraph; - all->lambdaFsm(); - } - else { - /* Add all the other graphs into the first. */ - all = graphs[0]; - all->globOp( graphs+1, numGraphs-1 ); - delete[] graphs; - } - - /* Go through all the token regions and check for lmRequiresErrorState. */ - for ( RegionImplList::Iter reg = regionImplList; reg.lte(); reg++ ) { - if ( reg->lmSwitchHandlesError ) - all->lmRequiresErrorState = true; - } - - all->nameIndex = nameIndex; - - return all; -} - -void Compiler::analyzeAction( Action *action, InlineList *inlineList ) -{ - /* FIXME: Actions used as conditions should be very constrained. */ - for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { - //if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr ) - // action->anyCall = true; - - /* Need to recurse into longest match items. */ - if ( item->type == InlineItem::LmSwitch ) { - RegionImpl *lm = item->tokenRegion; - for ( TokenInstanceListReg::Iter lmi = lm->tokenInstanceList; lmi.lte(); lmi++ ) { - if ( lmi->action != 0 ) - analyzeAction( action, lmi->action->inlineList ); - } - } - - if ( item->type == InlineItem::LmOnLast || - item->type == InlineItem::LmOnNext || - item->type == InlineItem::LmOnLagBehind ) - { - TokenInstance *lmi = item->longestMatchPart; - if ( lmi->action != 0 ) - analyzeAction( action, lmi->action->inlineList ); - } - - if ( item->children != 0 ) - analyzeAction( action, item->children ); - } -} - -void Compiler::analyzeGraph( FsmGraph *graph ) -{ - for ( ActionList::Iter act = actionList; act.lte(); act++ ) - analyzeAction( act, act->inlineList ); - - for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { - /* The transition list. */ - for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { - for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ ) - at->value->numTransRefs += 1; - } - - for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ ) - at->value->numToStateRefs += 1; - - for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ ) - at->value->numFromStateRefs += 1; - - for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ ) - at->value->numEofRefs += 1; - } -} - -FsmGraph *Compiler::makeScanner() -{ - /* Make the graph, do minimization. */ - FsmGraph *fsmGraph = makeAllRegions(); - - /* If any errors have occured in the input file then don't write anything. */ - if ( gblErrorCount > 0 ) - return 0; - - analyzeGraph( fsmGraph ); - - /* Decide if an error state is necessary. - * 1. There is an error transition - * 2. There is a gap in the transitions - * 3. The longest match operator requires it. */ - if ( fsmGraph->lmRequiresErrorState || fsmGraph->hasErrorTrans() ) - fsmGraph->errState = fsmGraph->addState(); - - /* State numbers need to be assigned such that all final states have a - * larger state id number than all non-final states. This enables the - * first_final mechanism to function correctly. We also want states to be - * ordered in a predictable fashion. So we first apply a depth-first - * search, then do a stable sort by final state status, then assign - * numbers. */ - - fsmGraph->depthFirstOrdering(); - fsmGraph->sortStatesByFinal(); - fsmGraph->setStateNumbers( 0 ); - - return fsmGraph; -} - -LangEl *Compiler::makeRepeatProd( const InputLoc &loc, Namespace *nspace, - const String &repeatName, UniqueType *ut ) -{ - LangEl *prodName = addLangEl( this, nspace, repeatName, LangEl::NonTerm ); - prodName->isRepeat = true; - - ProdElList *prodElList1 = new ProdElList; - - /* Build the first production of the repeat. */ - TypeRef *typeRef1 = TypeRef::cons( loc, ut ); - ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, - InputLoc(), 0, false, typeRef1, 0 ); - - UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName ); - TypeRef *typeRef2 = TypeRef::cons( loc, prodNameUT ); - ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType, - InputLoc(), 0, false, typeRef2, 0 ); - - prodElList1->append( factor1 ); - prodElList1->append( factor2 ); - - Production *newDef1 = Production::cons( InputLoc(), - prodName, prodElList1, String(), false, 0, - prodList.length(), prodName->defList.length() ); - - prodName->defList.append( newDef1 ); - prodList.append( newDef1 ); - - /* Build the second production of the repeat. */ - ProdElList *prodElList2 = new ProdElList; - - Production *newDef2 = Production::cons( InputLoc(), - prodName, prodElList2, String(), false, 0, - prodList.length(), prodName->defList.length() ); - - prodName->defList.append( newDef2 ); - prodList.append( newDef2 ); - - return prodName; -} - -LangEl *Compiler::makeListProd( const InputLoc &loc, Namespace *nspace, - const String &listName, UniqueType *ut ) -{ - LangEl *prodName = addLangEl( this, nspace, listName, LangEl::NonTerm ); - prodName->isList = true; - - /* Build the first production of the list. */ - TypeRef *typeRef1 = TypeRef::cons( loc, ut ); - ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 ); - - UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName ); - TypeRef *typeRef2 = TypeRef::cons( loc, prodNameUT ); - ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef2, 0 ); - - ProdElList *prodElList1 = new ProdElList; - prodElList1->append( factor1 ); - prodElList1->append( factor2 ); - - Production *newDef1 = Production::cons( loc, - prodName, prodElList1, String(), false, 0, - prodList.length(), prodName->defList.length() ); - - prodName->defList.append( newDef1 ); - prodList.append( newDef1 ); - - /* Build the second production of the list. */ - TypeRef *typeRef3 = TypeRef::cons( loc, ut ); - ProdEl *factor3 = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef3, 0 ); - - ProdElList *prodElList2 = new ProdElList; - prodElList2->append( factor3 ); - - Production *newDef2 = Production::cons( loc, - prodName, prodElList2, String(), false, 0, - prodList.length(), prodName->defList.length() ); - - prodName->defList.append( newDef2 ); - prodList.append( newDef2 ); - - return prodName; -} - -LangEl *Compiler::makeOptProd( const InputLoc &loc, Namespace *nspace, - const String &optName, UniqueType *ut ) -{ - LangEl *prodName = addLangEl( this, nspace, optName, LangEl::NonTerm ); - prodName->isOpt = true; - - ProdElList *prodElList1 = new ProdElList; - - /* Build the first production of the repeat. */ - TypeRef *typeRef1 = TypeRef::cons( loc, ut ); - ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef1, 0 ); - prodElList1->append( factor1 ); - - Production *newDef1 = Production::cons( loc, - prodName, prodElList1, String(), false, 0, - prodList.length(), prodName->defList.length() ); - - prodName->defList.append( newDef1 ); - prodList.append( newDef1 ); - - /* Build the second production of the repeat. */ - ProdElList *prodElList2 = new ProdElList; - - Production *newDef2 = Production::cons( loc, - prodName, prodElList2, String(), false, 0, - prodList.length(), prodName->defList.length() ); - - prodName->defList.append( newDef2 ); - prodList.append( newDef2 ); - - return prodName; -} - -Namespace *Namespace::findNamespace( const String &name ) -{ - for ( NamespaceVect::Iter c = childNamespaces; c.lte(); c++ ) { - if ( strcmp( name, (*c)->name ) == 0 ) - return *c; - } - return 0; -} - -Reduction *Namespace::findReduction( const String &name ) -{ - for ( ReductionVect::Iter r = reductions; r.lte(); r++ ) { - if ( strcmp( name, (*r)->name ) == 0 ) - return *r; - } - return 0; -} - -/* Search from a previously resolved qualification. (name 1+ in a qual list). */ -Namespace *NamespaceQual::searchFrom( Namespace *from, StringVect::Iter &qualPart ) -{ - /* While there are still parts in the qualification. */ - while ( qualPart.lte() ) { - Namespace *child = from->findNamespace( *qualPart ); - if ( child == 0 ) - return 0; - - from = child; - qualPart.increment(); - } - - return from; -} - -Namespace *NamespaceQual::getQual( Compiler *pd ) -{ - /* Do the search only once. */ - if ( cachedNspaceQual != 0 ) - return cachedNspaceQual; - - if ( qualNames.length() == 0 ) { - /* No qualification, use the region the qualification was - * declared in. */ - cachedNspaceQual = declInNspace; - } - else if ( strcmp( qualNames[0], "root" ) == 0 ) { - /* First item is "root." Start the downward search from there. */ - StringVect::Iter qualPart = qualNames; - qualPart.increment(); - cachedNspaceQual = searchFrom( pd->rootNamespace, qualPart ); - return cachedNspaceQual; - } - else { - /* Have a qualification. Move upwards through the declared - * regions looking for the first part. */ - StringVect::Iter qualPart = qualNames; - Namespace *parentNamespace = declInNspace; - while ( parentNamespace != 0 ) { - /* Search for the first part underneath the current parent. */ - Namespace *child = parentNamespace->findNamespace( *qualPart ); - - if ( child != 0 ) { - /* Found the first part. Start going below the result. */ - qualPart.increment(); - cachedNspaceQual = searchFrom( child, qualPart ); - return cachedNspaceQual; - } - - /* Not found, move up to the parent. */ - parentNamespace = parentNamespace->parentNamespace; - } - - /* Failed to find the place to start from. */ - cachedNspaceQual = 0; - } - - return cachedNspaceQual; -} - -void Compiler::initEmptyScanner( RegionSet *regionSet, TokenRegion *reg ) -{ - if ( reg != 0 && reg->impl->tokenInstanceList.length() == 0 ) { - reg->impl->wasEmpty = true; - - static int def = 1; - String name( 64, "__%p_DEF_PAT_%d", reg, def++ ); - - LexJoin *join = LexJoin::cons( LexExpression::cons( BT_Any ) ); - - TokenDef *tokenDef = TokenDef::cons( name, String(), false, false, - join, 0, internal, nextTokenId++, rootNamespace, - regionSet, 0, 0 ); - - TokenInstance *tokenInstance = TokenInstance::cons( tokenDef, - join, internal, nextTokenId++, - rootNamespace, reg ); - - reg->impl->tokenInstanceList.append( tokenInstance ); - - /* These do not go in the namespace so so they cannot get declared - * in the declare pass. */ - LangEl *lel = addLangEl( this, rootNamespace, name, LangEl::Term ); - - tokenInstance->tokenDef->tdLangEl = lel; - lel->tokenDef = tokenDef; - } -} - -void Compiler::initEmptyScanners() -{ - for ( RegionSetList::Iter regionSet = regionSetList; regionSet.lte(); regionSet++ ) { - initEmptyScanner( regionSet, regionSet->tokenIgnore ); - initEmptyScanner( regionSet, regionSet->tokenOnly ); - initEmptyScanner( regionSet, regionSet->ignoreOnly ); - initEmptyScanner( regionSet, regionSet->collectIgnore ); - } -} - -pda_run *Compiler::parsePattern( program_t *prg, tree_t **sp, const InputLoc &loc, - int parserId, struct input_impl *sourceStream ) -{ - struct pda_run *pdaRun = new pda_run; - colm_pda_init( prg, pdaRun, pdaTables, parserId, 0, false, 0, false ); - - long pcr = colm_parse_loop( prg, sp, pdaRun, sourceStream, PCR_START ); - assert( pcr == PCR_DONE ); - if ( pdaRun->parse_error ) { - cerr << ( loc.fileName != 0 ? loc.fileName : "<input>" ) << - ":" << loc.line << ":" << loc.col; - - if ( pdaRun->parse_error_text != 0 ) { - colm_data *tokdata = pdaRun->parse_error_text->tokdata; - cerr << ": relative error: "; - cerr.write( tokdata->data, tokdata->length ); - } - else { - cerr << ": parse error"; - } - - cerr << endl; - gblErrorCount += 1; - } - - return pdaRun; -} - -void Compiler::parsePatterns() -{ - program_t *prg = colm_new_program( runtimeData ); - - colm_set_debug( prg, gblActiveRealm ); - - /* Turn off context-dependent parsing. */ - prg->ctx_dep_parsing = 0; - - tree_t **sp = prg->stack_root; - - for ( ConsList::Iter cons = replList; cons.lte(); cons++ ) { - if ( cons->langEl != 0 ) { - struct input_impl *in = colm_impl_new_cons( strdup("<internal>"), cons ); - cons->pdaRun = parsePattern( prg, sp, cons->loc, cons->langEl->parserId, in ); - } - } - - for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) { - struct input_impl *in = colm_impl_new_pat( strdup("<internal>"), pat ); - pat->pdaRun = parsePattern( prg, sp, pat->loc, pat->langEl->parserId, in ); - } - - /* Bail on above errors. */ - if ( gblErrorCount > 0 ) - exit(1); - - fillInPatterns( prg ); -} - -void Compiler::collectParserEls( BstSet<LangEl*> &parserEls ) -{ - for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) { - /* We assume the reduction action compilation phase was run before - * pattern parsing and it decorated the pattern with the target type. */ - assert( pat->langEl != 0 ); - if ( pat->langEl->type != LangEl::NonTerm ) - error(pat->loc) << "pattern type is not a non-terminal" << endp; - - if ( pat->langEl->parserId < 0 ) { - /* Make a parser for the language element. */ - parserEls.insert( pat->langEl ); - pat->langEl->parserId = nextParserId++; - } - } - - for ( ConsList::Iter repl = replList; repl.lte(); repl++ ) { - /* We need the the language element from the compilation process. */ - assert( repl->langEl != 0 ); - - if ( repl->langEl->parserId < 0 ) { - /* Make a parser for the language element. */ - parserEls.insert( repl->langEl ); - repl->langEl->parserId = nextParserId++; - } - } - - /* Make parsers that we need. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->parserId >= 0 ) - parserEls.insert( lel ); - } -} - -void Compiler::writeHostCall() -{ - /* - * Host Call - */ - for ( FunctionList::Iter hc = inHostList; hc.lte(); hc++ ) { - *outStream << - "value_t " << hc->hostCall << "( program_t *prg, tree_t **sp"; - for ( ParameterList::Iter p = *hc->paramList; p.lte(); p++ ) { - *outStream << - ", value_t"; - } - *outStream << " );\n"; - } - - *outStream << - "tree_t **" << objectName << "_host_call( program_t *prg, long code, tree_t **sp )\n" - "{\n" - " value_t rtn = 0;\n" - " switch ( code ) {\n"; - - for ( FunctionList::Iter hc = inHostList; hc.lte(); hc++ ) { - *outStream << - " case " << hc->funcId << ": {\n"; - - int pos = hc->paramList->length() - 1; - for ( ParameterList::Iter p = *hc->paramList; p.lte(); p++, pos-- ) { - *outStream << - " value_t p" << pos << " = vm_pop_value();\n"; - } - - *outStream << - " rtn = " << hc->hostCall << "( prg, sp"; - - pos = 0; - for ( ParameterList::Iter p = *hc->paramList; p.lte(); p++, pos++ ) { - *outStream << - ", p" << pos; - } - *outStream << " );\n" - " break;\n" - " }\n"; - } - - *outStream << - " }\n" - " vm_push_value( rtn );\n" - " return sp;\n" - "}\n"; - -} - -void Compiler::generateOutput( long activeRealm, bool includeCommit ) -{ - FsmCodeGen *fsmGen = new FsmCodeGen( *outStream, redFsm, fsmTables ); - - PdaCodeGen *pdaGen = new PdaCodeGen( *outStream ); - - fsmGen->writeIncludes(); - pdaGen->defineRuntime(); - fsmGen->writeCode(); - - /* Make parsers that we need. */ - pdaGen->writeParserData( 0, pdaTables ); - - /* Write the runtime data. */ - pdaGen->writeRuntimeData( runtimeData, pdaTables ); - - writeHostCall(); - - if ( includeCommit ) - writeCommitStub(); - - if ( !gblLibrary ) - fsmGen->writeMain( activeRealm ); - - outStream->flush(); -} - - -void Compiler::prepGrammar() -{ - /* This will create language elements. */ - wrapNonTerminals(); - - makeLangElIds(); - makeStructElIds(); - makeLangElNames(); - makeDefinitionNames(); - noUndefindLangEls(); - - /* Put the language elements in an index by language element id. */ - langElIndex = new LangEl*[nextLelId+1]; - memset( langElIndex, 0, sizeof(LangEl*)*(nextLelId+1) ); - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) - langElIndex[lel->id] = lel; - - makeProdFsms(); - - /* Allocate the Runtime data now. Every PdaTable that we make - * will reference it, but it will be filled in after all the tables are - * built. */ - runtimeData = new colm_sections; -} - -void Compiler::compile() -{ - beginProcessing(); - initKeyOps(); - - /* Declare types. */ - declarePass(); - - /* Resolve type references. */ - resolvePass(); - - makeTerminalWrappers(); - makeEofElements(); - - /* - * Parsers - */ - - /* Init the longest match data */ - initLongestMatchData(); - FsmGraph *fsmGraph = makeScanner(); - - prepGrammar(); - - placeAllLanguageObjects(); - placeAllStructObjects(); - placeAllFrameObjects(); - placeAllFunctions(); - - /* Compile bytecode. */ - compileByteCode(); - - /* Make the reduced scanner. */ - RedFsmBuild reduce( this, fsmGraph ); - redFsm = reduce.reduceMachine(); - - BstSet<LangEl*> parserEls; - collectParserEls( parserEls ); - - makeParser( parserEls ); - - /* Make the scanner tables. */ - fsmTables = redFsm->makeFsmTables(); - - /* Now that all parsers are built, make the global runtimeData. */ - makeRuntimeData(); - - /* - * All compilation is now complete. - */ - - /* Parse constructors and patterns. */ - parsePatterns(); -} - |