moved the original colm src dir to /colm

author: Adrian Thurston <thurston@colm.net> 2019-09-08 21:11:17 -0600
committer: Adrian Thurston <thurston@colm.net> 2019-09-08 21:11:17 -0600
commit: c860c61607117582abd8f23881eed87957197484 (patch)
tree: 4d4e65dddc710e15f008189a9308d95924350c3f /src/compiler.cc
parent: f37c916aed2600951b8966a86020406b0b0542cf (diff)
download: colm-c860c61607117582abd8f23881eed87957197484.tar.gz
1 files changed, 0 insertions, 1247 deletions
diff --git a/src/compiler.cc b/src/compiler.cc
deleted file mode 100644
index 72cf99fa..00000000
--- a/src/compiler.cc
+++ /dev/null
@@ -1,1247 +0,0 @@
-/*
- * Copyright 2006-2018 Adrian Thurston <thurston@colm.net>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "compiler.h"
-
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdbool.h>
-#include <unistd.h>
-#include <assert.h>
-#include <iostream>
-
-#include "redbuild.h"
-#include "pdacodegen.h"
-#include "fsmcodegen.h"
-#include "colm.h"
-
-using std::ostringstream;
-using std::cout;
-using std::cerr;
-using std::endl;
-
-char machineMain[] = "main";
-exit_object endp;
-void operator<<( ostream &out, exit_object & )
-{
-	out << endl;
-	exit(1);
-}
-
-/* Perform minimization after an operation according 
- * to the command line args. */
-void afterOpMinimize( FsmGraph *fsm, bool lastInSeq )
-{
-	/* Switch on the prefered minimization algorithm. */
-	if ( lastInSeq ) {
-		/* First clean up the graph. FsmGraph operations may leave these
-		 * lying around. There should be no dead end states. The subtract
-		 * intersection operators are the only places where they may be
-		 * created and those operators clean them up. */
-		fsm->removeUnreachableStates();
-		fsm->minimizePartition2();
-	}
-}
-
-/* Count the transitions in the fsm by walking the state list. */
-int countTransitions( FsmGraph *fsm )
-{
-	int numTrans = 0;
-	FsmState *state = fsm->stateList.head;
-	while ( state != 0 ) {
-		numTrans += state->outList.length();
-		state = state->next;
-	}
-	return numTrans;
-}
-
-Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd )
-{
-	/* Reset errno so we can check for overflow or underflow. In the event of
-	 * an error, sets the return val to the upper or lower bound being tested
-	 * against. */
-	errno = 0;
-	unsigned int size = keyOps->alphType->size;
-	bool unusedBits = size < sizeof(unsigned long);
-
-	unsigned long ul = strtoul( str, 0, 16 );
-
-	if ( errno == ERANGE || (unusedBits && ul >> (size * 8)) ) {
-		error(loc) << "literal " << str << " overflows the alphabet type" << endl;
-		ul = 1 << (size * 8);
-	}
-
-	if ( unusedBits && ul >> (size * 8 - 1) )
-		ul |= (ULONG_MAX >> (size*8 ) ) << (size*8);
-
-	return Key( (long)ul );
-}
-
-Key makeFsmKeyDec( char *str, const InputLoc &loc, Compiler *pd )
-{
-	/* Convert the number to a decimal. First reset errno so we can check
-	 * for overflow or underflow. */
-	errno = 0;
-	long long minVal = keyOps->alphType->minVal;
-	long long maxVal = keyOps->alphType->maxVal;
-
-	long long ll = strtoll( str, 0, 10 );
-
-	/* Check for underflow. */
-	if ( (errno == ERANGE && ll < 0) || ll < minVal) {
-		error(loc) << "literal " << str << " underflows the alphabet type" << endl;
-		ll = minVal;
-	}
-	/* Check for overflow. */
-	else if ( (errno == ERANGE && ll > 0) || ll > maxVal ) {
-		error(loc) << "literal " << str << " overflows the alphabet type" << endl;
-		ll = maxVal;
-	}
-
-	return Key( (long)ll );
-}
-
-/* Make an fsm key in int format (what the fsm graph uses) from an alphabet
- * number returned by the parser. Validates that the number doesn't overflow
- * the alphabet type. */
-Key makeFsmKeyNum( char *str, const InputLoc &loc, Compiler *pd )
-{
-	/* Switch on hex/decimal format. */
-	if ( str[0] == '0' && str[1] == 'x' )
-		return makeFsmKeyHex( str, loc, pd );
-	else
-		return makeFsmKeyDec( str, loc, pd );
-}
-
-/* Make an fsm int format (what the fsm graph uses) from a single character.
- * Performs proper conversion depending on signed/unsigned property of the
- * alphabet. */
-Key makeFsmKeyChar( char c, Compiler *pd )
-{
-	/* Copy from a char type. */
-	return Key( c );
-}
-
-/* Make an fsm key array in int format (what the fsm graph uses) from a string
- * of characters. Performs proper conversion depending on signed/unsigned
- * property of the alphabet. */
-void makeFsmKeyArray( Key *result, char *data, int len, Compiler *pd )
-{
-	/* Copy from a char star type. */
-	char *src = data;
-	for ( int i = 0; i < len; i++ )
-		result[i] = Key(src[i]);
-}
-
-/* Like makeFsmKeyArray except the result has only unique keys. They ordering
- * will be changed. */
-void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, 
-		bool caseInsensitive, Compiler *pd )
-{
-	/* Copy from a char star type. */
-	char *src = data;
-	for ( int si = 0; si < len; si++ ) {
-		Key key( src[si] );
-		result.insert( key );
-		if ( caseInsensitive ) {
-			if ( key.isLower() )
-				result.insert( key.toUpper() );
-			else if ( key.isUpper() )
-				result.insert( key.toLower() );
-		}
-	}
-}
-
-FsmGraph *dotFsm( Compiler *pd )
-{
-	FsmGraph *retFsm = new FsmGraph();
-	retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey );
-	return retFsm;
-}
-
-FsmGraph *dotStarFsm( Compiler *pd )
-{
-	FsmGraph *retFsm = new FsmGraph();
-	retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey );
-	return retFsm;
-}
-
-/* Make a builtin type. Depends on the signed nature of the alphabet type. */
-FsmGraph *makeBuiltin( BuiltinMachine builtin, Compiler *pd )
-{
-	/* FsmGraph created to return. */
-	FsmGraph *retFsm = 0;
-
-	switch ( builtin ) {
-	case BT_Any: {
-		/* All characters. */
-		retFsm = dotFsm( pd );
-		break;
-	}
-	case BT_Ascii: {
-		/* Ascii characters 0 to 127. */
-		retFsm = new FsmGraph();
-		retFsm->rangeFsm( 0, 127 );
-		break;
-	}
-	case BT_Extend: {
-		/* Ascii extended characters. This is the full byte range. Dependent
-		 * on signed, vs no signed. If the alphabet is one byte then just use
-		 * dot fsm. */
-		retFsm = new FsmGraph();
-		retFsm->rangeFsm( -128, 127 );
-		break;
-	}
-	case BT_Alpha: {
-		/* Alpha [A-Za-z]. */
-		FsmGraph *upper = new FsmGraph(), *lower = new FsmGraph();
-		upper->rangeFsm( 'A', 'Z' );
-		lower->rangeFsm( 'a', 'z' );
-		upper->unionOp( lower );
-		upper->minimizePartition2();
-		retFsm = upper;
-		break;
-	}
-	case BT_Digit: {
-		/* Digits [0-9]. */
-		retFsm = new FsmGraph();
-		retFsm->rangeFsm( '0', '9' );
-		break;
-	}
-	case BT_Alnum: {
-		/* Alpha numerics [0-9A-Za-z]. */
-		FsmGraph *digit = new FsmGraph(), *lower = new FsmGraph();
-		FsmGraph *upper = new FsmGraph();
-		digit->rangeFsm( '0', '9' );
-		upper->rangeFsm( 'A', 'Z' );
-		lower->rangeFsm( 'a', 'z' );
-		digit->unionOp( upper );
-		digit->unionOp( lower );
-		digit->minimizePartition2();
-		retFsm = digit;
-		break;
-	}
-	case BT_Lower: {
-		/* Lower case characters. */
-		retFsm = new FsmGraph();
-		retFsm->rangeFsm( 'a', 'z' );
-		break;
-	}
-	case BT_Upper: {
-		/* Upper case characters. */
-		retFsm = new FsmGraph();
-		retFsm->rangeFsm( 'A', 'Z' );
-		break;
-	}
-	case BT_Cntrl: {
-		/* Control characters. */
-		FsmGraph *cntrl = new FsmGraph();
-		FsmGraph *highChar = new FsmGraph();
-		cntrl->rangeFsm( 0, 31 );
-		highChar->concatFsm( 127 );
-		cntrl->unionOp( highChar );
-		cntrl->minimizePartition2();
-		retFsm = cntrl;
-		break;
-	}
-	case BT_Graph: {
-		/* Graphical ascii characters [!-~]. */
-		retFsm = new FsmGraph();
-		retFsm->rangeFsm( '!', '~' );
-		break;
-	}
-	case BT_Print: {
-		/* Printable characters. Same as graph except includes space. */
-		retFsm = new FsmGraph();
-		retFsm->rangeFsm( ' ', '~' );
-		break;
-	}
-	case BT_Punct: {
-		/* Punctuation. */
-		FsmGraph *range1 = new FsmGraph();
-		FsmGraph *range2 = new FsmGraph();
-		FsmGraph *range3 = new FsmGraph(); 
-		FsmGraph *range4 = new FsmGraph();
-		range1->rangeFsm( '!', '/' );
-		range2->rangeFsm( ':', '@' );
-		range3->rangeFsm( '[', '`' );
-		range4->rangeFsm( '{', '~' );
-		range1->unionOp( range2 );
-		range1->unionOp( range3 );
-		range1->unionOp( range4 );
-		range1->minimizePartition2();
-		retFsm = range1;
-		break;
-	}
-	case BT_Space: {
-		/* Whitespace: [\t\v\f\n\r ]. */
-		FsmGraph *cntrl = new FsmGraph();
-		FsmGraph *space = new FsmGraph();
-		cntrl->rangeFsm( '\t', '\r' );
-		space->concatFsm( ' ' );
-		cntrl->unionOp( space );
-		cntrl->minimizePartition2();
-		retFsm = cntrl;
-		break;
-	}
-	case BT_Xdigit: {
-		/* Hex digits [0-9A-Fa-f]. */
-		FsmGraph *digit = new FsmGraph();
-		FsmGraph *upper = new FsmGraph();
-		FsmGraph *lower = new FsmGraph();
-		digit->rangeFsm( '0', '9' );
-		upper->rangeFsm( 'A', 'F' );
-		lower->rangeFsm( 'a', 'f' );
-		digit->unionOp( upper );
-		digit->unionOp( lower );
-		digit->minimizePartition2();
-		retFsm = digit;
-		break;
-	}
-	case BT_Lambda: {
-		retFsm = new FsmGraph();
-		retFsm->lambdaFsm();
-		break;
-	}
-	case BT_Empty: {
-		retFsm = new FsmGraph();
-		retFsm->emptyFsm();
-		break;
-	}}
-
-	return retFsm;
-}
-
-/*
- * Compiler
- */
-
-/* Initialize the structure that will collect info during the parse of a
- * machine. */
-Compiler::Compiler( )
-:	
-	nextPriorKey(0),
-	nextNameId(0),
-	alphTypeSet(false),
-	getKeyExpr(0),
-	accessExpr(0),
-	curStateExpr(0),
-	lowerNum(0),
-	upperNum(0),
-	errorCount(0),
-	curActionOrd(0),
-	curPriorOrd(0),
-	nextEpsilonResolvedLink(0),
-	nextTokenId(1),
-	rootCodeBlock(0),
-	mainReturnUT(0),
-	//access(0),
-	//tokenStruct(0),
-
-	ptrLangEl(0),
-	strLangEl(0),
-	anyLangEl(0),
-	rootLangEl(0),
-	noTokenLangEl(0),
-	eofLangEl(0),
-	errorLangEl(0),
-	ignoreLangEl(0),
-
-	firstNonTermId(0),
-	prodIdIndex(0),
-
-	global(0),
-	globalSel(0),
-	globalObjectDef(0),
-	arg0(0),
-	argv(0),
-
-	stream(0),
-	inputSel(0),
-	streamSel(0),
-
-	uniqueTypeNil(0),
-	uniqueTypePtr(0),
-	uniqueTypeBool(0),
-	uniqueTypeInt(0),
-	uniqueTypeStr(0),
-	uniqueTypeIgnore(0),
-	uniqueTypeAny(0),
-	uniqueTypeInput(0),
-	uniqueTypeStream(0),
-	nextPatConsId(0),
-	nextGenericId(1),
-	nextFuncId(0),
-	nextHostId(0),
-	nextObjectId(1),     /* 0 is  reserved for no object. */
-	nextFrameId(0),
-	nextParserId(0),
-	revertOn(true),
-	predValue(0),
-	nextMatchEndNum(0),
-	argvTypeRef(0),
-	inContiguous(false),
-	contiguousOffset(0),
-	contiguousStretch(0)
-{
-}
-
-/* Clean up the data collected during a parse. */
-Compiler::~Compiler()
-{
-	/* Delete all the nodes in the action list. Will cause all the
-	 * string data that represents the actions to be deallocated. */
-	actionList.empty();
-
-	for ( CharVectVect::Iter fns = streamFileNames; fns.lte(); fns++ ) {
-		const char **ptr = *fns;
-		while ( *ptr != 0 ) {
-			::free( (void*)*ptr );
-			ptr += 1;
-		}
-		free( (void*) *fns );
-	}
-}
-
-ostream &operator<<( ostream &out, const Token &token )
-{
-	out << token.data;
-	return out;
-}
-
-/* Write out a name reference. */
-ostream &operator<<( ostream &out, const NameRef &nameRef )
-{
-	int pos = 0;
-	if ( nameRef[pos] == 0 ) {
-		out << "::";
-		pos += 1;
-	}
-	out << nameRef[pos++];
-	for ( ; pos < nameRef.length(); pos++ )
-		out << "::" << nameRef[pos];
-	return out;
-}
-
-NameInst **Compiler::makeNameIndex()
-{
-	/* The number of nodes in the tree can now be given by nextNameId. Put a
-	 * null pointer on the end of the list to terminate it. */
-	NameInst **nameIndex = new NameInst*[nextNameId+1];
-	memset( nameIndex, 0, sizeof(NameInst*)*(nextNameId+1) );
-
-	for ( NameInstList::Iter ni = nameInstList; ni.lte(); ni++ )
-		nameIndex[ni->id] = ni;
-
-	return nameIndex;
-}
-
-void Compiler::createBuiltin( const char *name, BuiltinMachine builtin )
-{
-	LexExpression *expression = LexExpression::cons( builtin );
-	LexJoin *join = LexJoin::cons( expression );
-	LexDefinition *varDef = new LexDefinition( name, join );
-	GraphDictEl *graphDictEl = new GraphDictEl( name, varDef );
-	rootNamespace->rlMap.insert( graphDictEl );
-}
-
-/* Initialize the graph dict with builtin types. */
-void Compiler::initGraphDict( )
-{
-	createBuiltin( "any", BT_Any );
-	createBuiltin( "ascii", BT_Ascii );
-	createBuiltin( "extend", BT_Extend );
-	createBuiltin( "alpha", BT_Alpha );
-	createBuiltin( "digit", BT_Digit );
-	createBuiltin( "alnum", BT_Alnum );
-	createBuiltin( "lower", BT_Lower );
-	createBuiltin( "upper", BT_Upper );
-	createBuiltin( "cntrl", BT_Cntrl );
-	createBuiltin( "graph", BT_Graph );
-	createBuiltin( "print", BT_Print );
-	createBuiltin( "punct", BT_Punct );
-	createBuiltin( "space", BT_Space );
-	createBuiltin( "xdigit", BT_Xdigit );
-	createBuiltin( "null", BT_Lambda );
-	createBuiltin( "zlen", BT_Lambda );
-	createBuiltin( "empty", BT_Empty );
-}
-
-/* Initialize the key operators object that will be referenced by all fsms
- * created. */
-void Compiler::initKeyOps( )
-{
-	/* Signedness and bounds. */
-	HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType;
-	thisKeyOps.setAlphType( alphType );
-
-	if ( lowerNum != 0 ) {
-		/* If ranges are given then interpret the alphabet type. */
-		thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this );
-		thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this );
-	}
-}
-
-/* Remove duplicates of unique actions from an action table. */
-void Compiler::removeDups( ActionTable &table )
-{
-	/* Scan through the table looking for unique actions to 
-	 * remove duplicates of. */
-	for ( int i = 0; i < table.length(); i++ ) {
-		/* Remove any duplicates ahead of i. */
-		for ( int r = i+1; r < table.length(); ) {
-			if ( table[r].value == table[i].value )
-				table.vremove(r);
-			else
-				r += 1;
-		}
-	}
-}
-
-/* Remove duplicates from action lists. This operates only on transition and
- * eof action lists and so should be called once all actions have been
- * transfered to their final resting place. */
-void Compiler::removeActionDups( FsmGraph *graph )
-{
-	/* Loop all states. */
-	for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) {
-		/* Loop all transitions. */
-		for ( TransList::Iter trans = state->outList; trans.lte(); trans++ )
-			removeDups( trans->actionTable );
-		removeDups( state->toStateActionTable );
-		removeDups( state->fromStateActionTable );
-		removeDups( state->eofActionTable );
-	}
-}
-
-Action *Compiler::newAction( const String &name, InlineList *inlineList )
-{
-	InputLoc loc;
-	loc.line = 1;
-	loc.col = 1;
-	loc.fileName = 0;
-
-	Action *action = Action::cons( loc, name, inlineList );
-	actionList.append( action );
-	return action;
-}
-
-void Compiler::initLongestMatchData()
-{
-	if ( regionSetList.length() > 0 ) {
-		/* The initActId action gives act a default value. */
-		InlineList *il4 = InlineList::cons();
-		il4->append( InlineItem::cons( InputLoc(), InlineItem::LmInitAct ) );
-		initActId = newAction( "initact", il4 );
-		initActId->isLmAction = true;
-
-		/* The setTokStart action sets tokstart. */
-		InlineList *il5 = InlineList::cons();
-		il5->append( InlineItem::cons( InputLoc(), InlineItem::LmSetTokStart ) );
-		setTokStart = newAction( "tokstart", il5 );
-		setTokStart->isLmAction = true;
-
-		/* The setTokEnd action sets tokend. */
-		InlineList *il3 = InlineList::cons();
-		il3->append( InlineItem::cons( InputLoc(), InlineItem::LmSetTokEnd ) );
-		setTokEnd = newAction( "tokend", il3 );
-		setTokEnd->isLmAction = true;
-
-		/* The action will also need an ordering: ahead of all user action
-		 * embeddings. */
-		initActIdOrd = curActionOrd++;
-		setTokStartOrd = curActionOrd++;
-		setTokEndOrd = curActionOrd++;
-	}
-}
-
-void Compiler::finishGraphBuild( FsmGraph *graph )
-{
-	/* Resolve any labels that point to multiple states. Any labels that are
-	 * still around are referenced only by gotos and calls and they need to be
-	 * made into deterministic entry points. */
-	graph->deterministicEntry();
-
-	/*
-	 * All state construction is now complete.
-	 */
-
-	/* Transfer global error actions. */
-	for ( StateList::Iter state = graph->stateList; state.lte(); state++ )
-		graph->transferErrorActions( state, 0 );
-	
-	removeActionDups( graph );
-
-	/* Remove unreachable states. There should be no dead end states. The
-	 * subtract and intersection operators are the only places where they may
-	 * be created and those operators clean them up. */
-	graph->removeUnreachableStates();
-
-	/* No more fsm operations are to be done. Action ordering numbers are
-	 * no longer of use and will just hinder minimization. Clear them. */
-	graph->nullActionKeys();
-
-	/* Transition priorities are no longer of use. We can clear them
-	 * because they will just hinder minimization as well. Clear them. */
-	graph->clearAllPriorities();
-
-	/* Minimize here even if we minimized at every op. Now that function
-	 * keys have been cleared we may get a more minimal fsm. */
-	graph->minimizePartition2();
-	graph->compressTransitions();
-}
-
-/* Build the name tree and supporting data structures. */
-NameInst *Compiler::makeNameTree()
-{
-	/* Create the root name. */
-	nextNameId = 1;
-
-	/* First make the name tree. */
-	for ( RegionImplList::Iter rel = regionImplList; rel.lte(); rel++ ) {
-		/* Recurse on the instance. */
-		rel->makeNameTree( rel->loc, this );
-	}
-
-	return 0;
-}
-
-FsmGraph *Compiler::makeAllRegions()
-{
-	/* Build the name tree and supporting data structures. */
-	makeNameTree();
-	NameInst **nameIndex = makeNameIndex();
-
-	int numGraphs = 0;
-	FsmGraph **graphs = new FsmGraph*[regionImplList.length()];
-
-	/* Make all the instantiations, we know that main exists in this list. */
-	for ( RegionImplList::Iter rel = regionImplList; rel.lte(); rel++ ) {
-		/* Build the graph from a walk of the parse tree. */
-		FsmGraph *newGraph = rel->walk( this );
-
-		/* Wrap up the construction. */
-		finishGraphBuild( newGraph );
-
-		/* Save off the new graph. */
-		graphs[numGraphs++] = newGraph;
-	}
-
-	/* NOTE: If putting in minimization here we need to include eofTarget
-	 * into the minimization algorithm. It is currently set by the longest
-	 * match operator and not considered anywhere else. */
-
-	FsmGraph *all;
-	if ( numGraphs == 0 ) {
-		all = new FsmGraph;
-		all->lambdaFsm();
-	}
-	else {
-		/* Add all the other graphs into the first. */
-		all = graphs[0];
-		all->globOp( graphs+1, numGraphs-1 );
-		delete[] graphs;
-	}
-
-	/* Go through all the token regions and check for lmRequiresErrorState. */
-	for ( RegionImplList::Iter reg = regionImplList; reg.lte(); reg++ ) {
-		if ( reg->lmSwitchHandlesError )
-			all->lmRequiresErrorState = true;
-	}
-
-	all->nameIndex = nameIndex;
-
-	return all;
-}
-
-void Compiler::analyzeAction( Action *action, InlineList *inlineList )
-{
-	/* FIXME: Actions used as conditions should be very constrained. */
-	for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) {
-		//if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr )
-		//	action->anyCall = true;
-
-		/* Need to recurse into longest match items. */
-		if ( item->type == InlineItem::LmSwitch ) {
-			RegionImpl *lm = item->tokenRegion;
-			for ( TokenInstanceListReg::Iter lmi = lm->tokenInstanceList; lmi.lte(); lmi++ ) {
-				if ( lmi->action != 0 )
-					analyzeAction( action, lmi->action->inlineList );
-			}
-		}
-
-		if ( item->type == InlineItem::LmOnLast || 
-				item->type == InlineItem::LmOnNext ||
-				item->type == InlineItem::LmOnLagBehind )
-		{
-			TokenInstance *lmi = item->longestMatchPart;
-			if ( lmi->action != 0 )
-				analyzeAction( action, lmi->action->inlineList );
-		}
-
-		if ( item->children != 0 )
-			analyzeAction( action, item->children );
-	}
-}
-
-void Compiler::analyzeGraph( FsmGraph *graph )
-{
-	for ( ActionList::Iter act = actionList; act.lte(); act++ )
-		analyzeAction( act, act->inlineList );
-
-	for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
-		/* The transition list. */
-		for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
-			for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ )
-				at->value->numTransRefs += 1;
-		}
-
-		for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ )
-			at->value->numToStateRefs += 1;
-
-		for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ )
-			at->value->numFromStateRefs += 1;
-
-		for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ )
-			at->value->numEofRefs += 1;
-	}
-}
-
-FsmGraph *Compiler::makeScanner()
-{
-	/* Make the graph, do minimization. */
-	FsmGraph *fsmGraph = makeAllRegions();
-
-	/* If any errors have occured in the input file then don't write anything. */
-	if ( gblErrorCount > 0 )
-		return 0;
-
-	analyzeGraph( fsmGraph );
-
-	/* Decide if an error state is necessary.
-	 *  1. There is an error transition
-	 *  2. There is a gap in the transitions
-	 *  3. The longest match operator requires it. */
-	if ( fsmGraph->lmRequiresErrorState || fsmGraph->hasErrorTrans() )
-		fsmGraph->errState = fsmGraph->addState();
-
-	/* State numbers need to be assigned such that all final states have a
-	 * larger state id number than all non-final states. This enables the
-	 * first_final mechanism to function correctly. We also want states to be
-	 * ordered in a predictable fashion. So we first apply a depth-first
-	 * search, then do a stable sort by final state status, then assign
-	 * numbers. */
-
-	fsmGraph->depthFirstOrdering();
-	fsmGraph->sortStatesByFinal();
-	fsmGraph->setStateNumbers( 0 );
-
-	return fsmGraph;
-}
-
-LangEl *Compiler::makeRepeatProd( const InputLoc &loc, Namespace *nspace,
-		const String &repeatName, UniqueType *ut )
-{
-	LangEl *prodName = addLangEl( this, nspace, repeatName, LangEl::NonTerm );
-	prodName->isRepeat = true;
-
-	ProdElList *prodElList1 = new ProdElList;
-
-	/* Build the first production of the repeat. */
-	TypeRef *typeRef1 = TypeRef::cons( loc, ut );
-	ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType,
-			InputLoc(), 0, false, typeRef1, 0 );
-
-	UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName );
-	TypeRef *typeRef2 = TypeRef::cons( loc, prodNameUT );
-	ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType,
-			InputLoc(), 0, false, typeRef2, 0 );
-
-	prodElList1->append( factor1 );
-	prodElList1->append( factor2 );
-
-	Production *newDef1 = Production::cons( InputLoc(),
-			prodName, prodElList1, String(), false, 0,
-			prodList.length(), prodName->defList.length() );
-
-	prodName->defList.append( newDef1 );
-	prodList.append( newDef1 );
-
-	/* Build the second production of the repeat. */
-	ProdElList *prodElList2 = new ProdElList;
-
-	Production *newDef2 = Production::cons( InputLoc(),
-			prodName, prodElList2, String(), false, 0,
-			prodList.length(), prodName->defList.length() );
-
-	prodName->defList.append( newDef2 );
-	prodList.append( newDef2 );
-
-	return prodName;
-}
-
-LangEl *Compiler::makeListProd( const InputLoc &loc, Namespace *nspace,
-		const String &listName, UniqueType *ut )
-{
-	LangEl *prodName = addLangEl( this, nspace, listName, LangEl::NonTerm );
-	prodName->isList = true;
-
-	/* Build the first production of the list. */
-	TypeRef *typeRef1 = TypeRef::cons( loc, ut );
-	ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 );
-
-	UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName );
-	TypeRef *typeRef2 = TypeRef::cons( loc, prodNameUT );
-	ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef2, 0 );
-
-	ProdElList *prodElList1 = new ProdElList;
-	prodElList1->append( factor1 );
-	prodElList1->append( factor2 );
-
-	Production *newDef1 = Production::cons( loc,
-			prodName, prodElList1, String(), false, 0,
-			prodList.length(), prodName->defList.length() );
-
-	prodName->defList.append( newDef1 );
-	prodList.append( newDef1 );
-
-	/* Build the second production of the list. */
-	TypeRef *typeRef3 = TypeRef::cons( loc, ut );
-	ProdEl *factor3 = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef3, 0 );
-
-	ProdElList *prodElList2 = new ProdElList;
-	prodElList2->append( factor3 );
-
-	Production *newDef2 = Production::cons( loc,
-			prodName, prodElList2, String(), false, 0,
-			prodList.length(), prodName->defList.length() );
-
-	prodName->defList.append( newDef2 );
-	prodList.append( newDef2 );
-
-	return prodName;
-}
-
-LangEl *Compiler::makeOptProd( const InputLoc &loc, Namespace *nspace,
-		const String &optName, UniqueType *ut )
-{
-	LangEl *prodName = addLangEl( this, nspace, optName, LangEl::NonTerm );
-	prodName->isOpt = true;
-
-	ProdElList *prodElList1 = new ProdElList;
-
-	/* Build the first production of the repeat. */
-	TypeRef *typeRef1 = TypeRef::cons( loc, ut );
-	ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, loc, 0, false, typeRef1, 0 );
-	prodElList1->append( factor1 );
-
-	Production *newDef1 = Production::cons( loc,
-			prodName, prodElList1, String(), false, 0,
-			prodList.length(), prodName->defList.length() );
-
-	prodName->defList.append( newDef1 );
-	prodList.append( newDef1 );
-
-	/* Build the second production of the repeat. */
-	ProdElList *prodElList2 = new ProdElList;
-
-	Production *newDef2 = Production::cons( loc,
-			prodName, prodElList2, String(), false, 0,
-			prodList.length(), prodName->defList.length() );
-
-	prodName->defList.append( newDef2 );
-	prodList.append( newDef2 );
-
-	return prodName;
-}
-
-Namespace *Namespace::findNamespace( const String &name )
-{
-	for ( NamespaceVect::Iter c = childNamespaces; c.lte(); c++ ) {
-		if ( strcmp( name, (*c)->name ) == 0 )
-			return *c;
-	}
-	return 0;
-}
-
-Reduction *Namespace::findReduction( const String &name )
-{
-	for ( ReductionVect::Iter r = reductions; r.lte(); r++ ) {
-		if ( strcmp( name, (*r)->name ) == 0 )
-			return *r;
-	}
-	return 0;
-}
-
-/* Search from a previously resolved qualification. (name 1+ in a qual list). */
-Namespace *NamespaceQual::searchFrom( Namespace *from, StringVect::Iter &qualPart )
-{
-	/* While there are still parts in the qualification.  */
-	while ( qualPart.lte() ) {
-		Namespace *child = from->findNamespace( *qualPart );
-		if ( child == 0 )
-			return 0;
-
-		from = child;
-		qualPart.increment();
-	}
-
-	return from;
-}
-
-Namespace *NamespaceQual::getQual( Compiler *pd )
-{
-	/* Do the search only once. */
-	if ( cachedNspaceQual != 0 )
-		return cachedNspaceQual;
-	
-	if ( qualNames.length() == 0 ) {
-		/* No qualification, use the region the qualification was 
-		 * declared in. */
-		cachedNspaceQual = declInNspace;
-	}
-	else if ( strcmp( qualNames[0], "root" ) == 0 ) {
-		/* First item is "root." Start the downward search from there. */
-		StringVect::Iter qualPart = qualNames;
-		qualPart.increment();
-		cachedNspaceQual = searchFrom( pd->rootNamespace, qualPart );
-		return cachedNspaceQual;
-	}
-	else {
-		/* Have a qualification. Move upwards through the declared
-		 * regions looking for the first part. */
-		StringVect::Iter qualPart = qualNames;
-		Namespace *parentNamespace = declInNspace;
-		while ( parentNamespace != 0 ) {
-			/* Search for the first part underneath the current parent. */
-			Namespace *child = parentNamespace->findNamespace( *qualPart );
-
-			if ( child != 0 ) {
-				/* Found the first part. Start going below the result.  */
-				qualPart.increment();
-				cachedNspaceQual = searchFrom( child, qualPart );
-				return cachedNspaceQual;
-			}
-
-			/* Not found, move up to the parent. */
-			parentNamespace = parentNamespace->parentNamespace;
-		}
-
-		/* Failed to find the place to start from. */
-		cachedNspaceQual = 0;
-	}
-
-	return cachedNspaceQual;
-}
-
-void Compiler::initEmptyScanner( RegionSet *regionSet, TokenRegion *reg )
-{
-	if ( reg != 0 && reg->impl->tokenInstanceList.length() == 0 ) {
-		reg->impl->wasEmpty = true;
-
-		static int def = 1;
-		String name( 64, "__%p_DEF_PAT_%d", reg, def++ );
-
-		LexJoin *join = LexJoin::cons( LexExpression::cons( BT_Any ) );
-
-		TokenDef *tokenDef = TokenDef::cons( name, String(), false, false,
-				join, 0, internal, nextTokenId++, rootNamespace, 
-				regionSet, 0, 0 );
-			
-		TokenInstance *tokenInstance = TokenInstance::cons( tokenDef,
-				join, internal, nextTokenId++,
-				rootNamespace, reg );
-
-		reg->impl->tokenInstanceList.append( tokenInstance );
-
-		/* These do not go in the namespace so so they cannot get declared
-		 * in the declare pass. */
-		LangEl *lel = addLangEl( this, rootNamespace, name, LangEl::Term );
-
-		tokenInstance->tokenDef->tdLangEl = lel;
-		lel->tokenDef = tokenDef;
-	}
-}
-
-void Compiler::initEmptyScanners()
-{
-	for ( RegionSetList::Iter regionSet = regionSetList; regionSet.lte(); regionSet++ ) {
-		initEmptyScanner( regionSet, regionSet->tokenIgnore );
-		initEmptyScanner( regionSet, regionSet->tokenOnly );
-		initEmptyScanner( regionSet, regionSet->ignoreOnly );
-		initEmptyScanner( regionSet, regionSet->collectIgnore );
-	}
-}
-
-pda_run *Compiler::parsePattern( program_t *prg, tree_t **sp, const InputLoc &loc, 
-		int parserId, struct input_impl *sourceStream )
-{
-	struct pda_run *pdaRun = new pda_run;
-	colm_pda_init( prg, pdaRun, pdaTables, parserId, 0, false, 0, false );
-
-	long pcr = colm_parse_loop( prg, sp, pdaRun, sourceStream, PCR_START );
-	assert( pcr == PCR_DONE );
-	if ( pdaRun->parse_error ) {
-		cerr << ( loc.fileName != 0 ? loc.fileName : "<input>" ) <<
-				":" << loc.line << ":" << loc.col;
-
-		if ( pdaRun->parse_error_text != 0 ) {
-			colm_data *tokdata = pdaRun->parse_error_text->tokdata;
-			cerr << ": relative error: ";
-			cerr.write( tokdata->data, tokdata->length );
-		}
-		else {
-			cerr << ": parse error";
-		}
-
-		cerr << endl;
-		gblErrorCount += 1;
-	}
-
-	return pdaRun;
-}
-
-void Compiler::parsePatterns()
-{
-	program_t *prg = colm_new_program( runtimeData );
-
-	colm_set_debug( prg, gblActiveRealm );
-
-	/* Turn off context-dependent parsing. */
-	prg->ctx_dep_parsing = 0;
-
-	tree_t **sp = prg->stack_root;
-
-	for ( ConsList::Iter cons = replList; cons.lte(); cons++ ) {
-		if ( cons->langEl != 0 ) {
-			struct input_impl *in = colm_impl_new_cons( strdup("<internal>"), cons );
-			cons->pdaRun = parsePattern( prg, sp, cons->loc, cons->langEl->parserId, in );
-		}
-	}
-
-	for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) {
-		struct input_impl *in = colm_impl_new_pat( strdup("<internal>"), pat );
-		pat->pdaRun = parsePattern( prg, sp, pat->loc, pat->langEl->parserId, in );
-	}
-
-	/* Bail on above errors. */
-	if ( gblErrorCount > 0 )
-		exit(1);
-
-	fillInPatterns( prg );
-}
-
-void Compiler::collectParserEls( BstSet<LangEl*> &parserEls )
-{
-	for ( PatList::Iter pat = patternList; pat.lte(); pat++ ) {
-		/* We assume the reduction action compilation phase was run before
-		 * pattern parsing and it decorated the pattern with the target type. */
-		assert( pat->langEl != 0 );
-		if ( pat->langEl->type != LangEl::NonTerm )
-			error(pat->loc) << "pattern type is not a non-terminal" << endp;
-
-		if ( pat->langEl->parserId < 0 ) {
-			/* Make a parser for the language element. */
-			parserEls.insert( pat->langEl );
-			pat->langEl->parserId = nextParserId++;
-		}
-	}
-
-	for ( ConsList::Iter repl = replList; repl.lte(); repl++ ) {
-		/* We need the the language element from the compilation process. */
-		assert( repl->langEl != 0 );
-
-		if ( repl->langEl->parserId < 0 ) {
-			/* Make a parser for the language element. */
-			parserEls.insert( repl->langEl );
-			repl->langEl->parserId = nextParserId++;
-		}
-	}
-
-	/* Make parsers that we need. */
-	for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
-		if ( lel->parserId >= 0 )
-			parserEls.insert( lel );
-	}
-}
-
-void Compiler::writeHostCall()
-{
-	/*
-	 * Host Call
-	 */
-	for ( FunctionList::Iter hc = inHostList; hc.lte(); hc++ ) {
-		*outStream <<
-			"value_t " << hc->hostCall << "( program_t *prg, tree_t **sp";
-		for ( ParameterList::Iter p = *hc->paramList; p.lte(); p++ ) {
-			*outStream <<
-				", value_t";
-		}
-		*outStream << " );\n";
-	}
-
-	*outStream <<
-		"tree_t **" << objectName << "_host_call( program_t *prg, long code, tree_t **sp )\n"
-		"{\n"
-		"	value_t rtn = 0;\n"
-		"	switch ( code ) {\n";
-	
-	for ( FunctionList::Iter hc = inHostList; hc.lte(); hc++ ) {
-		*outStream <<
-			"		case " << hc->funcId << ": {\n";
-
-		int pos = hc->paramList->length() - 1;
-		for ( ParameterList::Iter p = *hc->paramList; p.lte(); p++, pos-- ) {
-			*outStream <<
-				"			value_t p" << pos << " = vm_pop_value();\n";
-		}
-		
-		*outStream <<
-			"			rtn = " << hc->hostCall << "( prg, sp";
-
-		pos = 0;
-		for ( ParameterList::Iter p = *hc->paramList; p.lte(); p++, pos++ ) {
-			*outStream <<
-				", p" << pos;
-		}
-		*outStream << " );\n"
-			"			break;\n"
-			"		}\n";
-	}
-		
-	*outStream <<
-		"	}\n"
-		"	vm_push_value( rtn );\n"
-		"	return sp;\n"
-		"}\n";
-
-}
-
-void Compiler::generateOutput( long activeRealm, bool includeCommit )
-{
-	FsmCodeGen *fsmGen = new FsmCodeGen( *outStream, redFsm, fsmTables );
-
-	PdaCodeGen *pdaGen = new PdaCodeGen( *outStream );
-
-	fsmGen->writeIncludes();
-	pdaGen->defineRuntime();
-	fsmGen->writeCode();
-
-	/* Make parsers that we need. */
-	pdaGen->writeParserData( 0, pdaTables );
-
-	/* Write the runtime data. */
-	pdaGen->writeRuntimeData( runtimeData, pdaTables );
-
-	writeHostCall();
-
-	if ( includeCommit )
-		writeCommitStub();
-
-	if ( !gblLibrary ) 
-		fsmGen->writeMain( activeRealm );
-
-	outStream->flush();
-}
-
-
-void Compiler::prepGrammar()
-{
-	/* This will create language elements. */
-	wrapNonTerminals();
-
-	makeLangElIds();
-	makeStructElIds();
-	makeLangElNames();
-	makeDefinitionNames();
-	noUndefindLangEls();
-
-	/* Put the language elements in an index by language element id. */
-	langElIndex = new LangEl*[nextLelId+1];
-	memset( langElIndex, 0, sizeof(LangEl*)*(nextLelId+1) );
-	for ( LelList::Iter lel = langEls; lel.lte(); lel++ )
-		langElIndex[lel->id] = lel;
-
-	makeProdFsms();
-
-	/* Allocate the Runtime data now. Every PdaTable that we make 
-	 * will reference it, but it will be filled in after all the tables are
-	 * built. */
-	runtimeData = new colm_sections;
-}
-
-void Compiler::compile()
-{
-	beginProcessing();
-	initKeyOps();
-
-	/* Declare types. */
-	declarePass();
-
-	/* Resolve type references. */
-	resolvePass();
-
-	makeTerminalWrappers();
-	makeEofElements();
-
-	/*
-	 * Parsers
-	 */
-
-	/* Init the longest match data */
-	initLongestMatchData();
-	FsmGraph *fsmGraph = makeScanner();
-
-	prepGrammar();
-
-	placeAllLanguageObjects();
-	placeAllStructObjects();
-	placeAllFrameObjects();
-	placeAllFunctions();
-
-	/* Compile bytecode. */
-	compileByteCode();
-
-	/* Make the reduced scanner. */
-	RedFsmBuild reduce( this, fsmGraph );
-	redFsm = reduce.reduceMachine();
-
-	BstSet<LangEl*> parserEls;
-	collectParserEls( parserEls );
-
-	makeParser( parserEls );
-
-	/* Make the scanner tables. */
-	fsmTables = redFsm->makeFsmTables();
-
-	/* Now that all parsers are built, make the global runtimeData. */
-	makeRuntimeData();
-
-	/* 
-	 * All compilation is now complete.
-	 */
-	
-	/* Parse constructors and patterns. */
-	parsePatterns();
-}
-
author	Adrian Thurston <thurston@colm.net>	2019-09-08 21:11:17 -0600
committer	Adrian Thurston <thurston@colm.net>	2019-09-08 21:11:17 -0600
commit	c860c61607117582abd8f23881eed87957197484 (patch)
tree	4d4e65dddc710e15f008189a9308d95924350c3f /src/compiler.cc
parent	f37c916aed2600951b8966a86020406b0b0542cf (diff)
download	colm-c860c61607117582abd8f23881eed87957197484.tar.gz