summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2011-07-28 02:19:59 +0000
committerAdrian Thurston <thurston@complang.org>2011-07-28 02:19:59 +0000
commit6a78200a3d72a12bd0e16a9350fd0eaafa15b548 (patch)
tree44391faa9656f4eec62757576f84eb654d5dab20
parentf8bd8a207022503a6628a2cdd2c8b7a9a64792fb (diff)
downloadcolm-6a78200a3d72a12bd0e16a9350fd0eaafa15b548.tar.gz
Pointer and Ref type references needed work following the recent improvements
to the type system. Rather than use a bool in typeref, wrap them in a whole typeref. refs #308. Added the analysis.cc file and moved the top-level compilation calls there. Moved a few function calls around to fix some crashes observed during testing. Have to be concerned about type resolution in productions that are created during type resolution (repeats, etc).
-rw-r--r--colm/Makefile.am2
-rw-r--r--colm/analysis.cc121
-rw-r--r--colm/compile.cc4
-rw-r--r--colm/declare.cc42
-rw-r--r--colm/lmparse.kl5
-rw-r--r--colm/parsedata.cc51
-rw-r--r--colm/parsetree.h29
-rw-r--r--colm/pdabuild.cc59
-rw-r--r--colm/resolve.cc87
9 files changed, 238 insertions, 162 deletions
diff --git a/colm/Makefile.am b/colm/Makefile.am
index 6b0f6dbf..f29d4f66 100644
--- a/colm/Makefile.am
+++ b/colm/Makefile.am
@@ -165,7 +165,7 @@ colm_SOURCES = \
parsedata.cc fsmstate.cc fsmbase.cc fsmattach.cc fsmmin.cc \
fsmgraph.cc pdagraph.cc pdabuild.cc pdacodegen.cc fsmcodegen.cc \
redfsm.cc fsmexec.cc main.cc redbuild.cc closure.cc fsmap.cc \
- dotgen.cc pcheck.cc ctinput.cc declare.cc
+ dotgen.cc pcheck.cc ctinput.cc declare.cc analysis.cc
colmincdir = $(includedir)/colm
diff --git a/colm/analysis.cc b/colm/analysis.cc
new file mode 100644
index 00000000..14035ec0
--- /dev/null
+++ b/colm/analysis.cc
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2011 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <iostream>
+#include <iomanip>
+#include <errno.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <sstream>
+
+#include "colm.h"
+#include "lmparse.h"
+#include "parsedata.h"
+#include "parsetree.h"
+#include "mergesort.h"
+#include "redbuild.h"
+#include "pdacodegen.h"
+#include "fsmcodegen.h"
+#include "fsmrun.h"
+#include "pdarun.h"
+
+using namespace std;
+
+
+void ParseData::prepGrammar()
+{
+ /* This will create language elements. */
+ wrapNonTerminals();
+
+ makeKlangElIds();
+ makeKlangElNames();
+ makeDefinitionNames();
+ noUndefindKlangEls();
+
+ /* Put the language elements in an index by language element id. */
+ langElIndex = new LangEl*[nextSymbolId+1];
+ memset( langElIndex, 0, sizeof(LangEl*)*(nextSymbolId+1) );
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ )
+ langElIndex[lel->id] = lel;
+
+ makeProdFsms();
+
+ /* Allocate the Runtime data now. Every PdaTable that we make
+ * will reference it, but it will be filled in after all the tables are
+ * built. */
+ runtimeData = new RuntimeData;
+}
+
+void ParseData::semanticAnalysis()
+{
+ beginProcessing();
+ initKeyOps();
+
+ /* Type declaration. */
+ typeDeclaration();
+
+ /* Type resolving. */
+ typeResolve();
+
+ makeTerminalWrappers();
+ makeEofElements();
+
+ /*
+ * Parsers
+ */
+
+ /* Init the longest match data */
+ initLongestMatchData();
+ FsmGraph *fsmGraph = makeScanner();
+
+ if ( colm_log_compile ) {
+ printNameTree( fsmGraph->rootName );
+ printNameIndex( fsmGraph->nameIndex );
+ }
+
+ prepGrammar();
+
+ /* Compile bytecode. */
+ compileByteCode();
+
+ /* Make the reduced fsm. */
+ RedFsmBuild reduce( sectionName, this, fsmGraph );
+ redFsm = reduce.reduceMachine();
+
+ BstSet<LangEl*> parserEls;
+ collectParserEls( parserEls );
+
+ makeParser( parserEls );
+
+ /* Make the scanner tables. */
+ fsmTables = redFsm->makeFsmTables();
+
+ /* Now that all parsers are built, make the global runtimeData. */
+ makeRuntimeData();
+
+ /*
+ * All compilation is now complete.
+ */
+
+ /* Parse patterns and replacements. */
+ parsePatterns();
+}
+
diff --git a/colm/compile.cc b/colm/compile.cc
index 371185e2..8b4178fc 100644
--- a/colm/compile.cc
+++ b/colm/compile.cc
@@ -437,7 +437,7 @@ long LangVarRef::loadQualificationRefs( ParseData *pd, CodeVect &code ) const
code.append( el->typeRef->iterDef->inRefFromCur );
code.appendHalf( el->offset );
}
- else if ( el->typeRef->isRef ) {
+ else if ( el->typeRef->type == TypeRef::Ref ) {
code.append( IN_REF_FROM_REF );
code.appendHalf( el->offset );
}
@@ -898,7 +898,7 @@ ObjField *LangVarRef::evaluateRef( ParseData *pd, CodeVect &code, long pushCount
code.append( lookup.objField->typeRef->iterDef->inRefFromCur );
code.appendHalf( lookup.objField->offset );
}
- else if ( lookup.objField->typeRef->isRef ) {
+ else if ( lookup.objField->typeRef->type == TypeRef::Ref ) {
code.append( IN_REF_FROM_REF );
code.appendHalf( lookup.objField->offset );
}
diff --git a/colm/declare.cc b/colm/declare.cc
index be75de66..6281e793 100644
--- a/colm/declare.cc
+++ b/colm/declare.cc
@@ -104,48 +104,6 @@ void ParseData::declareBaseKlangEls()
anyKlangEl = declareLangEl( this, rootNamespace, "any", LangEl::NonTerm );
}
-void ParseData::makeTerminalWrappers()
-{
- /* Make terminal language elements corresponding to each nonterminal in
- * the grammar. */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->type == LangEl::NonTerm ) {
- String name( lel->name.length() + 5, "_T_%s", lel->name.data );
- LangEl *termDup = new LangEl( lel->nspace, name, LangEl::Term );
-
- /* Give the dup the attributes of the nonterminal. This ensures
- * that the attributes are allocated when patterns and
- * constructors are parsed. */
- termDup->objectDef = lel->objectDef;
-
- langEls.append( termDup );
- lel->termDup = termDup;
- termDup->termDup = lel;
- }
- }
-}
-
-void ParseData::makeEofElements()
-{
- /* Make eof language elements for each user terminal. This is a bit excessive and
- * need to be reduced to the ones that we need parsers for, but we don't know that yet.
- * Another pass before this one is needed. */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->eofLel == 0 &&
- lel != eofKlangEl &&
- lel != errorKlangEl &&
- lel != noTokenKlangEl )
- {
- String name( lel->name.length() + 5, "_eof_%s", lel->name.data );
- LangEl *eofLel = new LangEl( lel->nspace, name, LangEl::Term );
-
- langEls.append( eofLel );
- lel->eofLel = eofLel;
- eofLel->eofLel = lel;
- eofLel->isEOF = true;
- }
- }
-}
void ParseData::addProdRedObjectVar( ObjectDef *localFrame, LangEl *nonTerm )
{
diff --git a/colm/lmparse.kl b/colm/lmparse.kl
index 80f8acf1..cacfea02 100644
--- a/colm/lmparse.kl
+++ b/colm/lmparse.kl
@@ -186,8 +186,7 @@ nonterm reference_type_ref uses type_ref;
reference_type_ref: KW_Ref type_ref
final {
- $$->typeRef = $2->typeRef;
- $$->typeRef->isRef = true;
+ $$->typeRef = new TypeRef( TypeRef::Ref, $1->loc, $2->typeRef );
};
nonterm global_def uses statement;
@@ -428,7 +427,7 @@ basic_type_ref: KW_Ptr region_qual TK_Word opt_repeat
final {
$$->typeRef = new TypeRef( $1->loc, $2->nspaceQual, $3->data );
$$->typeRef->repeatType = $4->repeatType;
- $$->typeRef->isPtr = true;
+ $$->typeRef = new TypeRef( TypeRef::Ptr, $1->loc, $$->typeRef );
};
diff --git a/colm/parsedata.cc b/colm/parsedata.cc
index 14c2743e..3e9d2bd6 100644
--- a/colm/parsedata.cc
+++ b/colm/parsedata.cc
@@ -1403,57 +1403,6 @@ void ParseData::collectParserEls( BstSet<LangEl*> &parserEls )
}
}
-void ParseData::semanticAnalysis()
-{
- beginProcessing();
- initKeyOps();
-
- /* Type declaration. */
- typeDeclaration();
-
- /* Type resolving. */
- typeResolve();
-
- /*
- * Parsers
- */
-
- /* Init the longest match data */
- initLongestMatchData();
- FsmGraph *fsmGraph = makeScanner();
-
- if ( colm_log_compile ) {
- printNameTree( fsmGraph->rootName );
- printNameIndex( fsmGraph->nameIndex );
- }
-
- prepGrammar();
-
- /* Compile bytecode. */
- compileByteCode();
-
- /* Make the reduced fsm. */
- RedFsmBuild reduce( sectionName, this, fsmGraph );
- redFsm = reduce.reduceMachine();
-
- BstSet<LangEl*> parserEls;
- collectParserEls( parserEls );
-
- makeParser( parserEls );
-
- /* Make the scanner tables. */
- fsmTables = redFsm->makeFsmTables();
-
- /* Now that all parsers are built, make the global runtimeData. */
- makeRuntimeData();
-
- /*
- * All compilation is now complete.
- */
-
- /* Parse patterns and replacements. */
- parsePatterns();
-}
void ParseData::generateOutput()
{
diff --git a/colm/parsetree.h b/colm/parsetree.h
index 51769ca5..afc62e68 100644
--- a/colm/parsetree.h
+++ b/colm/parsetree.h
@@ -1507,29 +1507,38 @@ struct TypeRef
Map,
List,
Vector,
- Accum
+ Accum,
+ Ref,
+ Ptr,
};
/* Qualification and a type name. These require lookup. */
TypeRef( const InputLoc &loc, NamespaceQual *nspaceQual, String typeName ) :
type(Name), loc(loc), nspaceQual(nspaceQual), typeName(typeName), pdaLiteral(0), iterDef(0),
typeRef1(0), typeRef2(0),
- isPtr(false), isRef(false), repeatType(RepeatNone),
+ repeatType(RepeatNone),
nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {}
/* Qualification and a type name. These require lookup. */
TypeRef( const InputLoc &loc, NamespaceQual *nspaceQual, PdaLiteral *pdaLiteral ) :
type(Literal), loc(loc), nspaceQual(nspaceQual), pdaLiteral(pdaLiteral), iterDef(0),
typeRef1(0), typeRef2(0),
- isPtr(false), isRef(false), repeatType(RepeatNone),
+ repeatType(RepeatNone),
nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {}
- /* Unique type is given directly. */
+ /* Generics. */
TypeRef( Type type, const InputLoc &loc, NamespaceQual *nspaceQual, TypeRef *typeRef1, TypeRef *typeRef2 ) :
type(type), loc(loc), nspaceQual(nspaceQual), pdaLiteral(0), iterDef(0),
typeRef1(typeRef1), typeRef2(typeRef2),
- isPtr(false), isRef(false), repeatType(RepeatNone),
- nspace(0), uniqueType(uniqueType), searchUniqueType(0), generic(0) {}
+ repeatType(RepeatNone),
+ nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {}
+
+ /* Pointers and Refs. */
+ TypeRef( Type type, const InputLoc &loc, TypeRef *typeRef1 ) :
+ type(type), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(0),
+ typeRef1(typeRef1), typeRef2(0),
+ repeatType(RepeatNone),
+ nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {}
/* Resolution not needed. */
@@ -1538,14 +1547,14 @@ struct TypeRef
UniqueType *searchUniqueType ) :
type(Iterator), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(iterDef),
typeRef1(0), typeRef2(0),
- isPtr(false), isRef(false), repeatType(RepeatNone),
+ repeatType(RepeatNone),
nspace(0), uniqueType(uniqueType), searchUniqueType(searchUniqueType), generic(0) {}
/* Unique type is given directly. */
TypeRef( const InputLoc &loc, UniqueType *uniqueType ) :
type(Unspecified), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(0),
typeRef1(0), typeRef2(0),
- isPtr(false), isRef(false), repeatType(RepeatNone),
+ repeatType(RepeatNone),
nspace(0), uniqueType(uniqueType), searchUniqueType(0), generic(0) {}
void resolveRepeat( ParseData *pd );
@@ -1557,6 +1566,8 @@ struct TypeRef
UniqueType *lookupTypeVector( ParseData *pd );
UniqueType *lookupTypeAccum( ParseData *pd );
UniqueType *lookupType( ParseData *pd );
+ UniqueType *lookupTypePtr( ParseData *pd );
+ UniqueType *lookupTypeRef( ParseData *pd );
Type type;
InputLoc loc;
@@ -1566,8 +1577,6 @@ struct TypeRef
IterDef *iterDef;
TypeRef *typeRef1;
TypeRef *typeRef2;
- bool isPtr;
- bool isRef;
RepeatType repeatType;
/* Resolved. */
diff --git a/colm/pdabuild.cc b/colm/pdabuild.cc
index 98f9954d..884dd197 100644
--- a/colm/pdabuild.cc
+++ b/colm/pdabuild.cc
@@ -972,6 +972,10 @@ void ParseData::wrapNonTerminals()
prodList.length(), Definition::Production );
prodList.append( lel->rootDef );
rootKlangEl->defList.append( lel->rootDef );
+
+ /* First resolve. */
+ for ( ProdElList::Iter fact = *prodElList; fact.lte(); fact++ )
+ resolveFactor( fact );
}
}
@@ -1650,6 +1654,21 @@ struct CmpSpan
}
};
+PdaGraph *ParseData::makePdaGraph( KlangElSet &parserEls )
+{
+ //for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
+ // cerr << prod->prodId << " " << prod->data << endl;
+
+ PdaGraph *pdaGraph = new PdaGraph();
+ lalr1GenerateParser( pdaGraph, parserEls );
+ pdaGraph->setStateNumbers();
+ analyzeMachine( pdaGraph, parserEls );
+
+ //cerr << "NUMBER OF STATES: " << pdaGraph->stateList.length() << endl;
+
+ return pdaGraph;
+}
+
PdaTables *ParseData::makePdaTables( PdaGraph *pdaGraph )
{
int count, pos;
@@ -1832,47 +1851,9 @@ PdaTables *ParseData::makePdaTables( PdaGraph *pdaGraph )
return pdaTables;
}
-void ParseData::prepGrammar()
-{
- /* This will create language elements. */
- wrapNonTerminals();
-
- makeKlangElIds();
- makeKlangElNames();
- makeDefinitionNames();
- noUndefindKlangEls();
-
- /* Put the language elements in an index by language element id. */
- langElIndex = new LangEl*[nextSymbolId+1];
- memset( langElIndex, 0, sizeof(LangEl*)*(nextSymbolId+1) );
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ )
- langElIndex[lel->id] = lel;
-
- makeProdFsms();
-
- /* Allocate the Runtime data now. Every PdaTable that we make
- * will reference it, but it will be filled in after all the tables are
- * built. */
- runtimeData = new RuntimeData;
-}
-
-PdaGraph *ParseData::makePdaGraph( KlangElSet &parserEls )
-{
- //for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
- // cerr << prod->prodId << " " << prod->data << endl;
-
- PdaGraph *pdaGraph = new PdaGraph();
- lalr1GenerateParser( pdaGraph, parserEls );
- pdaGraph->setStateNumbers();
- analyzeMachine( pdaGraph, parserEls );
-
- //cerr << "NUMBER OF STATES: " << pdaGraph->stateList.length() << endl;
-
- return pdaGraph;
-}
-
void ParseData::makeParser( KlangElSet &parserEls )
{
pdaGraph = makePdaGraph( parserEls );
pdaTables = makePdaTables( pdaGraph );
}
+
diff --git a/colm/resolve.cc b/colm/resolve.cc
index 78e03d8d..aae2a8ec 100644
--- a/colm/resolve.cc
+++ b/colm/resolve.cc
@@ -47,10 +47,8 @@ UniqueType *TypeRef::lookupTypeName( ParseData *pd )
case TypeMapEl::TypeAliasType:
return inDict->typeRef->lookupType( pd );
- case TypeMapEl::LangElType: {
- long typeId = ( isPtr ? TYPE_PTR : ( isRef ? TYPE_REF : TYPE_TREE ) );
- return pd->findUniqueType( typeId, inDict->value );
- }
+ case TypeMapEl::LangElType:
+ return pd->findUniqueType( TYPE_TREE, inDict->value );
}
}
@@ -78,10 +76,8 @@ UniqueType *TypeRef::lookupTypeLiteral( ParseData *pd )
while ( nspace != 0 ) {
LiteralDictEl *ldel = nspace->literalDict.find( interp );
- if ( ldel != 0 ) {
- long typeId = ( isPtr ? TYPE_PTR : ( isRef ? TYPE_REF : TYPE_TREE ) );
- return pd->findUniqueType( typeId, ldel->value->token );
- }
+ if ( ldel != 0 )
+ return pd->findUniqueType( TYPE_TREE, ldel->value->token );
nspace = nspace->parentNamespace;
}
@@ -216,10 +212,22 @@ UniqueType *TypeRef::lookupTypeAccum( ParseData *pd )
return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl );
}
+UniqueType *TypeRef::lookupTypePtr( ParseData *pd )
+{
+ typeRef1->lookupType( pd );
+ return pd->findUniqueType( TYPE_PTR, typeRef1->uniqueType->langEl );
+}
+
+UniqueType *TypeRef::lookupTypeRef( ParseData *pd )
+{
+ typeRef1->lookupType( pd );
+ return pd->findUniqueType( TYPE_REF, typeRef1->uniqueType->langEl );
+}
+
void TypeRef::resolveRepeat( ParseData *pd )
{
if ( uniqueType->typeId != TYPE_TREE )
- error() << "cannot repeat non-tree type" << endp;
+ error(loc) << "cannot repeat non-tree type" << endp;
UniqueRepeat searchKey( repeatType, uniqueType->langEl );
UniqueRepeat *uniqueRepeat = pd->uniqeRepeatMap.find( &searchKey );
@@ -288,6 +296,12 @@ UniqueType *TypeRef::lookupType( ParseData *pd )
case Accum:
uniqueType = lookupTypeAccum( pd );
break;
+ case Ptr:
+ uniqueType = lookupTypePtr( pd );
+ break;
+ case Ref:
+ uniqueType = lookupTypeRef( pd );
+ break;
case Iterator:
case Unspecified:
/* No lookup needed, unique type(s) set when constructed. */
@@ -668,6 +682,7 @@ void ParseData::resolveAccumEls()
/* Resolves production els and computes the precedence of each prod. */
void ParseData::resolveProductionEls()
{
+ /* NOTE: as we process this list it may be growing! */
for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
/* First resolve. */
for ( ProdElList::Iter fact = *prod->prodElList; fact.lte(); fact++ )
@@ -702,6 +717,48 @@ void ParseData::resolveGenericTypes()
}
}
+void ParseData::makeTerminalWrappers()
+{
+ /* Make terminal language elements corresponding to each nonterminal in
+ * the grammar. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->type == LangEl::NonTerm ) {
+ String name( lel->name.length() + 5, "_T_%s", lel->name.data );
+ LangEl *termDup = new LangEl( lel->nspace, name, LangEl::Term );
+
+ /* Give the dup the attributes of the nonterminal. This ensures
+ * that the attributes are allocated when patterns and
+ * constructors are parsed. */
+ termDup->objectDef = lel->objectDef;
+
+ langEls.append( termDup );
+ lel->termDup = termDup;
+ termDup->termDup = lel;
+ }
+ }
+}
+
+void ParseData::makeEofElements()
+{
+ /* Make eof language elements for each user terminal. This is a bit excessive and
+ * need to be reduced to the ones that we need parsers for, but we don't know that yet.
+ * Another pass before this one is needed. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->eofLel == 0 &&
+ lel != eofKlangEl &&
+ lel != errorKlangEl &&
+ lel != noTokenKlangEl )
+ {
+ String name( lel->name.length() + 5, "_eof_%s", lel->name.data );
+ LangEl *eofLel = new LangEl( lel->nspace, name, LangEl::Term );
+
+ langEls.append( eofLel );
+ lel->eofLel = eofLel;
+ eofLel->eofLel = lel;
+ eofLel->isEOF = true;
+ }
+ }
+}
void ParseData::typeResolve()
{
@@ -717,14 +774,16 @@ void ParseData::typeResolve()
resolveReplacementEls();
resolveAccumEls();
- /* This needs to happen before the scanner is built. */
- resolveProductionEls();
-
resolveParseTree();
- makeTerminalWrappers();
- makeEofElements();
resolveGenericTypes();
argvTypeRef->lookupType( this );
+
+ /* We must do this as the last step in the type resolution process because
+ * all type resolves can cause new language elments with associated
+ * productions. They get tacked onto the end of the list of productions.
+ * Doing it at the end results processing a growing list. */
+ resolveProductionEls();
+
}