summaryrefslogtreecommitdiff
path: root/colm
diff options
context:
space:
mode:
authorAdrian Thurston <thurston@complang.org>2011-07-28 02:19:59 +0000
committerAdrian Thurston <thurston@complang.org>2011-07-28 02:19:59 +0000
commit6a78200a3d72a12bd0e16a9350fd0eaafa15b548 (patch)
tree44391faa9656f4eec62757576f84eb654d5dab20 /colm
parentf8bd8a207022503a6628a2cdd2c8b7a9a64792fb (diff)
downloadcolm-6a78200a3d72a12bd0e16a9350fd0eaafa15b548.tar.gz
Pointer and Ref type references needed work following the recent improvements
to the type system. Rather than use a bool in typeref, wrap them in a whole typeref. refs #308. Added the analysis.cc file and moved the top-level compilation calls there. Moved a few function calls around to fix some crashes observed during testing. Have to be concerned about type resolution in productions that are created during type resolution (repeats, etc).
Diffstat (limited to 'colm')
-rw-r--r--colm/Makefile.am2
-rw-r--r--colm/analysis.cc121
-rw-r--r--colm/compile.cc4
-rw-r--r--colm/declare.cc42
-rw-r--r--colm/lmparse.kl5
-rw-r--r--colm/parsedata.cc51
-rw-r--r--colm/parsetree.h29
-rw-r--r--colm/pdabuild.cc59
-rw-r--r--colm/resolve.cc87
9 files changed, 238 insertions, 162 deletions
diff --git a/colm/Makefile.am b/colm/Makefile.am
index 6b0f6dbf..f29d4f66 100644
--- a/colm/Makefile.am
+++ b/colm/Makefile.am
@@ -165,7 +165,7 @@ colm_SOURCES = \
parsedata.cc fsmstate.cc fsmbase.cc fsmattach.cc fsmmin.cc \
fsmgraph.cc pdagraph.cc pdabuild.cc pdacodegen.cc fsmcodegen.cc \
redfsm.cc fsmexec.cc main.cc redbuild.cc closure.cc fsmap.cc \
- dotgen.cc pcheck.cc ctinput.cc declare.cc
+ dotgen.cc pcheck.cc ctinput.cc declare.cc analysis.cc
colmincdir = $(includedir)/colm
diff --git a/colm/analysis.cc b/colm/analysis.cc
new file mode 100644
index 00000000..14035ec0
--- /dev/null
+++ b/colm/analysis.cc
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2011 Adrian Thurston <thurston@complang.org>
+ */
+
+/* This file is part of Colm.
+ *
+ * Colm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Colm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Colm; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <iostream>
+#include <iomanip>
+#include <errno.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <sstream>
+
+#include "colm.h"
+#include "lmparse.h"
+#include "parsedata.h"
+#include "parsetree.h"
+#include "mergesort.h"
+#include "redbuild.h"
+#include "pdacodegen.h"
+#include "fsmcodegen.h"
+#include "fsmrun.h"
+#include "pdarun.h"
+
+using namespace std;
+
+
+void ParseData::prepGrammar()
+{
+ /* This will create language elements. */
+ wrapNonTerminals();
+
+ makeKlangElIds();
+ makeKlangElNames();
+ makeDefinitionNames();
+ noUndefindKlangEls();
+
+ /* Put the language elements in an index by language element id. */
+ langElIndex = new LangEl*[nextSymbolId+1];
+ memset( langElIndex, 0, sizeof(LangEl*)*(nextSymbolId+1) );
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ )
+ langElIndex[lel->id] = lel;
+
+ makeProdFsms();
+
+ /* Allocate the Runtime data now. Every PdaTable that we make
+ * will reference it, but it will be filled in after all the tables are
+ * built. */
+ runtimeData = new RuntimeData;
+}
+
+void ParseData::semanticAnalysis()
+{
+ beginProcessing();
+ initKeyOps();
+
+ /* Type declaration. */
+ typeDeclaration();
+
+ /* Type resolving. */
+ typeResolve();
+
+ makeTerminalWrappers();
+ makeEofElements();
+
+ /*
+ * Parsers
+ */
+
+ /* Init the longest match data */
+ initLongestMatchData();
+ FsmGraph *fsmGraph = makeScanner();
+
+ if ( colm_log_compile ) {
+ printNameTree( fsmGraph->rootName );
+ printNameIndex( fsmGraph->nameIndex );
+ }
+
+ prepGrammar();
+
+ /* Compile bytecode. */
+ compileByteCode();
+
+ /* Make the reduced fsm. */
+ RedFsmBuild reduce( sectionName, this, fsmGraph );
+ redFsm = reduce.reduceMachine();
+
+ BstSet<LangEl*> parserEls;
+ collectParserEls( parserEls );
+
+ makeParser( parserEls );
+
+ /* Make the scanner tables. */
+ fsmTables = redFsm->makeFsmTables();
+
+ /* Now that all parsers are built, make the global runtimeData. */
+ makeRuntimeData();
+
+ /*
+ * All compilation is now complete.
+ */
+
+ /* Parse patterns and replacements. */
+ parsePatterns();
+}
+
diff --git a/colm/compile.cc b/colm/compile.cc
index 371185e2..8b4178fc 100644
--- a/colm/compile.cc
+++ b/colm/compile.cc
@@ -437,7 +437,7 @@ long LangVarRef::loadQualificationRefs( ParseData *pd, CodeVect &code ) const
code.append( el->typeRef->iterDef->inRefFromCur );
code.appendHalf( el->offset );
}
- else if ( el->typeRef->isRef ) {
+ else if ( el->typeRef->type == TypeRef::Ref ) {
code.append( IN_REF_FROM_REF );
code.appendHalf( el->offset );
}
@@ -898,7 +898,7 @@ ObjField *LangVarRef::evaluateRef( ParseData *pd, CodeVect &code, long pushCount
code.append( lookup.objField->typeRef->iterDef->inRefFromCur );
code.appendHalf( lookup.objField->offset );
}
- else if ( lookup.objField->typeRef->isRef ) {
+ else if ( lookup.objField->typeRef->type == TypeRef::Ref ) {
code.append( IN_REF_FROM_REF );
code.appendHalf( lookup.objField->offset );
}
diff --git a/colm/declare.cc b/colm/declare.cc
index be75de66..6281e793 100644
--- a/colm/declare.cc
+++ b/colm/declare.cc
@@ -104,48 +104,6 @@ void ParseData::declareBaseKlangEls()
anyKlangEl = declareLangEl( this, rootNamespace, "any", LangEl::NonTerm );
}
-void ParseData::makeTerminalWrappers()
-{
- /* Make terminal language elements corresponding to each nonterminal in
- * the grammar. */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->type == LangEl::NonTerm ) {
- String name( lel->name.length() + 5, "_T_%s", lel->name.data );
- LangEl *termDup = new LangEl( lel->nspace, name, LangEl::Term );
-
- /* Give the dup the attributes of the nonterminal. This ensures
- * that the attributes are allocated when patterns and
- * constructors are parsed. */
- termDup->objectDef = lel->objectDef;
-
- langEls.append( termDup );
- lel->termDup = termDup;
- termDup->termDup = lel;
- }
- }
-}
-
-void ParseData::makeEofElements()
-{
- /* Make eof language elements for each user terminal. This is a bit excessive and
- * need to be reduced to the ones that we need parsers for, but we don't know that yet.
- * Another pass before this one is needed. */
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
- if ( lel->eofLel == 0 &&
- lel != eofKlangEl &&
- lel != errorKlangEl &&
- lel != noTokenKlangEl )
- {
- String name( lel->name.length() + 5, "_eof_%s", lel->name.data );
- LangEl *eofLel = new LangEl( lel->nspace, name, LangEl::Term );
-
- langEls.append( eofLel );
- lel->eofLel = eofLel;
- eofLel->eofLel = lel;
- eofLel->isEOF = true;
- }
- }
-}
void ParseData::addProdRedObjectVar( ObjectDef *localFrame, LangEl *nonTerm )
{
diff --git a/colm/lmparse.kl b/colm/lmparse.kl
index 80f8acf1..cacfea02 100644
--- a/colm/lmparse.kl
+++ b/colm/lmparse.kl
@@ -186,8 +186,7 @@ nonterm reference_type_ref uses type_ref;
reference_type_ref: KW_Ref type_ref
final {
- $$->typeRef = $2->typeRef;
- $$->typeRef->isRef = true;
+ $$->typeRef = new TypeRef( TypeRef::Ref, $1->loc, $2->typeRef );
};
nonterm global_def uses statement;
@@ -428,7 +427,7 @@ basic_type_ref: KW_Ptr region_qual TK_Word opt_repeat
final {
$$->typeRef = new TypeRef( $1->loc, $2->nspaceQual, $3->data );
$$->typeRef->repeatType = $4->repeatType;
- $$->typeRef->isPtr = true;
+ $$->typeRef = new TypeRef( TypeRef::Ptr, $1->loc, $$->typeRef );
};
diff --git a/colm/parsedata.cc b/colm/parsedata.cc
index 14c2743e..3e9d2bd6 100644
--- a/colm/parsedata.cc
+++ b/colm/parsedata.cc
@@ -1403,57 +1403,6 @@ void ParseData::collectParserEls( BstSet<LangEl*> &parserEls )
}
}
-void ParseData::semanticAnalysis()
-{
- beginProcessing();
- initKeyOps();
-
- /* Type declaration. */
- typeDeclaration();
-
- /* Type resolving. */
- typeResolve();
-
- /*
- * Parsers
- */
-
- /* Init the longest match data */
- initLongestMatchData();
- FsmGraph *fsmGraph = makeScanner();
-
- if ( colm_log_compile ) {
- printNameTree( fsmGraph->rootName );
- printNameIndex( fsmGraph->nameIndex );
- }
-
- prepGrammar();
-
- /* Compile bytecode. */
- compileByteCode();
-
- /* Make the reduced fsm. */
- RedFsmBuild reduce( sectionName, this, fsmGraph );
- redFsm = reduce.reduceMachine();
-
- BstSet<LangEl*> parserEls;
- collectParserEls( parserEls );
-
- makeParser( parserEls );
-
- /* Make the scanner tables. */
- fsmTables = redFsm->makeFsmTables();
-
- /* Now that all parsers are built, make the global runtimeData. */
- makeRuntimeData();
-
- /*
- * All compilation is now complete.
- */
-
- /* Parse patterns and replacements. */
- parsePatterns();
-}
void ParseData::generateOutput()
{
diff --git a/colm/parsetree.h b/colm/parsetree.h
index 51769ca5..afc62e68 100644
--- a/colm/parsetree.h
+++ b/colm/parsetree.h
@@ -1507,29 +1507,38 @@ struct TypeRef
Map,
List,
Vector,
- Accum
+ Accum,
+ Ref,
+ Ptr,
};
/* Qualification and a type name. These require lookup. */
TypeRef( const InputLoc &loc, NamespaceQual *nspaceQual, String typeName ) :
type(Name), loc(loc), nspaceQual(nspaceQual), typeName(typeName), pdaLiteral(0), iterDef(0),
typeRef1(0), typeRef2(0),
- isPtr(false), isRef(false), repeatType(RepeatNone),
+ repeatType(RepeatNone),
nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {}
/* Qualification and a type name. These require lookup. */
TypeRef( const InputLoc &loc, NamespaceQual *nspaceQual, PdaLiteral *pdaLiteral ) :
type(Literal), loc(loc), nspaceQual(nspaceQual), pdaLiteral(pdaLiteral), iterDef(0),
typeRef1(0), typeRef2(0),
- isPtr(false), isRef(false), repeatType(RepeatNone),
+ repeatType(RepeatNone),
nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {}
- /* Unique type is given directly. */
+ /* Generics. */
TypeRef( Type type, const InputLoc &loc, NamespaceQual *nspaceQual, TypeRef *typeRef1, TypeRef *typeRef2 ) :
type(type), loc(loc), nspaceQual(nspaceQual), pdaLiteral(0), iterDef(0),
typeRef1(typeRef1), typeRef2(typeRef2),
- isPtr(false), isRef(false), repeatType(RepeatNone),
- nspace(0), uniqueType(uniqueType), searchUniqueType(0), generic(0) {}
+ repeatType(RepeatNone),
+ nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {}
+
+ /* Pointers and Refs. */
+ TypeRef( Type type, const InputLoc &loc, TypeRef *typeRef1 ) :
+ type(type), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(0),
+ typeRef1(typeRef1), typeRef2(0),
+ repeatType(RepeatNone),
+ nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {}
/* Resolution not needed. */
@@ -1538,14 +1547,14 @@ struct TypeRef
UniqueType *searchUniqueType ) :
type(Iterator), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(iterDef),
typeRef1(0), typeRef2(0),
- isPtr(false), isRef(false), repeatType(RepeatNone),
+ repeatType(RepeatNone),
nspace(0), uniqueType(uniqueType), searchUniqueType(searchUniqueType), generic(0) {}
/* Unique type is given directly. */
TypeRef( const InputLoc &loc, UniqueType *uniqueType ) :
type(Unspecified), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(0),
typeRef1(0), typeRef2(0),
- isPtr(false), isRef(false), repeatType(RepeatNone),
+ repeatType(RepeatNone),
nspace(0), uniqueType(uniqueType), searchUniqueType(0), generic(0) {}
void resolveRepeat( ParseData *pd );
@@ -1557,6 +1566,8 @@ struct TypeRef
UniqueType *lookupTypeVector( ParseData *pd );
UniqueType *lookupTypeAccum( ParseData *pd );
UniqueType *lookupType( ParseData *pd );
+ UniqueType *lookupTypePtr( ParseData *pd );
+ UniqueType *lookupTypeRef( ParseData *pd );
Type type;
InputLoc loc;
@@ -1566,8 +1577,6 @@ struct TypeRef
IterDef *iterDef;
TypeRef *typeRef1;
TypeRef *typeRef2;
- bool isPtr;
- bool isRef;
RepeatType repeatType;
/* Resolved. */
diff --git a/colm/pdabuild.cc b/colm/pdabuild.cc
index 98f9954d..884dd197 100644
--- a/colm/pdabuild.cc
+++ b/colm/pdabuild.cc
@@ -972,6 +972,10 @@ void ParseData::wrapNonTerminals()
prodList.length(), Definition::Production );
prodList.append( lel->rootDef );
rootKlangEl->defList.append( lel->rootDef );
+
+ /* First resolve. */
+ for ( ProdElList::Iter fact = *prodElList; fact.lte(); fact++ )
+ resolveFactor( fact );
}
}
@@ -1650,6 +1654,21 @@ struct CmpSpan
}
};
+PdaGraph *ParseData::makePdaGraph( KlangElSet &parserEls )
+{
+ //for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
+ // cerr << prod->prodId << " " << prod->data << endl;
+
+ PdaGraph *pdaGraph = new PdaGraph();
+ lalr1GenerateParser( pdaGraph, parserEls );
+ pdaGraph->setStateNumbers();
+ analyzeMachine( pdaGraph, parserEls );
+
+ //cerr << "NUMBER OF STATES: " << pdaGraph->stateList.length() << endl;
+
+ return pdaGraph;
+}
+
PdaTables *ParseData::makePdaTables( PdaGraph *pdaGraph )
{
int count, pos;
@@ -1832,47 +1851,9 @@ PdaTables *ParseData::makePdaTables( PdaGraph *pdaGraph )
return pdaTables;
}
-void ParseData::prepGrammar()
-{
- /* This will create language elements. */
- wrapNonTerminals();
-
- makeKlangElIds();
- makeKlangElNames();
- makeDefinitionNames();
- noUndefindKlangEls();
-
- /* Put the language elements in an index by language element id. */
- langElIndex = new LangEl*[nextSymbolId+1];
- memset( langElIndex, 0, sizeof(LangEl*)*(nextSymbolId+1) );
- for ( LelList::Iter lel = langEls; lel.lte(); lel++ )
- langElIndex[lel->id] = lel;
-
- makeProdFsms();
-
- /* Allocate the Runtime data now. Every PdaTable that we make
- * will reference it, but it will be filled in after all the tables are
- * built. */
- runtimeData = new RuntimeData;
-}
-
-PdaGraph *ParseData::makePdaGraph( KlangElSet &parserEls )
-{
- //for ( DefList::Iter prod = prodList; prod.lte(); prod++ )
- // cerr << prod->prodId << " " << prod->data << endl;
-
- PdaGraph *pdaGraph = new PdaGraph();
- lalr1GenerateParser( pdaGraph, parserEls );
- pdaGraph->setStateNumbers();
- analyzeMachine( pdaGraph, parserEls );
-
- //cerr << "NUMBER OF STATES: " << pdaGraph->stateList.length() << endl;
-
- return pdaGraph;
-}
-
void ParseData::makeParser( KlangElSet &parserEls )
{
pdaGraph = makePdaGraph( parserEls );
pdaTables = makePdaTables( pdaGraph );
}
+
diff --git a/colm/resolve.cc b/colm/resolve.cc
index 78e03d8d..aae2a8ec 100644
--- a/colm/resolve.cc
+++ b/colm/resolve.cc
@@ -47,10 +47,8 @@ UniqueType *TypeRef::lookupTypeName( ParseData *pd )
case TypeMapEl::TypeAliasType:
return inDict->typeRef->lookupType( pd );
- case TypeMapEl::LangElType: {
- long typeId = ( isPtr ? TYPE_PTR : ( isRef ? TYPE_REF : TYPE_TREE ) );
- return pd->findUniqueType( typeId, inDict->value );
- }
+ case TypeMapEl::LangElType:
+ return pd->findUniqueType( TYPE_TREE, inDict->value );
}
}
@@ -78,10 +76,8 @@ UniqueType *TypeRef::lookupTypeLiteral( ParseData *pd )
while ( nspace != 0 ) {
LiteralDictEl *ldel = nspace->literalDict.find( interp );
- if ( ldel != 0 ) {
- long typeId = ( isPtr ? TYPE_PTR : ( isRef ? TYPE_REF : TYPE_TREE ) );
- return pd->findUniqueType( typeId, ldel->value->token );
- }
+ if ( ldel != 0 )
+ return pd->findUniqueType( TYPE_TREE, ldel->value->token );
nspace = nspace->parentNamespace;
}
@@ -216,10 +212,22 @@ UniqueType *TypeRef::lookupTypeAccum( ParseData *pd )
return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl );
}
+UniqueType *TypeRef::lookupTypePtr( ParseData *pd )
+{
+ typeRef1->lookupType( pd );
+ return pd->findUniqueType( TYPE_PTR, typeRef1->uniqueType->langEl );
+}
+
+UniqueType *TypeRef::lookupTypeRef( ParseData *pd )
+{
+ typeRef1->lookupType( pd );
+ return pd->findUniqueType( TYPE_REF, typeRef1->uniqueType->langEl );
+}
+
void TypeRef::resolveRepeat( ParseData *pd )
{
if ( uniqueType->typeId != TYPE_TREE )
- error() << "cannot repeat non-tree type" << endp;
+ error(loc) << "cannot repeat non-tree type" << endp;
UniqueRepeat searchKey( repeatType, uniqueType->langEl );
UniqueRepeat *uniqueRepeat = pd->uniqeRepeatMap.find( &searchKey );
@@ -288,6 +296,12 @@ UniqueType *TypeRef::lookupType( ParseData *pd )
case Accum:
uniqueType = lookupTypeAccum( pd );
break;
+ case Ptr:
+ uniqueType = lookupTypePtr( pd );
+ break;
+ case Ref:
+ uniqueType = lookupTypeRef( pd );
+ break;
case Iterator:
case Unspecified:
/* No lookup needed, unique type(s) set when constructed. */
@@ -668,6 +682,7 @@ void ParseData::resolveAccumEls()
/* Resolves production els and computes the precedence of each prod. */
void ParseData::resolveProductionEls()
{
+ /* NOTE: as we process this list it may be growing! */
for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) {
/* First resolve. */
for ( ProdElList::Iter fact = *prod->prodElList; fact.lte(); fact++ )
@@ -702,6 +717,48 @@ void ParseData::resolveGenericTypes()
}
}
+void ParseData::makeTerminalWrappers()
+{
+ /* Make terminal language elements corresponding to each nonterminal in
+ * the grammar. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->type == LangEl::NonTerm ) {
+ String name( lel->name.length() + 5, "_T_%s", lel->name.data );
+ LangEl *termDup = new LangEl( lel->nspace, name, LangEl::Term );
+
+ /* Give the dup the attributes of the nonterminal. This ensures
+ * that the attributes are allocated when patterns and
+ * constructors are parsed. */
+ termDup->objectDef = lel->objectDef;
+
+ langEls.append( termDup );
+ lel->termDup = termDup;
+ termDup->termDup = lel;
+ }
+ }
+}
+
+void ParseData::makeEofElements()
+{
+ /* Make eof language elements for each user terminal. This is a bit excessive and
+ * need to be reduced to the ones that we need parsers for, but we don't know that yet.
+ * Another pass before this one is needed. */
+ for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) {
+ if ( lel->eofLel == 0 &&
+ lel != eofKlangEl &&
+ lel != errorKlangEl &&
+ lel != noTokenKlangEl )
+ {
+ String name( lel->name.length() + 5, "_eof_%s", lel->name.data );
+ LangEl *eofLel = new LangEl( lel->nspace, name, LangEl::Term );
+
+ langEls.append( eofLel );
+ lel->eofLel = eofLel;
+ eofLel->eofLel = lel;
+ eofLel->isEOF = true;
+ }
+ }
+}
void ParseData::typeResolve()
{
@@ -717,14 +774,16 @@ void ParseData::typeResolve()
resolveReplacementEls();
resolveAccumEls();
- /* This needs to happen before the scanner is built. */
- resolveProductionEls();
-
resolveParseTree();
- makeTerminalWrappers();
- makeEofElements();
resolveGenericTypes();
argvTypeRef->lookupType( this );
+
+ /* We must do this as the last step in the type resolution process because
+ * all type resolves can cause new language elments with associated
+ * productions. They get tacked onto the end of the list of productions.
+ * Doing it at the end results processing a growing list. */
+ resolveProductionEls();
+
}