diff options
author | Adrian Thurston <thurston@complang.org> | 2011-07-28 02:19:59 +0000 |
---|---|---|
committer | Adrian Thurston <thurston@complang.org> | 2011-07-28 02:19:59 +0000 |
commit | 6a78200a3d72a12bd0e16a9350fd0eaafa15b548 (patch) | |
tree | 44391faa9656f4eec62757576f84eb654d5dab20 | |
parent | f8bd8a207022503a6628a2cdd2c8b7a9a64792fb (diff) | |
download | colm-6a78200a3d72a12bd0e16a9350fd0eaafa15b548.tar.gz |
Pointer and Ref type references needed work following the recent improvements
to the type system. Rather than use a bool in typeref, wrap them in a whole
typeref. refs #308.
Added the analysis.cc file and moved the top-level compilation calls there.
Moved a few function calls around to fix some crashes observed during testing.
Have to be concerned about type resolution in productions that are created
during type resolution (repeats, etc).
-rw-r--r-- | colm/Makefile.am | 2 | ||||
-rw-r--r-- | colm/analysis.cc | 121 | ||||
-rw-r--r-- | colm/compile.cc | 4 | ||||
-rw-r--r-- | colm/declare.cc | 42 | ||||
-rw-r--r-- | colm/lmparse.kl | 5 | ||||
-rw-r--r-- | colm/parsedata.cc | 51 | ||||
-rw-r--r-- | colm/parsetree.h | 29 | ||||
-rw-r--r-- | colm/pdabuild.cc | 59 | ||||
-rw-r--r-- | colm/resolve.cc | 87 |
9 files changed, 238 insertions, 162 deletions
diff --git a/colm/Makefile.am b/colm/Makefile.am index 6b0f6dbf..f29d4f66 100644 --- a/colm/Makefile.am +++ b/colm/Makefile.am @@ -165,7 +165,7 @@ colm_SOURCES = \ parsedata.cc fsmstate.cc fsmbase.cc fsmattach.cc fsmmin.cc \ fsmgraph.cc pdagraph.cc pdabuild.cc pdacodegen.cc fsmcodegen.cc \ redfsm.cc fsmexec.cc main.cc redbuild.cc closure.cc fsmap.cc \ - dotgen.cc pcheck.cc ctinput.cc declare.cc + dotgen.cc pcheck.cc ctinput.cc declare.cc analysis.cc colmincdir = $(includedir)/colm diff --git a/colm/analysis.cc b/colm/analysis.cc new file mode 100644 index 00000000..14035ec0 --- /dev/null +++ b/colm/analysis.cc @@ -0,0 +1,121 @@ +/* + * Copyright 2011 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <iostream> +#include <iomanip> +#include <errno.h> +#include <stdlib.h> +#include <limits.h> +#include <sstream> + +#include "colm.h" +#include "lmparse.h" +#include "parsedata.h" +#include "parsetree.h" +#include "mergesort.h" +#include "redbuild.h" +#include "pdacodegen.h" +#include "fsmcodegen.h" +#include "fsmrun.h" +#include "pdarun.h" + +using namespace std; + + +void ParseData::prepGrammar() +{ + /* This will create language elements. */ + wrapNonTerminals(); + + makeKlangElIds(); + makeKlangElNames(); + makeDefinitionNames(); + noUndefindKlangEls(); + + /* Put the language elements in an index by language element id. */ + langElIndex = new LangEl*[nextSymbolId+1]; + memset( langElIndex, 0, sizeof(LangEl*)*(nextSymbolId+1) ); + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) + langElIndex[lel->id] = lel; + + makeProdFsms(); + + /* Allocate the Runtime data now. Every PdaTable that we make + * will reference it, but it will be filled in after all the tables are + * built. */ + runtimeData = new RuntimeData; +} + +void ParseData::semanticAnalysis() +{ + beginProcessing(); + initKeyOps(); + + /* Type declaration. */ + typeDeclaration(); + + /* Type resolving. */ + typeResolve(); + + makeTerminalWrappers(); + makeEofElements(); + + /* + * Parsers + */ + + /* Init the longest match data */ + initLongestMatchData(); + FsmGraph *fsmGraph = makeScanner(); + + if ( colm_log_compile ) { + printNameTree( fsmGraph->rootName ); + printNameIndex( fsmGraph->nameIndex ); + } + + prepGrammar(); + + /* Compile bytecode. */ + compileByteCode(); + + /* Make the reduced fsm. */ + RedFsmBuild reduce( sectionName, this, fsmGraph ); + redFsm = reduce.reduceMachine(); + + BstSet<LangEl*> parserEls; + collectParserEls( parserEls ); + + makeParser( parserEls ); + + /* Make the scanner tables. */ + fsmTables = redFsm->makeFsmTables(); + + /* Now that all parsers are built, make the global runtimeData. */ + makeRuntimeData(); + + /* + * All compilation is now complete. + */ + + /* Parse patterns and replacements. */ + parsePatterns(); +} + diff --git a/colm/compile.cc b/colm/compile.cc index 371185e2..8b4178fc 100644 --- a/colm/compile.cc +++ b/colm/compile.cc @@ -437,7 +437,7 @@ long LangVarRef::loadQualificationRefs( ParseData *pd, CodeVect &code ) const code.append( el->typeRef->iterDef->inRefFromCur ); code.appendHalf( el->offset ); } - else if ( el->typeRef->isRef ) { + else if ( el->typeRef->type == TypeRef::Ref ) { code.append( IN_REF_FROM_REF ); code.appendHalf( el->offset ); } @@ -898,7 +898,7 @@ ObjField *LangVarRef::evaluateRef( ParseData *pd, CodeVect &code, long pushCount code.append( lookup.objField->typeRef->iterDef->inRefFromCur ); code.appendHalf( lookup.objField->offset ); } - else if ( lookup.objField->typeRef->isRef ) { + else if ( lookup.objField->typeRef->type == TypeRef::Ref ) { code.append( IN_REF_FROM_REF ); code.appendHalf( lookup.objField->offset ); } diff --git a/colm/declare.cc b/colm/declare.cc index be75de66..6281e793 100644 --- a/colm/declare.cc +++ b/colm/declare.cc @@ -104,48 +104,6 @@ void ParseData::declareBaseKlangEls() anyKlangEl = declareLangEl( this, rootNamespace, "any", LangEl::NonTerm ); } -void ParseData::makeTerminalWrappers() -{ - /* Make terminal language elements corresponding to each nonterminal in - * the grammar. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->type == LangEl::NonTerm ) { - String name( lel->name.length() + 5, "_T_%s", lel->name.data ); - LangEl *termDup = new LangEl( lel->nspace, name, LangEl::Term ); - - /* Give the dup the attributes of the nonterminal. This ensures - * that the attributes are allocated when patterns and - * constructors are parsed. */ - termDup->objectDef = lel->objectDef; - - langEls.append( termDup ); - lel->termDup = termDup; - termDup->termDup = lel; - } - } -} - -void ParseData::makeEofElements() -{ - /* Make eof language elements for each user terminal. This is a bit excessive and - * need to be reduced to the ones that we need parsers for, but we don't know that yet. - * Another pass before this one is needed. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->eofLel == 0 && - lel != eofKlangEl && - lel != errorKlangEl && - lel != noTokenKlangEl ) - { - String name( lel->name.length() + 5, "_eof_%s", lel->name.data ); - LangEl *eofLel = new LangEl( lel->nspace, name, LangEl::Term ); - - langEls.append( eofLel ); - lel->eofLel = eofLel; - eofLel->eofLel = lel; - eofLel->isEOF = true; - } - } -} void ParseData::addProdRedObjectVar( ObjectDef *localFrame, LangEl *nonTerm ) { diff --git a/colm/lmparse.kl b/colm/lmparse.kl index 80f8acf1..cacfea02 100644 --- a/colm/lmparse.kl +++ b/colm/lmparse.kl @@ -186,8 +186,7 @@ nonterm reference_type_ref uses type_ref; reference_type_ref: KW_Ref type_ref final { - $$->typeRef = $2->typeRef; - $$->typeRef->isRef = true; + $$->typeRef = new TypeRef( TypeRef::Ref, $1->loc, $2->typeRef ); }; nonterm global_def uses statement; @@ -428,7 +427,7 @@ basic_type_ref: KW_Ptr region_qual TK_Word opt_repeat final { $$->typeRef = new TypeRef( $1->loc, $2->nspaceQual, $3->data ); $$->typeRef->repeatType = $4->repeatType; - $$->typeRef->isPtr = true; + $$->typeRef = new TypeRef( TypeRef::Ptr, $1->loc, $$->typeRef ); }; diff --git a/colm/parsedata.cc b/colm/parsedata.cc index 14c2743e..3e9d2bd6 100644 --- a/colm/parsedata.cc +++ b/colm/parsedata.cc @@ -1403,57 +1403,6 @@ void ParseData::collectParserEls( BstSet<LangEl*> &parserEls ) } } -void ParseData::semanticAnalysis() -{ - beginProcessing(); - initKeyOps(); - - /* Type declaration. */ - typeDeclaration(); - - /* Type resolving. */ - typeResolve(); - - /* - * Parsers - */ - - /* Init the longest match data */ - initLongestMatchData(); - FsmGraph *fsmGraph = makeScanner(); - - if ( colm_log_compile ) { - printNameTree( fsmGraph->rootName ); - printNameIndex( fsmGraph->nameIndex ); - } - - prepGrammar(); - - /* Compile bytecode. */ - compileByteCode(); - - /* Make the reduced fsm. */ - RedFsmBuild reduce( sectionName, this, fsmGraph ); - redFsm = reduce.reduceMachine(); - - BstSet<LangEl*> parserEls; - collectParserEls( parserEls ); - - makeParser( parserEls ); - - /* Make the scanner tables. */ - fsmTables = redFsm->makeFsmTables(); - - /* Now that all parsers are built, make the global runtimeData. */ - makeRuntimeData(); - - /* - * All compilation is now complete. - */ - - /* Parse patterns and replacements. */ - parsePatterns(); -} void ParseData::generateOutput() { diff --git a/colm/parsetree.h b/colm/parsetree.h index 51769ca5..afc62e68 100644 --- a/colm/parsetree.h +++ b/colm/parsetree.h @@ -1507,29 +1507,38 @@ struct TypeRef Map, List, Vector, - Accum + Accum, + Ref, + Ptr, }; /* Qualification and a type name. These require lookup. */ TypeRef( const InputLoc &loc, NamespaceQual *nspaceQual, String typeName ) : type(Name), loc(loc), nspaceQual(nspaceQual), typeName(typeName), pdaLiteral(0), iterDef(0), typeRef1(0), typeRef2(0), - isPtr(false), isRef(false), repeatType(RepeatNone), + repeatType(RepeatNone), nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {} /* Qualification and a type name. These require lookup. */ TypeRef( const InputLoc &loc, NamespaceQual *nspaceQual, PdaLiteral *pdaLiteral ) : type(Literal), loc(loc), nspaceQual(nspaceQual), pdaLiteral(pdaLiteral), iterDef(0), typeRef1(0), typeRef2(0), - isPtr(false), isRef(false), repeatType(RepeatNone), + repeatType(RepeatNone), nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {} - /* Unique type is given directly. */ + /* Generics. */ TypeRef( Type type, const InputLoc &loc, NamespaceQual *nspaceQual, TypeRef *typeRef1, TypeRef *typeRef2 ) : type(type), loc(loc), nspaceQual(nspaceQual), pdaLiteral(0), iterDef(0), typeRef1(typeRef1), typeRef2(typeRef2), - isPtr(false), isRef(false), repeatType(RepeatNone), - nspace(0), uniqueType(uniqueType), searchUniqueType(0), generic(0) {} + repeatType(RepeatNone), + nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {} + + /* Pointers and Refs. */ + TypeRef( Type type, const InputLoc &loc, TypeRef *typeRef1 ) : + type(type), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(0), + typeRef1(typeRef1), typeRef2(0), + repeatType(RepeatNone), + nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {} /* Resolution not needed. */ @@ -1538,14 +1547,14 @@ struct TypeRef UniqueType *searchUniqueType ) : type(Iterator), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(iterDef), typeRef1(0), typeRef2(0), - isPtr(false), isRef(false), repeatType(RepeatNone), + repeatType(RepeatNone), nspace(0), uniqueType(uniqueType), searchUniqueType(searchUniqueType), generic(0) {} /* Unique type is given directly. */ TypeRef( const InputLoc &loc, UniqueType *uniqueType ) : type(Unspecified), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(0), typeRef1(0), typeRef2(0), - isPtr(false), isRef(false), repeatType(RepeatNone), + repeatType(RepeatNone), nspace(0), uniqueType(uniqueType), searchUniqueType(0), generic(0) {} void resolveRepeat( ParseData *pd ); @@ -1557,6 +1566,8 @@ struct TypeRef UniqueType *lookupTypeVector( ParseData *pd ); UniqueType *lookupTypeAccum( ParseData *pd ); UniqueType *lookupType( ParseData *pd ); + UniqueType *lookupTypePtr( ParseData *pd ); + UniqueType *lookupTypeRef( ParseData *pd ); Type type; InputLoc loc; @@ -1566,8 +1577,6 @@ struct TypeRef IterDef *iterDef; TypeRef *typeRef1; TypeRef *typeRef2; - bool isPtr; - bool isRef; RepeatType repeatType; /* Resolved. */ diff --git a/colm/pdabuild.cc b/colm/pdabuild.cc index 98f9954d..884dd197 100644 --- a/colm/pdabuild.cc +++ b/colm/pdabuild.cc @@ -972,6 +972,10 @@ void ParseData::wrapNonTerminals() prodList.length(), Definition::Production ); prodList.append( lel->rootDef ); rootKlangEl->defList.append( lel->rootDef ); + + /* First resolve. */ + for ( ProdElList::Iter fact = *prodElList; fact.lte(); fact++ ) + resolveFactor( fact ); } } @@ -1650,6 +1654,21 @@ struct CmpSpan } }; +PdaGraph *ParseData::makePdaGraph( KlangElSet &parserEls ) +{ + //for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) + // cerr << prod->prodId << " " << prod->data << endl; + + PdaGraph *pdaGraph = new PdaGraph(); + lalr1GenerateParser( pdaGraph, parserEls ); + pdaGraph->setStateNumbers(); + analyzeMachine( pdaGraph, parserEls ); + + //cerr << "NUMBER OF STATES: " << pdaGraph->stateList.length() << endl; + + return pdaGraph; +} + PdaTables *ParseData::makePdaTables( PdaGraph *pdaGraph ) { int count, pos; @@ -1832,47 +1851,9 @@ PdaTables *ParseData::makePdaTables( PdaGraph *pdaGraph ) return pdaTables; } -void ParseData::prepGrammar() -{ - /* This will create language elements. */ - wrapNonTerminals(); - - makeKlangElIds(); - makeKlangElNames(); - makeDefinitionNames(); - noUndefindKlangEls(); - - /* Put the language elements in an index by language element id. */ - langElIndex = new LangEl*[nextSymbolId+1]; - memset( langElIndex, 0, sizeof(LangEl*)*(nextSymbolId+1) ); - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) - langElIndex[lel->id] = lel; - - makeProdFsms(); - - /* Allocate the Runtime data now. Every PdaTable that we make - * will reference it, but it will be filled in after all the tables are - * built. */ - runtimeData = new RuntimeData; -} - -PdaGraph *ParseData::makePdaGraph( KlangElSet &parserEls ) -{ - //for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) - // cerr << prod->prodId << " " << prod->data << endl; - - PdaGraph *pdaGraph = new PdaGraph(); - lalr1GenerateParser( pdaGraph, parserEls ); - pdaGraph->setStateNumbers(); - analyzeMachine( pdaGraph, parserEls ); - - //cerr << "NUMBER OF STATES: " << pdaGraph->stateList.length() << endl; - - return pdaGraph; -} - void ParseData::makeParser( KlangElSet &parserEls ) { pdaGraph = makePdaGraph( parserEls ); pdaTables = makePdaTables( pdaGraph ); } + diff --git a/colm/resolve.cc b/colm/resolve.cc index 78e03d8d..aae2a8ec 100644 --- a/colm/resolve.cc +++ b/colm/resolve.cc @@ -47,10 +47,8 @@ UniqueType *TypeRef::lookupTypeName( ParseData *pd ) case TypeMapEl::TypeAliasType: return inDict->typeRef->lookupType( pd ); - case TypeMapEl::LangElType: { - long typeId = ( isPtr ? TYPE_PTR : ( isRef ? TYPE_REF : TYPE_TREE ) ); - return pd->findUniqueType( typeId, inDict->value ); - } + case TypeMapEl::LangElType: + return pd->findUniqueType( TYPE_TREE, inDict->value ); } } @@ -78,10 +76,8 @@ UniqueType *TypeRef::lookupTypeLiteral( ParseData *pd ) while ( nspace != 0 ) { LiteralDictEl *ldel = nspace->literalDict.find( interp ); - if ( ldel != 0 ) { - long typeId = ( isPtr ? TYPE_PTR : ( isRef ? TYPE_REF : TYPE_TREE ) ); - return pd->findUniqueType( typeId, ldel->value->token ); - } + if ( ldel != 0 ) + return pd->findUniqueType( TYPE_TREE, ldel->value->token ); nspace = nspace->parentNamespace; } @@ -216,10 +212,22 @@ UniqueType *TypeRef::lookupTypeAccum( ParseData *pd ) return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl ); } +UniqueType *TypeRef::lookupTypePtr( ParseData *pd ) +{ + typeRef1->lookupType( pd ); + return pd->findUniqueType( TYPE_PTR, typeRef1->uniqueType->langEl ); +} + +UniqueType *TypeRef::lookupTypeRef( ParseData *pd ) +{ + typeRef1->lookupType( pd ); + return pd->findUniqueType( TYPE_REF, typeRef1->uniqueType->langEl ); +} + void TypeRef::resolveRepeat( ParseData *pd ) { if ( uniqueType->typeId != TYPE_TREE ) - error() << "cannot repeat non-tree type" << endp; + error(loc) << "cannot repeat non-tree type" << endp; UniqueRepeat searchKey( repeatType, uniqueType->langEl ); UniqueRepeat *uniqueRepeat = pd->uniqeRepeatMap.find( &searchKey ); @@ -288,6 +296,12 @@ UniqueType *TypeRef::lookupType( ParseData *pd ) case Accum: uniqueType = lookupTypeAccum( pd ); break; + case Ptr: + uniqueType = lookupTypePtr( pd ); + break; + case Ref: + uniqueType = lookupTypeRef( pd ); + break; case Iterator: case Unspecified: /* No lookup needed, unique type(s) set when constructed. */ @@ -668,6 +682,7 @@ void ParseData::resolveAccumEls() /* Resolves production els and computes the precedence of each prod. */ void ParseData::resolveProductionEls() { + /* NOTE: as we process this list it may be growing! */ for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { /* First resolve. */ for ( ProdElList::Iter fact = *prod->prodElList; fact.lte(); fact++ ) @@ -702,6 +717,48 @@ void ParseData::resolveGenericTypes() } } +void ParseData::makeTerminalWrappers() +{ + /* Make terminal language elements corresponding to each nonterminal in + * the grammar. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->type == LangEl::NonTerm ) { + String name( lel->name.length() + 5, "_T_%s", lel->name.data ); + LangEl *termDup = new LangEl( lel->nspace, name, LangEl::Term ); + + /* Give the dup the attributes of the nonterminal. This ensures + * that the attributes are allocated when patterns and + * constructors are parsed. */ + termDup->objectDef = lel->objectDef; + + langEls.append( termDup ); + lel->termDup = termDup; + termDup->termDup = lel; + } + } +} + +void ParseData::makeEofElements() +{ + /* Make eof language elements for each user terminal. This is a bit excessive and + * need to be reduced to the ones that we need parsers for, but we don't know that yet. + * Another pass before this one is needed. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->eofLel == 0 && + lel != eofKlangEl && + lel != errorKlangEl && + lel != noTokenKlangEl ) + { + String name( lel->name.length() + 5, "_eof_%s", lel->name.data ); + LangEl *eofLel = new LangEl( lel->nspace, name, LangEl::Term ); + + langEls.append( eofLel ); + lel->eofLel = eofLel; + eofLel->eofLel = lel; + eofLel->isEOF = true; + } + } +} void ParseData::typeResolve() { @@ -717,14 +774,16 @@ void ParseData::typeResolve() resolveReplacementEls(); resolveAccumEls(); - /* This needs to happen before the scanner is built. */ - resolveProductionEls(); - resolveParseTree(); - makeTerminalWrappers(); - makeEofElements(); resolveGenericTypes(); argvTypeRef->lookupType( this ); + + /* We must do this as the last step in the type resolution process because + * all type resolves can cause new language elments with associated + * productions. They get tacked onto the end of the list of productions. + * Doing it at the end results processing a growing list. */ + resolveProductionEls(); + } |