diff options
author | Adrian Thurston <thurston@complang.org> | 2012-07-01 12:48:22 -0400 |
---|---|---|
committer | Adrian Thurston <thurston@complang.org> | 2012-07-01 12:48:22 -0400 |
commit | 247904a84430b8c9151fa6afb68f01b60afb92c9 (patch) | |
tree | 58d498f783a935b02255120c814c387745dc6e41 /colm | |
parent | d8cdec468bb7efad768d25872147533312cffe91 (diff) | |
download | colm-247904a84430b8c9151fa6afb68f01b60afb92c9.tar.gz |
moved 'colm' dir to 'src'
Diffstat (limited to 'colm')
68 files changed, 0 insertions, 44447 deletions
diff --git a/colm/.gitignore b/colm/.gitignore deleted file mode 100644 index fe077f2c..00000000 --- a/colm/.gitignore +++ /dev/null @@ -1,26 +0,0 @@ -# Common testing files. -/tmp.lm -/tmp.c -/tmp.bin -/input -/out - -/*.o -/Makefile.in -/Makefile -/.*.d -/colm -/lmparse.h -/lmparse.cc -/lmscan.cc -/config.h.in -/config.h -/defs.h -/version.h -/tags -/.deps -/libcolmd.a -/libcolmp.a -/.libs -/stamp-h1 -/stamp-h2 diff --git a/colm/Makefile.am b/colm/Makefile.am deleted file mode 100644 index c4a3504a..00000000 --- a/colm/Makefile.am +++ /dev/null @@ -1,183 +0,0 @@ -# -# Copyright 2007-2012 Adrian Thurston <thurston@complang.org> -# - -# This file is part of Colm. -# -# Colm is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# Colm is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Colm; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -INCLUDES = -I$(top_srcdir)/aapl - -bin_PROGRAMS = colm - -RUNTIME_SRC = \ - map.c pdarun.c list.c input.c debug.c \ - codevect.c pool.c string.c tree.c bytecode.c program.c - -RUNTIME_HDR = \ - bytecode.h config.h defs.h debug.h pool.h input.h \ - fsmrun.h pdarun.h map.h tree.h program.h colm.h - -lib_LIBRARIES = libcolmp.a libcolmd.a - -libcolmp_a_SOURCES = $(RUNTIME_SRC) -libcolmp_a_CFLAGS = -I.. - -libcolmd_a_SOURCES = $(RUNTIME_SRC) -libcolmd_a_CFLAGS = -I.. - -colm_CXXFLAGS = \ - -Wall \ - -DCOLM_LOG \ - -DPREFIX='"$(prefix)"' \ - -I.. - -colm_LDADD = libcolmp.a - -# ADT DEFS_COLM += -DCOLM_LOG -DPREFIX='"$(prefix)"' - -colm_SOURCES = \ - buffer.h bytecode.h colm.h debug.h dotgen.h fsmcodegen.h fsmgraph.h \ - fsmrun.h input.h keyops.h lmparse.h lmscan.h map.h parsedata.h \ - parsetree.h pcheck.h pdacodegen.h pdagraph.h pdarun.h pool.h redbuild.h \ - redfsm.h rtvector.h tree.h version.h global.h colm.h \ - \ - resolve.cc synthesis.cc lmparse.cc lmscan.cc parsetree.cc \ - fsmstate.cc fsmbase.cc fsmattach.cc fsmmin.cc \ - fsmgraph.cc pdagraph.cc pdabuild.cc pdacodegen.cc fsmcodegen.cc \ - redfsm.cc fsmexec.cc main.cc redbuild.cc closure.cc fsmap.cc \ - dotgen.cc pcheck.cc ctinput.cc declare.cc codegen.cc \ - exports.cc compiler.cc - -colmincdir = $(includedir)/colm - -colminc_HEADERS = $(RUNTIME_HDR) - -BUILT_SOURCES = \ - version.h lmscan.cc lmparse.h lmparse.cc - -version.h: Makefile - echo '#define VERSION "$(PACKAGE_VERSION)"' > version.h - echo '#define PUBDATE "$(PUBDATE)"' >> version.h - -if BUILD_PARSERS - -lmparse.h: lmparse.kh - $(KELBT) -o $@ $< - -lmparse.cc: lmparse.kl lmparse.kh - $(KELBT) -o $@ $< - -lmscan.cc: lmparse.h - -lmscan.cc: lmscan.rl - $(RAGEL) -G2 -o $@ $< - -endif - -# ADT -# ADT # Logging: -# ADT # colm: rt on/off -# ADT # rt_prd: off -# ADT # rt_db: on -# ADT # rt_clm: rt on/off -# ADT -# ADT INCS += -I../aapl -# ADT -# ADT DEFS_COLM += -DCOLM_LOG -DPREFIX='"$(prefix)"' -# ADT DEFS_RT_P += -# ADT DEFS_RT_D += -DCOLM_LOG -# ADT -# ADT CFLAGS += -g -Wall -Wwrite-strings -# ADT LDFLAGS += -# ADT -# ADT # Files in ALL_SRC that are generated. -# ADT GEN_SRC = version.h lmscan.cc lmparse.h lmparse.cc -# ADT -# ADT RUNTIME_P = libcolmp.a -# ADT RUNTIME_D = libcolmd.a -# ADT -# ADT LIBS = -# ADT -# ADT #************************************* -# ADT -# ADT # Get the version info. -# ADT include ../version.mk -# ADT -# ADT prefix = @prefix@ -# ADT -# ADT BUILD_PARSERS = @BUILD_PARSERS@ -# ADT -# ADT # Programs -# ADT CXX = @CXX@ -# ADT CC = @CC@ -# ADT -# ADT # Get objects and dependencies from sources. -# ADT COLM_OBJ = $(COLM_SRC:%.cc=%.o) -# ADT RUNTIME_OBJ_P = $(RUNTIME_SRC:%.c=%_p.o) -# ADT RUNTIME_OBJ_D = $(RUNTIME_SRC:%.c=%_d.o) -# ADT -# ADT DEPS = $(COLM_SRC:%.cc=.%.d) $(RUNTIME_SRC:%.c=.%_p.d) $(RUNTIME_SRC:%.c=.%_d.d) -# ADT -# ADT # Rules. -# ADT all: colm $(RUNTIME_P) $(RUNTIME_D) -# ADT -# ADT colm: $(GEN_SRC) $(COLM_OBJ) $(RUNTIME_OBJ_D) $(RUNTIME_OBJ_C_D) $(LIBS) -# ADT $(CXX) $(LDFLAGS) -o $@ $(COLM_OBJ) $(RUNTIME_OBJ_D) $(RUNTIME_OBJ_C_D) $(LIBS) -# ADT -# ADT $(RUNTIME_P): $(RUNTIME_OBJ_P) $(RUNTIME_OBJ_C_P) -# ADT ar -cr $@ $^ -# ADT -# ADT $(RUNTIME_D): $(RUNTIME_OBJ_D) $(RUNTIME_OBJ_C_D) -# ADT ar -cr $@ $^ -# ADT -# ADT version.h: ../version.mk -# ADT echo '#define VERSION "$(VERSION)"' > version.h -# ADT echo '#define PUBDATE "$(PUBDATE)"' >> version.h -# ADT -# ADT -# ADT $(COLM_OBJ): %.o: %.cc -# ADT @$(CXX) -M $(DEFS_COLM) $(INCS) $< > .$*.d -# ADT $(CXX) -c $(CFLAGS) $(DEFS_COLM) $(INCS) -o $@ $< -# ADT -# ADT $(RUNTIME_OBJ_P): %_p.o: %.c -# ADT @$(CC) -M -MT $@ $(DEFS_RT_P) $< > .$*_p.d -# ADT $(CC) -c $(CFLAGS) $(DEFS_RT_P) -o $@ $< -# ADT -# ADT $(RUNTIME_OBJ_D): %_d.o: %.c -# ADT @$(CC) -M -MT $@ $(DEFS_RT_D) $< > .$*_d.d -# ADT $(CC) -c $(CFLAGS) $(DEFS_RT_D) -o $@ $< -# ADT -# ADT distclean: clean -# ADT rm -f Makefile config.h -# ADT -# ADT ifeq ($(BUILD_PARSERS),true) -# ADT EXTRA_CLEAN = $(GEN_SRC) -# ADT endif -# ADT -# ADT clean: -# ADT rm -f tags .*.d *.o colm $(EXTRA_CLEAN) $(RUNTIME_P) $(RUNTIME_D) -# ADT -# ADT install: all -# ADT install -d $(prefix)/bin -# ADT install -d $(prefix)/include -# ADT install -d $(prefix)/include/colm -# ADT install -d $(prefix)/lib -# ADT install -s colm $(prefix)/bin/colm -# ADT install libcolmp.a libcolmd.a $(prefix)/lib -# ADT install $(RUNTIME_HDR) $(prefix)/include/colm -# ADT -# ADT -include $(DEPS) - diff --git a/colm/buffer.h b/colm/buffer.h deleted file mode 100644 index 9039ad4b..00000000 --- a/colm/buffer.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2003 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _BUFFER_H -#define _BUFFER_H - -#define BUFFER_INITIAL_SIZE 4096 - -/* An automatically grown buffer for collecting tokens. Always reuses space; - * never down resizes. */ -struct Buffer -{ - Buffer() - { - data = (char*) malloc( BUFFER_INITIAL_SIZE ); - allocated = BUFFER_INITIAL_SIZE; - length = 0; - } - ~Buffer() { free(data); } - - void append( char p ) - { - if ( length == allocated ) { - allocated *= 2; - data = (char*) realloc( data, allocated ); - } - data[length++] = p; - } - - void clear() { length = 0; } - - char *data; - int allocated; - int length; -}; - -#endif /* _BUFFER_H */ diff --git a/colm/bytecode.c b/colm/bytecode.c deleted file mode 100644 index 57d31c78..00000000 --- a/colm/bytecode.c +++ /dev/null @@ -1,3579 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -//#define COLM_LOG - -#include <colm/pdarun.h> -#include <colm/fsmrun.h> -#include <colm/tree.h> -#include <colm/bytecode.h> -#include <colm/pool.h> -#include <colm/debug.h> -#include <colm/config.h> - -#include <alloca.h> -#include <sys/mman.h> -#include <string.h> -#include <assert.h> -#include <stdlib.h> - -#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 - #error "SIZEOF_LONG contained an unexpected value" -#endif - -#define true 1 -#define false 0 - -/* More common macros are in bytecode.h. */ -#define vm_top_off(n) (sp[n]) -#define vm_popn(n) (sp += (n)) -#define vm_pushn(n) (sp -= (n)) -#define vm_local(o) (exec->framePtr[o]) -#define vm_plocal(o) (&exec->framePtr[o]) -#define vm_local_iframe(o) (exec->iframePtr[o]) -#define vm_plocal_iframe(o) (&exec->iframePtr[o]) - -#define read_byte( i ) do { \ - i = ((uchar) *instr++); \ -} while(0) - -#define consume_byte( ) do { \ - instr += 1; \ -} while(0) - - -#define read_word_p( i, p ) do { \ - i = ((Word) p[0]); \ - i |= ((Word) p[1]) << 8; \ - i |= ((Word) p[2]) << 16; \ - i |= ((Word) p[3]) << 24; \ -} while(0) - -/* There are better ways. */ -#if SIZEOF_LONG == 4 - #define read_word( i ) do { \ - i = ((Word) *instr++); \ - i |= ((Word) *instr++) << 8; \ - i |= ((Word) *instr++) << 16; \ - i |= ((Word) *instr++) << 24; \ - } while(0) -#else - #define read_word( i ) do { \ - i = ((Word) *instr++); \ - i |= ((Word) *instr++) << 8; \ - i |= ((Word) *instr++) << 16; \ - i |= ((Word) *instr++) << 24; \ - i |= ((Word) *instr++) << 32; \ - i |= ((Word) *instr++) << 40; \ - i |= ((Word) *instr++) << 48; \ - i |= ((Word) *instr++) << 56; \ - } while(0) -#endif - -/* There are better ways. */ -#if SIZEOF_LONG == 4 - #define read_tree( i ) do { \ - Word w; \ - w = ((Word) *instr++); \ - w |= ((Word) *instr++) << 8; \ - w |= ((Word) *instr++) << 16; \ - w |= ((Word) *instr++) << 24; \ - i = (Tree*) w; \ - } while(0) - - #define read_word_type( Type, i ) do { \ - Word w; \ - w = ((Word) *instr++); \ - w |= ((Word) *instr++) << 8; \ - w |= ((Word) *instr++) << 16; \ - w |= ((Word) *instr++) << 24; \ - i = (Type) w; \ - } while(0) - - #define consume_word( ) do { \ - instr += 4; \ - } while(0) -#else - #define read_tree( i ) do { \ - Word w; \ - w = ((Word) *instr++); \ - w |= ((Word) *instr++) << 8; \ - w |= ((Word) *instr++) << 16; \ - w |= ((Word) *instr++) << 24; \ - w |= ((Word) *instr++) << 32; \ - w |= ((Word) *instr++) << 40; \ - w |= ((Word) *instr++) << 48; \ - w |= ((Word) *instr++) << 56; \ - i = (Tree*) w; \ - } while(0) - - #define read_word_type( Type, i ) do { \ - Word w; \ - w = ((Word) *instr++); \ - w |= ((Word) *instr++) << 8; \ - w |= ((Word) *instr++) << 16; \ - w |= ((Word) *instr++) << 24; \ - w |= ((Word) *instr++) << 32; \ - w |= ((Word) *instr++) << 40; \ - w |= ((Word) *instr++) << 48; \ - w |= ((Word) *instr++) << 56; \ - i = (Type) w; \ - } while(0) - - #define consume_word( ) do { \ - instr += 8; \ - } while(0) -#endif - -#define read_half( i ) do { \ - i = ((Word) *instr++); \ - i |= ((Word) *instr++) << 8; \ -} while(0) - -int colm_log_bytecode = 0; -int colm_log_parse = 0; -int colm_log_match = 0; -int colm_log_compile = 0; -int colm_log_conds = 0; - -void vm_grow( Program *prg ) -{ - debug( REALM_BYTECODE, "growing stack\n" ); -} - -void parserSetContext( Program *prg, Tree **sp, Parser *parser, Tree *val ) -{ - parser->pdaRun->context = splitTree( prg, val ); -} - -Head *treeToStr( Program *prg, Tree **sp, Tree *tree, int trim ) -{ - /* Collect the tree data. */ - StrCollect collect; - initStrCollect( &collect ); - - printTreeCollect( prg, sp, &collect, tree, trim ); - - /* Set up the input stream. */ - Head *ret = stringAllocFull( prg, collect.data, collect.length ); - - strCollectDestroy( &collect ); - - return ret; -} - -Word streamAppend( Program *prg, Tree **sp, Tree *input, InputStream *inputStream ) -{ - long length = 0; - - if ( input->id == LEL_ID_STR ) { - //assert(false); - /* Collect the tree data. */ - StrCollect collect; - initStrCollect( &collect ); - printTreeCollect( prg, sp, &collect, input, true ); - - /* Load it into the input. */ - appendData( inputStream, collect.data, collect.length ); - length = collect.length; - strCollectDestroy( &collect ); - } - else if ( input->id == LEL_ID_STREAM ) { - treeUpref( input ); - appendStream( inputStream, input ); - } - else { - treeUpref( input ); - appendTree( inputStream, input ); - } - - return length; -} - -long parseFrag( Program *prg, Tree **sp, Parser *parser, long stopId, long entry ) -{ -switch ( entry ) { -case PcrStart: - - if ( ! parser->pdaRun->parseError ) { - parser->pdaRun->stopTarget = stopId; - - long pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry ); - - while ( pcr != PcrDone ) { - -return pcr; -case PcrReduction: -case PcrGeneration: -case PcrPreEof: -case PcrReverse: - - pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry ); - } - } - -case PcrDone: -break; } - - return PcrDone; -} - -long parseFinish( Tree **result, Program *prg, Tree **sp, - Parser *parser, int revertOn, long entry ) -{ -switch ( entry ) { -case PcrStart: - - if ( parser->pdaRun->stopTarget <= 0 ) { - setEof( parser->input->in ); - - if ( ! parser->pdaRun->parseError ) { - long pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry ); - - while ( pcr != PcrDone ) { - -return pcr; -case PcrReduction: -case PcrGeneration: -case PcrPreEof: -case PcrReverse: - - pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry ); - } - } - } - - /* FIXME: need something here to check that we aren' stopped waiting for - * more data when we are actually expected to finish. This check doesn't - * work (at time of writing). */ - //assert( (parser->pdaRun->stopTarget > 0 && parser->pdaRun->stopParsing) || parser->input->in->eofSent ); - - if ( !revertOn ) - commitFull( prg, sp, parser->pdaRun, 0 ); - - Tree *tree = getParsedRoot( parser->pdaRun, parser->pdaRun->stopTarget > 0 ); - treeUpref( tree ); - - *result = tree; - -case PcrDone: -break; } - - return PcrDone; -} - -long undoParseFrag( Program *prg, Tree **sp, Parser *parser, long steps, long entry ) -{ - InputStream *inputStream = parser->input->in; - FsmRun *fsmRun = parser->fsmRun; - PdaRun *pdaRun = parser->pdaRun; - - debug( REALM_PARSE, "undo parse frag, target steps: %ld, pdarun steps: %ld\n", steps, pdaRun->steps ); - - resetToken( fsmRun ); - -switch ( entry ) { -case PcrStart: - - if ( steps < pdaRun->steps ) { - /* Setup environment for going backwards until we reduced steps to - * what we want. */ - pdaRun->numRetry += 1; - pdaRun->targetSteps = steps; - pdaRun->triggerUndo = 1; - - /* The parse loop will recognise the situation. */ - long pcr = parseLoop( prg, sp, pdaRun, fsmRun, inputStream, entry ); - while ( pcr != PcrDone ) { - -return pcr; -case PcrReduction: -case PcrGeneration: -case PcrPreEof: -case PcrReverse: - - pcr = parseLoop( prg, sp, pdaRun, fsmRun, inputStream, entry ); - } - - /* Reset environment. */ - pdaRun->triggerUndo = 0; - pdaRun->targetSteps = -1; - pdaRun->numRetry -= 1; - } - -case PcrDone: -break; } - - return PcrDone; -} - -Tree *streamPullBc( Program *prg, FsmRun *fsmRun, InputStream *in, Tree *length ) -{ - long len = ((Int*)length)->value; - Head *tokdata = streamPull( prg, fsmRun, in, len ); - return constructString( prg, tokdata ); -} - -void undoPull( Program *prg, FsmRun *fsmRun, InputStream *in, Tree *str ) -{ - const char *data = stringData( ( (Str*)str )->value ); - long length = stringLength( ( (Str*)str )->value ); - undoStreamPull( fsmRun, in, data, length ); -} - -long streamPush( Program *prg, Tree **sp, FsmRun *fsmRun, InputStream *in, Tree *tree, int ignore ) -{ - if ( tree->id == LEL_ID_STR ) { - /* This should become a compile error. If it's text, it's up to the - * scanner to decide. Want to force it then send a token. */ - assert( !ignore ); - - /* Collect the tree data. */ - StrCollect collect; - initStrCollect( &collect ); - printTreeCollect( prg, sp, &collect, tree, true ); - - streamPushText( fsmRun, in, collect.data, collect.length ); - long length = collect.length; - strCollectDestroy( &collect ); - - return length; - } - else { - treeUpref( tree ); - streamPushTree( fsmRun, in, tree, ignore ); - return -1; - } -} - -void setLocal( Tree **frame, long field, Tree *tree ) -{ - if ( tree != 0 ) - assert( tree->refs >= 1 ); - frame[field] = tree; -} - -Tree *getLocalSplit( Program *prg, Tree **frame, long field ) -{ - Tree *val = frame[field]; - Tree *split = splitTree( prg, val ); - frame[field] = split; - return split; -} - -void downrefLocalTrees( Program *prg, Tree **sp, Tree **frame, char *trees, long treesLen ) -{ - long i; - for ( i = 0; i < treesLen; i++ ) { - debug( REALM_BYTECODE, "local tree downref: %ld\n", (long)trees[i] ); - - treeDownref( prg, sp, frame[((long)trees[i])] ); - } -} - -UserIter *uiterCreate( Program *prg, Tree ***psp, FunctionInfo *fi, long searchId ) -{ - Tree **sp = *psp; - vm_pushn( sizeof(UserIter) / sizeof(Word) ); - void *mem = vm_ptop(); - - UserIter *uiter = mem; - initUserIter( uiter, vm_ptop(), fi->argSize, searchId ); - *psp = sp; - return uiter; -} - -void uiterInit( Program *prg, Tree **sp, UserIter *uiter, - FunctionInfo *fi, int revertOn ) -{ - /* Set up the first yeild so when we resume it starts at the beginning. */ - uiter->ref.kid = 0; - uiter->stackSize = uiter->stackRoot - vm_ptop(); - uiter->frame = &uiter->stackRoot[-IFR_AA]; - - if ( revertOn ) - uiter->resume = prg->rtd->frameInfo[fi->frameId].codeWV; - else - uiter->resume = prg->rtd->frameInfo[fi->frameId].codeWC; -} - -void treeIterDestroy( Tree ***psp, TreeIter *iter ) -{ - Tree **sp = *psp; - long curStackSize = iter->stackRoot - vm_ptop(); - assert( iter->stackSize == curStackSize ); - vm_popn( iter->stackSize ); - *psp = sp; -} - -void userIterDestroy( Tree ***psp, UserIter *uiter ) -{ - Tree **sp = *psp; - - /* We should always be coming from a yield. The current stack size will be - * nonzero and the stack size in the iterator will be correct. */ - long curStackSize = uiter->stackRoot - vm_ptop(); - assert( uiter->stackSize == curStackSize ); - - long argSize = uiter->argSize; - - vm_popn( uiter->stackRoot - vm_ptop() ); - vm_popn( sizeof(UserIter) / sizeof(Word) ); - vm_popn( argSize ); - - *psp = sp; -} - -Tree *constructArgv( Program *prg, int argc, const char **argv ) -{ - Tree *list = createGeneric( prg, prg->rtd->argvGenericId ); - treeUpref( list ); - int i; - for ( i = 0; i < argc; i++ ) { - Head *head = stringAllocPointer( prg, argv[i], strlen(argv[i]) ); - Tree *arg = constructString( prg, head ); - treeUpref( arg ); - listAppend2( prg, (List*)list, arg ); - } - return list; -} - -/* - * Execution environment - */ - -void initExecution( Execution *exec, Parser *parser, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream, int frameId ) -{ - exec->parser = parser; - exec->pdaRun = pdaRun; - exec->fsmRun = fsmRun; - exec->inputStream = inputStream; - exec->framePtr = 0; - exec->iframePtr = 0; - exec->frameId = frameId; - exec->rcodeUnitLen = 0; -} - -void rcodeDownrefAll( Program *prg, Tree **sp, RtCodeVect *rev ) -{ - while ( rev->tabLen > 0 ) { - /* Read the length */ - Code *prcode = rev->data + rev->tabLen - SIZEOF_WORD; - Word len; - read_word_p( len, prcode ); - - /* Find the start of block. */ - long start = rev->tabLen - len - SIZEOF_WORD; - prcode = rev->data + start; - - /* Execute it. */ - rcodeDownref( prg, sp, prcode ); - - /* Backup over it. */ - rev->tabLen -= len + SIZEOF_WORD; - } -} - -void rcodeDownref( Program *prg, Tree **sp, Code *instr ) -{ -again: - switch ( *instr++ ) { - case IN_PARSE_LOAD_START: { - debug( REALM_BYTECODE, "IN_PARSE_LOAD_START\n" ); - break; - } - case IN_PARSE_SAVE_STEPS: { - debug( REALM_BYTECODE, "IN_PARSE_SAVE_STEPS\n" ); - break; - } - case IN_LOAD_TREE: { - Word w; - read_word( w ); - debug( REALM_BYTECODE, "IN_LOAD_TREE %p\n", (Tree*)w ); - treeDownref( prg, sp, (Tree*)w ); - break; - } - case IN_LOAD_WORD: { - Word w; - read_word( w ); - debug( REALM_BYTECODE, "IN_LOAD_WORD\n" ); - break; - } - case IN_RESTORE_LHS: { - Tree *restore; - read_tree( restore ); - debug( REALM_BYTECODE, "IN_RESTORE_LHS\n" ); - treeDownref( prg, sp, restore ); - break; - } - - case IN_PARSE_FRAG_BKT: { - Half stopId; - read_half( stopId ); - debug( REALM_BYTECODE, "IN_PARSE_FRAG_BKT\n" ); - break; - } - case IN_PARSE_FRAG_BKT3: { - debug( REALM_BYTECODE, "IN_PARSE_FRAG_BKT3\n" ); - break; - } - case IN_PARSE_FINISH_BKT: { - Half stopId; - read_half( stopId ); - debug( REALM_BYTECODE, "IN_PARSE_FINISH_BKT\n" ); - break; - } - case IN_PARSE_FINISH_BKT3: { - debug( REALM_BYTECODE, "IN_PARSE_FINISH_BKT3\n" ); - break; - } - case IN_PCR_CALL: { - debug( REALM_BYTECODE, "IN_PCR_CALL\n" ); - break; - } - case IN_PCR_RET: { - debug( REALM_BYTECODE, "IN_PCR_RET\n" ); - return; - } - case IN_PCR_END_DECK: { - debug( REALM_BYTECODE, "IN_PCR_END_DECK\n" ); - return; - } - case IN_INPUT_APPEND_BKT: { - Tree *parser; - Tree *input; - Word len; - read_tree( parser ); - read_tree( input ); - read_word( len ); - - debug( REALM_BYTECODE, "IN_INPUT_APPEND_BKT\n" ); - - treeDownref( prg, sp, parser ); - treeDownref( prg, sp, input ); - break; - } - case IN_INPUT_PULL_BKT: { - Word f; - Tree *string; - read_tree( string ); - read_word( f ); - - debug( REALM_BYTECODE, "IN_INPUT_PULL_BKT\n" ); - - treeDownref( prg, sp, string ); - break; - } - case IN_INPUT_PUSH_BKT: { - Word len; - read_word( len ); - - debug( REALM_BYTECODE, "IN_INPUT_PUSH_BKT\n" ); - break; - } - case IN_LOAD_GLOBAL_BKT: { - debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_BKT\n" ); - break; - } - case IN_LOAD_CONTEXT_BKT: { - debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_BKT\n" ); - break; - } - case IN_LOAD_ACCUM_BKT: { - /* Tree *parser; */ - consume_word(); - debug( REALM_BYTECODE, "IN_LOAD_ACCUM_BKT\n" ); - break; - } - case IN_LOAD_INPUT_BKT: { - /* Tree *input; */ - consume_word(); - debug( REALM_BYTECODE, "IN_LOAD_INPUT_BKT\n" ); - break; - } - case IN_GET_FIELD_BKT: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_GET_FIELD_BKT %hd\n", field ); - break; - } - case IN_SET_FIELD_BKT: { - short field; - Tree *val; - read_half( field ); - read_tree( val ); - - debug( REALM_BYTECODE, "IN_SET_FIELD_BKT %hd\n", field ); - - treeDownref( prg, sp, val ); - break; - } - case IN_PTR_DEREF_BKT: { - Tree *ptr; - read_tree( ptr ); - - debug( REALM_BYTECODE, "IN_PTR_DEREF_BKT\n" ); - - treeDownref( prg, sp, ptr ); - break; - } - case IN_SET_TOKEN_DATA_BKT: { - Word oldval; - read_word( oldval ); - - debug( REALM_BYTECODE, "IN_SET_TOKEN_DATA_BKT\n" ); - - Head *head = (Head*)oldval; - stringFree( prg, head ); - break; - } - case IN_LIST_APPEND_BKT: { - debug( REALM_BYTECODE, "IN_LIST_APPEND_BKT\n" ); - break; - } - case IN_LIST_REMOVE_END_BKT: { - Tree *val; - read_tree( val ); - - debug( REALM_BYTECODE, "IN_LIST_REMOVE_END_BKT\n" ); - - treeDownref( prg, sp, val ); - break; - } - case IN_GET_LIST_MEM_BKT: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_GET_LIST_MEM_BKT %hd\n", field ); - break; - } - case IN_SET_LIST_MEM_BKT: { - Half field; - Tree *val; - read_half( field ); - read_tree( val ); - - debug( REALM_BYTECODE, "IN_SET_LIST_MEM_BKT %hd\n", field ); - - treeDownref( prg, sp, val ); - break; - } - case IN_MAP_INSERT_BKT: { - /* uchar inserted; */ - Tree *key; - consume_byte(); - read_tree( key ); - - debug( REALM_BYTECODE, "IN_MAP_INSERT_BKT\n" ); - - treeDownref( prg, sp, key ); - break; - } - case IN_MAP_STORE_BKT: { - Tree *key, *val; - read_tree( key ); - read_tree( val ); - - debug( REALM_BYTECODE,"IN_MAP_STORE_BKT\n" ); - - treeDownref( prg, sp, key ); - treeDownref( prg, sp, val ); - break; - } - case IN_MAP_REMOVE_BKT: { - Tree *key, *val; - read_tree( key ); - read_tree( val ); - - debug( REALM_BYTECODE, "IN_MAP_REMOVE_BKT\n" ); - - treeDownref( prg, sp, key ); - treeDownref( prg, sp, val ); - break; - } - case IN_STOP: { - return; - } - default: { - fatal( "UNKNOWN INSTRUCTION 0x%2x: -- reverse code downref\n", *(instr-1)); - assert(false); - break; - } - } - goto again; -} - -void mainExecution( Program *prg, Execution *exec, Code *code ) -{ - Tree **sp = prg->vm_root; - - /* Set up the stack as if we have called. We allow a return value. */ - vm_push( 0 ); - vm_push( 0 ); - vm_push( 0 ); - vm_push( 0 ); - - /* Execution loop. */ - executeCode( prg, exec, sp, code ); - - vm_pop_ignore(); - vm_pop_ignore(); - prg->returnVal = vm_pop(); -} - -int makeReverseCode( PdaRun *pdaRun ) -{ - RtCodeVect *reverseCode = &pdaRun->reverseCode; - RtCodeVect *rcodeCollect = &pdaRun->rcodeCollect; - - /* Do we need to revert the left hand side? */ - - /* Check if there was anything generated. */ - if ( rcodeCollect->tabLen == 0 ) - return false; - - if ( pdaRun->rcBlockCount == 0 ) { - /* One reverse code run for the DECK terminator. */ - append( reverseCode, IN_PCR_END_DECK ); - append( reverseCode, IN_PCR_RET ); - appendWord( reverseCode, 2 ); - pdaRun->rcBlockCount += 1; - incrementSteps( pdaRun ); - } - - long startLength = reverseCode->tabLen; - - /* Go backwards, group by group, through the reverse code. Push each group - * to the global reverse code stack. */ - Code *p = rcodeCollect->data + rcodeCollect->tabLen; - while ( p != rcodeCollect->data ) { - p--; - long len = *p; - p = p - len; - append2( reverseCode, p, len ); - } - - /* Stop, then place a total length in the global stack. */ - append( reverseCode, IN_PCR_RET ); - long length = reverseCode->tabLen - startLength; - appendWord( reverseCode, length ); - - /* Clear the revere code buffer. */ - rcodeCollect->tabLen = 0; - - pdaRun->rcBlockCount += 1; - incrementSteps( pdaRun ); - - return true; -} - -void transferReverseCode( PdaRun *pdaRun, ParseTree *parseTree ) -{ - if ( pdaRun->rcBlockCount > 0 ) { - debug( REALM_PARSE, "attaching reverse code to token\n" ); - parseTree->flags |= PF_HAS_RCODE; - pdaRun->rcBlockCount = 0; - } -} - -Code *popReverseCode( RtCodeVect *allRev ) -{ - /* Read the length */ - Code *prcode = allRev->data + allRev->tabLen - SIZEOF_WORD; - Word len; - read_word_p( len, prcode ); - - /* Find the start of block. */ - long start = allRev->tabLen - len - SIZEOF_WORD; - prcode = allRev->data + start; - - /* Backup over it. */ - allRev->tabLen -= len + SIZEOF_WORD; - return prcode; -} - -Tree **executeCode( Program *prg, Execution *exec, Tree **sp, Code *instr ) -{ - /* When we exit we are going to verify that we did not eat up any stack - * space. */ - Tree **root = sp; - Code c; - -again: - c = *instr++; - //debug( REALM_BYTECODE, "--in 0x%x\n", c ); - - switch ( c ) { - case IN_RESTORE_LHS: { - Tree *restore; - read_tree( restore ); - - debug( REALM_BYTECODE, "IN_RESTORE_LHS\n" ); - treeDownref( prg, sp, exec->pdaRun->parseInput->shadow->tree ); - exec->pdaRun->parseInput->shadow->tree = restore; - break; - } - case IN_LOAD_NIL: { - debug( REALM_BYTECODE, "IN_LOAD_NIL\n" ); - vm_push( 0 ); - break; - } - case IN_LOAD_TREE: { - debug( REALM_BYTECODE, "IN_LOAD_TREE\n" ); - Tree *tree; - read_tree( tree ); - vm_push( tree ); - break; - } - case IN_LOAD_WORD: { - debug( REALM_BYTECODE, "IN_LOAD_WORD\n" ); - Word w; - read_word( w ); - vm_push( (SW)w ); - break; - } - case IN_LOAD_TRUE: { - debug( REALM_BYTECODE, "IN_LOAD_TRUE\n" ); - treeUpref( prg->trueVal ); - vm_push( prg->trueVal ); - break; - } - case IN_LOAD_FALSE: { - debug( REALM_BYTECODE, "IN_LOAD_FALSE\n" ); - treeUpref( prg->falseVal ); - vm_push( prg->falseVal ); - break; - } - case IN_LOAD_INT: { - Word i; - read_word( i ); - - debug( REALM_BYTECODE, "IN_LOAD_INT %d\n", i ); - - Tree *tree = constructInteger( prg, i ); - treeUpref( tree ); - vm_push( tree ); - break; - } - case IN_LOAD_STR: { - Word offset; - read_word( offset ); - - debug( REALM_BYTECODE, "IN_LOAD_STR %d\n", offset ); - - Head *lit = makeLiteral( prg, offset ); - Tree *tree = constructString( prg, lit ); - treeUpref( tree ); - vm_push( tree ); - break; - } - case IN_PRINT: { - int n; - read_byte( n ); - debug( REALM_BYTECODE, "IN_PRINT %d\n", n ); - - while ( n-- > 0 ) { - Tree *tree = vm_pop(); - printTreeFile( prg, sp, stdout, tree, true ); - treeDownref( prg, sp, tree ); - } - break; - } - case IN_PRINT_XML_AC: { - int n; - read_byte( n ); - - debug( REALM_BYTECODE, "IN_PRINT_XML_AC %d\n", n ); - - while ( n-- > 0 ) { - Tree *tree = vm_pop(); - printXmlStdout( prg, sp, tree, true, true ); - treeDownref( prg, sp, tree ); - } - break; - } - case IN_PRINT_XML: { - int n; - read_byte( n ); - debug( REALM_BYTECODE, "IN_PRINT_XML %d", n ); - - while ( n-- > 0 ) { - Tree *tree = vm_pop(); - printXmlStdout( prg, sp, tree, false, true ); - treeDownref( prg, sp, tree ); - } - break; - } - case IN_PRINT_STREAM: { - int n; - read_byte( n ); - debug( REALM_BYTECODE, "IN_PRINT_STREAM\n" ); - - Stream *stream = (Stream*)vm_pop(); - while ( n-- > 0 ) { - Tree *tree = vm_pop(); - printTreeFile( prg, sp, stream->file, tree, true ); - treeDownref( prg, sp, tree ); - } - treeDownref( prg, sp, (Tree*)stream ); - break; - } - case IN_LOAD_CONTEXT_R: { - debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_R\n" ); - - treeUpref( exec->pdaRun->context ); - vm_push( exec->pdaRun->context ); - break; - } - case IN_LOAD_CONTEXT_WV: { - debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_WV\n" ); - - treeUpref( exec->pdaRun->context ); - vm_push( exec->pdaRun->context ); - - /* Set up the reverse instruction. */ - append( &exec->pdaRun->rcodeCollect, IN_LOAD_CONTEXT_BKT ); - exec->rcodeUnitLen = SIZEOF_CODE; - break; - } - case IN_LOAD_CONTEXT_WC: { - debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_WC\n" ); - - /* This is identical to the _R version, but using it for writing - * would be confusing. */ - treeUpref( exec->pdaRun->context ); - vm_push( exec->pdaRun->context ); - break; - } - case IN_LOAD_CONTEXT_BKT: { - debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_BKT\n" ); - - treeUpref( exec->pdaRun->context ); - vm_push( exec->pdaRun->context ); - break; - } - case IN_LOAD_GLOBAL_R: { - debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_R\n" ); - - treeUpref( prg->global ); - vm_push( prg->global ); - break; - } - case IN_LOAD_GLOBAL_WV: { - debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_WV\n" ); - - treeUpref( prg->global ); - vm_push( prg->global ); - - /* Set up the reverse instruction. */ - append( &exec->pdaRun->rcodeCollect, IN_LOAD_GLOBAL_BKT ); - exec->rcodeUnitLen = SIZEOF_CODE; - break; - } - case IN_LOAD_GLOBAL_WC: { - debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_WC\n" ); - - /* This is identical to the _R version, but using it for writing - * would be confusing. */ - treeUpref( prg->global ); - vm_push( prg->global ); - break; - } - case IN_LOAD_GLOBAL_BKT: { - debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_BKT\n" ); - - treeUpref( prg->global ); - vm_push( prg->global ); - break; - } - case IN_LOAD_ACCUM_R: { - debug( REALM_BYTECODE, "IN_LOAD_ACCUM_R\n" ); - - treeUpref( (Tree*)exec->parser ); - vm_push( (Tree*)exec->parser ); - assert( exec->parser != 0 ); - break; - } - case IN_LOAD_ACCUM_WV: { - debug( REALM_BYTECODE, "IN_LOAD_ACCUM_WV\n" ); - - treeUpref( (Tree*)exec->parser ); - vm_push( (Tree*)exec->parser ); - assert( exec->parser != 0 ); - - /* Set up the reverse instruction. */ - append( &exec->pdaRun->rcodeCollect, IN_LOAD_ACCUM_BKT ); - appendWord( &exec->pdaRun->rcodeCollect, (Word)exec->parser ); - exec->rcodeUnitLen = SIZEOF_CODE + SIZEOF_WORD; - break; - } - case IN_LOAD_ACCUM_WC: { - debug( REALM_BYTECODE, "IN_LOAD_ACCUM_WC\n" ); - - /* This is identical to the _R version, but using it for writing - * would be confusing. */ - treeUpref( (Tree*)exec->parser ); - vm_push( (Tree*)exec->parser ); - assert( exec->parser != 0 ); - break; - } - case IN_LOAD_ACCUM_BKT: { - Tree *parser; - read_tree( parser ); - - debug( REALM_BYTECODE, "IN_LOAD_ACCUM_BKT\n" ); - - treeUpref( parser ); - vm_push( parser ); - break; - } - case IN_LOAD_INPUT_R: { - debug( REALM_BYTECODE, "IN_LOAD_INPUT_R\n" ); - - assert( exec->parser != 0 ); - treeUpref( (Tree*)exec->parser->input ); - vm_push( (Tree*)exec->parser->input ); - break; - } - case IN_LOAD_INPUT_WV: { - debug( REALM_BYTECODE, "IN_LOAD_INPUT_WV\n" ); - - assert( exec->parser != 0 ); - treeUpref( (Tree*)exec->parser->input ); - vm_push( (Tree*)exec->parser->input ); - - /* Set up the reverse instruction. */ - append( &exec->pdaRun->rcodeCollect, IN_LOAD_INPUT_BKT ); - appendWord( &exec->pdaRun->rcodeCollect, (Word)exec->parser->input ); - exec->rcodeUnitLen = SIZEOF_CODE + SIZEOF_WORD; - break; - } - case IN_LOAD_INPUT_WC: { - debug( REALM_BYTECODE, "IN_LOAD_INPUT_WC\n" ); - - /* This is identical to the _R version, but using it for writing - * would be confusing. */ - assert( exec->parser != 0 ); - treeUpref( (Tree*)exec->parser->input ); - vm_push( (Tree*)exec->parser->input ); - break; - } - case IN_LOAD_INPUT_BKT: { - Tree *accumStream; - read_tree( accumStream ); - - debug( REALM_BYTECODE, "IN_LOAD_INPUT_BKT\n" ); - - treeUpref( accumStream ); - vm_push( accumStream ); - break; - } - case IN_LOAD_CTX_R: { - debug( REALM_BYTECODE, "IN_LOAD_CTX_R\n" ); - - treeUpref( exec->pdaRun->context ); - vm_push( exec->pdaRun->context ); - break; - } - case IN_LOAD_CTX_WV: { - debug( REALM_BYTECODE, "IN_LOAD_CTX_WV\n" ); - - treeUpref( exec->pdaRun->context ); - vm_push( exec->pdaRun->context ); - - /* Set up the reverse instruction. */ - append( &exec->pdaRun->rcodeCollect, IN_LOAD_ACCUM_BKT ); - appendWord( &exec->pdaRun->rcodeCollect, (Word)exec->parser ); - exec->rcodeUnitLen = SIZEOF_CODE + SIZEOF_WORD; - break; - } - case IN_LOAD_CTX_WC: { - debug( REALM_BYTECODE, "IN_LOAD_CTX_WC\n" ); - - /* This is identical to the _R version, but using it for writing - * would be confusing. */ - treeUpref( exec->pdaRun->context ); - vm_push( exec->pdaRun->context ); - break; - } - case IN_LOAD_CTX_BKT: { - debug( REALM_BYTECODE, "IN_LOAD_CTX_BKT\n" ); - - treeUpref( exec->pdaRun->context ); - vm_push( exec->pdaRun->context ); - break; - } - case IN_INIT_CAPTURES: { - /* uchar ncaps; */ - consume_byte(); - - debug( REALM_BYTECODE, "IN_INIT_CAPTURES\n" ); - - /* If there are captures (this is a translate block) then copy them into - * the local frame now. */ - LangElInfo *lelInfo = prg->rtd->lelInfo; - char **mark = exec->fsmRun->mark; - - int i; - for ( i = 0; i < lelInfo[exec->pdaRun->tokenId].numCaptureAttr; i++ ) { - CaptureAttr *ca = &prg->rtd->captureAttr[lelInfo[exec->pdaRun->tokenId].captureAttr + i]; - Head *data = stringAllocFull( prg, - mark[ca->mark_enter], mark[ca->mark_leave] - mark[ca->mark_enter] ); - Tree *string = constructString( prg, data ); - treeUpref( string ); - setLocal( exec->framePtr, -1 - i, string ); - } - break; - } - case IN_INIT_RHS_EL: { - Half position; - short field; - read_half( position ); - read_half( field ); - - debug( REALM_BYTECODE, "IN_INIT_RHS_EL %hd\n", field ); - - Tree *val = getRhsEl( prg, exec->pdaRun->redLel->shadow->tree, position ); - treeUpref( val ); - vm_local(field) = val; - break; - } - - case IN_INIT_LHS_EL: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_INIT_LHS_EL %hd\n", field ); - - /* We transfer it to to the local field. Possibly take a copy. */ - Tree *val = exec->pdaRun->redLel->shadow->tree; - - /* Save it. */ - treeUpref( val ); - exec->pdaRun->parsed = val; - - exec->pdaRun->redLel->shadow->tree = 0; - vm_local(field) = val; - break; - } - case IN_STORE_LHS_EL: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_STORE_LHS_EL %hd\n", field ); - - Tree *val = vm_local(field); - vm_local(field) = 0; - exec->pdaRun->redLel->shadow->tree = val; - break; - } - case IN_UITER_ADVANCE: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_UITER_ADVANCE\n" ); - - /* Get the iterator. */ - UserIter *uiter = (UserIter*) vm_local(field); - - long stackSize = uiter->stackRoot - vm_ptop(); - assert( uiter->stackSize == stackSize ); - - /* Fix the return instruction pointer. */ - uiter->stackRoot[-IFR_AA + IFR_RIN] = (SW)instr; - - instr = uiter->resume; - exec->framePtr = uiter->frame; - exec->iframePtr = &uiter->stackRoot[-IFR_AA]; - break; - } - case IN_UITER_GET_CUR_R: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_UITER_GET_CUR_R\n" ); - - UserIter *uiter = (UserIter*) vm_local(field); - Tree *val = uiter->ref.kid->tree; - treeUpref( val ); - vm_push( val ); - break; - } - case IN_UITER_GET_CUR_WC: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_UITER_GET_CUR_WC\n" ); - - UserIter *uiter = (UserIter*) vm_local(field); - splitRef( prg, &sp, &uiter->ref ); - Tree *split = uiter->ref.kid->tree; - treeUpref( split ); - vm_push( split ); - break; - } - case IN_UITER_SET_CUR_WC: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_UITER_SET_CUR_WC\n" ); - - Tree *t = vm_pop(); - UserIter *uiter = (UserIter*) vm_local(field); - splitRef( prg, &sp, &uiter->ref ); - Tree *old = uiter->ref.kid->tree; - setUiterCur( prg, uiter, t ); - treeDownref( prg, sp, old ); - break; - } - case IN_GET_LOCAL_R: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_GET_LOCAL_R\n" ); - - Tree *val = vm_local(field); - treeUpref( val ); - vm_push( val ); - break; - } - case IN_GET_LOCAL_WC: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_GET_LOCAL_WC\n" ); - - Tree *split = getLocalSplit( prg, exec->framePtr, field ); - treeUpref( split ); - vm_push( split ); - break; - } - case IN_SET_LOCAL_WC: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_SET_LOCAL_WC %d\n", field ); - - Tree *val = vm_pop(); - treeDownref( prg, sp, vm_local(field) ); - setLocal( exec->framePtr, field, val ); - break; - } - case IN_SAVE_RET: { - debug( REALM_BYTECODE, "IN_SAVE_RET\n" ); - - Tree *val = vm_pop(); - vm_local(FR_RV) = val; - break; - } - case IN_GET_LOCAL_REF_R: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_GET_LOCAL_REF_R\n" ); - - Ref *ref = (Ref*) vm_plocal(field); - Tree *val = ref->kid->tree; - treeUpref( val ); - vm_push( val ); - break; - } - case IN_GET_LOCAL_REF_WC: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_GET_LOCAL_REF_WC\n" ); - - Ref *ref = (Ref*) vm_plocal(field); - splitRef( prg, &sp, ref ); - Tree *val = ref->kid->tree; - treeUpref( val ); - vm_push( val ); - break; - } - case IN_SET_LOCAL_REF_WC: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_SET_LOCAL_REF_WC\n" ); - - Tree *val = vm_pop(); - Ref *ref = (Ref*) vm_plocal(field); - splitRef( prg, &sp, ref ); - refSetValue( ref, val ); - break; - } - case IN_GET_FIELD_R: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_GET_FIELD_R %d\n", field ); - - Tree *obj = vm_pop(); - treeDownref( prg, sp, obj ); - - Tree *val = getField( obj, field ); - treeUpref( val ); - vm_push( val ); - break; - } - case IN_GET_FIELD_WC: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_GET_FIELD_WC %d\n", field ); - - Tree *obj = vm_pop(); - treeDownref( prg, sp, obj ); - - Tree *split = getFieldSplit( prg, obj, field ); - treeUpref( split ); - vm_push( split ); - break; - } - case IN_GET_FIELD_WV: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_GET_FIELD_WV\n" ); - - Tree *obj = vm_pop(); - treeDownref( prg, sp, obj ); - - Tree *split = getFieldSplit( prg, obj, field ); - treeUpref( split ); - vm_push( split ); - - /* Set up the reverse instruction. */ - append( &exec->pdaRun->rcodeCollect, IN_GET_FIELD_BKT ); - appendHalf( &exec->pdaRun->rcodeCollect, field ); - exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_HALF; - break; - } - case IN_GET_FIELD_BKT: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_GET_FIELD_BKT\n" ); - - Tree *obj = vm_pop(); - treeDownref( prg, sp, obj ); - - Tree *split = getFieldSplit( prg, obj, field ); - treeUpref( split ); - vm_push( split ); - break; - } - case IN_SET_FIELD_WC: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_SET_FIELD_WC %d\n", field ); - - Tree *obj = vm_pop(); - Tree *val = vm_pop(); - treeDownref( prg, sp, obj ); - - /* Downref the old value. */ - Tree *prev = getField( obj, field ); - treeDownref( prg, sp, prev ); - - setField( prg, obj, field, val ); - break; - } - case IN_SET_FIELD_WV: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_SET_FIELD_WV %d\n", field ); - - Tree *obj = vm_pop(); - Tree *val = vm_pop(); - treeDownref( prg, sp, obj ); - - /* Save the old value, then set the field. */ - Tree *prev = getField( obj, field ); - setField( prg, obj, field, val ); - - /* Set up the reverse instruction. */ - append( &exec->pdaRun->rcodeCollect, IN_SET_FIELD_BKT ); - appendHalf( &exec->pdaRun->rcodeCollect, field ); - appendWord( &exec->pdaRun->rcodeCollect, (Word)prev ); - exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_HALF + SIZEOF_WORD; - append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); - /* FLUSH */ - break; - } - case IN_SET_FIELD_BKT: { - short field; - Tree *val; - read_half( field ); - read_tree( val ); - - debug( REALM_BYTECODE, "IN_SET_FIELD_BKT\n" ); - - Tree *obj = vm_pop(); - treeDownref( prg, sp, obj ); - - /* Downref the old value. */ - Tree *prev = getField( obj, field ); - treeDownref( prg, sp, prev ); - - setField( prg, obj, field, val ); - break; - } - case IN_SET_FIELD_LEAVE_WC: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_SET_FIELD_LEAVE_WC\n" ); - - /* Note that we don't downref the object here because we are - * leaving it on the stack. */ - Tree *obj = vm_pop(); - Tree *val = vm_pop(); - - /* Downref the old value. */ - Tree *prev = getField( obj, field ); - treeDownref( prg, sp, prev ); - - /* Set the field. */ - setField( prg, obj, field, val ); - - /* Leave the object on the top of the stack. */ - vm_push( obj ); - break; - } - case IN_GET_RHS_VAL_R: { - debug( REALM_BYTECODE, "IN_GET_RHS_VAL_R\n" ); - int i, done = 0; - uchar len; - - Tree *obj = vm_pop(), *val = 0; - treeDownref( prg, sp, obj ); - - read_byte( len ); - for ( i = 0; i < len; i++ ) { - uchar prodNum, childNum; - read_byte( prodNum ); - read_byte( childNum ); - if ( !done && obj->prodNum == prodNum ) { - val = getRhsEl( prg, obj, childNum ); - done = 1; - } - } - - treeUpref( val ); - vm_push( val ); - break; - } - case IN_POP: { - debug( REALM_BYTECODE, "IN_POP\n" ); - - Tree *val = vm_pop(); - treeDownref( prg, sp, val ); - break; - } - case IN_POP_N_WORDS: { - short n; - read_half( n ); - - debug( REALM_BYTECODE, "IN_POP_N_WORDS\n" ); - - vm_popn( n ); - break; - } - case IN_SPRINTF: { - debug( REALM_BYTECODE, "IN_SPRINTF\n" ); - - Tree *f = vm_pop(); - f++; - Tree *integer = vm_pop(); - Tree *format = vm_pop(); - Head *res = stringSprintf( prg, (Str*)format, (Int*)integer ); - Tree *str = constructString( prg, res ); - treeUpref( str ); - vm_push( str ); - treeDownref( prg, sp, integer ); - treeDownref( prg, sp, format ); - break; - } - case IN_STR_ATOI: { - debug( REALM_BYTECODE, "IN_STR_ATOI\n" ); - - Str *str = (Str*)vm_pop(); - Word res = strAtoi( str->value ); - Tree *integer = constructInteger( prg, res ); - treeUpref( integer ); - vm_push( integer ); - treeDownref( prg, sp, (Tree*)str ); - break; - } - case IN_INT_TO_STR: { - debug( REALM_BYTECODE, "IN_INT_TO_STR\n" ); - - Int *i = (Int*)vm_pop(); - Head *res = intToStr( prg, i->value ); - Tree *str = constructString( prg, res ); - treeUpref( str ); - vm_push( str ); - treeDownref( prg, sp, (Tree*) i ); - break; - } - case IN_TREE_TO_STR: { - debug( REALM_BYTECODE, "IN_TREE_TO_STR\n" ); - - Tree *tree = vm_pop(); - Head *res = treeToStr( prg, sp, tree, true ); - Tree *str = constructString( prg, res ); - treeUpref( str ); - vm_push( str ); - treeDownref( prg, sp, tree ); - break; - } - case IN_TREE_TO_STR_NOTRIM: { - debug( REALM_BYTECODE, "IN_TREE_TO_STR_NOTRIM\n" ); - - Tree *tree = vm_pop(); - Head *res = treeToStr( prg, sp, tree, false ); - Tree *str = constructString( prg, res ); - treeUpref( str ); - vm_push( str ); - treeDownref( prg, sp, tree ); - break; - } - case IN_TREE_TRIM: { - debug( REALM_BYTECODE, "IN_TREE_TRIM\n" ); - - Tree *tree = vm_pop(); - Tree *trimmed = treeTrim( prg, sp, tree ); - vm_push( trimmed ); - break; - } - case IN_CONCAT_STR: { - debug( REALM_BYTECODE, "IN_CONCAT_STR\n" ); - - Str *s2 = (Str*)vm_pop(); - Str *s1 = (Str*)vm_pop(); - Head *res = concatStr( s1->value, s2->value ); - Tree *str = constructString( prg, res ); - treeUpref( str ); - treeDownref( prg, sp, (Tree*)s1 ); - treeDownref( prg, sp, (Tree*)s2 ); - vm_push( str ); - break; - } - case IN_STR_UORD8: { - debug( REALM_BYTECODE, "IN_STR_UORD8\n" ); - - Str *str = (Str*)vm_pop(); - Word res = strUord8( str->value ); - Tree *tree = constructInteger( prg, res ); - treeUpref( tree ); - vm_push( tree ); - treeDownref( prg, sp, (Tree*)str ); - break; - } - case IN_STR_UORD16: { - debug( REALM_BYTECODE, "IN_STR_UORD16\n" ); - - Str *str = (Str*)vm_pop(); - Word res = strUord16( str->value ); - Tree *tree = constructInteger( prg, res ); - treeUpref( tree ); - vm_push( tree ); - treeDownref( prg, sp, (Tree*)str ); - break; - } - - case IN_STR_LENGTH: { - debug( REALM_BYTECODE, "IN_STR_LENGTH\n" ); - - Str *str = (Str*)vm_pop(); - long len = stringLength( str->value ); - Tree *res = constructInteger( prg, len ); - treeUpref( res ); - vm_push( res ); - treeDownref( prg, sp, (Tree*)str ); - break; - } - case IN_JMP_FALSE: { - short dist; - read_half( dist ); - - debug( REALM_BYTECODE, "IN_JMP_FALSE %d\n", dist ); - - Tree *tree = vm_pop(); - if ( testFalse( prg, tree ) ) - instr += dist; - treeDownref( prg, sp, tree ); - break; - } - case IN_JMP_TRUE: { - short dist; - read_half( dist ); - - debug( REALM_BYTECODE, "IN_JMP_TRUE %d\n", dist ); - - Tree *tree = vm_pop(); - if ( !testFalse( prg, tree ) ) - instr += dist; - treeDownref( prg, sp, tree ); - break; - } - case IN_JMP: { - short dist; - read_half( dist ); - - debug( REALM_BYTECODE, "IN_JMP\n" ); - - instr += dist; - break; - } - case IN_REJECT: { - debug( REALM_BYTECODE, "IN_REJECT\n" ); - exec->pdaRun->reject = true; - break; - } - - /* - * Binary comparison operators. - */ - case IN_TST_EQL: { - debug( REALM_BYTECODE, "IN_TST_EQL\n" ); - - Tree *o2 = vm_pop(); - Tree *o1 = vm_pop(); - long r = cmpTree( prg, o1, o2 ); - Tree *val = r ? prg->falseVal : prg->trueVal; - treeUpref( val ); - vm_push( val ); - treeDownref( prg, sp, o1 ); - treeDownref( prg, sp, o2 ); - break; - } - case IN_TST_NOT_EQL: { - debug( REALM_BYTECODE, "IN_TST_NOT_EQL\n" ); - - Tree *o2 = vm_pop(); - Tree *o1 = vm_pop(); - long r = cmpTree( prg, o1, o2 ); - Tree *val = r ? prg->trueVal : prg->falseVal; - treeUpref( val ); - vm_push( val ); - treeDownref( prg, sp, o1 ); - treeDownref( prg, sp, o2 ); - break; - } - case IN_TST_LESS: { - debug( REALM_BYTECODE, "IN_TST_LESS\n" ); - - Tree *o2 = vm_pop(); - Tree *o1 = vm_pop(); - long r = cmpTree( prg, o1, o2 ); - Tree *val = r < 0 ? prg->trueVal : prg->falseVal; - treeUpref( val ); - vm_push( val ); - treeDownref( prg, sp, o1 ); - treeDownref( prg, sp, o2 ); - break; - } - case IN_TST_LESS_EQL: { - debug( REALM_BYTECODE, "IN_TST_LESS_EQL\n" ); - - Tree *o2 = vm_pop(); - Tree *o1 = vm_pop(); - long r = cmpTree( prg, o1, o2 ); - Tree *val = r <= 0 ? prg->trueVal : prg->falseVal; - treeUpref( val ); - vm_push( val ); - treeDownref( prg, sp, o1 ); - treeDownref( prg, sp, o2 ); - } - case IN_TST_GRTR: { - debug( REALM_BYTECODE, "IN_TST_GRTR\n" ); - - Tree *o2 = vm_pop(); - Tree *o1 = vm_pop(); - long r = cmpTree( prg, o1, o2 ); - Tree *val = r > 0 ? prg->trueVal : prg->falseVal; - treeUpref( val ); - vm_push( val ); - treeDownref( prg, sp, o1 ); - treeDownref( prg, sp, o2 ); - break; - } - case IN_TST_GRTR_EQL: { - debug( REALM_BYTECODE, "IN_TST_GRTR_EQL\n" ); - - Tree *o2 = (Tree*)vm_pop(); - Tree *o1 = (Tree*)vm_pop(); - long r = cmpTree( prg, o1, o2 ); - Tree *val = r >= 0 ? prg->trueVal : prg->falseVal; - treeUpref( val ); - vm_push( val ); - treeDownref( prg, sp, o1 ); - treeDownref( prg, sp, o2 ); - break; - } - case IN_TST_LOGICAL_AND: { - debug( REALM_BYTECODE, "IN_TST_LOGICAL_AND\n" ); - - Tree *o2 = vm_pop(); - Tree *o1 = vm_pop(); - long v2 = !testFalse( prg, o2 ); - long v1 = !testFalse( prg, o1 ); - Word r = v1 && v2; - Tree *val = r ? prg->trueVal : prg->falseVal; - treeUpref( val ); - vm_push( val ); - treeDownref( prg, sp, o1 ); - treeDownref( prg, sp, o2 ); - break; - } - case IN_TST_LOGICAL_OR: { - debug( REALM_BYTECODE, "IN_TST_LOGICAL_OR\n" ); - - Tree *o2 = vm_pop(); - Tree *o1 = vm_pop(); - long v2 = !testFalse( prg, o2 ); - long v1 = !testFalse( prg, o1 ); - Word r = v1 || v2; - Tree *val = r ? prg->trueVal : prg->falseVal; - treeUpref( val ); - vm_push( val ); - treeDownref( prg, sp, o1 ); - treeDownref( prg, sp, o2 ); - break; - } - case IN_NOT: { - debug( REALM_BYTECODE, "IN_NOT\n" ); - - Tree *tree = (Tree*)vm_pop(); - long r = testFalse( prg, tree ); - Tree *val = r ? prg->trueVal : prg->falseVal; - treeUpref( val ); - vm_push( val ); - treeDownref( prg, sp, tree ); - break; - } - - case IN_ADD_INT: { - debug( REALM_BYTECODE, "IN_ADD_INT\n" ); - - Int *o2 = (Int*)vm_pop(); - Int *o1 = (Int*)vm_pop(); - long r = o1->value + o2->value; - Tree *tree = constructInteger( prg, r ); - treeUpref( tree ); - vm_push( tree ); - treeDownref( prg, sp, (Tree*)o1 ); - treeDownref( prg, sp, (Tree*)o2 ); - break; - } - case IN_MULT_INT: { - debug( REALM_BYTECODE, "IN_MULT_INT\n" ); - - Int *o2 = (Int*)vm_pop(); - Int *o1 = (Int*)vm_pop(); - long r = o1->value * o2->value; - Tree *tree = constructInteger( prg, r ); - treeUpref( tree ); - vm_push( tree ); - treeDownref( prg, sp, (Tree*)o1 ); - treeDownref( prg, sp, (Tree*)o2 ); - break; - } - case IN_DIV_INT: { - debug( REALM_BYTECODE, "IN_DIV_INT\n" ); - - Int *o2 = (Int*)vm_pop(); - Int *o1 = (Int*)vm_pop(); - long r = o1->value / o2->value; - Tree *tree = constructInteger( prg, r ); - treeUpref( tree ); - vm_push( tree ); - treeDownref( prg, sp, (Tree*)o1 ); - treeDownref( prg, sp, (Tree*)o2 ); - break; - } - case IN_SUB_INT: { - debug( REALM_BYTECODE, "IN_SUB_INT\n" ); - - Int *o2 = (Int*)vm_pop(); - Int *o1 = (Int*)vm_pop(); - long r = o1->value - o2->value; - Tree *tree = constructInteger( prg, r ); - treeUpref( tree ); - vm_push( tree ); - treeDownref( prg, sp, (Tree*)o1 ); - treeDownref( prg, sp, (Tree*)o2 ); - break; - } - case IN_DUP_TOP_OFF: { - short off; - read_half( off ); - - debug( REALM_BYTECODE, "IN_DUP_TOP_OFF %hd\n", off ); - - Tree *val = vm_top_off(off); - treeUpref( val ); - vm_push( val ); - break; - } - case IN_DUP_TOP: { - debug( REALM_BYTECODE, "IN_DUP_TOP\n" ); - - Tree *val = vm_top(); - treeUpref( val ); - vm_push( val ); - break; - } - case IN_TRITER_FROM_REF: { - short field; - Half searchTypeId; - read_half( field ); - read_half( searchTypeId ); - - debug( REALM_BYTECODE, "IN_TRITER_FROM_REF\n" ); - - Ref rootRef; - rootRef.kid = (Kid*)vm_pop(); - rootRef.next = (Ref*)vm_pop(); - void *mem = vm_plocal(field); - initTreeIter( (TreeIter*)mem, &rootRef, searchTypeId, vm_ptop() ); - break; - } - case IN_TRITER_DESTROY: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_TRITER_DESTROY\n" ); - - TreeIter *iter = (TreeIter*) vm_plocal(field); - treeIterDestroy( &sp, iter ); - break; - } - case IN_REV_TRITER_FROM_REF: { - short field; - Half searchTypeId; - read_half( field ); - read_half( searchTypeId ); - - debug( REALM_BYTECODE, "IN_REV_TRITER_FROM_REF\n" ); - - Ref rootRef; - rootRef.kid = (Kid*)vm_pop(); - rootRef.next = (Ref*)vm_pop(); - - Tree **stackRoot = vm_ptop(); - - int children = 0; - Kid *kid = treeChild( prg, rootRef.kid->tree ); - while ( kid != 0 ) { - children++; - vm_push( (SW) kid ); - kid = kid->next; - } - - void *mem = vm_plocal(field); - initRevTreeIter( (RevTreeIter*)mem, &rootRef, searchTypeId, stackRoot, children ); - break; - } - case IN_REV_TRITER_DESTROY: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_REV_TRITER_DESTROY\n" ); - - RevTreeIter *iter = (RevTreeIter*) vm_plocal(field); - long curStackSize = iter->stackRoot - vm_ptop(); - assert( iter->stackSize == curStackSize ); - vm_popn( iter->stackSize ); - break; - } - case IN_TREE_SEARCH: { - Word id; - read_word( id ); - - debug( REALM_BYTECODE, "IN_TREE_SEARCH\n" ); - - Tree *tree = vm_pop(); - Tree *res = treeSearch2( prg, tree, id ); - treeUpref( res ); - vm_push( res ); - treeDownref( prg, sp, tree ); - break; - } - case IN_TRITER_ADVANCE: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_TRITER_ADVANCE\n" ); - - TreeIter *iter = (TreeIter*) vm_plocal(field); - Tree *res = treeIterAdvance( prg, &sp, iter ); - treeUpref( res ); - vm_push( res ); - break; - } - case IN_TRITER_NEXT_CHILD: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_TRITER_NEXT_CHILD\n" ); - - TreeIter *iter = (TreeIter*) vm_plocal(field); - Tree *res = treeIterNextChild( prg, &sp, iter ); - treeUpref( res ); - vm_push( res ); - break; - } - case IN_REV_TRITER_PREV_CHILD: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_REV_TRITER_PREV_CHILD\n" ); - - RevTreeIter *iter = (RevTreeIter*) vm_plocal(field); - Tree *res = treeRevIterPrevChild( prg, &sp, iter ); - treeUpref( res ); - vm_push( res ); - break; - } - case IN_TRITER_NEXT_REPEAT: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_TRITER_NEXT_REPEAT\n" ); - - TreeIter *iter = (TreeIter*) vm_plocal(field); - Tree *res = treeIterNextRepeat( prg, &sp, iter ); - treeUpref( res ); - vm_push( res ); - break; - } - case IN_TRITER_PREV_REPEAT: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_TRITER_PREV_REPEAT\n" ); - - TreeIter *iter = (TreeIter*) vm_plocal(field); - Tree *res = treeIterPrevRepeat( prg, &sp, iter ); - treeUpref( res ); - vm_push( res ); - break; - } - case IN_TRITER_GET_CUR_R: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_TRITER_GET_CUR_R\n" ); - - TreeIter *iter = (TreeIter*) vm_plocal(field); - Tree *tree = treeIterDerefCur( iter ); - treeUpref( tree ); - vm_push( tree ); - break; - } - case IN_TRITER_GET_CUR_WC: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_TRITER_GET_CUR_WC\n" ); - - TreeIter *iter = (TreeIter*) vm_plocal(field); - splitIterCur( prg, &sp, iter ); - Tree *tree = treeIterDerefCur( iter ); - treeUpref( tree ); - vm_push( tree ); - break; - } - case IN_TRITER_SET_CUR_WC: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_TRITER_SET_CUR_WC\n" ); - - Tree *tree = vm_pop(); - TreeIter *iter = (TreeIter*) vm_plocal(field); - splitIterCur( prg, &sp, iter ); - Tree *old = treeIterDerefCur( iter ); - setTriterCur( prg, iter, tree ); - treeDownref( prg, sp, old ); - break; - } - case IN_MATCH: { - Half patternId; - read_half( patternId ); - - debug( REALM_BYTECODE, "IN_MATCH\n" ); - - Tree *tree = vm_pop(); - - /* Run the match, push the result. */ - int rootNode = prg->rtd->patReplInfo[patternId].offset; - - /* Bindings are indexed starting at 1. Zero bindId to represent no - * binding. We make a space for it here rather than do math at - * access them. */ - long numBindings = prg->rtd->patReplInfo[patternId].numBindings; - Tree *bindings[1+numBindings]; - memset( bindings, 0, sizeof(Tree*)*(1+numBindings) ); - - Kid kid; - kid.tree = tree; - kid.next = 0; - int matched = matchPattern( bindings, prg, rootNode, &kid, false ); - - if ( !matched ) - memset( bindings, 0, sizeof(Tree*)*(1+numBindings) ); - else { - int b; - for ( b = 1; b <= numBindings; b++ ) - assert( bindings[b] != 0 ); - } - - Tree *result = matched ? tree : 0; - treeUpref( result ); - vm_push( result ? tree : 0 ); - int b; - for ( b = 1; b <= numBindings; b++ ) { - treeUpref( bindings[b] ); - vm_push( bindings[b] ); - } - - treeDownref( prg, sp, tree ); - break; - } - - case IN_GET_ACCUM_CTX_R: { - debug( REALM_BYTECODE, "IN_GET_ACCUM_CTX_R\n" ); - - Tree *obj = vm_pop(); - Tree *ctx = ((Parser*)obj)->pdaRun->context; - treeUpref( ctx ); - vm_push( ctx ); - treeDownref( prg, sp, obj ); - break; - } - - case IN_SET_ACCUM_CTX_WC: { - debug( REALM_BYTECODE, "IN_SET_ACCUM_CTX_WC\n" ); - - Tree *parser = vm_pop(); - Tree *val = vm_pop(); - parserSetContext( prg, sp, (Parser*)parser, val ); - treeDownref( prg, sp, parser ); - break; - } - -// case IN_GET_ACCUM_CTX_WC: -// case IN_GET_ACCUM_CTX_WV: -// case IN_SET_ACCUM_CTX_WC: -// case IN_SET_ACCUM_CTX_WV: -// break; - - case IN_INPUT_APPEND_WC: { - debug( REALM_BYTECODE, "IN_INPUT_APPEND_WC \n" ); - - Input *accumStream = (Input*)vm_pop(); - Tree *input = vm_pop(); - streamAppend( prg, sp, input, accumStream->in ); - - vm_push( (Tree*)accumStream ); - treeDownref( prg, sp, input ); - break; - } - case IN_INPUT_APPEND_WV: { - debug( REALM_BYTECODE, "IN_INPUT_APPEND_WV \n" ); - - Input *accumStream = (Input*)vm_pop(); - Tree *input = vm_pop(); - Word len = streamAppend( prg, sp, input, accumStream->in ); - - treeUpref( (Tree*)accumStream ); - vm_push( (Tree*)accumStream ); - - append( &exec->pdaRun->rcodeCollect, IN_INPUT_APPEND_BKT ); - appendWord( &exec->pdaRun->rcodeCollect, (Word) accumStream ); - appendWord( &exec->pdaRun->rcodeCollect, (Word) input ); - appendWord( &exec->pdaRun->rcodeCollect, (Word) len ); - append( &exec->pdaRun->rcodeCollect, SIZEOF_CODE + 3 * SIZEOF_WORD ); - break; - } - - case IN_INPUT_APPEND_BKT: { - Tree *accumStream; - Tree *input; - Word len; - read_tree( accumStream ); - read_tree( input ); - read_word( len ); - - debug( REALM_BYTECODE, "IN_INPUT_APPEND_BKT\n" ); - - undoStreamAppend( prg, sp, 0, ((Input*)accumStream)->in, input, len ); - treeDownref( prg, sp, accumStream ); - treeDownref( prg, sp, input ); - break; - } - - case IN_PARSE_LOAD_START: { - debug( REALM_BYTECODE, "IN_PARSE_LOAD_START\n" ); - vm_push( (SW) PcrStart ); - break; - } - - case IN_PARSE_SAVE_STEPS: { - debug( REALM_BYTECODE, "IN_PARSE_SAVE_STEPS\n" ); - - Parser *parser = (Parser*)vm_pop(); - long steps = parser->pdaRun->steps; - - vm_push( (SW)steps ); - vm_push( (SW)parser ); - break; - } - - case IN_PCR_CALL: { - debug( REALM_BYTECODE, "IN_PCR_CALL\n" ); - - long pcr = (long)vm_pop(); - Parser *parser = (Parser*)vm_pop(); - long steps = (long)vm_pop(); - - vm_push( (SW)steps ); - vm_push( (SW)parser ); - vm_push( (SW)pcr ); - - vm_push( (SW)exec->parser ); - vm_push( (SW)exec->pdaRun ); - vm_push( (SW)exec->fsmRun ); - vm_push( (SW)exec->inputStream ); - vm_push( (SW)exec->framePtr ); - vm_push( (SW)exec->iframePtr ); - vm_push( (SW)exec->frameId ); - vm_push( (SW)exec->rcodeUnitLen ); - - Code *returnTo = instr - ( SIZEOF_CODE + SIZEOF_CODE + SIZEOF_HALF ); - vm_push( (SW)returnTo ); - - initExecution( exec, parser, parser->pdaRun, parser->fsmRun, parser->input->in, parser->pdaRun->frameId ); - instr = parser->pdaRun->code; - break; - } - - case IN_PCR_RET: { - debug( REALM_BYTECODE, "IN_PCR_RET\n" ); - - FrameInfo *fi = &prg->rtd->frameInfo[exec->frameId]; - downrefLocalTrees( prg, sp, exec->framePtr, fi->trees, fi->treesLen ); - vm_popn( fi->frameSize ); - - instr = (Code*) vm_pop(); - exec->rcodeUnitLen = ( long ) vm_pop(); - exec->frameId = ( long ) vm_pop(); - exec->iframePtr = ( Tree ** ) vm_pop(); - exec->framePtr = ( Tree ** ) vm_pop(); - exec->inputStream = ( InputStream * ) vm_pop(); - exec->fsmRun = ( FsmRun * ) vm_pop(); - exec->pdaRun = ( PdaRun * ) vm_pop(); - exec->parser = ( Parser * ) vm_pop(); - - if ( instr == 0 ) { - fflush( stdout ); - goto out; - } - break; - } - - case IN_PCR_END_DECK: { - debug( REALM_BYTECODE, "IN_PCR_END_DECK\n" ); - exec->pdaRun->onDeck = false; - break; - } - - case IN_PARSE_FRAG_WC: { - debug( REALM_BYTECODE, "IN_PARSE_FRAG_WC\n" ); - - Half stopId; - read_half( stopId ); - - long pcr = (long)vm_pop(); - Parser *parser = (Parser*)vm_pop(); - long steps = (long)vm_pop(); - - pcr = parseFrag( prg, sp, parser, stopId, pcr ); - - vm_push( (SW)steps ); - vm_push( (SW)parser ); - vm_push( (SW)pcr ); - - /* If done, jump to the terminating instruction, otherwise fall - * through to call some code, then jump back here. */ - if ( pcr == PcrDone ) - instr += SIZEOF_CODE; - break; - } - - case IN_PARSE_FRAG_WC3: { - debug( REALM_BYTECODE, "IN_PARSE_FRAG_WC3\n" ); - - vm_pop_ignore(); - Parser *parser = (Parser*)vm_pop(); - vm_pop_ignore(); - - treeDownref( prg, sp, (Tree*)parser ); - - if ( prg->induceExit ) - goto out; - - break; - } - - case IN_PARSE_FRAG_WV: { - Half stopId; - read_half( stopId ); - - debug( REALM_BYTECODE, "IN_PARSE_FRAG_WV\n" ); - - long pcr = (long)vm_pop(); - Parser *parser = (Parser*)vm_pop(); - long steps = (long)vm_pop(); - - pcr = parseFrag( prg, sp, parser, stopId, pcr ); - - vm_push( (SW)steps ); - vm_push( (SW)parser ); - vm_push( (SW)pcr ); - - /* If done, jump to the terminating instruction, otherwise fall - * through to call some code, then jump back here. */ - if ( pcr == PcrDone ) - instr += SIZEOF_CODE; - break; - } - - case IN_PARSE_FRAG_WV3: { - debug( REALM_BYTECODE, "IN_PARSE_FRAG_WV3 \n" ); - - vm_pop_ignore(); - Parser *parser = (Parser*)vm_pop(); - long steps = (long)vm_pop(); - - append( &exec->pdaRun->rcodeCollect, IN_LOAD_WORD ); - appendWord( &exec->pdaRun->rcodeCollect, steps ); - append( &exec->pdaRun->rcodeCollect, IN_LOAD_TREE ); - appendWord( &exec->pdaRun->rcodeCollect, (Word)parser ); - append( &exec->pdaRun->rcodeCollect, IN_PARSE_LOAD_START ); - append( &exec->pdaRun->rcodeCollect, IN_PARSE_FRAG_BKT ); - appendHalf( &exec->pdaRun->rcodeCollect, 0 ); - append( &exec->pdaRun->rcodeCollect, IN_PCR_CALL ); - append( &exec->pdaRun->rcodeCollect, IN_PARSE_FRAG_BKT3 ); - append( &exec->pdaRun->rcodeCollect, 6 * SIZEOF_CODE + 2 * SIZEOF_WORD + SIZEOF_HALF ); - - if ( prg->induceExit ) - goto out; - break; - } - - case IN_PARSE_FRAG_BKT: { - Half stopId; - read_half( stopId ); - - debug( REALM_BYTECODE, "IN_PARSE_FRAG_BKT\n" ); - - long pcr = (long)vm_pop(); - Parser *parser = (Parser*)vm_pop(); - long steps = (long)vm_pop(); - - pcr = undoParseFrag( prg, sp, parser, steps, pcr ); - - vm_push( (SW)steps ); - vm_push( (SW)parser ); - vm_push( (SW)pcr ); - - if ( pcr == PcrDone ) - instr += SIZEOF_CODE; - break; - } - - case IN_PARSE_FRAG_BKT3: { - debug( REALM_BYTECODE, "IN_PARSE_FRAG_BKT3\n" ); - - vm_pop_ignore(); - Parser *parser = (Parser*)vm_pop(); - vm_pop_ignore(); - - treeDownref( prg, sp, (Tree*)parser ); - break; - } - - case IN_PARSE_FINISH_WC: { - Half stopId; - read_half( stopId ); - - debug( REALM_BYTECODE, "IN_PARSE_FINISH_WC\n" ); - - long pcr = (long)vm_pop(); - Parser *parser = (Parser*)vm_pop(); - long steps = (long)vm_pop(); - - parser->result = 0; - pcr = parseFinish( &parser->result, prg, sp, parser, false, pcr ); - - vm_push( (SW)steps ); - vm_push( (SW)parser ); - vm_push( (SW)pcr ); - - /* If done, jump to the terminating instruction, otherwise fall - * through to call some code, then jump back here. */ - if ( pcr == PcrDone ) - instr += SIZEOF_CODE; - break; - } - - case IN_PARSE_FINISH_WC3: { - debug( REALM_BYTECODE, "IN_PARSE_FINISH_WC3\n" ); - - vm_pop_ignore(); - Parser *parser = (Parser*)vm_pop(); - vm_pop_ignore(); - - vm_push( parser->result ); - debug( REALM_BYTECODE, "parser refs: %d\n", parser->refs ); - treeDownref( prg, sp, (Tree*)parser ); - if ( prg->induceExit ) - goto out; - - break; - } - - case IN_PARSE_FINISH_WV: { - Half stopId; - read_half( stopId ); - - debug( REALM_BYTECODE, "IN_PARSE_FINISH_WV\n" ); - - long pcr = (long)vm_pop(); - Parser *parser = (Parser*)vm_pop(); - long steps = (long)vm_pop(); - - parser->result = 0; - pcr = parseFinish( &parser->result, prg, sp, parser, true, pcr ); - - vm_push( (SW)steps ); - vm_push( (SW)parser ); - vm_push( (SW)pcr ); - - if ( pcr == PcrDone ) - instr += SIZEOF_CODE; - break; - } - - case IN_PARSE_FINISH_WV3: { - debug( REALM_BYTECODE, "IN_PARSE_FINISH_WV3\n" ); - - vm_pop_ignore(); - Parser *parser = (Parser*)vm_pop(); - long steps = (long)vm_pop(); - - vm_push( parser->result ); - - append( &exec->pdaRun->rcodeCollect, IN_LOAD_WORD ); - appendWord( &exec->pdaRun->rcodeCollect, steps ); - append( &exec->pdaRun->rcodeCollect, IN_LOAD_TREE ); - appendWord( &exec->pdaRun->rcodeCollect, (Word)parser ); - append( &exec->pdaRun->rcodeCollect, IN_PARSE_LOAD_START ); - append( &exec->pdaRun->rcodeCollect, IN_PARSE_FINISH_BKT ); - appendHalf( &exec->pdaRun->rcodeCollect, 0 ); - append( &exec->pdaRun->rcodeCollect, IN_PCR_CALL ); - append( &exec->pdaRun->rcodeCollect, IN_PARSE_FINISH_BKT3 ); - append( &exec->pdaRun->rcodeCollect, 6 * SIZEOF_CODE + 2 * SIZEOF_WORD + SIZEOF_HALF ); - - if ( prg->induceExit ) - goto out; - - break; - } - - case IN_PARSE_FINISH_BKT: { - Half stopId; - read_half( stopId ); - - debug( REALM_BYTECODE, "IN_PARSE_FINISH_BKT\n" ); - - long pcr = (long)vm_pop(); - Parser *parser = (Parser*)vm_pop(); - long steps = (long)vm_pop(); - - pcr = undoParseFrag( prg, sp, parser, steps, pcr ); - - vm_push( (SW)steps ); - vm_push( (SW)parser ); - vm_push( (SW)pcr ); - - if ( pcr == PcrDone ) - instr += SIZEOF_CODE; - break; - } - - case IN_PARSE_FINISH_BKT3: { - debug( REALM_BYTECODE, "IN_PARSE_FINISH_BKT3\n" ); - - vm_pop_ignore(); - Parser *parser = (Parser*)vm_pop(); - vm_pop_ignore(); - - unsetEof( parser->input->in ); - treeDownref( prg, sp, (Tree*)parser ); - break; - } - - case IN_INPUT_PULL_WV: { - debug( REALM_BYTECODE, "IN_INPUT_PULL_WV\n" ); - - Input *accumStream = (Input*)vm_pop(); - Tree *len = vm_pop(); - Tree *string = streamPullBc( prg, exec->fsmRun, accumStream->in, len ); - treeUpref( string ); - vm_push( string ); - - /* Single unit. */ - treeUpref( string ); - append( &exec->pdaRun->rcodeCollect, IN_INPUT_PULL_BKT ); - appendWord( &exec->pdaRun->rcodeCollect, (Word) string ); - appendWord( &exec->pdaRun->rcodeCollect, (Word) exec->fsmRun ); - exec->rcodeUnitLen += SIZEOF_CODE + 2 *SIZEOF_WORD; - append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); - - treeDownref( prg, sp, (Tree*)accumStream ); - treeDownref( prg, sp, len ); - break; - } - case IN_INPUT_PULL_BKT: { - Word f; - Tree *string; - read_tree( string ); - read_word( f ); - FsmRun *fsmRun = (FsmRun*)f; - - Tree *accumStream = vm_pop(); - - debug( REALM_BYTECODE, "IN_INPUT_PULL_BKT\n" ); - - undoPull( prg, fsmRun, ((Input*)accumStream)->in, string ); - treeDownref( prg, sp, accumStream ); - treeDownref( prg, sp, string ); - break; - } - case IN_INPUT_PUSH_WV: { - debug( REALM_BYTECODE, "IN_INPUT_PUSH_WV\n" ); - - Input *input = (Input*)vm_pop(); - Tree *tree = vm_pop(); - long len = streamPush( prg, sp, 0, input->in, tree, false ); - vm_push( 0 ); - - /* Single unit. */ - append( &exec->pdaRun->rcodeCollect, IN_INPUT_PUSH_BKT ); - appendWord( &exec->pdaRun->rcodeCollect, len ); - exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD; - append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); - - treeDownref( prg, sp, (Tree*)input ); - treeDownref( prg, sp, tree ); - break; - } - case IN_INPUT_PUSH_IGNORE_WV: { - debug( REALM_BYTECODE, "IN_INPUT_PUSH_IGNORE_WV\n" ); - - Input *input = (Input*)vm_pop(); - Tree *tree = vm_pop(); - long len = streamPush( prg, sp, 0, input->in, tree, true ); - vm_push( 0 ); - - /* Single unit. */ - append( &exec->pdaRun->rcodeCollect, IN_INPUT_PUSH_BKT ); - appendWord( &exec->pdaRun->rcodeCollect, len ); - exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD; - append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); - - treeDownref( prg, sp, (Tree*)input ); - treeDownref( prg, sp, tree ); - break; - } - case IN_INPUT_PUSH_BKT: { - Word len; - read_word( len ); - - Input *input = (Input*)vm_pop(); - - debug( REALM_BYTECODE, "IN_INPUT_PUSH_BKT\n" ); - - undoStreamPush( prg, sp, 0, input->in, len ); - treeDownref( prg, sp, (Tree*)input ); - break; - } - case IN_CONSTRUCT: { - Half patternId; - read_half( patternId ); - - debug( REALM_BYTECODE, "IN_CONSTRUCT\n" ); - - int rootNode = prg->rtd->patReplInfo[patternId].offset; - - /* Note that bindIds are indexed at one. Add one spot for them. */ - int numBindings = prg->rtd->patReplInfo[patternId].numBindings; - Tree *bindings[1+numBindings]; - - int b; - for ( b = 1; b <= numBindings; b++ ) { - bindings[b] = vm_pop(); - assert( bindings[b] != 0 ); - } - - Tree *replTree = 0; - PatReplNode *nodes = prg->rtd->patReplNodes; - LangElInfo *lelInfo = prg->rtd->lelInfo; - long genericId = lelInfo[nodes[rootNode].id].genericId; - if ( genericId > 0 ) { - replTree = createGeneric( prg, genericId ); - treeUpref( replTree ); - } - else { - replTree = constructReplacementTree( 0, bindings, - prg, rootNode ); - } - - vm_push( replTree ); - break; - } - case IN_CONSTRUCT_INPUT: { - debug( REALM_BYTECODE, "IN_CONSTRUCT_INPUT\n" ); - - Tree *input = constructInput( prg ); - treeUpref( input ); - vm_push( input ); - break; - } - case IN_GET_INPUT: { - debug( REALM_BYTECODE, "IN_GET_INPUT\n" ); - - Parser *parser = (Parser*)vm_pop(); - treeUpref( (Tree*)parser->input ); - vm_push( (Tree*)parser->input ); - treeDownref( prg, sp, (Tree*)parser ); - break; - } - case IN_SET_INPUT: { - debug( REALM_BYTECODE, "IN_SET_INPUT\n" ); - - Parser *parser = (Parser*)vm_pop(); - Input *accumStream = (Input*)vm_pop(); - parser->input = accumStream; - treeUpref( (Tree*)accumStream ); - treeDownref( prg, sp, (Tree*)parser ); - treeDownref( prg, sp, (Tree*)accumStream ); - break; - } - case IN_CONSTRUCT_TERM: { - Half tokenId; - read_half( tokenId ); - - debug( REALM_BYTECODE, "IN_CONSTRUCT_TERM\n" ); - - /* Pop the string we are constructing the token from. */ - Str *str = (Str*)vm_pop(); - Tree *res = constructTerm( prg, tokenId, str->value ); - treeUpref( res ); - vm_push( res ); - break; - } - case IN_MAKE_TOKEN: { - uchar nargs; - read_byte( nargs ); - - debug( REALM_BYTECODE, "IN_MAKE_TOKEN\n" ); - - Tree *result = constructToken( prg, sp, nargs ); - long i; - for ( i = 0; i < nargs; i++ ) { - Tree *arg = vm_pop(); - treeDownref( prg, sp, arg ); - } - vm_push( result ); - break; - } - case IN_MAKE_TREE: { - uchar nargs; - read_byte( nargs ); - - debug( REALM_BYTECODE, "IN_MAKE_TREE\n" ); - - Tree *result = makeTree( prg, sp, nargs ); - long i; - for ( i = 0; i < nargs; i++ ) { - Tree *arg = vm_pop(); - treeDownref( prg, sp, arg ); - } - vm_push( result ); - break; - } - case IN_TREE_NEW: { - debug( REALM_BYTECODE, "IN_TREE_NEW \n" ); - - Tree *tree = vm_pop(); - Tree *res = constructPointer( prg, tree ); - treeUpref( res ); - vm_push( res ); - break; - } - case IN_PTR_DEREF_R: { - debug( REALM_BYTECODE, "IN_PTR_DEREF_R\n" ); - - Pointer *ptr = (Pointer*)vm_pop(); - treeDownref( prg, sp, (Tree*)ptr ); - - Tree *dval = getPtrVal( ptr ); - treeUpref( dval ); - vm_push( dval ); - break; - } - case IN_PTR_DEREF_WC: { - debug( REALM_BYTECODE, "IN_PTR_DEREF_WC\n" ); - - Pointer *ptr = (Pointer*)vm_pop(); - treeDownref( prg, sp, (Tree*)ptr ); - - Tree *dval = getPtrValSplit( prg, ptr ); - treeUpref( dval ); - vm_push( dval ); - break; - } - case IN_PTR_DEREF_WV: { - debug( REALM_BYTECODE, "IN_PTR_DEREF_WV\n" ); - - Pointer *ptr = (Pointer*)vm_pop(); - /* Don't downref the pointer since it is going into the reverse - * instruction. */ - - Tree *dval = getPtrValSplit( prg, ptr ); - treeUpref( dval ); - vm_push( dval ); - - /* This is an initial global load. Need to reverse execute it. */ - append( &exec->pdaRun->rcodeCollect, IN_PTR_DEREF_BKT ); - appendWord( &exec->pdaRun->rcodeCollect, (Word) ptr ); - exec->rcodeUnitLen = SIZEOF_CODE + SIZEOF_WORD; - break; - } - case IN_PTR_DEREF_BKT: { - Word p; - read_word( p ); - - debug( REALM_BYTECODE, "IN_PTR_DEREF_BKT\n" ); - - Pointer *ptr = (Pointer*)p; - - Tree *dval = getPtrValSplit( prg, ptr ); - treeUpref( dval ); - vm_push( dval ); - - treeDownref( prg, sp, (Tree*)ptr ); - break; - } - case IN_REF_FROM_LOCAL: { - short int field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_REF_FROM_LOCAL\n" ); - - /* First push the null next pointer, then the kid pointer. */ - Tree **ptr = vm_plocal(field); - vm_push( 0 ); - vm_push( (SW)ptr ); - break; - } - case IN_REF_FROM_REF: { - short int field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_REF_FROM_REF\n" ); - - Ref *ref = (Ref*)vm_plocal(field); - vm_push( (SW)ref ); - vm_push( (SW)ref->kid ); - break; - } - case IN_REF_FROM_QUAL_REF: { - short int back; - short int field; - read_half( back ); - read_half( field ); - - debug( REALM_BYTECODE, "IN_REF_FROM_QUAL_REF\n" ); - - Ref *ref = (Ref*)(sp + back); - - Tree *obj = ref->kid->tree; - Kid *attr_kid = getFieldKid( obj, field ); - - vm_push( (SW)ref ); - vm_push( (SW)attr_kid ); - break; - } - case IN_TRITER_REF_FROM_CUR: { - short int field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_TRITER_REF_FROM_CUR\n" ); - - /* Push the next pointer first, then the kid. */ - TreeIter *iter = (TreeIter*) vm_plocal(field); - Ref *ref = &iter->ref; - vm_push( (SW)ref ); - vm_push( (SW)iter->ref.kid ); - break; - } - case IN_UITER_REF_FROM_CUR: { - short int field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_UITER_REF_FROM_CUR\n" ); - - /* Push the next pointer first, then the kid. */ - UserIter *uiter = (UserIter*) vm_local(field); - vm_push( (SW)uiter->ref.next ); - vm_push( (SW)uiter->ref.kid ); - break; - } - case IN_GET_TOKEN_DATA_R: { - debug( REALM_BYTECODE, "IN_GET_TOKEN_DATA_R\n" ); - - Tree *tree = (Tree*) vm_pop(); - Head *data = stringCopy( prg, tree->tokdata ); - Tree *str = constructString( prg, data ); - treeUpref( str ); - vm_push( str ); - treeDownref( prg, sp, tree ); - break; - } - case IN_SET_TOKEN_DATA_WC: { - debug( REALM_BYTECODE, "IN_SET_TOKEN_DATA_WC\n" ); - - Tree *tree = vm_pop(); - Tree *val = vm_pop(); - Head *head = stringCopy( prg, ((Str*)val)->value ); - stringFree( prg, tree->tokdata ); - tree->tokdata = head; - - treeDownref( prg, sp, tree ); - treeDownref( prg, sp, val ); - break; - } - case IN_SET_TOKEN_DATA_WV: { - debug( REALM_BYTECODE, "IN_SET_TOKEN_DATA_WV\n" ); - - Tree *tree = vm_pop(); - Tree *val = vm_pop(); - - Head *oldval = tree->tokdata; - Head *head = stringCopy( prg, ((Str*)val)->value ); - tree->tokdata = head; - - /* Set up reverse code. Needs no args. */ - append( &exec->pdaRun->rcodeCollect, IN_SET_TOKEN_DATA_BKT ); - appendWord( &exec->pdaRun->rcodeCollect, (Word)oldval ); - exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD; - append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); - - treeDownref( prg, sp, tree ); - treeDownref( prg, sp, val ); - break; - } - case IN_SET_TOKEN_DATA_BKT: { - debug( REALM_BYTECODE, "IN_SET_TOKEN_DATA_BKT \n" ); - - Word oldval; - read_word( oldval ); - - Tree *tree = vm_pop(); - Head *head = (Head*)oldval; - stringFree( prg, tree->tokdata ); - tree->tokdata = head; - treeDownref( prg, sp, tree ); - break; - } - case IN_GET_TOKEN_POS_R: { - debug( REALM_BYTECODE, "IN_GET_TOKEN_POS_R\n" ); - - Tree *tree = (Tree*) vm_pop(); - Tree *integer = 0; - if ( tree->tokdata->location ) { - integer = constructInteger( prg, tree->tokdata->location->byte ); - treeUpref( integer ); - } - vm_push( integer ); - treeDownref( prg, sp, tree ); - break; - } - case IN_GET_TOKEN_LINE_R: { - debug( REALM_BYTECODE, "IN_GET_TOKEN_LINE_R\n" ); - - Tree *tree = (Tree*) vm_pop(); - Tree *integer = 0; - if ( tree->tokdata->location ) { - integer = constructInteger( prg, tree->tokdata->location->line ); - treeUpref( integer ); - } - vm_push( integer ); - treeDownref( prg, sp, tree ); - break; - } - case IN_GET_MATCH_LENGTH_R: { - debug( REALM_BYTECODE, "IN_GET_MATCH_LENGTH_R\n" ); - - Tree *integer = constructInteger( prg, stringLength(exec->pdaRun->tokdata) ); - treeUpref( integer ); - vm_push( integer ); - break; - } - case IN_GET_MATCH_TEXT_R: { - debug( REALM_BYTECODE, "IN_GET_MATCH_TEXT_R\n" ); - - Head *s = stringCopy( prg, exec->pdaRun->tokdata ); - Tree *tree = constructString( prg, s ); - treeUpref( tree ); - vm_push( tree ); - break; - } - case IN_LIST_LENGTH: { - debug( REALM_BYTECODE, "IN_LIST_LENGTH\n" ); - - List *list = (List*) vm_pop(); - long len = listLength( list ); - Tree *res = constructInteger( prg, len ); - treeDownref( prg, sp, (Tree*)list ); - treeUpref( res ); - vm_push( res ); - break; - } - case IN_LIST_APPEND_WV: { - debug( REALM_BYTECODE, "IN_LIST_APPEND_WV\n" ); - - Tree *obj = vm_pop(); - Tree *val = vm_pop(); - - treeDownref( prg, sp, obj ); - - listAppend2( prg, (List*)obj, val ); - treeUpref( prg->trueVal ); - vm_push( prg->trueVal ); - - /* Set up reverse code. Needs no args. */ - append( &exec->pdaRun->rcodeCollect, IN_LIST_APPEND_BKT ); - exec->rcodeUnitLen += SIZEOF_CODE; - append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); - /* FLUSH */ - break; - } - case IN_LIST_APPEND_WC: { - debug( REALM_BYTECODE, "IN_LIST_APPEND_WC\n" ); - - Tree *obj = vm_pop(); - Tree *val = vm_pop(); - - treeDownref( prg, sp, obj ); - - listAppend2( prg, (List*)obj, val ); - treeUpref( prg->trueVal ); - vm_push( prg->trueVal ); - break; - } - case IN_LIST_APPEND_BKT: { - debug( REALM_BYTECODE, "IN_LIST_APPEND_BKT\n" ); - - Tree *obj = vm_pop(); - treeDownref( prg, sp, obj ); - - Tree *tree = listRemoveEnd( prg, (List*)obj ); - treeDownref( prg, sp, tree ); - break; - } - case IN_LIST_REMOVE_END_WC: { - debug( REALM_BYTECODE, "IN_LIST_REMOVE_END_WC\n" ); - - Tree *obj = vm_pop(); - treeDownref( prg, sp, obj ); - - Tree *end = listRemoveEnd( prg, (List*)obj ); - vm_push( end ); - break; - } - case IN_LIST_REMOVE_END_WV: { - debug( REALM_BYTECODE, "IN_LIST_REMOVE_END_WV\n" ); - - Tree *obj = vm_pop(); - treeDownref( prg, sp, obj ); - - Tree *end = listRemoveEnd( prg, (List*)obj ); - vm_push( end ); - - /* Set up reverse. The result comes off the list downrefed. - * Need it up referenced for the reverse code too. */ - treeUpref( end ); - append( &exec->pdaRun->rcodeCollect, IN_LIST_REMOVE_END_BKT ); - appendWord( &exec->pdaRun->rcodeCollect, (Word)end ); - exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD; - append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); - /* FLUSH */ - break; - } - case IN_LIST_REMOVE_END_BKT: { - debug( REALM_BYTECODE, "IN_LIST_REMOVE_END_BKT\n" ); - - Tree *val; - read_tree( val ); - - Tree *obj = vm_pop(); - treeDownref( prg, sp, obj ); - - listAppend2( prg, (List*)obj, val ); - break; - } - case IN_GET_LIST_MEM_R: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_GET_LIST_MEM_R\n" ); - - Tree *obj = vm_pop(); - treeDownref( prg, sp, obj ); - - Tree *val = getListMem( (List*)obj, field ); - treeUpref( val ); - vm_push( val ); - break; - } - case IN_GET_LIST_MEM_WC: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_GET_LIST_MEM_WC\n" ); - - Tree *obj = vm_pop(); - treeDownref( prg, sp, obj ); - - Tree *val = getListMemSplit( prg, (List*)obj, field ); - treeUpref( val ); - vm_push( val ); - break; - } - case IN_GET_LIST_MEM_WV: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_GET_LIST_MEM_WV\n" ); - - Tree *obj = vm_pop(); - treeDownref( prg, sp, obj ); - - Tree *val = getListMemSplit( prg, (List*)obj, field ); - treeUpref( val ); - vm_push( val ); - - /* Set up the reverse instruction. */ - append( &exec->pdaRun->rcodeCollect, IN_GET_LIST_MEM_BKT ); - appendHalf( &exec->pdaRun->rcodeCollect, field ); - exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_HALF; - break; - } - case IN_GET_LIST_MEM_BKT: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_GET_LIST_MEM_BKT\n" ); - - Tree *obj = vm_pop(); - treeDownref( prg, sp, obj ); - - Tree *res = getListMemSplit( prg, (List*)obj, field ); - treeUpref( res ); - vm_push( res ); - break; - } - case IN_SET_LIST_MEM_WC: { - Half field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_SET_LIST_MEM_WC\n" ); - - Tree *obj = vm_pop(); - treeDownref( prg, sp, obj ); - - Tree *val = vm_pop(); - Tree *existing = setListMem( (List*)obj, field, val ); - treeDownref( prg, sp, existing ); - break; - } - case IN_SET_LIST_MEM_WV: { - Half field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_SET_LIST_MEM_WV\n" ); - - Tree *obj = vm_pop(); - treeDownref( prg, sp, obj ); - - Tree *val = vm_pop(); - Tree *existing = setListMem( (List*)obj, field, val ); - - /* Set up the reverse instruction. */ - append( &exec->pdaRun->rcodeCollect, IN_SET_LIST_MEM_BKT ); - appendHalf( &exec->pdaRun->rcodeCollect, field ); - appendWord( &exec->pdaRun->rcodeCollect, (Word)existing ); - exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_HALF + SIZEOF_WORD; - append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); - /* FLUSH */ - break; - } - case IN_SET_LIST_MEM_BKT: { - Half field; - Tree *val; - read_half( field ); - read_tree( val ); - - debug( REALM_BYTECODE, "IN_SET_LIST_MEM_BKT\n" ); - - Tree *obj = vm_pop(); - treeDownref( prg, sp, obj ); - - Tree *undid = setListMem( (List*)obj, field, val ); - treeDownref( prg, sp, undid ); - break; - } - case IN_MAP_INSERT_WV: { - debug( REALM_BYTECODE, "IN_MAP_INSERT_WV\n" ); - - Tree *obj = vm_pop(); - Tree *val = vm_pop(); - Tree *key = vm_pop(); - - treeDownref( prg, sp, obj ); - - int inserted = mapInsert( prg, (Map*)obj, key, val ); - Tree *result = inserted ? prg->trueVal : prg->falseVal; - treeUpref( result ); - vm_push( result ); - - /* Set up the reverse instruction. If the insert fails still need - * to pop the loaded map object. Just use the reverse instruction - * since it's nice to see it in the logs. */ - - /* Need to upref key for storage in reverse code. */ - treeUpref( key ); - append( &exec->pdaRun->rcodeCollect, IN_MAP_INSERT_BKT ); - append( &exec->pdaRun->rcodeCollect, inserted ); - appendWord( &exec->pdaRun->rcodeCollect, (Word)key ); - exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_CODE + SIZEOF_WORD; - append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); - - if ( ! inserted ) { - treeDownref( prg, sp, key ); - treeDownref( prg, sp, val ); - } - break; - } - case IN_MAP_INSERT_WC: { - debug( REALM_BYTECODE, "IN_MAP_INSERT_WC\n" ); - - Tree *obj = vm_pop(); - Tree *val = vm_pop(); - Tree *key = vm_pop(); - - treeDownref( prg, sp, obj ); - - int inserted = mapInsert( prg, (Map*)obj, key, val ); - Tree *result = inserted ? prg->trueVal : prg->falseVal; - treeUpref( result ); - vm_push( result ); - - if ( ! inserted ) { - treeDownref( prg, sp, key ); - treeDownref( prg, sp, val ); - } - break; - } - case IN_MAP_INSERT_BKT: { - uchar inserted; - Tree *key; - read_byte( inserted ); - read_tree( key ); - - debug( REALM_BYTECODE, "IN_MAP_INSERT_BKT\n" ); - - Tree *obj = vm_pop(); - if ( inserted ) { - Tree *val = mapUninsert( prg, (Map*)obj, key ); - treeDownref( prg, sp, key ); - treeDownref( prg, sp, val ); - } - - treeDownref( prg, sp, obj ); - treeDownref( prg, sp, key ); - break; - } - case IN_MAP_STORE_WC: { - debug( REALM_BYTECODE, "IN_MAP_STORE_WC\n" ); - - Tree *obj = vm_pop(); - Tree *element = vm_pop(); - Tree *key = vm_pop(); - - Tree *existing = mapStore( prg, (Map*)obj, key, element ); - Tree *result = existing == 0 ? prg->trueVal : prg->falseVal; - treeUpref( result ); - vm_push( result ); - - treeDownref( prg, sp, obj ); - if ( existing != 0 ) { - treeDownref( prg, sp, key ); - treeDownref( prg, sp, existing ); - } - break; - } - case IN_MAP_STORE_WV: { - debug( REALM_BYTECODE, "IN_MAP_STORE_WV\n" ); - - Tree *obj = vm_pop(); - Tree *element = vm_pop(); - Tree *key = vm_pop(); - - Tree *existing = mapStore( prg, (Map*)obj, key, element ); - Tree *result = existing == 0 ? prg->trueVal : prg->falseVal; - treeUpref( result ); - vm_push( result ); - - /* Set up the reverse instruction. */ - treeUpref( key ); - treeUpref( existing ); - append( &exec->pdaRun->rcodeCollect, IN_MAP_STORE_BKT ); - appendWord( &exec->pdaRun->rcodeCollect, (Word)key ); - appendWord( &exec->pdaRun->rcodeCollect, (Word)existing ); - exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD + SIZEOF_WORD; - append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); - /* FLUSH */ - - treeDownref( prg, sp, obj ); - if ( existing != 0 ) { - treeDownref( prg, sp, key ); - treeDownref( prg, sp, existing ); - } - break; - } - case IN_MAP_STORE_BKT: { - Tree *key, *val; - read_tree( key ); - read_tree( val ); - - debug( REALM_BYTECODE, "IN_MAP_STORE_BKT\n" ); - - Tree *obj = vm_pop(); - Tree *stored = mapUnstore( prg, (Map*)obj, key, val ); - - treeDownref( prg, sp, stored ); - if ( val == 0 ) - treeDownref( prg, sp, key ); - - treeDownref( prg, sp, obj ); - treeDownref( prg, sp, key ); - break; - } - case IN_MAP_REMOVE_WC: { - debug( REALM_BYTECODE, "IN_MAP_REMOVE_WC\n" ); - - Tree *obj = vm_pop(); - Tree *key = vm_pop(); - TreePair pair = mapRemove( prg, (Map*)obj, key ); - - vm_push( pair.val ); - - treeDownref( prg, sp, obj ); - treeDownref( prg, sp, key ); - treeDownref( prg, sp, pair.key ); - break; - } - case IN_MAP_REMOVE_WV: { - debug( REALM_BYTECODE, "IN_MAP_REMOVE_WV\n" ); - - Tree *obj = vm_pop(); - Tree *key = vm_pop(); - TreePair pair = mapRemove( prg, (Map*)obj, key ); - - treeUpref( pair.val ); - vm_push( pair.val ); - - /* Reverse instruction. */ - append( &exec->pdaRun->rcodeCollect, IN_MAP_REMOVE_BKT ); - appendWord( &exec->pdaRun->rcodeCollect, (Word)pair.key ); - appendWord( &exec->pdaRun->rcodeCollect, (Word)pair.val ); - exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD + SIZEOF_WORD; - append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); - - treeDownref( prg, sp, obj ); - treeDownref( prg, sp, key ); - break; - } - case IN_MAP_REMOVE_BKT: { - Tree *key, *val; - read_tree( key ); - read_tree( val ); - - debug( REALM_BYTECODE, "IN_MAP_REMOVE_BKT\n" ); - - /* Either both or neither. */ - assert( ( key == 0 ) ^ ( val != 0 ) ); - - Tree *obj = vm_pop(); - if ( key != 0 ) - mapUnremove( prg, (Map*)obj, key, val ); - - treeDownref( prg, sp, obj ); - break; - } - case IN_MAP_LENGTH: { - debug( REALM_BYTECODE, "IN_MAP_LENGTH\n" ); - - Tree *obj = vm_pop(); - long len = mapLength( (Map*)obj ); - Tree *res = constructInteger( prg, len ); - treeUpref( res ); - vm_push( res ); - - treeDownref( prg, sp, obj ); - break; - } - case IN_MAP_FIND: { - debug( REALM_BYTECODE, "IN_MAP_FIND\n" ); - - Tree *obj = vm_pop(); - Tree *key = vm_pop(); - Tree *result = mapFind( prg, (Map*)obj, key ); - treeUpref( result ); - vm_push( result ); - - treeDownref( prg, sp, obj ); - treeDownref( prg, sp, key ); - break; - } - case IN_INIT_LOCALS: { - Half size; - read_half( size ); - - debug( REALM_BYTECODE, "IN_INIT_LOCALS\n" ); - - exec->framePtr = vm_ptop(); - vm_pushn( size ); - memset( vm_ptop(), 0, sizeof(Word) * size ); - break; - } - case IN_CALL_WV: { - Half funcId; - read_half( funcId ); - - FunctionInfo *fi = &prg->rtd->functionInfo[funcId]; - - debug( REALM_BYTECODE, "IN_CALL_WV %ld\n", fi->name ); - - vm_push( 0 ); /* Return value. */ - vm_push( (SW)instr ); - vm_push( (SW)exec->framePtr ); - vm_push( (SW)exec->frameId ); - - instr = prg->rtd->frameInfo[fi->frameId].codeWV; - exec->framePtr = vm_ptop(); - exec->frameId = fi->frameId; - break; - } - case IN_CALL_WC: { - Half funcId; - read_half( funcId ); - - FunctionInfo *fi = &prg->rtd->functionInfo[funcId]; - - debug( REALM_BYTECODE, "IN_CALL_WC %ld\n", fi->name ); - - vm_push( 0 ); /* Return value. */ - vm_push( (SW)instr ); - vm_push( (SW)exec->framePtr ); - vm_push( (SW)exec->frameId ); - - instr = prg->rtd->frameInfo[fi->frameId].codeWC; - exec->framePtr = vm_ptop(); - exec->frameId = fi->frameId; - break; - } - case IN_YIELD: { - debug( REALM_BYTECODE, "IN_YIELD\n" ); - - Kid *kid = (Kid*)vm_pop(); - Ref *next = (Ref*)vm_pop(); - UserIter *uiter = (UserIter*) vm_plocal_iframe( IFR_AA ); - - if ( kid == 0 || kid->tree == 0 || - kid->tree->id == uiter->searchId || - uiter->searchId == prg->rtd->anyId ) - { - /* Store the yeilded value. */ - uiter->ref.kid = kid; - uiter->ref.next = next; - uiter->stackSize = uiter->stackRoot - vm_ptop(); - uiter->resume = instr; - uiter->frame = exec->framePtr; - - /* Restore the instruction and frame pointer. */ - instr = (Code*) vm_local_iframe(IFR_RIN); - exec->framePtr = (Tree**) vm_local_iframe(IFR_RFR); - exec->iframePtr = (Tree**) vm_local_iframe(IFR_RIF); - - /* Return the yield result on the top of the stack. */ - Tree *result = uiter->ref.kid != 0 ? prg->trueVal : prg->falseVal; - treeUpref( result ); - vm_push( result ); - } - break; - } - case IN_UITER_CREATE_WV: { - short field; - Half funcId, searchId; - read_half( field ); - read_half( funcId ); - read_half( searchId ); - - debug( REALM_BYTECODE, "IN_UITER_CREATE_WV\n" ); - - FunctionInfo *fi = prg->rtd->functionInfo + funcId; - UserIter *uiter = uiterCreate( prg, &sp, fi, searchId ); - vm_local(field) = (SW) uiter; - - /* This is a setup similar to as a call, only the frame structure - * is slightly different for user iterators. We aren't going to do - * the call. We don't need to set up the return ip because the - * uiter advance will set it. The frame we need to do because it - * is set once for the lifetime of the iterator. */ - vm_push( 0 ); /* Return instruction pointer, */ - vm_push( (SW)exec->iframePtr ); /* Return iframe. */ - vm_push( (SW)exec->framePtr ); /* Return frame. */ - - uiterInit( prg, sp, uiter, fi, true ); - break; - } - case IN_UITER_CREATE_WC: { - short field; - Half funcId, searchId; - read_half( field ); - read_half( funcId ); - read_half( searchId ); - - debug( REALM_BYTECODE, "IN_UITER_CREATE_WC\n" ); - - FunctionInfo *fi = prg->rtd->functionInfo + funcId; - UserIter *uiter = uiterCreate( prg, &sp, fi, searchId ); - vm_local(field) = (SW) uiter; - - /* This is a setup similar to as a call, only the frame structure - * is slightly different for user iterators. We aren't going to do - * the call. We don't need to set up the return ip because the - * uiter advance will set it. The frame we need to do because it - * is set once for the lifetime of the iterator. */ - vm_push( 0 ); /* Return instruction pointer, */ - vm_push( (SW)exec->iframePtr ); /* Return iframe. */ - vm_push( (SW)exec->framePtr ); /* Return frame. */ - - uiterInit( prg, sp, uiter, fi, false ); - break; - } - case IN_UITER_DESTROY: { - short field; - read_half( field ); - - debug( REALM_BYTECODE, "IN_UITER_DESTROY\n" ); - - UserIter *uiter = (UserIter*) vm_local(field); - userIterDestroy( &sp, uiter ); - break; - } - case IN_RET: { - debug( REALM_BYTECODE, "IN_RET\n" ); - - FrameInfo *fi = &prg->rtd->frameInfo[exec->frameId]; - downrefLocalTrees( prg, sp, exec->framePtr, fi->trees, fi->treesLen ); - vm_popn( fi->frameSize ); - - exec->frameId = (long) vm_pop(); - exec->framePtr = (Tree**) vm_pop(); - instr = (Code*) vm_pop(); - Tree *retVal = vm_pop(); - vm_popn( fi->argSize ); - vm_push( retVal ); - break; - } - case IN_TO_UPPER: { - debug( REALM_BYTECODE, "IN_TO_UPPER\n" ); - - Tree *in = vm_pop(); - Head *head = stringToUpper( in->tokdata ); - Tree *upper = constructString( prg, head ); - treeUpref( upper ); - vm_push( upper ); - treeDownref( prg, sp, in ); - break; - } - case IN_TO_LOWER: { - debug( REALM_BYTECODE, "IN_TO_LOWER\n" ); - - Tree *in = vm_pop(); - Head *head = stringToLower( in->tokdata ); - Tree *lower = constructString( prg, head ); - treeUpref( lower ); - vm_push( lower ); - treeDownref( prg, sp, in ); - break; - } - case IN_ERROR: { - debug( REALM_BYTECODE, "IN_ERROR\n" ); - - /* Pop the global. */ - Tree *global = vm_pop(); - treeDownref( prg, sp, global ); - treeUpref( prg->lastParseError ); - vm_push( prg->lastParseError ); - break; - } - case IN_OPEN_FILE: { - debug( REALM_BYTECODE, "IN_OPEN_FILE\n" ); - - Tree *mode = vm_pop(); - Tree *name = vm_pop(); - Tree *res = (Tree*)openFile( prg, name, mode ); - treeUpref( res ); - vm_push( res ); - treeDownref( prg, sp, name ); - treeDownref( prg, sp, mode ); - break; - } - case IN_GET_STDIN: { - debug( REALM_BYTECODE, "IN_GET_STDIN\n" ); - - /* Pop the root object. */ - Tree *obj = vm_pop(); - treeDownref( prg, sp, obj ); - if ( prg->stdinVal == 0 ) { - prg->stdinVal = openStreamFd( prg, 0 ); - treeUpref( (Tree*)prg->stdinVal ); - } - - treeUpref( (Tree*)prg->stdinVal ); - vm_push( (Tree*)prg->stdinVal ); - break; - } - case IN_LOAD_ARGV: { - Half field; - read_half( field ); - debug( REALM_BYTECODE, "IN_LOAD_ARGV %lu\n", field ); - - /* Tree comes back upreffed. */ - Tree *tree = constructArgv( prg, prg->argc, prg->argv ); - setField( prg, prg->global, field, tree ); - break; - } - - case IN_EXIT: { - debug( REALM_BYTECODE, "IN_EXIT\n" ); - - Tree *global = vm_pop(); - Int *status = (Int*)vm_pop(); - prg->exitStatus = status->value; - prg->induceExit = 1; - treeDownref( prg, sp, global ); - treeDownref( prg, sp, (Tree*)status ); - - while ( true ) { - FrameInfo *fi = &prg->rtd->frameInfo[exec->frameId]; - downrefLocalTrees( prg, sp, exec->framePtr, fi->trees, fi->treesLen ); - vm_popn( fi->frameSize ); - - /* Call layout. */ - exec->frameId = (long) vm_pop(); - exec->framePtr = (Tree**) vm_pop(); - instr = (Code*) vm_pop(); - Tree *retVal = vm_pop(); - vm_popn( fi->argSize ); - - treeDownref( prg, sp, retVal ); - - /* We stop on the root, which doesn't have the full function - * stack layout. */ - if ( exec->frameId == prg->rtd->rootFrameId ) - break; - } - - goto out; - } - - case IN_STOP: { - debug( REALM_BYTECODE, "IN_STOP\n" ); - - FrameInfo *fi = &prg->rtd->frameInfo[exec->frameId]; - downrefLocalTrees( prg, sp, exec->framePtr, fi->trees, fi->treesLen ); - vm_popn( fi->frameSize ); - - fflush( stdout ); - goto out; - } - - /* Halt is a default instruction given by the compiler when it is - * asked to generate and instruction it doesn't have. It is deliberate - * and can represent "not implemented" or "compiler error" because a - * variable holding instructions was not properly initialize. */ - case IN_HALT: { - fatal( "IN_HALT -- compiler did something wrong\n" ); - exit(1); - break; - } - default: { - fatal( "UNKNOWN INSTRUCTION: 0x%2x -- something is wrong\n", *(instr-1) ); - assert(false); - break; - } - } - goto again; - -out: - if ( ! prg->induceExit ) - assert( sp == root ); - return sp; -} - diff --git a/colm/bytecode.h b/colm/bytecode.h deleted file mode 100644 index 2151544d..00000000 --- a/colm/bytecode.h +++ /dev/null @@ -1,487 +0,0 @@ -/* - * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _BYTECODE_H -#define _BYTECODE_H - -#include <colm/pdarun.h> -#include <colm/tree.h> - -#ifdef __cplusplus -extern "C" { -#endif - -#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 - #error "SIZEOF_LONG contained an unexpected value" -#endif - -typedef unsigned long ulong; -typedef unsigned char uchar; - -#define IN_LOAD_INT 0x02 -#define IN_LOAD_STR 0x03 -#define IN_LOAD_NIL 0x04 -#define IN_LOAD_TRUE 0x05 -#define IN_LOAD_FALSE 0x06 -#define IN_LOAD_TREE 0xf4 -#define IN_LOAD_WORD 0xf5 - -#define IN_ADD_INT 0x07 -#define IN_SUB_INT 0x08 -#define IN_MULT_INT 0x09 -#define IN_DIV_INT 0xd0 - -#define IN_TST_EQL 0x0a -#define IN_TST_NOT_EQL 0x0b -#define IN_TST_LESS 0x0c -#define IN_TST_GRTR 0x0d -#define IN_TST_LESS_EQL 0x0e -#define IN_TST_GRTR_EQL 0x0f -#define IN_TST_LOGICAL_AND 0x10 -#define IN_TST_LOGICAL_OR 0x11 - -#define IN_NOT 0x12 - -#define IN_JMP 0x13 -#define IN_JMP_FALSE 0x14 -#define IN_JMP_TRUE 0x15 - -#define IN_STR_ATOI 0x16 -#define IN_STR_LENGTH 0x17 -#define IN_CONCAT_STR 0x18 -#define IN_TREE_TRIM 0xfc - -#define IN_INIT_LOCALS 0x19 -#define IN_POP 0x1b -#define IN_POP_N_WORDS 0x1c -#define IN_DUP_TOP 0x1d -#define IN_DUP_TOP_OFF 0xbc -#define IN_REJECT 0x1e -#define IN_MATCH 0x1f -#define IN_CONSTRUCT 0x20 -#define IN_TREE_NEW 0x21 - -#define IN_GET_LOCAL_R 0x22 -#define IN_GET_LOCAL_WC 0x23 -#define IN_SET_LOCAL_WC 0x24 - -#define IN_GET_LOCAL_REF_R 0x25 -#define IN_GET_LOCAL_REF_WC 0x26 -#define IN_SET_LOCAL_REF_WC 0x27 - -#define IN_SAVE_RET 0x28 - -#define IN_GET_FIELD_R 0x29 -#define IN_GET_FIELD_WC 0x2a -#define IN_GET_FIELD_WV 0x2b -#define IN_GET_FIELD_BKT 0x2c - -#define IN_SET_FIELD_WV 0x2d -#define IN_SET_FIELD_WC 0x2e -#define IN_SET_FIELD_BKT 0x2f -#define IN_SET_FIELD_LEAVE_WC 0x30 - -#define IN_GET_MATCH_LENGTH_R 0x31 -#define IN_GET_MATCH_TEXT_R 0x32 - -#define IN_GET_TOKEN_DATA_R 0x33 -#define IN_SET_TOKEN_DATA_WC 0x34 -#define IN_SET_TOKEN_DATA_WV 0x35 -#define IN_SET_TOKEN_DATA_BKT 0x36 - -#define IN_GET_TOKEN_POS_R 0x37 -#define IN_GET_TOKEN_LINE_R 0xf6 - -#define IN_INIT_RHS_EL 0x38 -#define IN_INIT_LHS_EL 0xef -#define IN_INIT_CAPTURES 0x39 -#define IN_STORE_LHS_EL 0xf0 -#define IN_RESTORE_LHS 0x01 - -#define IN_TRITER_FROM_REF 0x3a -#define IN_TRITER_ADVANCE 0x3b -#define IN_TRITER_NEXT_CHILD 0x3c -#define IN_TRITER_GET_CUR_R 0x3d -#define IN_TRITER_GET_CUR_WC 0x3e -#define IN_TRITER_SET_CUR_WC 0x3f -#define IN_TRITER_DESTROY 0x40 -#define IN_TRITER_NEXT_REPEAT 0x41 -#define IN_TRITER_PREV_REPEAT 0x42 - -#define IN_REV_TRITER_FROM_REF 0x43 -#define IN_REV_TRITER_DESTROY 0x44 -#define IN_REV_TRITER_PREV_CHILD 0x45 - -#define IN_UITER_DESTROY 0x46 -#define IN_UITER_CREATE_WV 0x47 -#define IN_UITER_CREATE_WC 0x48 -#define IN_UITER_ADVANCE 0x49 -#define IN_UITER_GET_CUR_R 0x4a -#define IN_UITER_GET_CUR_WC 0x4b -#define IN_UITER_SET_CUR_WC 0x4c - -#define IN_TREE_SEARCH 0x4d - -#define IN_LOAD_GLOBAL_R 0x4e -#define IN_LOAD_GLOBAL_WV 0x4f -#define IN_LOAD_GLOBAL_WC 0x50 -#define IN_LOAD_GLOBAL_BKT 0x51 - -#define IN_PTR_DEREF_R 0x52 -#define IN_PTR_DEREF_WV 0x53 -#define IN_PTR_DEREF_WC 0x54 -#define IN_PTR_DEREF_BKT 0x55 - -#define IN_REF_FROM_LOCAL 0x56 -#define IN_REF_FROM_REF 0x57 -#define IN_REF_FROM_QUAL_REF 0x58 -#define IN_TRITER_REF_FROM_CUR 0x59 -#define IN_UITER_REF_FROM_CUR 0x5a - -#define IN_MAP_LENGTH 0x5b -#define IN_MAP_FIND 0x5c -#define IN_MAP_INSERT_WV 0x5d -#define IN_MAP_INSERT_WC 0x5e -#define IN_MAP_INSERT_BKT 0x5f -#define IN_MAP_STORE_WV 0x60 -#define IN_MAP_STORE_WC 0x61 -#define IN_MAP_STORE_BKT 0x62 -#define IN_MAP_REMOVE_WV 0x63 -#define IN_MAP_REMOVE_WC 0x64 -#define IN_MAP_REMOVE_BKT 0x65 - -#define IN_LIST_LENGTH 0x66 -#define IN_LIST_APPEND_WV 0x67 -#define IN_LIST_APPEND_WC 0x68 -#define IN_LIST_APPEND_BKT 0x69 -#define IN_LIST_REMOVE_END_WV 0x6a -#define IN_LIST_REMOVE_END_WC 0x6b -#define IN_LIST_REMOVE_END_BKT 0x6c - -#define IN_GET_LIST_MEM_R 0x6d -#define IN_GET_LIST_MEM_WC 0x6e -#define IN_GET_LIST_MEM_WV 0x6f -#define IN_GET_LIST_MEM_BKT 0x70 -#define IN_SET_LIST_MEM_WV 0x71 -#define IN_SET_LIST_MEM_WC 0x72 -#define IN_SET_LIST_MEM_BKT 0x73 - -#define IN_VECTOR_LENGTH 0x74 -#define IN_VECTOR_APPEND_WV 0x75 -#define IN_VECTOR_APPEND_WC 0x76 -#define IN_VECTOR_APPEND_BKT 0x77 -#define IN_VECTOR_INSERT_WV 0x78 -#define IN_VECTOR_INSERT_WC 0x79 -#define IN_VECTOR_INSERT_BKT 0x7a - -#define IN_PRINT 0x7b -#define IN_PRINT_XML_AC 0x7c -#define IN_PRINT_XML 0x7d -#define IN_PRINT_STREAM 0x7e - -#define IN_HALT 0x7f - -#define IN_CALL_WC 0x80 -#define IN_CALL_WV 0x81 -#define IN_RET 0x82 -#define IN_YIELD 0x83 -#define IN_STOP 0x84 - -#define IN_STR_UORD8 0x85 -#define IN_STR_SORD8 0x86 -#define IN_STR_UORD16 0x87 -#define IN_STR_SORD16 0x88 -#define IN_STR_UORD32 0x89 -#define IN_STR_SORD32 0x8a - -#define IN_INT_TO_STR 0x8b -#define IN_TREE_TO_STR 0x8c -#define IN_TREE_TO_STR_NOTRIM 0xfd - -#define IN_CREATE_TOKEN 0x8d -#define IN_MAKE_TOKEN 0x8e -#define IN_MAKE_TREE 0x8f -#define IN_CONSTRUCT_TERM 0x90 - -#define IN_INPUT_PULL_WV 0xf7 -#define IN_INPUT_PULL_BKT 0xf8 - -#define IN_PARSE_LOAD_START 0xf2 -#define IN_PARSE_SAVE_STEPS 0xf3 -#define IN_PARSE_FRAG_WC 0xc0 -#define IN_PARSE_FRAG_WC3 0xe1 - -#define IN_PARSE_FRAG_WV 0xc1 -#define IN_PARSE_FRAG_WV3 0xe4 - -#define IN_PARSE_FRAG_BKT 0xc2 -#define IN_PARSE_FRAG_BKT3 0xe6 - -#define IN_INPUT_APPEND_WC 0x91 -#define IN_INPUT_APPEND_WV 0x92 -#define IN_INPUT_APPEND_BKT 0x93 - -#define IN_PARSE_FINISH_WC 0x9d -#define IN_PARSE_FINISH_WC3 0xea - -#define IN_PARSE_FINISH_WV 0xbd -#define IN_PARSE_FINISH_WV3 0xeb - -#define IN_PARSE_FINISH_BKT 0xbf -#define IN_PARSE_FINISH_BKT3 0xec - -#define IN_PCR_CALL 0xe0 -#define IN_PCR_RET 0xe3 -#define IN_PCR_END_DECK 0xed - -#define IN_PARSE_EXTRACT_INPUT - -#define IN_OPEN_FILE 0x9e -#define IN_GET_STDIN 0x9f -#define IN_GET_STDOUT 0xa0 -#define IN_GET_STDERR 0xa1 -#define IN_LOAD_ARGV 0xa2 -#define IN_TO_UPPER 0xa3 -#define IN_TO_LOWER 0xa4 -#define IN_EXIT 0xa5 -#define IN_ERROR 0xa6 - -#define IN_LOAD_ACCUM_R 0xa8 -#define IN_LOAD_ACCUM_WV 0xa9 -#define IN_LOAD_ACCUM_WC 0xaa -#define IN_LOAD_ACCUM_BKT 0xab - -#define IN_LOAD_INPUT_R 0x98 -#define IN_LOAD_INPUT_WV 0x99 -#define IN_LOAD_INPUT_WC 0x9a -#define IN_LOAD_INPUT_BKT 0x9b - -#define IN_INPUT_PUSH_WV 0xf9 -#define IN_INPUT_PUSH_BKT 0xfa -#define IN_INPUT_PUSH_IGNORE_WV 0xfb - -#define IN_LOAD_CONTEXT_R 0xac -#define IN_LOAD_CONTEXT_WV 0xad -#define IN_LOAD_CONTEXT_WC 0xae -#define IN_LOAD_CONTEXT_BKT 0xaf - -#define IN_GET_ACCUM_CTX_R 0xb0 -#define IN_GET_ACCUM_CTX_WC 0xb1 -#define IN_GET_ACCUM_CTX_WV 0xb2 -#define IN_SET_ACCUM_CTX_WC 0xb3 -#define IN_SET_ACCUM_CTX_WV 0xb4 - -#define IN_LOAD_CTX_R 0xb5 -#define IN_LOAD_CTX_WC 0xb6 -#define IN_LOAD_CTX_WV 0xb7 -#define IN_LOAD_CTX_BKT 0xb8 - -#define IN_SPRINTF 0xcf - -#define IN_GET_RHS_VAL_R 0xd1 -#define IN_GET_RHS_VAL_WC 0xd2 -#define IN_GET_RHS_VAL_WV 0xd3 -#define IN_GET_RHS_VAL_BKT 0xd4 -#define IN_SET_RHS_VAL_WC 0xd5 -#define IN_SET_RHS_VAL_WV 0xd6 -#define IN_SET_RHS_VAL_BKT 0xd7 - -#define IN_CONSTRUCT_INPUT 0x9c -#define IN_SET_INPUT 0xa7 -#define IN_GET_INPUT 0xb9 - -/* Types */ -#define TYPE_NIL 0x01 -#define TYPE_TREE 0x02 -#define TYPE_REF 0x03 -#define TYPE_PTR 0x04 -#define TYPE_ITER 0x05 -#define TYPE_IGNORE_LIST 0x06 - -/* Types of Generics. */ -#define GEN_LIST 0x10 -#define GEN_MAP 0x11 -#define GEN_VECTOR 0x12 -#define GEN_PARSER 0x13 - -/* Virtual machine stack size, number of pointers. - * This will be mmapped. */ -#define VM_STACK_SIZE (SIZEOF_WORD*1024ll*1024ll) - -/* Known language element ids. */ -#define LEL_ID_PTR 1 -#define LEL_ID_BOOL 2 -#define LEL_ID_INT 3 -#define LEL_ID_STR 4 -#define LEL_ID_STREAM 5 -#define LEL_ID_INPUT 6 -#define LEL_ID_IGNORE 7 - -/* - * Flags - */ - -/* A tree that has been generated by a termDup. */ -#define PF_TERM_DUP 0x0001 - -/* Has been processed by the commit function. All children have also been - * processed. */ -#define PF_COMMITTED 0x0002 - -/* Created by a token generation action, not made from the input. */ -#define PF_ARTIFICIAL 0x0004 - -/* Named node from a pattern or constructor. */ -#define PF_NAMED 0x0008 - -/* There is reverse code associated with this tree node. */ -#define PF_HAS_RCODE 0x0010 - -#define PF_RIGHT_IGNORE 0x0020 - -#define PF_LEFT_IL_ATTACHED 0x0400 -#define PF_RIGHT_IL_ATTACHED 0x0800 - -#define AF_LEFT_IGNORE 0x0100 -#define AF_RIGHT_IGNORE 0x0200 - -#define AF_SUPPRESS_LEFT 0x4000 -#define AF_SUPPRESS_RIGHT 0x8000 - -/* - * Call stack. - */ - -/* Number of spots in the frame, after the args. */ -#define FR_AA 4 - -/* Positions relative to the frame pointer. */ -#define FR_RV 3 /* return value */ -#define FR_RI 2 /* return instruction */ -#define FR_RFP 1 /* return frame pointer */ -#define FR_RFD 0 /* return frame id. */ - -/* - * Calling Convention: - * a1 - * a2 - * a3 - * ... - * return value FR_RV - * return instr FR_RI - * return frame ptr FR_RFP - * return frame id FR_RFD - */ - -/* - * User iterator call stack. - * Adds an iframe pointer, removes the return value. - */ - -/* Number of spots in the frame, after the args. */ -#define IFR_AA 3 - -/* Positions relative to the frame pointer. */ -#define IFR_RIN 2 /* return instruction */ -#define IFR_RIF 1 /* return iframe pointer */ -#define IFR_RFR 0 /* return frame pointer */ - -/* Exported to modules other than bytecode.c */ -#define vm_push(i) /*if ( sp == prg->se ) vm_grow( prg ); */ (*(--sp) = (i)) -#define vm_pop() (*sp++) -#define vm_top() (*sp) -#define vm_ptop() (sp) -#define vm_pop_ignore() (sp++) - -void vm_grow( struct ColmProgram * ); - -typedef Tree *SW; -typedef Tree **StackPtr; - - -/* Can't use sizeof() because we have used types that are bigger than the - * serial representation. */ -#define SIZEOF_CODE 1 -#define SIZEOF_HALF 2 -#define SIZEOF_WORD sizeof(Word) - -typedef struct _Execution -{ - Parser *parser; - PdaRun *pdaRun; - FsmRun *fsmRun; - InputStream *inputStream; - Tree **framePtr; - Tree **iframePtr; - long frameId; - long rcodeUnitLen; -} Execution; - -long stringLength( Head *str ); -const char *stringData( Head *str ); -Head *stringAllocFull( struct ColmProgram *prg, const char *data, long length ); -Head *stringCopy( struct ColmProgram *prg, Head *head ); -void stringFree( struct ColmProgram *prg, Head *head ); -void stringShorten( Head *tokdata, long newlen ); -Head *concatStr( Head *s1, Head *s2 ); -Word strAtoi( Head *str ); -Word strUord16( Head *head ); -Word strUord8( Head *head ); -Word cmpString( Head *s1, Head *s2 ); -Head *stringToUpper( Head *s ); -Head *stringToLower( Head *s ); -Head *stringSprintf( struct ColmProgram *prg, Str *format, Int *integer ); - -Head *makeLiteral( struct ColmProgram *prg, long litoffset ); -Head *intToStr( struct ColmProgram *prg, Word i ); - -Tree *constructString( struct ColmProgram *prg, Head *s ); - -void initExecution( Execution *exec, Parser *parser, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream, int frameId ); - -void mainExecution( struct ColmProgram *prg, Execution *exec, Code *code ); -void reductionExecution( Execution *exec, Tree **sp ); -void generationExecution( Execution *exec, Tree **sp ); -void reverseExecution( Execution *exec, Tree **sp, RtCodeVect *allRev ); - -Kid *allocAttrs( struct ColmProgram *prg, long length ); -void freeAttrs( struct ColmProgram *prg, Kid *attrs ); -void setAttr( Tree *tree, long pos, Tree *val ); -Kid *getAttrKid( Tree *tree, long pos ); - -Tree *splitTree( struct ColmProgram *prg, Tree *t ); -void rcodeDownrefAll( struct ColmProgram *prg, Tree **sp, RtCodeVect *cv ); -void commitFull( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, long commitReduce ); -Tree *getParsedRoot( PdaRun *pdaRun, int stop ); -void splitRef( struct ColmProgram *prg, Tree ***sp, Ref *fromRef ); - -void allocGlobal( struct ColmProgram *prg ); -Tree **executeCode( struct ColmProgram *prg, Execution *exec, Tree **sp, Code *instr ); -void rcodeDownref( struct ColmProgram *prg, Tree **sp, Code *instr ); -Tree **stackAlloc(); -Code *popReverseCode( RtCodeVect *allRev ); -void sendBackBuffered( FsmRun *fsmRun, InputStream *inputStream ); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/colm/closure.cc b/colm/closure.cc deleted file mode 100644 index 37b0e259..00000000 --- a/colm/closure.cc +++ /dev/null @@ -1,458 +0,0 @@ -/* - * Copyright 2005-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "global.h" -#include "parsedata.h" - -#include "vector.h" -#include <assert.h> -#include <string.h> -#include <iostream> - -using std::endl; -using std::cerr; - -void Compiler::lr0BringInItem( PdaGraph *pdaGraph, PdaState *dest, PdaState *prodState, - PdaTrans *expandFrom, Definition *prod ) -{ - /* We use dot sets for finding unique states. In the future, should merge - * dots sets with the stateSet pointer (only need one of these). */ - assert( dest != prodState ); - dest->dotSet.insert( prodState->dotSet ); - - /* Get the epsilons, context, out priorities. */ - dest->pendingCommits.insert( prodState->pendingCommits ); - //if ( prodState->pendingCommits.length() > 0 ) - // cerr << "THERE ARE PENDING COMMITS DRAWN IN" << endl; - - if ( prodState->transMap.length() > 0 ) { - assert( prodState->transMap.length() == 1 ); - PdaTrans *srcTrans = prodState->transMap[0].value; - - /* Look for the source in the destination. */ - TransMapEl *destTel = dest->transMap.find( srcTrans->lowKey ); - if ( destTel == 0 ) { - /* Make a new state and transition to it. */ - PdaState *newState = pdaGraph->addState(); - PdaTrans *newTrans = new PdaTrans(); - - /* Attach the new transition to the new state. */ - newTrans->lowKey = srcTrans->lowKey; - pdaGraph->attachTrans( dest, newState, newTrans ); - pdaGraph->addInTrans( newTrans, srcTrans ); - - /* The transitions we make during lr0 closure are all shifts. */ - assert( newTrans->isShift ); - assert( srcTrans->isShift ); - - /* The new state must have its state set setup. */ - newState->stateSet = new PdaStateSet; - newState->stateSet->insert( srcTrans->toState ); - - /* Insert the transition into the map. Be sure to set destTel, it - * is needed below. */ - dest->transMap.insert( srcTrans->lowKey, newTrans, &destTel ); - - /* If the item is a non-term, queue it for closure. */ - LangEl *langEl = langElIndex[srcTrans->lowKey]; - if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { - pdaGraph->transClosureQueue.append( newTrans ); - //cerr << "put to trans closure queue" << endl; - } - } - else { - //cerr << "merging transitions" << endl; - destTel->value->toState->stateSet->insert( srcTrans->toState ); - pdaGraph->addInTrans( destTel->value, srcTrans ); - } - - /* If this is an expansion then we may need to bring in commits. */ - if ( expandFrom != 0 && expandFrom->commits.length() > 0 ) { - //cerr << "SETTING COMMIT ON CLOSURE ROUND" << endl; - destTel->value->commits.insert( expandFrom->commits ); - - expandFrom->commits.empty(); - } - } - else { - /* ProdState does not have any transitions out. It is at the end of a - * production. */ - if ( expandFrom != 0 && expandFrom->commits.length() > 0 ) { - //cerr << "SETTING COMMIT IN PENDING LOOKAHEAD" << endl; - for ( LongSet::Iter len = expandFrom->commits; len.lte(); len++ ) - dest->pendingCommits.insert( ProdIdPair( prod->prodId, *len ) ); - - expandFrom->commits.empty(); - } - } -} - -void Compiler::lr0InvokeClosure( PdaGraph *pdaGraph, PdaState *state ) -{ - /* State should not already be closed. */ - assert( !state->inClosedMap ); - - /* This is used each time we invoke closure, it must be cleared. */ - pdaGraph->transClosureQueue.abandon(); - - /* Drag in the core items. */ - for ( PdaStateSet::Iter ssi = *state->stateSet; ssi.lte(); ssi++ ) - lr0BringInItem( pdaGraph, state, *ssi, 0, 0 ); - - /* Now bring in the derived items. */ - while ( pdaGraph->transClosureQueue.length() > 0 ) { - PdaTrans *toClose = pdaGraph->transClosureQueue.detachFirst(); - //cerr << "have a transition to derive" << endl; - - /* Get the langEl. */ - LangEl *langEl = langElIndex[toClose->lowKey]; - - /* Make graphs for all of the productions that the non - * terminal goes to that are not already in the state's dotSet. */ - for ( LelDefList::Iter prod = langEl->defList; prod.lte(); prod++ ) { - /* Bring in the start state of the production. */ - lr0BringInItem( pdaGraph, state, prod->fsm->startState, toClose, prod ); - } - } - - /* Try and insert into the closed dict. */ - DotSetMapEl *lastFound; - if ( pdaGraph->closedMap.insert( state, &lastFound ) ) { - /* Insertion into closed dict succeeded. There is no state with the - * same dot set. The state is now closed. It is guaranteed a spot in - * the closed dict and it will never go away (states never deleted - * during closure). */ - pdaGraph->stateClosedList.append( state ); - state->inClosedMap = true; - - /* Add all of the states in the out transitions to the closure queue. - * This will give us a depth first search of the graph. */ - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - /* Get the state the transEl goes to. */ - PdaState *targ = trans->value->toState; - - /* If the state on this tranisition has not already been slated - * for closure, then add it to the queue. */ - if ( !targ->onClosureQueue && !targ->inClosedMap ) { - pdaGraph->stateClosureQueue.append( targ ); - targ->onClosureQueue = true; - } - } - } - else { - /* Insertion into closed dict failed. There is an existing state - * with the same dot set. Get the existing state. */ - pdaGraph->inTransMove( lastFound, state ); - for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { - pdaGraph->stateList.detach( tel->value->toState ); - delete tel->value->toState; - delete tel->value; - } - pdaGraph->stateList.detach( state ); - delete state; - } -} - -/* Invoke cloure on the graph. We use a queue here to achieve a breadth - * first search of the tree we build. Note, there are back edges in this - * tree. They are the edges made when upon closure, a dot set exists - * already. */ -void Compiler::lr0CloseAllStates( PdaGraph *pdaGraph ) -{ - /* While there are items on the closure queue. */ - while ( pdaGraph->stateClosureQueue.length() > 0 ) { - /* Pop the first item off. */ - PdaState *state = pdaGraph->stateClosureQueue.detachFirst(); - state->onClosureQueue = false; - - /* Invoke closure upon the state. */ - lr0InvokeClosure( pdaGraph, state ); - } -} - -void Compiler::transferCommits( PdaGraph *pdaGraph, PdaTrans *trans, - PdaState *state, long prodId ) -{ - ProdIdPairSet &pendingCommits = state->pendingCommits; - for ( ProdIdPairSet::Iter pi = pendingCommits; pi.lte(); pi++ ) { - if ( pi->onReduce == prodId ) - trans->commits.insert( pi->length ); - } -} - -void Compiler::lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, FollowToAdd &followKeys ) -{ - for ( ExpandToSet::Iter ets = trans->expandTo; ets.lte(); ets++ ) { - int prodId = ets->prodId; - PdaState *expandTo = ets->state; - - for ( FollowToAdd::Iter fkey = followKeys; fkey.lte(); fkey++ ) { - TransMapEl *transEl = expandTo->transMap.find( fkey->key ); - - if ( transEl != 0 ) { - /* Set up the follow transition. */ - PdaTrans *destTrans = transEl->value; - - transferCommits( pdaGraph, destTrans, expandTo, prodId ); - - pdaGraph->addInReduction( destTrans, prodId, fkey->value ); - } - else { - /* Set up the follow transition. */ - PdaTrans *followTrans = new PdaTrans; - followTrans->lowKey = fkey->key; - followTrans->isShift = false; - followTrans->reductions.insert( prodId, fkey->value ); - - transferCommits( pdaGraph, followTrans, expandTo, prodId ); - - pdaGraph->attachTrans( expandTo, actionDestState, followTrans ); - expandTo->transMap.insert( followTrans->lowKey, followTrans ); - pdaGraph->transClosureQueue.append( followTrans ); - } - } - } -} - -long PdaTrans::maxPrior() -{ - long prior = LONG_MIN; - if ( isShift && shiftPrior > prior ) - prior = shiftPrior; - for ( ReductionMap::Iter red = reductions; red.lte(); red++ ) { - if ( red->value > prior ) - prior = red->value; - } - return prior; -} - -void Compiler::lalr1AddFollow1( PdaGraph *pdaGraph, PdaState *state ) -{ - /* Finding non-terminals into the state. */ - for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) { - long key = in->lowKey; - LangEl *langEl = langElIndex[key]; - if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { - /* Finding the following transitions. */ - FollowToAdd followKeys; - for ( TransMap::Iter fout = state->transMap; fout.lte(); fout++ ) { - int fkey = fout->key; - LangEl *flel = langElIndex[fkey]; - if ( flel == 0 || flel->type == LangEl::Term ) { - long prior = fout->value->maxPrior(); - followKeys.insert( fkey, prior ); - } - } - - if ( followKeys.length() > 0 ) - lalr1AddFollow2( pdaGraph, in, followKeys ); - } - } -} - -void Compiler::lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, - long followKey, long prior ) -{ - for ( ExpandToSet::Iter ets = trans->expandTo; ets.lte(); ets++ ) { - int prodId = ets->prodId; - PdaState *expandTo = ets->state; - - TransMapEl *transEl = expandTo->transMap.find( followKey ); - if ( transEl != 0 ) { - /* Add in the reductions, or in the shift. */ - PdaTrans *destTrans = transEl->value; - - transferCommits( pdaGraph, destTrans, expandTo, prodId ); - - pdaGraph->addInReduction( destTrans, prodId, prior ); - } - else { - /* Set up the follow transition. */ - PdaTrans *followTrans = new PdaTrans; - followTrans->lowKey = followKey; - followTrans->isShift = false; - followTrans->reductions.insert( prodId, prior ); - - transferCommits( pdaGraph, followTrans, expandTo, prodId ); - - pdaGraph->attachTrans( expandTo, actionDestState, followTrans ); - expandTo->transMap.insert( followTrans->lowKey, followTrans ); - pdaGraph->transClosureQueue.append( followTrans ); - } - } -} - -void Compiler::lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans ) -{ - PdaState *state = trans->fromState; - int fkey = trans->lowKey; - LangEl *flel = langElIndex[fkey]; - if ( flel == 0 || flel->type == LangEl::Term ) { - /* Finding non-terminals into the state. */ - for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) { - long key = in->lowKey; - LangEl *langEl = langElIndex[key]; - if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { - //cerr << "FOLLOW PRIOR TRANSFER 2: " << prior << endl; - long prior = trans->maxPrior(); - lalr1AddFollow2( pdaGraph, in, fkey, prior ); - } - } - } -} - -/* Add follow sets to an LR(0) graph to make it LALR(1). */ -void Compiler::lalr1AddFollowSets( PdaGraph *pdaGraph, LangElSet &parserEls ) -{ - /* Make the state that all reduction actions go to. Since a reduction pops - * states of the stack and sets the new target state, this state is - * actually never reached. Just here to link the trans to. */ - actionDestState = pdaGraph->addState(); - pdaGraph->setFinState( actionDestState ); - - for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) { - /* Get the entry into the graph and traverse over start. */ - PdaState *overStart = pdaGraph->followFsm( (*pe)->startState, (*pe)->rootDef->fsm ); - - /* Add _eof after the initial _start. */ - PdaTrans *eofTrans = pdaGraph->insertNewTrans( overStart, actionDestState, - (*pe)->eofLel->id, (*pe)->eofLel->id ); - eofTrans->isShift = true; - } - - /* This was used during lr0 table construction. */ - pdaGraph->transClosureQueue.abandon(); - - /* Need to pass over every state initially. */ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) - lalr1AddFollow1( pdaGraph, state ); - - /* While the closure queue has items, pop them off and add follow - * characters. */ - while ( pdaGraph->transClosureQueue.length() > 0 ) { - /* Pop the first item off and add Follow for it . */ - PdaTrans *trans = pdaGraph->transClosureQueue.detachFirst(); - lalr1AddFollow1( pdaGraph, trans ); - } -} - -void Compiler::linkExpansions( PdaGraph *pdaGraph ) -{ - pdaGraph->setStateNumbers(); - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - /* Find transitions out on non terminals. */ - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - long key = trans->key; - LangEl *langEl = langElIndex[key]; - if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { - /* For each production that the non terminal expand to ... */ - for ( LelDefList::Iter prod = langEl->defList; prod.lte(); prod++ ) { - /* Follow the production and add to the trans's expand to set. */ - PdaState *followRes = pdaGraph->followFsm( state, prod->fsm ); - - //LangEl *lel = langElIndex[key]; - //cerr << state->stateNum << ", "; - //if ( lel != 0 ) - // cerr << lel->data; - //else - // cerr << (char)key; - //cerr << " -> " << (*fto)->stateNum << " on " << - // prod->data << " (fss = " << fin.pos() << ")" << endl; - trans->value->expandTo.insert( ExpandToEl( followRes, prod->prodId ) ); - } - } - } - } -} - -/* Add terminal versions of all nonterminal transitions. */ -void Compiler::addDupTerms( PdaGraph *pdaGraph ) -{ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - PdaTransList newTranitions; - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - LangEl *lel = langElIndex[trans->value->lowKey]; - if ( lel->type == LangEl::NonTerm ) { - PdaTrans *dupTrans = new PdaTrans; - dupTrans->lowKey = lel->termDup->id; - dupTrans->isShift = true; - - /* Save the target state in to state. In the next loop when we - * attach the transition we must clear this because the - * attaching code requires the transition to be unattached. */ - dupTrans->toState = trans->value->toState; - newTranitions.append( dupTrans ); - - /* Commit code used? */ - //transferCommits( pdaGraph, followTrans, expandTo, prodId ); - } - } - - for ( PdaTrans *dup = newTranitions.head; dup != 0; ) { - PdaTrans *next = dup->next; - PdaState *toState = dup->toState; - dup->toState = 0; - pdaGraph->attachTrans( state, toState, dup ); - state->transMap.insert( dup->lowKey, dup ); - dup = next; - } - } -} - -/* Generate a LALR(1) graph. */ -void Compiler::lalr1GenerateParser( PdaGraph *pdaGraph, LangElSet &parserEls ) -{ - /* Make the intial graph. */ - pdaGraph->langElIndex = langElIndex; - - for ( Vector<LangEl*>::Iter r = parserEls; r.lte(); r++ ) { - /* Create the entry point. */ - PdaState *rs = pdaGraph->addState(); - pdaGraph->entryStateSet.insert( rs ); - - /* State set of just one state. */ - rs->stateSet = new PdaStateSet; - rs->stateSet->insert( (*r)->rootDef->fsm->startState ); - - /* Queue the start state for closure. */ - rs->onClosureQueue = true; - pdaGraph->stateClosureQueue.append( rs ); - - (*r)->startState = rs; - } - - /* Run the lr0 closure. */ - lr0CloseAllStates( pdaGraph ); - - /* Add terminal versions of all nonterminal transitions. */ - addDupTerms( pdaGraph ); - - /* Link production expansions to the place they expand to. */ - linkExpansions( pdaGraph ); - - /* Walk the graph adding follow sets to the LR(0) graph. */ - lalr1AddFollowSets( pdaGraph, parserEls ); - -// /* Set the commit on the final eof shift. */ -// PdaTrans *overStart = pdaGraph->startState->findTrans( rootEl->id ); -// PdaTrans *eofTrans = overStart->toState->findTrans( eofLangEl->id ); -// eofTrans->afterShiftCommits.insert( 2 ); -} diff --git a/colm/codegen.cc b/colm/codegen.cc deleted file mode 100644 index 4403cf8f..00000000 --- a/colm/codegen.cc +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "parsedata.h" -#include "fsmcodegen.h" -#include "redfsm.h" -#include "bstmap.h" -#include "fsmrun.h" -#include "debug.h" -#include <sstream> -#include <string> - - -void FsmCodeGen::writeMain() -{ - out << - "int main( int argc, const char **argv )\n" - "{\n" - " struct ColmProgram *prg;\n" - " int exitStatus;\n" - " colmInit( " << colmActiveRealm << " );\n" - " prg = colmNewProgram( &main_runtimeData, argc, argv );\n" - " colmRunProgram( prg );\n" - " exitStatus = colmDeleteProgram( prg );\n" - " return exitStatus;\n" - "}\n" - "\n"; - - out.flush(); -} - - diff --git a/colm/codevect.c b/colm/codevect.c deleted file mode 100644 index a05c7ea4..00000000 --- a/colm/codevect.c +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Copyright 2010-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Aapl. - * - * Aapl is free software; you can redistribute it and/or modify it under the - * terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) - * any later version. - * - * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for - * more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 - * Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <colm/rtvector.h> -#include <colm/pdarun.h> - -#include <string.h> -#include <stdlib.h> -#include <assert.h> - - -void initRtCodeVect( RtCodeVect *vect ) -{ - vect->data = 0; - vect->tabLen = 0; - vect->allocLen = 0; -} - -static long newSizeUp( long existing, long needed ) -{ - return needed > existing ? (needed<<1) : existing; -} - -static long newSizeDown( long existing, long needed ) -{ - return needed < (existing>>2) ? (needed<<1) : existing; -} - -/* Up resize the data for len elements using Resize::upResize to tell us the - * new tabLen. Reads and writes allocLen. Does not read or write tabLen. */ -static void upResize( RtCodeVect *vect, long len ) -{ - /* Ask the resizer what the new tabLen will be. */ - long newLen = newSizeUp(vect->allocLen, len); - - /* Did the data grow? */ - if ( newLen > vect->allocLen ) { - vect->allocLen = newLen; - if ( vect->data != 0 ) { - /* Table exists already, resize it up. */ - vect->data = (Code*) realloc( vect->data, sizeof(Code) * newLen ); - //if ( vect->data == 0 ) - // throw std::bad_alloc(); - } - else { - /* Create the data. */ - vect->data = (Code*) malloc( sizeof(Code) * newLen ); - //if ( vect->data == 0 ) - // throw std::bad_alloc(); - } - } -} - -/* Down resize the data for len elements using Resize::downResize to determine - * the new tabLen. Reads and writes allocLen. Does not read or write tabLen. */ -static void downResize( RtCodeVect *vect, long len) -{ - /* Ask the resizer what the new tabLen will be. */ - long newLen = newSizeDown( vect->allocLen, len ); - - /* Did the data shrink? */ - if ( newLen < vect->allocLen ) { - vect->allocLen = newLen; - if ( newLen == 0 ) { - /* Simply free the data. */ - free( vect->data ); - vect->data = 0; - } - else { - /* Not shrinking to size zero, realloc it to the smaller size. */ - vect->data = (Code*) realloc( vect->data, sizeof(Code) * newLen ); - //if ( vect->data == 0 ) - // throw std::bad_alloc(); - } - } -} - - -void rtCodeVectEmpty( RtCodeVect *vect ) -{ - if ( vect->data != 0 ) { - /* Free the data space. */ - free( vect->data ); - vect->data = 0; - vect->tabLen = vect->allocLen = 0; - } -} - -void rtCodeVectReplace( RtCodeVect *vect, long pos, const Code *val, long len ) -{ - long endPos, i; - //Code *item; - - /* If we are given a negative position to replace at then - * treat it as a position relative to the length. */ - if ( pos < 0 ) - pos = vect->tabLen + pos; - - /* The end is the one past the last item that we want - * to write to. */ - endPos = pos + len; - - /* Make sure we have enough space. */ - if ( endPos > vect->tabLen ) { - upResize( vect, endPos ); - - /* Delete any objects we need to delete. */ - //item = vect->data + pos; - //for ( i = pos; i < vect->tabLen; i++, item++ ) - // item->~Code(); - - /* We are extending the vector, set the new data length. */ - vect->tabLen = endPos; - } - else { - /* Delete any objects we need to delete. */ - //item = vect->data + pos; - //for ( i = pos; i < endPos; i++, item++ ) - // item->~Code(); - } - - /* Copy data in using copy constructor. */ - Code *dst = vect->data + pos; - const Code *src = val; - for ( i = 0; i < len; i++, dst++, src++ ) - *dst = *src; -} - -void rtCodeVectRemove( RtCodeVect *vect, long pos, long len ) -{ - long newLen, lenToSlideOver, endPos; - Code *dst;//, *item; - - /* If we are given a negative position to remove at then - * treat it as a position relative to the length. */ - if ( pos < 0 ) - pos = vect->tabLen + pos; - - /* The first position after the last item deleted. */ - endPos = pos + len; - - /* The new data length. */ - newLen = vect->tabLen - len; - - /* The place in the data we are deleting at. */ - dst = vect->data + pos; - - /* Call Destructors. */ - //item = dst; - //for ( long i = 0; i < len; i += 1, item += 1 ) - // item->~Code(); - - /* Shift data over if necessary. */ - lenToSlideOver = vect->tabLen - endPos; - if ( len > 0 && lenToSlideOver > 0 ) - memmove(dst, dst + len, sizeof(Code)*lenToSlideOver); - - /* Shrink the data if necessary. */ - downResize( vect, newLen ); - - /* Set the new data length. */ - vect->tabLen = newLen; -} - - diff --git a/colm/colm.h b/colm/colm.h deleted file mode 100644 index 4f169254..00000000 --- a/colm/colm.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef __COLM_COLM_H -#define __COLM_COLM_H - -#ifdef __cplusplus -extern "C" { -#endif - -struct ColmTree; -struct ColmKid; -struct ColmProgram; -struct ColmRuntimeData; - -void colmInit( long debugRealm ); -struct ColmProgram *colmNewProgram( struct ColmRuntimeData *rtd, int argc, const char **argv ); -void colmRunProgram( struct ColmProgram *prg ); -int colmDeleteProgram( struct ColmProgram *prg ); - -struct ColmPrintArgs -{ - void *arg; - int comm; - int attr; - int trim; - void (*out)( struct ColmPrintArgs *args, const char *data, int length ); - void (*openTree)( struct ColmProgram *prg, struct ColmTree **sp, - struct ColmPrintArgs *args, struct ColmKid *parent, struct ColmKid *kid ); - void (*printTerm)( struct ColmProgram *prg, struct ColmTree **sp, - struct ColmPrintArgs *args, struct ColmKid *kid ); - void (*closeTree)( struct ColmProgram *prg, struct ColmTree **sp, - struct ColmPrintArgs *args, struct ColmKid *parent, struct ColmKid *kid ); -}; - -void printNull( struct ColmProgram *prg, struct ColmTree **sp, - struct ColmPrintArgs *args, struct ColmKid *parent, struct ColmKid *kid ); -void printTermTree( struct ColmProgram *prg, struct ColmTree **sp, - struct ColmPrintArgs *printArgs, struct ColmKid *kid ); - -struct ColmTree **vm_root( struct ColmProgram *prg ); -struct ColmTree *returnVal( struct ColmProgram *prg ); -void printTreeArgs( struct ColmProgram *prg, struct ColmTree **sp, - struct ColmPrintArgs *printArgs, struct ColmTree *tree ); - -int repeatEnd( struct ColmTree *tree ); -int listLast( struct ColmTree *tree ); -struct ColmTree *getRhsVal( struct ColmProgram *prg, struct ColmTree *tree, int *a ); -struct ColmTree *getAttr( struct ColmTree *tree, long pos ); -struct ColmTree *getGlobal( struct ColmProgram *prg, long pos ); -struct ColmTree *getRepeatNext( struct ColmTree *tree ); -struct ColmTree *getRepeatVal( struct ColmTree *tree ); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/colm/compiler.cc b/colm/compiler.cc deleted file mode 100644 index c1e775f2..00000000 --- a/colm/compiler.cc +++ /dev/null @@ -1,1496 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <iostream> -#include <iomanip> -#include <errno.h> -#include <stdlib.h> -#include <limits.h> -#include <sstream> - -#include "global.h" -#include "lmparse.h" -#include "parsedata.h" -#include "parsetree.h" -#include "mergesort.h" -#include "redbuild.h" -#include "pdacodegen.h" -#include "fsmcodegen.h" -#include "fsmrun.h" -#include "pdarun.h" -#include "colm.h" -#include "pool.h" - -using namespace std; -using std::ostringstream; - -char machineMain[] = "main"; -exit_object endp; -void operator<<( ostream &out, exit_object & ) -{ - out << endl; - exit(1); -} - -/* Perform minimization after an operation according - * to the command line args. */ -void afterOpMinimize( FsmGraph *fsm, bool lastInSeq ) -{ - /* Switch on the prefered minimization algorithm. */ - if ( lastInSeq ) { - /* First clean up the graph. FsmGraph operations may leave these - * lying around. There should be no dead end states. The subtract - * intersection operators are the only places where they may be - * created and those operators clean them up. */ - fsm->removeUnreachableStates(); - fsm->minimizePartition2(); - } -} - -/* Count the transitions in the fsm by walking the state list. */ -int countTransitions( FsmGraph *fsm ) -{ - int numTrans = 0; - FsmState *state = fsm->stateList.head; - while ( state != 0 ) { - numTrans += state->outList.length(); - state = state->next; - } - return numTrans; -} - -Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd ) -{ - /* Reset errno so we can check for overflow or underflow. In the event of - * an error, sets the return val to the upper or lower bound being tested - * against. */ - errno = 0; - unsigned int size = keyOps->alphType->size; - bool unusedBits = size < sizeof(unsigned long); - - unsigned long ul = strtoul( str, 0, 16 ); - - if ( errno == ERANGE || (unusedBits && ul >> (size * 8)) ) { - error(loc) << "literal " << str << " overflows the alphabet type" << endl; - ul = 1 << (size * 8); - } - - if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) ) - ul |= (0xffffffff >> (size*8 ) ) << (size*8); - - return Key( (long)ul ); -} - -Key makeFsmKeyDec( char *str, const InputLoc &loc, Compiler *pd ) -{ - /* Convert the number to a decimal. First reset errno so we can check - * for overflow or underflow. */ - errno = 0; - long long minVal = keyOps->alphType->minVal; - long long maxVal = keyOps->alphType->maxVal; - - long long ll = strtoll( str, 0, 10 ); - - /* Check for underflow. */ - if ( (errno == ERANGE && ll < 0) || ll < minVal) { - error(loc) << "literal " << str << " underflows the alphabet type" << endl; - ll = minVal; - } - /* Check for overflow. */ - else if ( (errno == ERANGE && ll > 0) || ll > maxVal ) { - error(loc) << "literal " << str << " overflows the alphabet type" << endl; - ll = maxVal; - } - - if ( keyOps->alphType->isSigned ) - return Key( (long)ll ); - else - return Key( (unsigned long)ll ); -} - -/* Make an fsm key in int format (what the fsm graph uses) from an alphabet - * number returned by the parser. Validates that the number doesn't overflow - * the alphabet type. */ -Key makeFsmKeyNum( char *str, const InputLoc &loc, Compiler *pd ) -{ - /* Switch on hex/decimal format. */ - if ( str[0] == '0' && str[1] == 'x' ) - return makeFsmKeyHex( str, loc, pd ); - else - return makeFsmKeyDec( str, loc, pd ); -} - -/* Make an fsm int format (what the fsm graph uses) from a single character. - * Performs proper conversion depending on signed/unsigned property of the - * alphabet. */ -Key makeFsmKeyChar( char c, Compiler *pd ) -{ - if ( keyOps->isSigned ) { - /* Copy from a char type. */ - return Key( c ); - } - else { - /* Copy from an unsigned byte type. */ - return Key( (unsigned char)c ); - } -} - -/* Make an fsm key array in int format (what the fsm graph uses) from a string - * of characters. Performs proper conversion depending on signed/unsigned - * property of the alphabet. */ -void makeFsmKeyArray( Key *result, char *data, int len, Compiler *pd ) -{ - if ( keyOps->isSigned ) { - /* Copy from a char star type. */ - char *src = data; - for ( int i = 0; i < len; i++ ) - result[i] = Key(src[i]); - } - else { - /* Copy from an unsigned byte ptr type. */ - unsigned char *src = (unsigned char*) data; - for ( int i = 0; i < len; i++ ) - result[i] = Key(src[i]); - } -} - -/* Like makeFsmKeyArray except the result has only unique keys. They ordering - * will be changed. */ -void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, - bool caseInsensitive, Compiler *pd ) -{ - /* Use a transitions list for getting unique keys. */ - if ( keyOps->isSigned ) { - /* Copy from a char star type. */ - char *src = data; - for ( int si = 0; si < len; si++ ) { - Key key( src[si] ); - result.insert( key ); - if ( caseInsensitive ) { - if ( key.isLower() ) - result.insert( key.toUpper() ); - else if ( key.isUpper() ) - result.insert( key.toLower() ); - } - } - } - else { - /* Copy from an unsigned byte ptr type. */ - unsigned char *src = (unsigned char*) data; - for ( int si = 0; si < len; si++ ) { - Key key( src[si] ); - result.insert( key ); - if ( caseInsensitive ) { - if ( key.isLower() ) - result.insert( key.toUpper() ); - else if ( key.isUpper() ) - result.insert( key.toLower() ); - } - } - } -} - -FsmGraph *dotFsm( Compiler *pd ) -{ - FsmGraph *retFsm = new FsmGraph(); - retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey ); - return retFsm; -} - -FsmGraph *dotStarFsm( Compiler *pd ) -{ - FsmGraph *retFsm = new FsmGraph(); - retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey ); - return retFsm; -} - -/* Make a builtin type. Depends on the signed nature of the alphabet type. */ -FsmGraph *makeBuiltin( BuiltinMachine builtin, Compiler *pd ) -{ - /* FsmGraph created to return. */ - FsmGraph *retFsm = 0; - bool isSigned = keyOps->isSigned; - - switch ( builtin ) { - case BT_Any: { - /* All characters. */ - retFsm = dotFsm( pd ); - break; - } - case BT_Ascii: { - /* Ascii characters 0 to 127. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( 0, 127 ); - break; - } - case BT_Extend: { - /* Ascii extended characters. This is the full byte range. Dependent - * on signed, vs no signed. If the alphabet is one byte then just use - * dot fsm. */ - if ( isSigned ) { - retFsm = new FsmGraph(); - retFsm->rangeFsm( -128, 127 ); - } - else { - retFsm = new FsmGraph(); - retFsm->rangeFsm( 0, 255 ); - } - break; - } - case BT_Alpha: { - /* Alpha [A-Za-z]. */ - FsmGraph *upper = new FsmGraph(), *lower = new FsmGraph(); - upper->rangeFsm( 'A', 'Z' ); - lower->rangeFsm( 'a', 'z' ); - upper->unionOp( lower ); - upper->minimizePartition2(); - retFsm = upper; - break; - } - case BT_Digit: { - /* Digits [0-9]. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( '0', '9' ); - break; - } - case BT_Alnum: { - /* Alpha numerics [0-9A-Za-z]. */ - FsmGraph *digit = new FsmGraph(), *lower = new FsmGraph(); - FsmGraph *upper = new FsmGraph(); - digit->rangeFsm( '0', '9' ); - upper->rangeFsm( 'A', 'Z' ); - lower->rangeFsm( 'a', 'z' ); - digit->unionOp( upper ); - digit->unionOp( lower ); - digit->minimizePartition2(); - retFsm = digit; - break; - } - case BT_Lower: { - /* Lower case characters. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( 'a', 'z' ); - break; - } - case BT_Upper: { - /* Upper case characters. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( 'A', 'Z' ); - break; - } - case BT_Cntrl: { - /* Control characters. */ - FsmGraph *cntrl = new FsmGraph(); - FsmGraph *highChar = new FsmGraph(); - cntrl->rangeFsm( 0, 31 ); - highChar->concatFsm( 127 ); - cntrl->unionOp( highChar ); - cntrl->minimizePartition2(); - retFsm = cntrl; - break; - } - case BT_Graph: { - /* Graphical ascii characters [!-~]. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( '!', '~' ); - break; - } - case BT_Print: { - /* Printable characters. Same as graph except includes space. */ - retFsm = new FsmGraph(); - retFsm->rangeFsm( ' ', '~' ); - break; - } - case BT_Punct: { - /* Punctuation. */ - FsmGraph *range1 = new FsmGraph(); - FsmGraph *range2 = new FsmGraph(); - FsmGraph *range3 = new FsmGraph(); - FsmGraph *range4 = new FsmGraph(); - range1->rangeFsm( '!', '/' ); - range2->rangeFsm( ':', '@' ); - range3->rangeFsm( '[', '`' ); - range4->rangeFsm( '{', '~' ); - range1->unionOp( range2 ); - range1->unionOp( range3 ); - range1->unionOp( range4 ); - range1->minimizePartition2(); - retFsm = range1; - break; - } - case BT_Space: { - /* Whitespace: [\t\v\f\n\r ]. */ - FsmGraph *cntrl = new FsmGraph(); - FsmGraph *space = new FsmGraph(); - cntrl->rangeFsm( '\t', '\r' ); - space->concatFsm( ' ' ); - cntrl->unionOp( space ); - cntrl->minimizePartition2(); - retFsm = cntrl; - break; - } - case BT_Xdigit: { - /* Hex digits [0-9A-Fa-f]. */ - FsmGraph *digit = new FsmGraph(); - FsmGraph *upper = new FsmGraph(); - FsmGraph *lower = new FsmGraph(); - digit->rangeFsm( '0', '9' ); - upper->rangeFsm( 'A', 'F' ); - lower->rangeFsm( 'a', 'f' ); - digit->unionOp( upper ); - digit->unionOp( lower ); - digit->minimizePartition2(); - retFsm = digit; - break; - } - case BT_Lambda: { - retFsm = new FsmGraph(); - retFsm->lambdaFsm(); - break; - } - case BT_Empty: { - retFsm = new FsmGraph(); - retFsm->emptyFsm(); - break; - }} - - return retFsm; -} - -/* Check if this name inst or any name inst below is referenced. */ -bool NameInst::anyRefsRec() -{ - if ( numRefs > 0 ) - return true; - - /* Recurse on children until true. */ - for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) { - if ( (*ch)->anyRefsRec() ) - return true; - } - - return false; -} - -/* - * Compiler - */ - -/* Initialize the structure that will collect info during the parse of a - * machine. */ -Compiler::Compiler( const String &fileName, const String §ionName, - const InputLoc §ionLoc, ostream &out ) -: - nextPriorKey(0), - nextLocalErrKey(1), /* 0 is reserved for global error actions. */ - nextNameId(0), - alphTypeSet(false), - getKeyExpr(0), - accessExpr(0), - curStateExpr(0), - lowerNum(0), - upperNum(0), - fileName(fileName), - sectionName(sectionName), - sectionLoc(sectionLoc), - errorCount(0), - curActionOrd(0), - curPriorOrd(0), - nextEpsilonResolvedLink(0), - nextTokenId(1), - rootCodeBlock(0), - mainReturnUT(0), - parserName(sectionName), - out(out), - access(0), - tokenStruct(0), - rootLangEl(0), - eofLangEl(0), - errorLangEl(0), - defaultCharLangEl(0), - rootRegion(0), - defaultRegion(0), - firstNonTermId(0), - prodIdIndex(0), - nextPatReplId(0), - nextGenericId(1), - nextFuncId(0), - loopCleanup(0), - nextObjectId(1), /* 0 is reserved for no object. */ - nextFrameId(0), - nextParserId(0), - nextLabelId(0), - revertOn(true), - predValue(0), - nextMatchEndNum(0), - argvTypeRef(0), - context(0) -{ -} - -/* Clean up the data collected during a parse. */ -Compiler::~Compiler() -{ - /* Delete all the nodes in the action list. Will cause all the - * string data that represents the actions to be deallocated. */ - actionList.empty(); -} - -/* Make a name id in the current name instantiation scope if it is not - * already there. */ -NameInst *Compiler::addNameInst( const InputLoc &loc, char *data, bool isLabel ) -{ - /* Create the name instantitaion object and insert it. */ - NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel ); - curNameInst->childVect.append( newNameInst ); - if ( data != 0 ) - curNameInst->children.insertMulti( data, newNameInst ); - return newNameInst; -} - -void Compiler::initNameWalk( NameInst *rootName ) -{ - curNameInst = rootName; - curNameChild = 0; -} - -/* Goes into the next child scope. The number of the child is already set up. - * We need this for the syncronous name tree and parse tree walk to work - * properly. It is reset on entry into a scope and advanced on poping of a - * scope. A call to enterNameScope should be accompanied by a corresponding - * popNameScope. */ -NameFrame Compiler::enterNameScope( bool isLocal, int numScopes ) -{ - /* Save off the current data. */ - NameFrame retFrame; - retFrame.prevNameInst = curNameInst; - retFrame.prevNameChild = curNameChild; - retFrame.prevLocalScope = localNameScope; - - /* Enter into the new name scope. */ - for ( int i = 0; i < numScopes; i++ ) { - curNameInst = curNameInst->childVect[curNameChild]; - curNameChild = 0; - } - - if ( isLocal ) - localNameScope = curNameInst; - - return retFrame; -} - -/* Return from a child scope to a parent. The parent info must be specified as - * an argument and is obtained from the corresponding call to enterNameScope. - * */ -void Compiler::popNameScope( const NameFrame &frame ) -{ - /* Pop the name scope. */ - curNameInst = frame.prevNameInst; - curNameChild = frame.prevNameChild+1; - localNameScope = frame.prevLocalScope; -} - -void Compiler::resetNameScope( const NameFrame &frame ) -{ - /* Pop the name scope. */ - curNameInst = frame.prevNameInst; - curNameChild = frame.prevNameChild; - localNameScope = frame.prevLocalScope; -} - - -void Compiler::unsetObsoleteEntries( FsmGraph *graph ) -{ - /* Loop the reference names and increment the usage. Names that are no - * longer needed will be unset in graph. */ - for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) { - /* Get the name. */ - NameInst *name = *ref; - name->numUses += 1; - - /* If the name is no longer needed unset its corresponding entry. */ - if ( name->numUses == name->numRefs ) { - assert( graph->entryPoints.find( name->id ) != 0 ); - graph->unsetEntry( name->id ); - } - } -} - -NameSet Compiler::resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly ) -{ - /* Queue needed for breadth-first search, load it with the start node. */ - NameInstList nameQueue; - nameQueue.append( refFrom ); - - NameSet result; - while ( nameQueue.length() > 0 ) { - /* Pull the next from location off the queue. */ - NameInst *from = nameQueue.detachFirst(); - - /* Look for the name. */ - NameMapEl *low, *high; - if ( from->children.findMulti( data, low, high ) ) { - /* Record all instances of the name. */ - for ( ; low <= high; low++ ) - result.insert( low->value ); - } - - /* Name not there, do breadth-first operation of appending all - * childrent to the processing queue. */ - for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) { - if ( !recLabelsOnly || (*name)->isLabel ) - nameQueue.append( *name ); - } - } - - /* Queue exhausted and name never found. */ - return result; -} - -void Compiler::resolveFrom( NameSet &result, NameInst *refFrom, - const NameRef &nameRef, int namePos ) -{ - /* Look for the name in the owning scope of the factor with aug. */ - NameSet partResult = resolvePart( refFrom, nameRef[namePos], false ); - - /* If there are more parts to the name then continue on. */ - if ( ++namePos < nameRef.length() ) { - /* There are more components to the name, search using all the part - * results as the base. */ - for ( NameSet::Iter name = partResult; name.lte(); name++ ) - resolveFrom( result, *name, nameRef, namePos ); - } - else { - /* This is the last component, append the part results to the final - * results. */ - result.insert( partResult ); - } -} - -ostream &operator<<( ostream &out, const Token &token ) -{ - out << token.data; - return out; -} - -/* Write out a name reference. */ -ostream &operator<<( ostream &out, const NameRef &nameRef ) -{ - int pos = 0; - if ( nameRef[pos] == 0 ) { - out << "::"; - pos += 1; - } - out << nameRef[pos++]; - for ( ; pos < nameRef.length(); pos++ ) - out << "::" << nameRef[pos]; - return out; -} - -ostream &operator<<( ostream &out, const NameInst &nameInst ) -{ - /* Count the number fully qualified name parts. */ - int numParents = 0; - NameInst *curParent = nameInst.parent; - while ( curParent != 0 ) { - numParents += 1; - curParent = curParent->parent; - } - - /* Make an array and fill it in. */ - curParent = nameInst.parent; - NameInst **parents = new NameInst*[numParents]; - for ( int p = numParents-1; p >= 0; p-- ) { - parents[p] = curParent; - curParent = curParent->parent; - } - - /* Write the parents out, skip the root. */ - for ( int p = 1; p < numParents; p++ ) - out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" ); - - /* Write the name and cleanup. */ - out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" ); - delete[] parents; - return out; -} - -struct CmpNameInstLoc -{ - static int compare( const NameInst *ni1, const NameInst *ni2 ) - { - if ( ni1->loc.line < ni2->loc.line ) - return -1; - else if ( ni1->loc.line > ni2->loc.line ) - return 1; - else if ( ni1->loc.col < ni2->loc.col ) - return -1; - else if ( ni1->loc.col > ni2->loc.col ) - return 1; - return 0; - } -}; - -void errorStateLabels( const NameSet &resolved ) -{ - MergeSort<NameInst*, CmpNameInstLoc> mergeSort; - mergeSort.sort( resolved.data, resolved.length() ); - for ( NameSet::Iter res = resolved; res.lte(); res++ ) - error((*res)->loc) << " -> " << **res << endl; -} - - -void Compiler::referenceRegions( NameInst *rootName ) -{ - for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ ) { - /* Inc the reference in the name. This will cause the entry point to - * survive to the end of the graph generating walk. */ - (*inst)->numRefs += 1; - } -} - -/* Walk a name tree starting at from and fill the name index. */ -void Compiler::fillNameIndex( NameInst **nameIndex, NameInst *from ) -{ - /* Fill the value for from in the name index. */ - nameIndex[from->id] = from; - - /* Recurse on the implicit final state and then all children. */ - if ( from->final != 0 ) - fillNameIndex( nameIndex, from->final ); - for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) - fillNameIndex( nameIndex, *name ); -} - -NameInst **Compiler::makeNameIndex( NameInst *rootName ) -{ - /* The number of nodes in the tree can now be given by nextNameId. Put a - * null pointer on the end of the list to terminate it. */ - NameInst **nameIndex = new NameInst*[nextNameId+1]; - memset( nameIndex, 0, sizeof(NameInst*)*(nextNameId+1) ); - fillNameIndex( nameIndex, rootName ); - return nameIndex; -} - -void Compiler::createBuiltin( const char *name, BuiltinMachine builtin ) -{ - Expression *expression = new Expression( builtin ); - Join *join = new Join( expression ); - VarDef *varDef = new VarDef( name, join ); - GraphDictEl *graphDictEl = new GraphDictEl( name, varDef ); - rootNamespace->rlMap.insert( graphDictEl ); -} - -/* Initialize the graph dict with builtin types. */ -void Compiler::initGraphDict( ) -{ - createBuiltin( "any", BT_Any ); - createBuiltin( "ascii", BT_Ascii ); - createBuiltin( "extend", BT_Extend ); - createBuiltin( "alpha", BT_Alpha ); - createBuiltin( "digit", BT_Digit ); - createBuiltin( "alnum", BT_Alnum ); - createBuiltin( "lower", BT_Lower ); - createBuiltin( "upper", BT_Upper ); - createBuiltin( "cntrl", BT_Cntrl ); - createBuiltin( "graph", BT_Graph ); - createBuiltin( "print", BT_Print ); - createBuiltin( "punct", BT_Punct ); - createBuiltin( "space", BT_Space ); - createBuiltin( "xdigit", BT_Xdigit ); - createBuiltin( "null", BT_Lambda ); - createBuiltin( "zlen", BT_Lambda ); - createBuiltin( "empty", BT_Empty ); -} - -/* Initialize the key operators object that will be referenced by all fsms - * created. */ -void Compiler::initKeyOps( ) -{ - /* Signedness and bounds. */ - HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType; - thisKeyOps.setAlphType( alphType ); - - if ( lowerNum != 0 ) { - /* If ranges are given then interpret the alphabet type. */ - thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this ); - thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this ); - } - - thisCondData.nextCondKey = thisKeyOps.maxKey; - thisCondData.nextCondKey.increment(); -} - -void Compiler::printNameInst( NameInst *nameInst, int level ) -{ - for ( int i = 0; i < level; i++ ) - cerr << " "; - cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") << - " id: " << nameInst->id << - " refs: " << nameInst->numRefs << endl; - for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ ) - printNameInst( *name, level+1 ); -} - -/* Remove duplicates of unique actions from an action table. */ -void Compiler::removeDups( ActionTable &table ) -{ - /* Scan through the table looking for unique actions to - * remove duplicates of. */ - for ( int i = 0; i < table.length(); i++ ) { - /* Remove any duplicates ahead of i. */ - for ( int r = i+1; r < table.length(); ) { - if ( table[r].value == table[i].value ) - table.vremove(r); - else - r += 1; - } - } -} - -/* Remove duplicates from action lists. This operates only on transition and - * eof action lists and so should be called once all actions have been - * transfered to their final resting place. */ -void Compiler::removeActionDups( FsmGraph *graph ) -{ - /* Loop all states. */ - for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) { - /* Loop all transitions. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) - removeDups( trans->actionTable ); - removeDups( state->toStateActionTable ); - removeDups( state->fromStateActionTable ); - removeDups( state->eofActionTable ); - } -} - -Action *Compiler::newAction( const String &name, InlineList *inlineList ) -{ - InputLoc loc; - loc.line = 1; - loc.col = 1; - loc.fileName = 0; - - Action *action = new Action( loc, name, inlineList ); - actionList.append( action ); - return action; -} - -void Compiler::initLongestMatchData() -{ - if ( regionList.length() > 0 ) { - /* The initActId action gives act a default value. */ - InlineList *il4 = new InlineList; - il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) ); - initActId = newAction( "initact", il4 ); - initActId->isLmAction = true; - - /* The setTokStart action sets tokstart. */ - InlineList *il5 = new InlineList; - il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) ); - setTokStart = newAction( "tokstart", il5 ); - setTokStart->isLmAction = true; - - /* The setTokEnd action sets tokend. */ - InlineList *il3 = new InlineList; - il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) ); - setTokEnd = newAction( "tokend", il3 ); - setTokEnd->isLmAction = true; - - /* The action will also need an ordering: ahead of all user action - * embeddings. */ - initActIdOrd = curActionOrd++; - setTokStartOrd = curActionOrd++; - setTokEndOrd = curActionOrd++; - } -} - -void Compiler::finishGraphBuild( FsmGraph *graph ) -{ - /* Resolve any labels that point to multiple states. Any labels that are - * still around are referenced only by gotos and calls and they need to be - * made into deterministic entry points. */ - graph->deterministicEntry(); - - /* - * All state construction is now complete. - */ - - /* Transfer global error actions. */ - for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) - graph->transferErrorActions( state, 0 ); - - removeActionDups( graph ); - - /* Remove unreachable states. There should be no dead end states. The - * subtract and intersection operators are the only places where they may - * be created and those operators clean them up. */ - graph->removeUnreachableStates(); - - /* No more fsm operations are to be done. Action ordering numbers are - * no longer of use and will just hinder minimization. Clear them. */ - graph->nullActionKeys(); - - /* Transition priorities are no longer of use. We can clear them - * because they will just hinder minimization as well. Clear them. */ - graph->clearAllPriorities(); - - /* Minimize here even if we minimized at every op. Now that function - * keys have been cleared we may get a more minimal fsm. */ - graph->minimizePartition2(); - graph->compressTransitions(); -} - -void Compiler::printNameTree( NameInst *rootName ) -{ - /* Print the name instance map. */ - cerr << "name tree:" << endl; - for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ ) - printNameInst( *name, 0 ); -} - -void Compiler::printNameIndex( NameInst **nameIndex ) -{ - /* The name index is terminated with a null pointer. */ - cerr << "name index:" << endl; - for ( int ni = 0; nameIndex[ni]; ni++ ) { - cerr << ni << ": "; - char *name = nameIndex[ni]->name; - cerr << ( name != 0 ? name : "<ANON>" ) << endl; - } -} - - -/* Build the name tree and supporting data structures. */ -NameInst *Compiler::makeNameTree() -{ - /* Create the root name. */ - nextNameId = 0; - NameInst *rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false ); - - /* First make the name tree. */ - initNameWalk( rootName ); - for ( RegionGraphList::Iter glel = instanceList; glel.lte(); glel++ ) { - /* Recurse on the instance. */ - glel->value->makeNameTree( glel->loc, this ); - } - - return rootName; -} - -FsmGraph *Compiler::makeAllRegions() -{ - /* Build the name tree and supporting data structures. */ - NameInst *rootName = makeNameTree( ); - NameInst **nameIndex = makeNameIndex( rootName ); - - /* Resovle the implicit name references to the nfa instantiations. */ - referenceRegions( rootName ); - - int numGraphs = 0; - FsmGraph **graphs = new FsmGraph*[instanceList.length()]; - - /* Make all the instantiations, we know that main exists in this list. */ - initNameWalk( rootName ); - for ( RegionGraphList::Iter glel = instanceList; glel.lte(); glel++ ) { - /* Build the graph from a walk of the parse tree. */ - FsmGraph *newGraph = glel->value->walk( this ); - - /* Wrap up the construction. */ - finishGraphBuild( newGraph ); - - /* Save off the new graph. */ - graphs[numGraphs++] = newGraph; - } - - /* NOTE: If putting in minimization here we need to include eofTarget - * into the minimization algorithm. It is currently set by the longest - * match operator and not considered anywhere else. */ - - /* Add all the other graphs into the first. */ - FsmGraph *all = graphs[0]; - all->globOp( graphs+1, numGraphs-1 ); - delete[] graphs; - - /* Go through all the token regions and check for lmRequiresErrorState. */ - for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) { - if ( reg->lmSwitchHandlesError ) - all->lmRequiresErrorState = true; - } - - all->rootName = rootName; - all->nameIndex = nameIndex; - - return all; -} - -void Compiler::analyzeAction( Action *action, InlineList *inlineList ) -{ - /* FIXME: Actions used as conditions should be very constrained. */ - for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { - //if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr ) - // action->anyCall = true; - - /* Need to recurse into longest match items. */ - if ( item->type == InlineItem::LmSwitch ) { - TokenRegion *lm = item->tokenRegion; - for ( TokenDefListReg::Iter lmi = lm->tokenDefList; lmi.lte(); lmi++ ) { - if ( lmi->action != 0 ) - analyzeAction( action, lmi->action->inlineList ); - } - } - - if ( item->type == InlineItem::LmOnLast || - item->type == InlineItem::LmOnNext || - item->type == InlineItem::LmOnLagBehind ) - { - TokenDef *lmi = item->longestMatchPart; - if ( lmi->action != 0 ) - analyzeAction( action, lmi->action->inlineList ); - } - - if ( item->children != 0 ) - analyzeAction( action, item->children ); - } -} - -void Compiler::analyzeGraph( FsmGraph *graph ) -{ - for ( ActionList::Iter act = actionList; act.lte(); act++ ) - analyzeAction( act, act->inlineList ); - - for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { - /* The transition list. */ - for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { - for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ ) - at->value->numTransRefs += 1; - } - - for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ ) - at->value->numToStateRefs += 1; - - for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ ) - at->value->numFromStateRefs += 1; - - for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ ) - at->value->numEofRefs += 1; - - for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) { - for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ ) - (*sci)->numCondRefs += 1; - } - } -} - -FsmGraph *Compiler::makeScanner() -{ - /* Make the graph, do minimization. */ - FsmGraph *fsmGraph = makeAllRegions(); - - /* If any errors have occured in the input file then don't write anything. */ - if ( gblErrorCount > 0 ) - return 0; - - analyzeGraph( fsmGraph ); - - /* Decide if an error state is necessary. - * 1. There is an error transition - * 2. There is a gap in the transitions - * 3. The longest match operator requires it. */ - if ( fsmGraph->lmRequiresErrorState || fsmGraph->hasErrorTrans() ) - fsmGraph->errState = fsmGraph->addState(); - - /* State numbers need to be assigned such that all final states have a - * larger state id number than all non-final states. This enables the - * first_final mechanism to function correctly. We also want states to be - * ordered in a predictable fashion. So we first apply a depth-first - * search, then do a stable sort by final state status, then assign - * numbers. */ - - fsmGraph->depthFirstOrdering(); - fsmGraph->sortStatesByFinal(); - fsmGraph->setStateNumbers( 0 ); - - return fsmGraph; -} - -void Compiler::createDefaultScanner() -{ - InputLoc loc = { 0, 0, 0 }; - - const char *name = "___DEFAULT_SCANNER"; - - /* Create the default namespace. */ - defaultNamespace = new Namespace( InputLoc(), name, - namespaceList.length(), 0 ); - namespaceList.append( defaultNamespace ); - - /* Create a scanner which will be used when no other scanner can be - * figured out. It returns single characters. */ - defaultRegion = new TokenRegion( InputLoc(), name, - regionList.length(), 0 ); - regionList.append( defaultRegion ); - - /* Insert the machine definition into the graph dictionary. */ - RegionGraphDictEl *newEl = rootNamespace->graphDict.insert( name ); - assert( newEl != 0 ); - newEl->value = new RegionDef( name, defaultRegion ); - newEl->isInstance = true; - instanceList.append( newEl ); - - Join *join = new Join( new Expression( BT_Any ) ); - - TokenDef *tokenDef = new TokenDef( name, String(), false, false, - join, 0, loc, nextTokenId++, - rootNamespace, defaultRegion, 0, 0, 0 ); - - defaultRegion->tokenDefList.append( tokenDef ); - - /* Now create the one and only token -> "<chr>" / any / */ - name = "___DEFAULT_SCANNER_CHR"; - defaultCharLangEl = addLangEl( this, defaultNamespace, name, LangEl::Term ); - - tokenDef->tdLangEl = defaultCharLangEl; - defaultCharLangEl->tokenDef = tokenDef; -} - -LangEl *Compiler::makeRepeatProd( Namespace *nspace, const String &repeatName, - NamespaceQual *nspaceQual, const String &name ) -{ - LangEl *prodName = addLangEl( this, nspace, repeatName, LangEl::NonTerm ); - prodName->isRepeat = true; - - ProdElList *prodElList1 = new ProdElList; - - /* Build the first production of the repeat. */ - TypeRef *typeRef1 = new TypeRef( InputLoc(), nspaceQual, name ); - ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 ); - - UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName ); - TypeRef *typeRef2 = new TypeRef( InputLoc(), prodNameUT ); - ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef2, 0 ); - - prodElList1->append( factor1 ); - prodElList1->append( factor2 ); - - Definition *newDef1 = new Definition( InputLoc(), - prodName, prodElList1, false, 0, - prodList.length(), prodName->defList.length(), - Definition::Production ); - - prodName->defList.append( newDef1 ); - prodList.append( newDef1 ); - - /* Build the second production of the repeat. */ - ProdElList *prodElList2 = new ProdElList; - - Definition *newDef2 = new Definition( InputLoc(), - prodName, prodElList2, false, 0, - prodList.length(), prodName->defList.length(), - Definition::Production ); - - prodName->defList.append( newDef2 ); - prodList.append( newDef2 ); - - return prodName; -} - -LangEl *Compiler::makeListProd( Namespace *nspace, const String &listName, NamespaceQual *nspaceQual, const String &name ) -{ - LangEl *prodName = addLangEl( this, nspace, listName, LangEl::NonTerm ); - prodName->isList = true; - - /* Build the first production of the list. */ - TypeRef *typeRef1 = new TypeRef( InputLoc(), nspaceQual, name ); - ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 ); - - UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName ); - TypeRef *typeRef2 = new TypeRef( InputLoc(), prodNameUT ); - ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef2, 0 ); - - ProdElList *prodElList1 = new ProdElList; - prodElList1->append( factor1 ); - prodElList1->append( factor2 ); - - Definition *newDef1 = new Definition( InputLoc(), - prodName, prodElList1, false, 0, - prodList.length(), prodName->defList.length(), - Definition::Production ); - - prodName->defList.append( newDef1 ); - prodList.append( newDef1 ); - - /* Build the second production of the list. */ - TypeRef *typeRef3 = new TypeRef( InputLoc(), nspaceQual, name ); - ProdEl *factor3 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef3, 0 ); - - ProdElList *prodElList2 = new ProdElList; - prodElList2->append( factor3 ); - - Definition *newDef2 = new Definition( InputLoc(), - prodName, prodElList2, false, 0, - prodList.length(), prodName->defList.length(), - Definition::Production ); - - prodName->defList.append( newDef2 ); - prodList.append( newDef2 ); - - return prodName; -} - -LangEl *Compiler::makeOptProd( Namespace *nspace, const String &optName, NamespaceQual *nspaceQual, const String &name ) -{ - LangEl *prodName = addLangEl( this, nspace, optName, LangEl::NonTerm ); - prodName->isOpt = true; - - ProdElList *prodElList1 = new ProdElList; - - /* Build the first production of the repeat. */ - TypeRef *typeRef1 = new TypeRef( InputLoc(), nspaceQual, name ); - ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 ); - prodElList1->append( factor1 ); - - Definition *newDef1 = new Definition( InputLoc(), - prodName, prodElList1, false, 0, - prodList.length(), prodName->defList.length(), - Definition::Production ); - - prodName->defList.append( newDef1 ); - prodList.append( newDef1 ); - - /* Build the second production of the repeat. */ - ProdElList *prodElList2 = new ProdElList; - - Definition *newDef2 = new Definition( InputLoc(), - prodName, prodElList2, false, 0, - prodList.length(), prodName->defList.length(), - Definition::Production ); - - prodName->defList.append( newDef2 ); - prodList.append( newDef2 ); - - return prodName; -} - -Namespace *Namespace::findNamespace( const String &name ) -{ - for ( NamespaceVect::Iter c = childNamespaces; c.lte(); c++ ) { - if ( strcmp( name, (*c)->name ) == 0 ) - return *c; - } - return 0; -} - -/* Search from a previously resolved qualification. (name 1+ in a qual list). */ -Namespace *NamespaceQual::searchFrom( Namespace *from, StringVect::Iter &qualPart ) -{ - /* While there are still parts in the qualification. */ - while ( qualPart.lte() ) { - Namespace *child = from->findNamespace( *qualPart ); - if ( child == 0 ) - return 0; - - from = child; - qualPart.increment(); - } - - return from; -} - -Namespace *NamespaceQual::getQual( Compiler *pd ) -{ - /* Do the search only once. */ - if ( cachedNspaceQual != 0 ) - return cachedNspaceQual; - - if ( qualNames.length() == 0 ) { - /* No qualification, use the region the qualification was - * declared in. */ - cachedNspaceQual = declInNspace; - } - else if ( strcmp( qualNames[0], "root" ) == 0 ) { - /* First item is "root." Start the downward search from there. */ - StringVect::Iter qualPart = qualNames; - qualPart.increment(); - cachedNspaceQual = searchFrom( pd->rootNamespace, qualPart ); - return cachedNspaceQual; - } - else { - /* Have a qualification. Move upwards through the declared - * regions looking for the first part. */ - StringVect::Iter qualPart = qualNames; - Namespace *parentNamespace = declInNspace; - while ( parentNamespace != 0 ) { - /* Search for the first part underneath the current parent. */ - Namespace *child = parentNamespace->findNamespace( *qualPart ); - - if ( child != 0 ) { - /* Found the first part. Start going below the result. */ - qualPart.increment(); - cachedNspaceQual = searchFrom( child, qualPart ); - return cachedNspaceQual; - } - - /* Not found, move up to the parent. */ - parentNamespace = parentNamespace->parentNamespace; - } - - /* Failed to find the place to start from. */ - cachedNspaceQual = 0; - } - - return cachedNspaceQual; -} - -void Compiler::initEmptyScanners() -{ - for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) { - if ( reg->tokenDefList.length() == 0 ) { - reg->wasEmpty = true; - - static int def = 1; - InputLoc loc = { 0, 0, 0 }; - String name( reg->name.length() + 16, "__%s_DEF_PAT_%d", reg->name.data, def++ ); - - Join *join = new Join( new Expression( BT_Any ) ); - - TokenDef *tokenDef = new TokenDef( name, String(), false, false, join, - 0, loc, nextTokenId++, rootNamespace, reg, 0, 0, 0 ); - reg->tokenDefList.append( tokenDef ); - - /* These do not go in the namespace so so they cannot get declared - * in the declare pass. */ - LangEl *lel = addLangEl( this, rootNamespace, name, LangEl::Term ); - - tokenDef->tdLangEl = lel; - lel->tokenDef = tokenDef; - } - } -} - - -void Compiler::parsePatterns() -{ - Program *prg = colmNewProgram( runtimeData, 0, 0 ); - - /* Turn off context-dependent parsing. */ - prg->ctxDepParsing = 0; - - Tree **vm_stack = stackAlloc(); - Tree **root = &vm_stack[VM_STACK_SIZE]; - - for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) { - if ( colm_log_compile ) { - cerr << "parsing replacement at " << - repl->loc.line << ' ' << repl->loc.col << endl; - } - - InputStream *in = new InputStream; - FsmRun *fsmRun = new FsmRun; - repl->pdaRun = new PdaRun; - - initInputStream( in ); - initPdaRun( repl->pdaRun, prg, pdaTables, fsmRun, repl->langEl->parserId, 0, false, 0 ); - initFsmRun( fsmRun, prg ); - - Stream *res = streamAllocate( prg ); - res->id = LEL_ID_STREAM; - res->in = newSourceStreamRepl( repl ); - appendStream( in, (Tree*)res ); - setEof( in ); - - newToken( prg, repl->pdaRun, fsmRun ); - long pcr = parseLoop( prg, root, repl->pdaRun, fsmRun, in, PcrStart ); - assert( pcr == PcrDone ); - if ( repl->pdaRun->parseError ) - cout << "parse error" << endp; - } - - for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) { - if ( colm_log_compile ) { - cerr << "parsing pattern at " << - pat->loc.line << ' ' << pat->loc.col << endl; - } - - InputStream *in = new InputStream; - FsmRun *fsmRun = new FsmRun; - pat->pdaRun = new PdaRun; - - initInputStream( in ); - initPdaRun( pat->pdaRun, prg, pdaTables, fsmRun, pat->langEl->parserId, 0, false, 0 ); - initFsmRun( fsmRun, prg ); - - Stream *res = streamAllocate( prg ); - res->id = LEL_ID_STREAM; - res->in = newSourceStreamPattern( pat ); - appendStream( in, (Tree*)res ); - setEof( in ); - - newToken( prg, pat->pdaRun, fsmRun ); - long pcr = parseLoop( prg, root, pat->pdaRun, fsmRun, in, PcrStart ); - assert( pcr == PcrDone ); - if ( pat->pdaRun->parseError ) - cout << "parse error" << endp; - } - - fillInPatterns( prg ); -} - -void Compiler::collectParserEls( BstSet<LangEl*> &parserEls ) -{ - for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) { - /* We assume the reduction action compilation phase was run before - * pattern parsing and it decorated the pattern with the target type. */ - assert( pat->langEl != 0 ); - if ( pat->langEl->type != LangEl::NonTerm ) - error(pat->loc) << "pattern type is not a non-terminal" << endp; - - if ( pat->langEl->parserId < 0 ) { - /* Make a parser for the language element. */ - parserEls.insert( pat->langEl ); - pat->langEl->parserId = nextParserId++; - } - } - - for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) { - /* We assume the reduction action compilation phase was run before - * replacement parsing decorated the replacement with the target type. */ - assert( repl->langEl != 0 ); - - if ( repl->langEl->parserId < 0 ) { - /* Make a parser for the language element. */ - parserEls.insert( repl->langEl ); - repl->langEl->parserId = nextParserId++; - } - } - - /* Make parsers that we need. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->parserId >= 0 ) - parserEls.insert( lel ); - } -} - - -void Compiler::generateOutput() -{ - FsmCodeGen *fsmGen = new FsmCodeGen("<INPUT>", sectionName, - *outStream, redFsm, fsmTables ); - - PdaCodeGen *pdaGen = new PdaCodeGen( outputFileName, "parser", this, *outStream ); - - fsmGen->writeIncludes(); - pdaGen->defineRuntime(); - fsmGen->writeCode(); - - /* Make parsers that we need. */ - pdaGen->writeParserData( 0, pdaTables ); - - /* Write the runtime data. */ - pdaGen->writeRuntimeData( runtimeData, pdaTables ); - - if ( !gblLibrary ) - fsmGen->writeMain(); - - outStream->flush(); -} - - -void Compiler::prepGrammar() -{ - /* This will create language elements. */ - wrapNonTerminals(); - - makeLangElIds(); - makeLangElNames(); - makeDefinitionNames(); - noUndefindLangEls(); - - /* Put the language elements in an index by language element id. */ - langElIndex = new LangEl*[nextSymbolId+1]; - memset( langElIndex, 0, sizeof(LangEl*)*(nextSymbolId+1) ); - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) - langElIndex[lel->id] = lel; - - makeProdFsms(); - - /* Allocate the Runtime data now. Every PdaTable that we make - * will reference it, but it will be filled in after all the tables are - * built. */ - runtimeData = new RuntimeData; -} - -void Compiler::compile() -{ - beginProcessing(); - initKeyOps(); - - - /* Type declaration. */ - typeDeclaration(); - - /* Type resolving. */ - typeResolve(); - - makeTerminalWrappers(); - makeEofElements(); - - /* - * Parsers - */ - - /* Init the longest match data */ - initLongestMatchData(); - FsmGraph *fsmGraph = makeScanner(); - - if ( colm_log_compile ) { - printNameTree( fsmGraph->rootName ); - printNameIndex( fsmGraph->nameIndex ); - } - - prepGrammar(); - - /* Compile bytecode. */ - compileByteCode(); - - /* Make the reduced fsm. */ - RedFsmBuild reduce( sectionName, this, fsmGraph ); - redFsm = reduce.reduceMachine(); - - BstSet<LangEl*> parserEls; - collectParserEls( parserEls ); - - makeParser( parserEls ); - - /* Make the scanner tables. */ - fsmTables = redFsm->makeFsmTables(); - - /* Now that all parsers are built, make the global runtimeData. */ - makeRuntimeData(); - - /* - * All compilation is now complete. - */ - - /* Parse patterns and replacements. */ - parsePatterns(); -} - diff --git a/colm/ctinput.cc b/colm/ctinput.cc deleted file mode 100644 index b5086268..00000000 --- a/colm/ctinput.cc +++ /dev/null @@ -1,439 +0,0 @@ -/* - * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "parsedata.h" -#include "parsetree.h" -#include "input.h" -#include "fsmrun.h" -#include "debug.h" -#include "pool.h" - -#include <iostream> - -using std::cerr; -using std::endl; - -SourceFuncs patternFuncs; -SourceFuncs replFuncs; - -/* - * Pattern - */ - -SourceStream *newSourceStreamPattern( Pattern *pattern ) -{ - SourceStream *is = (SourceStream*)malloc(sizeof(SourceStream)); - memset( is, 0, sizeof(SourceStream) ); - is->handlesLine = true; - is->pattern = pattern; - is->patItem = pattern->list->head; - is->funcs = &patternFuncs; - return is; -} - -LangEl *inputStreamPatternGetLangEl( SourceStream *is, long *bindId, char **data, long *length ) -{ - LangEl *klangEl = is->patItem->factor->langEl; - *bindId = is->patItem->bindId; - *data = 0; - *length = 0; - is->line = is->patItem->loc.line; - - is->patItem = is->patItem->next; - is->offset = 0; - return klangEl; -} - -int inputStreamPatternGetData( SourceStream *is, int skip, char *dest, int length, int *copied ) -{ - *copied = 0; - - PatternItem *buf = is->patItem; - int offset = is->offset; - - while ( true ) { - if ( buf == 0 ) - return INPUT_EOD; - - if ( buf->type == PatternItem::FactorType ) - return INPUT_LANG_EL; - - if ( offset == 0 ) - is->line = buf->loc.line; - - assert ( buf->type == PatternItem::InputText ); - int avail = buf->data.length() - offset; - - if ( avail > 0 ) { - /* The source data from the current buffer. */ - char *src = &buf->data[offset]; - int slen = avail <= length ? avail : length; - - /* Need to skip? */ - if ( skip > 0 && slen <= skip ) { - /* Skipping the the whole source. */ - skip -= slen; - } - else { - /* Either skip is zero, or less than slen. Skip goes to zero. - * Some data left over, copy it. */ - src += skip; - slen -= skip; - skip = 0; - - memcpy( dest, src, slen ) ; - *copied += slen; - break; - } - } - - buf = buf->next; - offset = 0; - } - - return INPUT_DATA; -} - -void inputStreamPatternBackup( SourceStream *is ) -{ - if ( is->patItem == 0 ) - is->patItem = is->pattern->list->tail; - else - is->patItem = is->patItem->prev; -} - -void inputStreamPatternPushBackBuf( SourceStream *is, RunBuf *runBuf ) -{ - char *data = runBuf->data + runBuf->offset; - long length = runBuf->length; - - if ( length == 0 ) - return; - - /* While pushing back past the current pattern item start. */ - while ( length > is->offset ) { - length -= is->offset; - if ( is->offset > 0 ) - assert( memcmp( is->patItem->data, data-length, is->offset ) == 0 ); - inputStreamPatternBackup( is ); - is->offset = is->patItem->data.length(); - } - - is->offset -= length; - assert( memcmp( &is->patItem->data[is->offset], data, length ) == 0 ); -} - -void inputStreamPatternUndoConsumeLangEl( SourceStream *is ) -{ - inputStreamPatternBackup( is ); - is->offset = is->patItem->data.length(); -} - -int inputStreamPatternConsumeData( SourceStream *is, int length ) -{ - debug( REALM_INPUT, "consuming %ld bytes\n", length ); - - int consumed = 0; - - while ( true ) { - if ( is->patItem == 0 ) - break; - - int avail = is->patItem->data.length() - is->offset; - - if ( length >= avail ) { - /* Read up to the end of the data. Advance the - * pattern item. */ - is->patItem = is->patItem->next; - is->offset = 0; - - length -= avail; - consumed += avail; - - if ( length == 0 ) - break; - } - else { - is->offset += length; - consumed += length; - break; - } - } - - return consumed; -} - -int inputStreamPatternUndoConsumeData( SourceStream *is, const char *data, int length ) -{ - is->offset -= length; - return length; -} - -extern "C" void initPatternFuncs() -{ - memset( &patternFuncs, 0, sizeof(SourceFuncs) ); - - patternFuncs.getData = &inputStreamPatternGetData; - patternFuncs.consumeData = &inputStreamPatternConsumeData; - patternFuncs.undoConsumeData = &inputStreamPatternUndoConsumeData; - - patternFuncs.consumeLangEl = &inputStreamPatternGetLangEl; - patternFuncs.undoConsumeLangEl = &inputStreamPatternUndoConsumeLangEl; -} - - -/* - * Replacement - */ - -SourceStream *newSourceStreamRepl( Replacement *replacement ) -{ - SourceStream *is = (SourceStream*)malloc(sizeof(SourceStream)); - memset( is, 0, sizeof(SourceStream) ); - is->handlesLine = true; - is->replacement = replacement; - is->replItem = replacement->list->head; - is->funcs = &replFuncs; - return is; -} - -LangEl *inputStreamReplGetLangEl( SourceStream *is, long *bindId, char **data, long *length ) -{ - LangEl *klangEl = is->replItem->type == ReplItem::ExprType ? - is->replItem->langEl : is->replItem->factor->langEl; - *bindId = is->replItem->bindId; - - *data = 0; - *length = 0; - is->line = is->replItem->loc.line; - - if ( is->replItem->type == ReplItem::FactorType ) { - if ( is->replItem->factor->typeRef->pdaLiteral != 0 ) { - bool unusedCI; - prepareLitString( is->replItem->data, unusedCI, - is->replItem->factor->typeRef->pdaLiteral->token.data, - is->replItem->factor->typeRef->pdaLiteral->token.loc ); - - *data = is->replItem->data; - *length = is->replItem->data.length(); - } - } - - is->replItem = is->replItem->next; - is->offset = 0; - return klangEl; -} - -int inputStreamReplGetData( SourceStream *is, int skip, char *dest, int length, int *copied ) -{ - *copied = 0; - - ReplItem *buf = is->replItem; - int offset = is->offset; - - while ( true ) { - if ( buf == 0 ) - return INPUT_EOD; - - if ( buf->type == ReplItem::ExprType || buf->type == ReplItem::FactorType ) - return INPUT_LANG_EL; - - if ( offset == 0 ) - is->line = buf->loc.line; - - assert ( buf->type == ReplItem::InputText ); - int avail = buf->data.length() - offset; - - if ( avail > 0 ) { - /* The source data from the current buffer. */ - char *src = &buf->data[offset]; - int slen = avail <= length ? avail : length; - - /* Need to skip? */ - if ( skip > 0 && slen <= skip ) { - /* Skipping the the whole source. */ - skip -= slen; - } - else { - /* Either skip is zero, or less than slen. Skip goes to zero. - * Some data left over, copy it. */ - src += skip; - slen -= skip; - skip = 0; - - memcpy( dest, src, slen ) ; - *copied += slen; - break; - } - } - - buf = buf->next; - offset = 0; - } - - return INPUT_DATA; -} - -void inputStreamReplBackup( SourceStream *is ) -{ - if ( is->replItem == 0 ) - is->replItem = is->replacement->list->tail; - else - is->replItem = is->replItem->prev; -} - -void inputStreamReplPushBackBuf( SourceStream *is, RunBuf *runBuf ) -{ - char *data = runBuf->data + runBuf->offset; - long length = runBuf->length; - - if ( colm_log_parse ) { - cerr << "push back data: "; - cerr.write( data, length ); - cerr << endl; - } - - if ( length == 0 ) - return; - - /* While pushing back past the current pattern item start. */ - while ( length > is->offset ) { - length -= is->offset; - if ( is->offset > 0 ) - assert( memcmp( is->replItem->data, data-length, is->offset ) == 0 ); - inputStreamReplBackup( is ); - is->offset = is->replItem->data.length(); - } - - is->offset -= length; - assert( memcmp( &is->replItem->data[is->offset], data, length ) == 0 ); -} - -void inputStreamReplUndoConsumeLangEl( SourceStream *is ) -{ - inputStreamReplBackup( is ); - is->offset = is->replItem->data.length(); -} - -int inputStreamReplConsumeData( SourceStream *is, int length ) -{ - int consumed = 0; - - while ( true ) { - if ( is->replItem == 0 ) - break; - - int avail = is->replItem->data.length() - is->offset; - - if ( length >= avail ) { - /* Read up to the end of the data. Advance the - * pattern item. */ - is->replItem = is->replItem->next; - is->offset = 0; - - length -= avail; - consumed += avail; - - if ( length == 0 ) - break; - } - else { - is->offset += length; - consumed += length; - break; - } - } - - return consumed; -} - -int inputStreamReplUndoConsumeData( SourceStream *is, const char *data, int length ) -{ - is->offset -= length; - return length; -} - -extern "C" void initReplFuncs() -{ - memset( &replFuncs, 0, sizeof(SourceFuncs) ); - - replFuncs.getData = &inputStreamReplGetData; - replFuncs.consumeData = &inputStreamReplConsumeData; - replFuncs.undoConsumeData = &inputStreamReplUndoConsumeData; - - replFuncs.consumeLangEl = &inputStreamReplGetLangEl; - replFuncs.undoConsumeLangEl = &inputStreamReplUndoConsumeLangEl; -} - -void sendNamedLangEl( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream ) -{ - /* All three set by consumeLangEl. */ - long bindId; - char *data; - long length; - - LangEl *klangEl = consumeLangEl( inputStream, &bindId, &data, &length ); - - #ifdef COLM_LOG_PARSE - if ( colm_log_parse ) { - cerr << "named langEl: " << prg->rtd->lelInfo[klangEl->id].name << endl; - } - #endif - - /* Copy the token data. */ - Head *tokdata = 0; - if ( data != 0 ) - tokdata = stringAllocFull( prg, data, length ); - - Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, inputStream, klangEl->id, tokdata ); - - incrementSteps( pdaRun ); - - ParseTree *parseTree = parseTreeAllocate( prg ); - parseTree->id = input->tree->id; - parseTree->flags |= PF_NAMED; - parseTree->shadow = input; - - if ( bindId > 0 ) - pushBinding( pdaRun, parseTree ); - - pdaRun->parseInput = parseTree; -} - -void initBindings( PdaRun *pdaRun ) -{ - /* Bindings are indexed at 1. Need a no-binding. */ - pdaRun->bindings = new Bindings; - pdaRun->bindings->push(0); -} - -void pushBinding( PdaRun *pdaRun, ParseTree *parseTree ) -{ - /* If the item is bound then store it in the bindings array. */ - pdaRun->bindings->push( parseTree ); -} - -void popBinding( PdaRun *pdaRun, ParseTree *parseTree ) -{ - ParseTree *lastBound = pdaRun->bindings->top(); - if ( lastBound == parseTree ) - pdaRun->bindings->pop(); -} diff --git a/colm/debug.c b/colm/debug.c deleted file mode 100644 index 6d9689e0..00000000 --- a/colm/debug.c +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright 2010-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <colm/debug.h> -#include <stdarg.h> -#include <stdio.h> -#include <stdlib.h> - -long colmActiveRealm = 0; -const char *colmRealmNames[REALMS] = - { - "BYTECODE", - "PARSE", - "MATCH", - "COMPILE", - "POOL", - "PRINT", - "INPUT", - "SCAN", - }; - -int _debug( long realm, const char *fmt, ... ) -{ - int result = 0; - if ( colmActiveRealm & realm ) { - /* Compute the index by shifting. */ - int ind = 0; - while ( (realm & 0x1) != 0x1 ) { - realm >>= 1; - ind += 1; - } - - fprintf( stderr, "%s: ", colmRealmNames[ind] ); - va_list args; - va_start( args, fmt ); - result = vfprintf( stderr, fmt, args ); - va_end( args ); - } - - return result; -} - -void fatal( const char *fmt, ... ) -{ - va_list args; - fprintf( stderr, "fatal: " ); - va_start( args, fmt ); - vfprintf( stderr, fmt, args ); - va_end( args ); - exit(1); -} - -void message( const char *fmt, ... ) -{ - va_list args; - fprintf( stderr, "message: " ); - va_start( args, fmt ); - vfprintf( stderr, fmt, args ); - va_end( args ); -} diff --git a/colm/debug.h b/colm/debug.h deleted file mode 100644 index 3fd9bb8e..00000000 --- a/colm/debug.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright 2010-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "config.h" - -void fatal( const char *fmt, ... ); - -#ifdef DEBUG -#define debug( realm, ... ) _debug( realm, __VA_ARGS__ ) -#define check_realm( realm ) _check_realm( realm ) -#else -#define debug( realm, ... ) -#define check_realm( realm ) -#endif - -int _debug( long realm, const char *fmt, ... ); - -void message( const char *fmt, ... ); - -#define REALM_BYTECODE 0x00000001 -#define REALM_PARSE 0x00000002 -#define REALM_MATCH 0x00000004 -#define REALM_COMPILE 0x00000008 -#define REALM_POOL 0x00000010 -#define REALM_PRINT 0x00000020 -#define REALM_INPUT 0x00000040 -#define REALM_SCAN 0x00000080 - -#define REALMS 32 - -extern long colmActiveRealm; -extern const char *colmRealmNames[REALMS]; - -#ifdef __cplusplus -} -#endif diff --git a/colm/declare.cc b/colm/declare.cc deleted file mode 100644 index 167fe050..00000000 --- a/colm/declare.cc +++ /dev/null @@ -1,383 +0,0 @@ -/* - * Copyright 2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "bytecode.h" -#include "parsedata.h" -#include "fsmrun.h" -#include <iostream> -#include <assert.h> - -LangEl *declareLangEl( Compiler *pd, Namespace *nspace, const String &data, LangEl::Type type ) -{ - /* If the id is already in the dict, it will be placed in last found. If - * it is not there then it will be inserted and last found will be set to it. */ - TypeMapEl *inDict = nspace->typeMap.find( data ); - if ( inDict != 0 ) - error() << "'" << data << "' already defined as something else" << endp; - - /* Language element not there. Make the new lang el and insert.. */ - LangEl *langEl = new LangEl( nspace, data, type ); - TypeMapEl *typeMapEl = new TypeMapEl( data, langEl ); - nspace->typeMap.insert( typeMapEl ); - pd->langEls.append( langEl ); - - return langEl; -} - -/* Does not map the new language element. */ -LangEl *addLangEl( Compiler *pd, Namespace *nspace, const String &data, LangEl::Type type ) -{ - LangEl *langEl = new LangEl( nspace, data, type ); - pd->langEls.append( langEl ); - return langEl; -} - -void declareTypeAlias( Compiler *pd, Namespace *nspace, const String &data, TypeRef *typeRef ) -{ - /* If the id is already in the dict, it will be placed in last found. If - * it is not there then it will be inserted and last found will be set to it. */ - TypeMapEl *inDict = nspace->typeMap.find( data ); - if ( inDict != 0 ) - error() << "'" << data << "' already defined as something else" << endp; - - /* Language element not there. Make the new lang el and insert.. */ - TypeMapEl *typeMapEl = new TypeMapEl( data, typeRef ); - nspace->typeMap.insert( typeMapEl ); -} - -LangEl *findType( Compiler *pd, Namespace *nspace, const String &data ) -{ - /* If the id is already in the dict, it will be placed in last found. If - * it is not there then it will be inserted and last found will be set to it. */ - TypeMapEl *inDict = nspace->typeMap.find( data ); - - if ( inDict == 0 ) - error() << "'" << data << "' not declared as anything" << endp; - - return inDict->value; -} - - -void Compiler::declareBaseLangEls() -{ - /* Order here is important because we make assumptions about the inbuild - * language elements in the runtime. Note tokens are have identifiers set - * in an initial pass. */ - - /* Make a "_notoken" language element. This element is used when a - * generation action fails to generate anything, but there is reverse code - * that needs to be associated with a language element. This allows us to - * always associate reverse code with the first language element produced - * after a generation action. */ - noTokenLangEl = declareLangEl( this, rootNamespace, "_notoken", LangEl::Term ); - noTokenLangEl->ignore = true; - - /* Make the "stream" language element */ - ptrLangEl = declareLangEl( this, rootNamespace, "ptr", LangEl::Term ); - boolLangEl = declareLangEl( this, rootNamespace, "bool", LangEl::Term ); - intLangEl = declareLangEl( this, rootNamespace, "int", LangEl::Term ); - strLangEl = declareLangEl( this, rootNamespace, "str", LangEl::Term ); - streamLangEl = declareLangEl( this, rootNamespace, "stream", LangEl::Term ); - inputLangEl = declareLangEl( this, rootNamespace, "accum_stream", LangEl::Term ); - ignoreLangEl = declareLangEl( this, rootNamespace, "il", LangEl::Term ); - - /* Make the EOF language element. */ - eofLangEl = 0; - - /* Make the "any" language element */ - anyLangEl = declareLangEl( this, rootNamespace, "any", LangEl::NonTerm ); -} - - -void Compiler::addProdRedObjectVar( ObjectDef *localFrame, LangEl *nonTerm ) -{ - UniqueType *prodNameUT = findUniqueType( TYPE_TREE, nonTerm ); - TypeRef *typeRef = new TypeRef( InputLoc(), prodNameUT ); - ObjField *el = new ObjField( InputLoc(), typeRef, "lhs" ); - - el->isLhsEl = true; - - initLocalInstructions( el ); - - localFrame->insertField( el->name, el ); -} - -void Compiler::addProdLHSLoad( Definition *prod, CodeVect &code, long &insertPos ) -{ - ObjField *lhsField = prod->redBlock->localFrame->findField("lhs"); - assert( lhsField != 0 ); - - CodeVect loads; - if ( lhsField->beenReferenced ) { - loads.append( IN_INIT_LHS_EL ); - loads.appendHalf( lhsField->offset ); - } - - code.insert( insertPos, loads ); - insertPos += loads.length(); -} - -void Compiler::addPushBackLHS( Definition *prod, CodeVect &code, long &insertPos ) -{ - CodeBlock *block = prod->redBlock; - - /* If the lhs tree is dirty then we will need to save off the old lhs - * before it gets modified. We want to avoid this for attribute - * modifications. The computation of dirtyTree should deal with this for - * us. */ - ObjField *lhsField = block->localFrame->findField("lhs"); - assert( lhsField != 0 ); - - if ( lhsField->beenReferenced ) { - code.append( IN_STORE_LHS_EL ); - code.appendHalf( lhsField->offset ); - } -} - -void Compiler::addProdRHSVars( ObjectDef *localFrame, ProdElList *prodElList ) -{ - long position = 1; - for ( ProdElList::Iter rhsEl = *prodElList; rhsEl.lte(); rhsEl++, position++ ) { - if ( rhsEl->type == ProdEl::ReferenceType ) { - /* Use an offset of zero. For frame objects we compute the offset on - * demand. */ - String name( 8, "r%d", position ); - ObjField *el = new ObjField( InputLoc(), rhsEl->typeRef, name ); - rhsEl->objField = el; - - /* Right hand side elements are constant. */ - el->isConst = true; - el->isRhsEl = true; - - /* Only ever fetch for reading since they are constant. */ - el->inGetR = IN_GET_LOCAL_R; - - localFrame->insertField( el->name, el ); - } - } -} - -void Compiler::addProdRHSLoads( Definition *prod, CodeVect &code, long &insertPos ) -{ - CodeVect loads; - long elPos = 0; - for ( ProdElList::Iter rhsEl = *prod->prodElList; rhsEl.lte(); rhsEl++, elPos++ ) { - if ( rhsEl->type == ProdEl::ReferenceType ) { - if ( rhsEl->objField->beenReferenced ) { - loads.append ( IN_INIT_RHS_EL ); - loads.appendHalf( elPos ); - loads.appendHalf( rhsEl->objField->offset ); - } - } - } - - /* Insert and update the insert position. */ - code.insert( insertPos, loads ); - insertPos += loads.length(); -} - -void GenericType::declare( Compiler *pd, Namespace *nspace ) -{ - //std::cout << "generic " << g->name << std::endl; - - LangEl *langEl = declareLangEl( pd, nspace, name, LangEl::NonTerm ); - - /* Add one empty production. */ - ProdElList *emptyList = new ProdElList; - //addProduction( g->loc, langEl, emptyList, false, 0, 0 ); - - { - LangEl *prodName = langEl; - assert( prodName->type == LangEl::NonTerm ); - - Definition *newDef = new Definition( InputLoc(), prodName, - emptyList, false, 0, - pd->prodList.length(), prodName->defList.length(), - Definition::Production ); - - prodName->defList.append( newDef ); - pd->prodList.append( newDef ); - newDef->predOf = 0; - } - - langEl->generic = this; - this->langEl = langEl; -} - -void Namespace::declare( Compiler *pd ) -{ - for ( GenericList::Iter g = genericList; g.lte(); g++ ) - g->declare( pd, this ); - - for ( LiteralDict::Iter l = literalDict; l.lte(); l++ ) { - if ( l->value->dupOf != 0 ) { - /* Duplicate of another. Use the lang el of that token. */ - assert( l->value->dupOf->tdLangEl != 0 ); - l->value->tdLangEl = l->value->dupOf->tdLangEl; - } - else { - if ( l->value->isZero ) { - l->value->tdLangEl = l->value->tokenRegion->ciLel; - assert( l->value->tokenRegion->ciLel != 0 ); - } - else { - /* Original. Create a token for the literal. */ - LangEl *newLangEl = declareLangEl( pd, this, l->value->name, LangEl::Term ); - - newLangEl->lit = l->value->literal; - newLangEl->isLiteral = true; - newLangEl->tokenDef = l->value; - - l->value->tdLangEl = newLangEl; - - if ( l->value->noPreIgnore ) - newLangEl->noPreIgnore = true; - if ( l->value->noPostIgnore ) - newLangEl->noPostIgnore = true; - } - } - } - - for ( ContextDefList::Iter c = contextDefList; c.lte(); c++ ) { - LangEl *lel = declareLangEl( pd, this, c->name, LangEl::NonTerm ); - ProdElList *emptyList = new ProdElList; - //addProduction( c->context->loc, c->name, emptyList, false, 0, 0 ); - - { - LangEl *prodName = lel; - assert( prodName->type == LangEl::NonTerm ); - - Definition *newDef = new Definition( loc, prodName, - emptyList, false, 0, - pd->prodList.length(), prodName->defList.length(), - Definition::Production ); - - prodName->defList.append( newDef ); - pd->prodList.append( newDef ); - newDef->predOf = 0; - - /* If the token has the same name as the region it is in, then also - * insert it into the symbol map for the parent region. */ - if ( strcmp( c->name, this->name ) == 0 ) { - /* Insert the name into the top of the region stack after popping the - * region just created. We need it in the parent. */ - TypeMapEl *typeMapEl = new TypeMapEl( c->name, prodName ); - this->parentNamespace->typeMap.insert( typeMapEl ); - } - } - - c->context->lel = lel; - lel->contextDef = c->context; - lel->objectDef = c->context->contextObjDef; - } - - for ( TokenDefListNs::Iter t = tokenDefList; t.lte(); t++ ) { - /* Literals already taken care of. */ - if ( ! t->isLiteral ) { - if ( t->dupOf != 0 ) { - /* Duplicate of another. Use the lang el of that token. */ - assert( t->dupOf->tdLangEl != 0 ); - t->tdLangEl = t->dupOf->tdLangEl; - } - else { - /* Create the token. */ - LangEl *tokEl = declareLangEl( pd, this, t->name, LangEl::Term ); - tokEl->ignore = t->ignore; - tokEl->transBlock = t->codeBlock; - tokEl->objectDef = t->objectDef; - tokEl->contextIn = t->contextIn; - tokEl->tokenDef = t; - - if ( t->noPreIgnore ) - tokEl->noPreIgnore = true; - if ( t->noPostIgnore ) - tokEl->noPostIgnore = true; - - t->tdLangEl = tokEl; - } - } - } - - for ( NtDefList::Iter n = ntDefList; n.lte(); n++ ) { - /* Get the language element. */ - LangEl *langEl = declareLangEl( pd, this, n->name, LangEl::NonTerm ); - //$$->langEl = langEl; - - /* Get the language element. */ - langEl->objectDef = n->objectDef; - langEl->reduceFirst = n->reduceFirst; - langEl->contextIn = n->contextIn; - langEl->defList.transfer( *n->defList ); - - for ( LelDefList::Iter d = langEl->defList; d.lte(); d++ ) { - d->prodName = langEl; - - if ( d->redBlock != 0 ) { - pd->addProdRedObjectVar( d->redBlock->localFrame, langEl ); - pd->addProdRHSVars( d->redBlock->localFrame, d->prodElList ); - } - - /* References to the reduce item. */ - } - } - - for ( TypeAliasList::Iter ta = typeAliasList; ta.lte(); ta++ ) - declareTypeAlias( pd, this, ta->name, ta->typeRef ); - - /* Go into child aliases. */ - for ( NamespaceVect::Iter c = childNamespaces; c.lte(); c++ ) - (*c)->declare( pd ); -} - -void Compiler::setPrecedence() -{ - for ( PredDeclList::Iter predDecl = predDeclList; predDecl != 0; predDecl++ ) { - predDecl->typeRef->lookupType( this ); - - LangEl *langEl = predDecl->typeRef->uniqueType->langEl; - langEl->predType = predDecl->predType; - langEl->predValue = predDecl->predValue; - } -} - -/* - * Type Declaration Root. - */ -void Compiler::typeDeclaration() -{ - /* These must be declared first, since the runtime assumes their identifiers. */ - declareBaseLangEls(); - - makeIgnoreCollectors(); - - rootNamespace->declare( this ); - - /* Fill any empty scanners with a default token. */ - initEmptyScanners(); - - /* Create the default scanner which will return single characters for us - * when we have no other scanner */ - createDefaultScanner(); - - initUniqueTypes(); - - setPrecedence(); -} diff --git a/colm/defs.h.in b/colm/defs.h.in deleted file mode 100644 index 06a3f9df..00000000 --- a/colm/defs.h.in +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright 2001 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Ragel. - * - * Ragel is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Ragel is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Ragel; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _CONFIG_H -#define _CONFIG_H - -/* Configuration */ -#undef COLM_LOG -#undef COLM_LOG_BYTECODE -#undef COLM_LOG_PARSE -#undef COLM_LOG_MATCH -#undef COLM_LOG_COMPILE - -/* If COLM_LOG is defined then turn on all logging options. */ -#ifdef COLM_LOG -#define COLM_LOG_BYTECODE 1 -#define COLM_LOG_PARSE 1 -#define COLM_LOG_MATCH 1 -#define COLM_LOG_COMPILE 1 -#endif - -extern int colm_log_bytecode; -extern int colm_log_parse; -extern int colm_log_match; -extern int colm_log_compile; -extern int colm_log_conds; - -/* The size of `long', as computed by sizeof. */ -#undef SIZEOF_LONG - -#endif /* _CONFIG_H */ diff --git a/colm/dotgen.cc b/colm/dotgen.cc deleted file mode 100644 index e4474958..00000000 --- a/colm/dotgen.cc +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - - -#include "global.h" -#include "parsedata.h" - -using namespace std; - - -void Compiler::writeTransList( PdaState *state ) -{ - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - /* Write out the from and to states. */ - out << "\t" << state->stateNum << " -> " << trans->value->toState->stateNum; - - /* Begin the label. */ - out << " [ label = \""; - long key = trans->key; - LangEl *lel = langElIndex[key]; - if ( lel != 0 ) - out << lel->name; - else - out << (char)key; - - if ( trans->value->actions.length() > 0 ) { - out << " / "; - for ( ActDataList::Iter act = trans->value->actions; act.lte(); act++ ) { - switch ( *act & 0x3 ) { - case 1: - out << "S(" << trans->value->actOrds[act.pos()] << ")"; - break; - case 2: { - out << "R(" << prodIdIndex[(*act >> 2)]->data << - ", " << trans->value->actOrds[act.pos()] << ")"; - break; - } - case 3: { - out << "SR(" << prodIdIndex[(*act >> 2)]->data << - ", " << trans->value->actOrds[act.pos()] << ")"; - break; - }} - if ( ! act.last() ) - out << ", "; - } - } - - out << "\" ];\n"; - } -} - -void Compiler::writeDotFile( PdaGraph *graph ) -{ - out << - "digraph " << parserName << " {\n" - " rankdir=LR;\n" - " ranksep=\"0\"\n" - " nodesep=\"0.25\"\n" - "\n"; - - /* Define the psuedo states. Transitions will be done after the states - * have been defined as either final or not final. */ - out << - " node [ shape = point ];\n"; - - for ( int i = 0; i < graph->entryStateSet.length(); i++ ) - out << "\tENTRY" << i << " [ label = \"\" ];\n"; - - out << - "\n" - " node [ shape = circle, fixedsize = true, height = 0.6 ];\n"; - - /* Walk the states. */ - for ( PdaStateList::Iter st = graph->stateList; st.lte(); st++ ) - out << " " << st->stateNum << " [ label = \"" << st->stateNum << "\" ];\n"; - - out << "\n"; - - /* Walk the states. */ - for ( PdaStateList::Iter st = graph->stateList; st.lte(); st++ ) - writeTransList( st ); - - /* Start state and other entry points. */ - for ( PdaStateSet::Iter st = graph->entryStateSet; st.lte(); st++ ) - out << "\tENTRY" << st.pos() << " -> " << (*st)->stateNum << " [ label = \"\" ];\n"; - - out << - "}\n"; -} - -void Compiler::writeDotFile() -{ - writeDotFile( pdaGraph ); -} - diff --git a/colm/dotgen.h b/colm/dotgen.h deleted file mode 100644 index d05a2410..00000000 --- a/colm/dotgen.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright 2001-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _GVDOTGEN_H -#define _GVDOTGEN_H - -#include <iostream> - -#if 0 - -class GraphvizDotGen : public CodeGenData -{ -public: - GraphvizDotGen( ostream &out ) : CodeGenData(out) { } - - /* Print an fsm to out stream. */ - void writeTransList( RedState *state ); - void writeDotFile( ); - - virtual void finishRagelDef(); - -private: - /* Writing labels and actions. */ - std::ostream &ONCHAR( Key lowKey, Key highKey ); - std::ostream &TRANS_ACTION( RedState *fromState, RedTrans *trans ); - std::ostream &ACTION( RedAction *action ); - std::ostream &KEY( Key key ); -}; - -#endif - - -#endif /* _GVDOTGEN_H */ diff --git a/colm/exports.cc b/colm/exports.cc deleted file mode 100644 index f5153330..00000000 --- a/colm/exports.cc +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "parsedata.h" -#include "fsmcodegen.h" -#include "redfsm.h" -#include "bstmap.h" -#include "fsmrun.h" -#include "debug.h" -#include <sstream> -#include <string> - -using std::ostream; -using std::ostringstream; -using std::string; -using std::cerr; -using std::endl; - -void Compiler::openNameSpace( ostream &out, Namespace *nspace ) -{ - if ( nspace == defaultNamespace || nspace == rootNamespace ) - return; - - openNameSpace( out, nspace->parentNamespace ); - out << "namespace " << nspace->name << " { "; -} - -void Compiler::closeNameSpace( ostream &out, Namespace *nspace ) -{ - if ( nspace == defaultNamespace || nspace == rootNamespace ) - return; - - openNameSpace( out, nspace->parentNamespace ); - out << " }"; -} - -void Compiler::generateExports() -{ - ostream &out = *outStream; - - out << - "#ifndef _EXPORTS_H\n" - "#define _EXPORTS_H\n" - "\n" - "#include <colm/colm.h>\n" - "#include <string>\n" - "\n"; - - out << - "inline void appendString( ColmPrintArgs *args, const char *data, int length )\n" - "{\n" - " std::string *str = (std::string*)args->arg;\n" - " *str += std::string( data, length );\n" - "}\n" - "\n"; - - out << - "inline std::string printTreeStr( ColmProgram *prg, ColmTree *tree, bool trim )\n" - "{\n" - " std::string str;\n" - " ColmPrintArgs printArgs = { &str, 1, 0, trim, &appendString, \n" - " &printNull, &printTermTree, &printNull };\n" - " printTreeArgs( prg, vm_root(prg), &printArgs, tree );\n" - " return str;\n" - "}\n" - "\n"; - - /* Declare. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->isEOF ) { - out << "// isEOF\n"; - continue; - } - if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isTokenOnly ) { - out << "// isTokenOnly\n"; - continue; - } - if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isIgnoreOnly ) { - out << "// isIgnoreOnly\n"; - continue; - } - if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isCiOnly ) { - out << "// isCiOnly\n"; - continue; - } - if ( lel->ciRegion != 0 ) { - out << "// ciRegion != 0\n"; - continue; - } - openNameSpace( out, lel->nspace ); - out << "struct " << lel->fullName << ";"; - closeNameSpace( out, lel->nspace ); - out << "\n"; - } - - /* Class definitions. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->isEOF ) { - out << "// isTokenOnly\n"; - continue; - } - if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isTokenOnly ) { - out << "// isTokenOnly\n"; - continue; - } - if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isIgnoreOnly ) { - out << "// isIgnoreOnly\n"; - continue; - } - if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isCiOnly ) { - out << "// isCiOnly\n"; - continue; - } - if ( lel->ciRegion != 0 ) { - out << "// ciRegion != 0\n"; - continue; - } - - openNameSpace( out, lel->nspace ); - out << "struct " << lel->fullName << "\n"; - out << "{\n"; - out << " std::string text() { return printTreeStr( prg, tree, true ); }\n"; - out << " std::string text_notrim() { return printTreeStr( prg, tree, false ); }\n"; - out << " operator ColmTree *() { return tree; }\n"; - out << " ColmProgram *prg;\n"; - out << " ColmTree *tree;\n"; - - if ( mainReturnUT != 0 && mainReturnUT->langEl == lel ) { - out << " " << lel->fullName << "( ColmProgram *prg ) : prg(prg), tree(returnVal(prg)) {}\n"; - } - out << " " << lel->fullName << "( ColmProgram *prg, ColmTree *tree ) : prg(prg), tree(tree) {}\n"; - - if ( lel->objectDef != 0 && lel->objectDef->objFieldList != 0 ) { - ObjFieldList *objFieldList = lel->objectDef->objFieldList; - for ( ObjFieldList::Iter ofi = *objFieldList; ofi.lte(); ofi++ ) { - ObjField *field = ofi->value; - if ( field->useOffset && field->typeRef != 0 ) { - UniqueType *ut = field->typeRef->lookupType( this ); - - if ( ut != 0 && ut->typeId == TYPE_TREE ) { - out << " " << ut->langEl->refName << " " << field->name << "();\n"; - } - } - - if ( field->isRhsGet ) { - UniqueType *ut = field->typeRef->lookupType( this ); - - if ( ut != 0 && ut->typeId == TYPE_TREE ) { - out << " " << ut->langEl->refName << " " << field->name << "();\n"; - } - } - } - } - - if ( lel->isRepeat ) { - out << " " << "int end() { return repeatEnd( tree ); }\n"; - out << " " << lel->refName << " next();\n"; - out << " " << lel->repeatOf->refName << " value();\n"; - } - - if ( lel->isList ) { - out << " " << "int last() { return listLast( tree ); }\n"; - out << " " << lel->refName << " next();\n"; - out << " " << lel->repeatOf->refName << " value();\n"; - } - out << "};"; - closeNameSpace( out, lel->nspace ); - out << "\n"; - } - - for ( ObjFieldList::Iter of = *globalObjectDef->objFieldList; of.lte(); of++ ) { - ObjField *field = of->value; - if ( field->isExport ) { - UniqueType *ut = field->typeRef->lookupType(this); - if ( ut != 0 && ut->typeId == TYPE_TREE ) { - out << ut->langEl->refName << " " << field->name << "( ColmProgram *prg );\n"; - } - } - } - - out << "#endif\n"; -} - -void Compiler::generateExportsImpl() -{ - ostream &out = *outStream; - - if ( gblExportTo != 0 ) { - out << "#include \"" << gblExportTo << "\"\n"; - } - - /* Function implementations. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->objectDef != 0 && lel->objectDef->objFieldList != 0 ) { - ObjFieldList *objFieldList = lel->objectDef->objFieldList; - for ( ObjFieldList::Iter ofi = *objFieldList; ofi.lte(); ofi++ ) { - ObjField *field = ofi->value; - if ( field->useOffset && field->typeRef != 0 ) { - UniqueType *ut = field->typeRef->lookupType( this ); - - if ( ut != 0 && ut->typeId == TYPE_TREE ) { - out << ut->langEl->refName << " " << lel->declName << "::" << field->name << - "() { return " << ut->langEl->refName << - "( prg, getAttr( tree, " << field->offset << ") ); }\n"; - } - } - - if ( field->isRhsGet ) { - UniqueType *ut = field->typeRef->lookupType( this ); - - if ( ut != 0 && ut->typeId == TYPE_TREE ) { - out << ut->langEl->refName << " " << lel->declName << "::" << field->name << - "() { static int a[] = {"; - - /* Need to place the array computing the val. */ - out << field->rhsVal.length(); - for ( Vector<RhsVal>::Iter rg = field->rhsVal; rg.lte(); rg++ ) { - out << ", " << rg->prodNum; - out << ", " << rg->childNum; - } - - out << "}; return " << ut->langEl->refName << - "( prg, getRhsVal( prg, tree, a ) ); }\n"; - } - } - } - } - - if ( lel->isRepeat ) { - out << lel->refName << " " << lel->declName << "::" << " next" - "() { return " << lel->refName << - "( prg, getRepeatNext( tree ) ); }\n"; - - out << lel->repeatOf->refName << " " << lel->declName << "::" << " value" - "() { return " << lel->repeatOf->refName << - "( prg, getRepeatVal( tree ) ); }\n"; - - } - - if ( lel->isList ) { - out << lel->refName << " " << lel->declName << "::" << " next" - "() { return " << lel->refName << - "( prg, getRepeatNext( tree ) ); }\n"; - - out << lel->repeatOf->refName << " " << lel->declName << "::" << " value" - "() { return " << lel->repeatOf->refName << - "( prg, getRepeatVal( tree ) ); }\n"; - } - } - - out << "\n"; - - for ( ObjFieldList::Iter of = *globalObjectDef->objFieldList; of.lte(); of++ ) { - ObjField *field = of->value; - if ( field->isExport ) { - UniqueType *ut = field->typeRef->lookupType(this); - if ( ut != 0 && ut->typeId == TYPE_TREE ) { - out << - ut->langEl->refName << " " << field->name << "(ColmProgram *prg)\n" - "{ return " << ut->langEl->refName << "( prg, getGlobal( prg, " << - field->offset << ") ); }\n"; - } - } - } -} - - diff --git a/colm/fsmap.cc b/colm/fsmap.cc deleted file mode 100644 index a4c072b6..00000000 --- a/colm/fsmap.cc +++ /dev/null @@ -1,856 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" -#include "defs.h" -#include "fsmgraph.h" -#include <iostream> - -using std::cerr; -using std::endl; - -CondData *condData = 0; -KeyOps *keyOps = 0; - -/* Insert an action into an action table. */ -void ActionTable::setAction( int ordering, Action *action ) -{ - /* Multi-insert in case specific instances of an action appear in a - * transition more than once. */ - insertMulti( ordering, action ); -} - -/* Set all the action from another action table in this table. */ -void ActionTable::setActions( const ActionTable &other ) -{ - for ( ActionTable::Iter action = other; action.lte(); action++ ) - insertMulti( action->key, action->value ); -} - -void ActionTable::setActions( int *orderings, Action **actions, int nActs ) -{ - for ( int a = 0; a < nActs; a++ ) - insertMulti( orderings[a], actions[a] ); -} - -bool ActionTable::hasAction( Action *action ) -{ - for ( int a = 0; a < length(); a++ ) { - if ( data[a].value == action ) - return true; - } - return false; -} - -/* Insert an action into an action table. */ -void LmActionTable::setAction( int ordering, TokenDef *action ) -{ - /* Multi-insert in case specific instances of an action appear in a - * transition more than once. */ - insertMulti( ordering, action ); -} - -/* Set all the action from another action table in this table. */ -void LmActionTable::setActions( const LmActionTable &other ) -{ - for ( LmActionTable::Iter action = other; action.lte(); action++ ) - insertMulti( action->key, action->value ); -} - -void ErrActionTable::setAction( int ordering, Action *action, int transferPoint ) -{ - insertMulti( ErrActionTableEl( action, ordering, transferPoint ) ); -} - -void ErrActionTable::setActions( const ErrActionTable &other ) -{ - for ( ErrActionTable::Iter act = other; act.lte(); act++ ) - insertMulti( ErrActionTableEl( act->action, act->ordering, act->transferPoint ) ); -} - -/* Insert a priority into this priority table. Looks out for priorities on - * duplicate keys. */ -void PriorTable::setPrior( int ordering, PriorDesc *desc ) -{ - PriorEl *lastHit = 0; - PriorEl *insed = insert( PriorEl(ordering, desc), &lastHit ); - if ( insed == 0 ) { - /* This already has a priority on the same key as desc. Overwrite the - * priority if the ordering is larger (later in time). */ - if ( ordering >= lastHit->ordering ) - *lastHit = PriorEl( ordering, desc ); - } -} - -/* Set all the priorities from a priorTable in this table. */ -void PriorTable::setPriors( const PriorTable &other ) -{ - /* Loop src priorities once to overwrite duplicates. */ - PriorTable::Iter priorIt = other; - for ( ; priorIt.lte(); priorIt++ ) - setPrior( priorIt->ordering, priorIt->desc ); -} - -/* Set the priority of starting transitions. Isolates the start state so it has - * no other entry points, then sets the priorities of all the transitions out - * of the start state. If the start state is final, then the outPrior of the - * start state is also set. The idea is that a machine that accepts the null - * string can still specify the starting trans prior for when it accepts the - * null word. */ -void FsmGraph::startFsmPrior( int ordering, PriorDesc *prior ) -{ - /* Make sure the start state has no other entry points. */ - isolateStartState(); - - /* Walk all transitions out of the start state. */ - for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { - if ( trans->toState != 0 ) - trans->priorTable.setPrior( ordering, prior ); - } -} - -/* Set the priority of all transitions in a graph. Walks all transition lists - * and all def transitions. */ -void FsmGraph::allTransPrior( int ordering, PriorDesc *prior ) -{ - /* Walk the list of all states. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - /* Walk the out list of the state. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - if ( trans->toState != 0 ) - trans->priorTable.setPrior( ordering, prior ); - } - } -} - -/* Set the priority of all transitions that go into a final state. Note that if - * any entry states are final, we will not be setting the priority of any - * transitions that may go into those states in the future. The graph does not - * support pending in transitions in the same way pending out transitions are - * supported. */ -void FsmGraph::finishFsmPrior( int ordering, PriorDesc *prior ) -{ - /* Walk all final states. */ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { - /* Walk all in transitions of the final state. */ - for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) - trans->priorTable.setPrior( ordering, prior ); - } -} - -/* Set the priority of any future out transitions that may be made going out of - * this state machine. */ -void FsmGraph::leaveFsmPrior( int ordering, PriorDesc *prior ) -{ - /* Set priority in all final states. */ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) - (*state)->outPriorTable.setPrior( ordering, prior ); -} - - -/* Set actions to execute on starting transitions. Isolates the start state - * so it has no other entry points, then adds to the transition functions - * of all the transitions out of the start state. If the start state is final, - * then the func is also added to the start state's out func list. The idea is - * that a machine that accepts the null string can execute a start func when it - * matches the null word, which can only be done when leaving the start/final - * state. */ -void FsmGraph::startFsmAction( int ordering, Action *action ) -{ - /* Make sure the start state has no other entry points. */ - isolateStartState(); - - /* Walk the start state's transitions, setting functions. */ - for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { - if ( trans->toState != 0 ) - trans->actionTable.setAction( ordering, action ); - } -} - -/* Set functions to execute on all transitions. Walks the out lists of all - * states. */ -void FsmGraph::allTransAction( int ordering, Action *action ) -{ - /* Walk all states. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - /* Walk the out list of the state. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - if ( trans->toState != 0 ) - trans->actionTable.setAction( ordering, action ); - } - } -} - -/* Specify functions to execute upon entering final states. If the start state - * is final we can't really specify a function to execute upon entering that - * final state the first time. So function really means whenever entering a - * final state from within the same fsm. */ -void FsmGraph::finishFsmAction( int ordering, Action *action ) -{ - /* Walk all final states. */ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { - /* Walk the final state's in list. */ - for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) - trans->actionTable.setAction( ordering, action ); - } -} - -/* Add functions to any future out transitions that may be made going out of - * this state machine. */ -void FsmGraph::leaveFsmAction( int ordering, Action *action ) -{ - /* Insert the action in the outActionTable of all final states. */ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) - (*state)->outActionTable.setAction( ordering, action ); -} - -/* Add functions to the longest match action table for constructing scanners. */ -void FsmGraph::longMatchAction( int ordering, TokenDef *lmPart ) -{ - /* Walk all final states. */ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { - /* Walk the final state's in list. */ - for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) - trans->lmActionTable.setAction( ordering, lmPart ); - } -} - -void FsmGraph::fillGaps( FsmState *state ) -{ - if ( state->outList.length() == 0 ) { - /* Add the range on the lower and upper bound. */ - attachNewTrans( state, 0, keyOps->minKey, keyOps->maxKey ); - } - else { - TransList srcList; - srcList.transfer( state->outList ); - - /* Check for a gap at the beginning. */ - TransList::Iter trans = srcList, next; - if ( keyOps->minKey < trans->lowKey ) { - /* Make the high key and append. */ - Key highKey = trans->lowKey; - highKey.decrement(); - - attachNewTrans( state, 0, keyOps->minKey, highKey ); - } - - /* Write the transition. */ - next = trans.next(); - state->outList.append( trans ); - - /* Keep the last high end. */ - Key lastHigh = trans->highKey; - - /* Loop each source range. */ - for ( trans = next; trans.lte(); trans = next ) { - /* Make the next key following the last range. */ - Key nextKey = lastHigh; - nextKey.increment(); - - /* Check for a gap from last up to here. */ - if ( nextKey < trans->lowKey ) { - /* Make the high end of the range that fills the gap. */ - Key highKey = trans->lowKey; - highKey.decrement(); - - attachNewTrans( state, 0, nextKey, highKey ); - } - - /* Reduce the transition. If it reduced to anything then add it. */ - next = trans.next(); - state->outList.append( trans ); - - /* Keep the last high end. */ - lastHigh = trans->highKey; - } - - /* Now check for a gap on the end to fill. */ - if ( lastHigh < keyOps->maxKey ) { - /* Get a copy of the default. */ - lastHigh.increment(); - - attachNewTrans( state, 0, lastHigh, keyOps->maxKey ); - } - } -} - -void FsmGraph::setErrorAction( FsmState *state, int ordering, Action *action ) -{ - /* Fill any gaps in the out list with an error transition. */ - fillGaps( state ); - - /* Set error transitions in the transitions that go to error. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - if ( trans->toState == 0 ) - trans->actionTable.setAction( ordering, action ); - } -} - -void FsmGraph::setErrorActions( FsmState *state, const ActionTable &other ) -{ - /* Fill any gaps in the out list with an error transition. */ - fillGaps( state ); - - /* Set error transitions in the transitions that go to error. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - if ( trans->toState == 0 ) - trans->actionTable.setActions( other ); - } -} - - -/* Give a target state for error transitions. */ -void FsmGraph::setErrorTarget( FsmState *state, FsmState *target, int *orderings, - Action **actions, int nActs ) -{ - /* Fill any gaps in the out list with an error transition. */ - fillGaps( state ); - - /* Set error target in the transitions that go to error. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - if ( trans->toState == 0 ) { - /* The trans goes to error, redirect it. */ - redirectErrorTrans( trans->fromState, target, trans ); - trans->actionTable.setActions( orderings, actions, nActs ); - } - } -} - -void FsmGraph::transferErrorActions( FsmState *state, int transferPoint ) -{ - for ( int i = 0; i < state->errActionTable.length(); ) { - ErrActionTableEl *act = state->errActionTable.data + i; - if ( act->transferPoint == transferPoint ) { - /* Transfer the error action and remove it. */ - setErrorAction( state, act->ordering, act->action ); - state->errActionTable.vremove( i ); - } - else { - /* Not transfering and deleting, skip over the item. */ - i += 1; - } - } -} - -/* Set error actions in the start state. */ -void FsmGraph::startErrorAction( int ordering, Action *action, int transferPoint ) -{ - /* Make sure the start state has no other entry points. */ - isolateStartState(); - - /* Add the actions. */ - startState->errActionTable.setAction( ordering, action, transferPoint ); -} - -/* Set error actions in all states where there is a transition out. */ -void FsmGraph::allErrorAction( int ordering, Action *action, int transferPoint ) -{ - /* Insert actions in the error action table of all states. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) - state->errActionTable.setAction( ordering, action, transferPoint ); -} - -/* Set error actions in final states. */ -void FsmGraph::finalErrorAction( int ordering, Action *action, int transferPoint ) -{ - /* Add the action to the error table of final states. */ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) - (*state)->errActionTable.setAction( ordering, action, transferPoint ); -} - -void FsmGraph::notStartErrorAction( int ordering, Action *action, int transferPoint ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( state != startState ) - state->errActionTable.setAction( ordering, action, transferPoint ); - } -} - -void FsmGraph::notFinalErrorAction( int ordering, Action *action, int transferPoint ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( ! state->isFinState() ) - state->errActionTable.setAction( ordering, action, transferPoint ); - } -} - -/* Set error actions in the states that have transitions into a final state. */ -void FsmGraph::middleErrorAction( int ordering, Action *action, int transferPoint ) -{ - /* Isolate the start state in case it is reachable from in inside the - * machine, in which case we don't want it set. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( state != startState && ! state->isFinState() ) - state->errActionTable.setAction( ordering, action, transferPoint ); - } -} - -/* Set EOF actions in the start state. */ -void FsmGraph::startEOFAction( int ordering, Action *action ) -{ - /* Make sure the start state has no other entry points. */ - isolateStartState(); - - /* Add the actions. */ - startState->eofActionTable.setAction( ordering, action ); -} - -/* Set EOF actions in all states where there is a transition out. */ -void FsmGraph::allEOFAction( int ordering, Action *action ) -{ - /* Insert actions in the EOF action table of all states. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) - state->eofActionTable.setAction( ordering, action ); -} - -/* Set EOF actions in final states. */ -void FsmGraph::finalEOFAction( int ordering, Action *action ) -{ - /* Add the action to the error table of final states. */ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) - (*state)->eofActionTable.setAction( ordering, action ); -} - -void FsmGraph::notStartEOFAction( int ordering, Action *action ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( state != startState ) - state->eofActionTable.setAction( ordering, action ); - } -} - -void FsmGraph::notFinalEOFAction( int ordering, Action *action ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( ! state->isFinState() ) - state->eofActionTable.setAction( ordering, action ); - } -} - -/* Set EOF actions in the states that have transitions into a final state. */ -void FsmGraph::middleEOFAction( int ordering, Action *action ) -{ - /* Set the actions in all states that are not the start state and not final. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( state != startState && ! state->isFinState() ) - state->eofActionTable.setAction( ordering, action ); - } -} - -/* - * Set To State Actions. - */ - -/* Set to state actions in the start state. */ -void FsmGraph::startToStateAction( int ordering, Action *action ) -{ - /* Make sure the start state has no other entry points. */ - isolateStartState(); - startState->toStateActionTable.setAction( ordering, action ); -} - -/* Set to state actions in all states. */ -void FsmGraph::allToStateAction( int ordering, Action *action ) -{ - /* Insert the action on all states. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) - state->toStateActionTable.setAction( ordering, action ); -} - -/* Set to state actions in final states. */ -void FsmGraph::finalToStateAction( int ordering, Action *action ) -{ - /* Add the action to the error table of final states. */ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) - (*state)->toStateActionTable.setAction( ordering, action ); -} - -void FsmGraph::notStartToStateAction( int ordering, Action *action ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( state != startState ) - state->toStateActionTable.setAction( ordering, action ); - } -} - -void FsmGraph::notFinalToStateAction( int ordering, Action *action ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( ! state->isFinState() ) - state->toStateActionTable.setAction( ordering, action ); - } -} - -/* Set to state actions in states that are not final and not the start state. */ -void FsmGraph::middleToStateAction( int ordering, Action *action ) -{ - /* Set the action in all states that are not the start state and not final. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( state != startState && ! state->isFinState() ) - state->toStateActionTable.setAction( ordering, action ); - } -} - -/* - * Set From State Actions. - */ - -void FsmGraph::startFromStateAction( int ordering, Action *action ) -{ - /* Make sure the start state has no other entry points. */ - isolateStartState(); - startState->fromStateActionTable.setAction( ordering, action ); -} - -void FsmGraph::allFromStateAction( int ordering, Action *action ) -{ - /* Insert the action on all states. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) - state->fromStateActionTable.setAction( ordering, action ); -} - -void FsmGraph::finalFromStateAction( int ordering, Action *action ) -{ - /* Add the action to the error table of final states. */ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) - (*state)->fromStateActionTable.setAction( ordering, action ); -} - -void FsmGraph::notStartFromStateAction( int ordering, Action *action ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( state != startState ) - state->fromStateActionTable.setAction( ordering, action ); - } -} - -void FsmGraph::notFinalFromStateAction( int ordering, Action *action ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( ! state->isFinState() ) - state->fromStateActionTable.setAction( ordering, action ); - } -} - -void FsmGraph::middleFromStateAction( int ordering, Action *action ) -{ - /* Set the action in all states that are not the start state and not final. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - if ( state != startState && ! state->isFinState() ) - state->fromStateActionTable.setAction( ordering, action ); - } -} - -/* Shift the function ordering of the start transitions to start - * at fromOrder and increase in units of 1. Useful before staring. - * Returns the maximum number of order numbers used. */ -int FsmGraph::shiftStartActionOrder( int fromOrder ) -{ - int maxUsed = 0; - - /* Walk the start state's transitions, shifting function ordering. */ - for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { - /* Walk the function data for the transition and set the keys to - * increasing values starting at fromOrder. */ - int curFromOrder = fromOrder; - ActionTable::Iter action = trans->actionTable; - for ( ; action.lte(); action++ ) - action->key = curFromOrder++; - - /* Keep track of the max number of orders used. */ - if ( curFromOrder - fromOrder > maxUsed ) - maxUsed = curFromOrder - fromOrder; - } - - return maxUsed; -} - -/* Remove all priorities. */ -void FsmGraph::clearAllPriorities() -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - /* Clear out priority data. */ - state->outPriorTable.empty(); - - /* Clear transition data from the out transitions. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) - trans->priorTable.empty(); - } -} - -/* Zeros out the function ordering keys. This may be called before minimization - * when it is known that no more fsm operations are going to be done. This - * will achieve greater reduction as states will not be separated on the basis - * of function ordering. */ -void FsmGraph::nullActionKeys( ) -{ - /* For each state... */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - /* Walk the transitions for the state. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - /* Walk the action table for the transition. */ - for ( ActionTable::Iter action = trans->actionTable; - action.lte(); action++ ) - action->key = 0; - - /* Walk the action table for the transition. */ - for ( LmActionTable::Iter action = trans->lmActionTable; - action.lte(); action++ ) - action->key = 0; - } - - /* Null the action keys of the to state action table. */ - for ( ActionTable::Iter action = state->toStateActionTable; - action.lte(); action++ ) - action->key = 0; - - /* Null the action keys of the from state action table. */ - for ( ActionTable::Iter action = state->fromStateActionTable; - action.lte(); action++ ) - action->key = 0; - - /* Null the action keys of the out transtions. */ - for ( ActionTable::Iter action = state->outActionTable; - action.lte(); action++ ) - action->key = 0; - - /* Null the action keys of the error action table. */ - for ( ErrActionTable::Iter action = state->errActionTable; - action.lte(); action++ ) - action->ordering = 0; - - /* Null the action keys eof action table. */ - for ( ActionTable::Iter action = state->eofActionTable; - action.lte(); action++ ) - action->key = 0; - } -} - -/* Walk the list of states and verify that non final states do not have out - * data, that all stateBits are cleared, and that there are no states with - * zero foreign in transitions. */ -void FsmGraph::verifyStates() -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - /* Non final states should not have leaving data. */ - if ( ! (state->stateBits & SB_ISFINAL) ) { - assert( state->outActionTable.length() == 0 ); - assert( state->outCondSet.length() == 0 ); - assert( state->outPriorTable.length() == 0 ); - } - - /* Data used in algorithms should be cleared. */ - assert( (state->stateBits & SB_BOTH) == 0 ); - assert( state->foreignInTrans > 0 ); - } -} - -/* Compare two transitions according to their relative priority. Since the - * base transition has no priority associated with it, the default is to - * return equal. */ -int FsmGraph::comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ) -{ - /* Looking for differing priorities on same keys. Need to concurrently - * scan the priority lists. */ - PriorTable::Iter pd1 = priorTable1; - PriorTable::Iter pd2 = priorTable2; - while ( pd1.lte() && pd2.lte() ) { - /* Check keys. */ - if ( pd1->desc->key < pd2->desc->key ) - pd1.increment(); - else if ( pd1->desc->key > pd2->desc->key ) - pd2.increment(); - /* Keys are the same, check priorities. */ - else if ( pd1->desc->priority < pd2->desc->priority ) - return -1; - else if ( pd1->desc->priority > pd2->desc->priority ) - return 1; - else { - /* Keys and priorities are equal, advance both. */ - pd1.increment(); - pd2.increment(); - } - } - - /* No differing priorities on the same key. */ - return 0; -} - -/* Compares two transitions according to priority and functions. Pointers - * should not be null. Does not consider to state or from state. Compare two - * transitions according to the data contained in the transitions. Data means - * any properties added to user transitions that may differentiate them. Since - * the base transition has no data, the default is to return equal. */ -int FsmGraph::compareTransData( FsmTrans *trans1, FsmTrans *trans2 ) -{ - /* Compare the prior table. */ - int cmpRes = CmpPriorTable::compare( trans1->priorTable, - trans2->priorTable ); - if ( cmpRes != 0 ) - return cmpRes; - - /* Compare longest match action tables. */ - cmpRes = CmpLmActionTable::compare(trans1->lmActionTable, - trans2->lmActionTable); - if ( cmpRes != 0 ) - return cmpRes; - - /* Compare action tables. */ - return CmpActionTable::compare(trans1->actionTable, - trans2->actionTable); -} - -/* Callback invoked when another trans (or possibly this) is added into this - * transition during the merging process. Draw in any properties of srcTrans - * into this transition. AddInTrans is called when a new transitions is made - * that will be a duplicate of another transition or a combination of several - * other transitions. AddInTrans will be called for each transition that the - * new transition is to represent. */ -void FsmGraph::addInTrans( FsmTrans *destTrans, FsmTrans *srcTrans ) -{ - /* Protect against adding in from ourselves. */ - if ( srcTrans == destTrans ) { - /* Adding in ourselves, need to make a copy of the source transitions. - * The priorities are not copied in as that would have no effect. */ - destTrans->lmActionTable.setActions( LmActionTable(srcTrans->lmActionTable) ); - destTrans->actionTable.setActions( ActionTable(srcTrans->actionTable) ); - } - else { - /* Not a copy of ourself, get the functions and priorities. */ - destTrans->lmActionTable.setActions( srcTrans->lmActionTable ); - destTrans->actionTable.setActions( srcTrans->actionTable ); - destTrans->priorTable.setPriors( srcTrans->priorTable ); - } -} - -/* Compare the properties of states that are embedded by users. Compares out - * priorities, out transitions, to, from, out, error and eof action tables. */ -int FsmGraph::compareStateData( const FsmState *state1, const FsmState *state2 ) -{ - /* Compare the out priority table. */ - int cmpRes = CmpPriorTable:: - compare( state1->outPriorTable, state2->outPriorTable ); - if ( cmpRes != 0 ) - return cmpRes; - - /* Test to state action tables. */ - cmpRes = CmpActionTable::compare( state1->toStateActionTable, - state2->toStateActionTable ); - if ( cmpRes != 0 ) - return cmpRes; - - /* Test from state action tables. */ - cmpRes = CmpActionTable::compare( state1->fromStateActionTable, - state2->fromStateActionTable ); - if ( cmpRes != 0 ) - return cmpRes; - - /* Test out action tables. */ - cmpRes = CmpActionTable::compare( state1->outActionTable, - state2->outActionTable ); - if ( cmpRes != 0 ) - return cmpRes; - - /* Test out condition sets. */ - cmpRes = CmpActionSet::compare( state1->outCondSet, - state2->outCondSet ); - if ( cmpRes != 0 ) - return cmpRes; - - /* Test out error action tables. */ - cmpRes = CmpErrActionTable::compare( state1->errActionTable, - state2->errActionTable ); - if ( cmpRes != 0 ) - return cmpRes; - - /* Test eof action tables. */ - return CmpActionTable::compare( state1->eofActionTable, - state2->eofActionTable ); -} - - -/* Invoked when a state looses its final state status and the leaving - * transition embedding data should be deleted. */ -void FsmGraph::clearOutData( FsmState *state ) -{ - /* Kill the out actions and priorities. */ - state->outActionTable.empty(); - state->outCondSet.empty(); - state->outPriorTable.empty(); -} - -bool FsmGraph::hasOutData( FsmState *state ) -{ - return ( state->outActionTable.length() > 0 || - state->outCondSet.length() > 0 || - state->outPriorTable.length() > 0 ); -} - -/* - * Setting Conditions. - */ - -void logNewExpansion( Expansion *exp ); -void logCondSpace( CondSpace *condSpace ); - -CondSpace *FsmGraph::addCondSpace( const CondSet &condSet ) -{ - CondSpace *condSpace = condData->condSpaceMap.find( condSet ); - if ( condSpace == 0 ) { - Key baseKey = condData->nextCondKey; - condData->nextCondKey += (1 << condSet.length() ) * keyOps->alphSize(); - - condSpace = new CondSpace( condSet ); - condSpace->baseKey = baseKey; - condData->condSpaceMap.insert( condSpace ); - - #ifdef COLM_LOG_CONDS - if ( colm_log_conds ) { - cerr << "adding new condition space" << endl; - cerr << " condition set: "; - logCondSpace( condSpace ); - cerr << endl; - cerr << " baseKey: " << baseKey.getVal() << endl; - } - #endif - } - return condSpace; -} - -void FsmGraph::startFsmCondition( Action *condAction ) -{ - /* Make sure the start state has no other entry points. */ - isolateStartState(); - embedCondition( startState, condAction ); -} - -void FsmGraph::allTransCondition( Action *condAction ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) - embedCondition( state, condAction ); -} - -void FsmGraph::leaveFsmCondition( Action *condAction ) -{ - for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) - (*state)->outCondSet.insert( condAction ); -} diff --git a/colm/fsmattach.cc b/colm/fsmattach.cc deleted file mode 100644 index a58ed9a4..00000000 --- a/colm/fsmattach.cc +++ /dev/null @@ -1,425 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <string.h> -#include <assert.h> -#include "fsmgraph.h" - -#include <iostream> -using namespace std; - -/* Insert a transition into an inlist. The head must be supplied. */ -void FsmGraph::attachToInList( FsmState *from, FsmState *to, - FsmTrans *&head, FsmTrans *trans ) -{ - trans->ilnext = head; - trans->ilprev = 0; - - /* If in trans list is not empty, set the head->prev to trans. */ - if ( head != 0 ) - head->ilprev = trans; - - /* Now insert ourselves at the front of the list. */ - head = trans; - - /* Keep track of foreign transitions for from and to. */ - if ( from != to ) { - if ( misfitAccounting ) { - /* If the number of foreign in transitions is about to go up to 1 then - * move it from the misfit list to the main list. */ - if ( to->foreignInTrans == 0 ) - stateList.append( misfitList.detach( to ) ); - } - - to->foreignInTrans += 1; - } -}; - -/* Detach a transition from an inlist. The head of the inlist must be supplied. */ -void FsmGraph::detachFromInList( FsmState *from, FsmState *to, - FsmTrans *&head, FsmTrans *trans ) -{ - /* Detach in the inTransList. */ - if ( trans->ilprev == 0 ) - head = trans->ilnext; - else - trans->ilprev->ilnext = trans->ilnext; - - if ( trans->ilnext != 0 ) - trans->ilnext->ilprev = trans->ilprev; - - /* Keep track of foreign transitions for from and to. */ - if ( from != to ) { - to->foreignInTrans -= 1; - - if ( misfitAccounting ) { - /* If the number of foreign in transitions goes down to 0 then move it - * from the main list to the misfit list. */ - if ( to->foreignInTrans == 0 ) - misfitList.append( stateList.detach( to ) ); - } - } -} - -/* Attach states on the default transition, range list or on out/in list key. - * First makes a new transition. If there is already a transition out from - * fromState on the default, then will assertion fail. */ -FsmTrans *FsmGraph::attachNewTrans( FsmState *from, FsmState *to, Key lowKey, Key highKey ) -{ - /* Make the new transition. */ - FsmTrans *retVal = new FsmTrans(); - - /* The transition is now attached. Remember the parties involved. */ - retVal->fromState = from; - retVal->toState = to; - - /* Make the entry in the out list for the transitions. */ - from->outList.append( retVal ); - - /* Set the the keys of the new trans. */ - retVal->lowKey = lowKey; - retVal->highKey = highKey; - - /* Attach using inList as the head pointer. */ - if ( to != 0 ) - attachToInList( from, to, to->inList.head, retVal ); - - return retVal; -} - -/* Attach for range lists or for the default transition. This attach should - * be used when a transition already is allocated and must be attached to a - * target state. Does not handle adding the transition into the out list. */ -void FsmGraph::attachTrans( FsmState *from, FsmState *to, FsmTrans *trans ) -{ - assert( trans->fromState == 0 && trans->toState == 0 ); - trans->fromState = from; - trans->toState = to; - - if ( to != 0 ) { - /* Attach using the inList pointer as the head pointer. */ - attachToInList( from, to, to->inList.head, trans ); - } -} - -/* Redirect a transition away from error and towards some state. This is just - * like attachTrans except it requires fromState to be set and does not touch - * it. */ -void FsmGraph::redirectErrorTrans( FsmState *from, FsmState *to, FsmTrans *trans ) -{ - assert( trans->fromState != 0 && trans->toState == 0 ); - trans->toState = to; - - if ( to != 0 ) { - /* Attach using the inList pointer as the head pointer. */ - attachToInList( from, to, to->inList.head, trans ); - } -} - -/* Detach for out/in lists or for default transition. */ -void FsmGraph::detachTrans( FsmState *from, FsmState *to, FsmTrans *trans ) -{ - assert( trans->fromState == from && trans->toState == to ); - trans->fromState = 0; - trans->toState = 0; - - if ( to != 0 ) { - /* Detach using to's inList pointer as the head. */ - detachFromInList( from, to, to->inList.head, trans ); - } -} - - -/* Detach a state from the graph. Detaches and deletes transitions in and out - * of the state. Empties inList and outList. Removes the state from the final - * state set. A detached state becomes useless and should be deleted. */ -void FsmGraph::detachState( FsmState *state ) -{ - /* Detach the in transitions from the inList list of transitions. */ - while ( state->inList.head != 0 ) { - /* Get pointers to the trans and the state. */ - FsmTrans *trans = state->inList.head; - FsmState *fromState = trans->fromState; - - /* Detach the transitions from the source state. */ - detachTrans( fromState, state, trans ); - - /* Ok to delete the transition. */ - fromState->outList.detach( trans ); - delete trans; - } - - /* Remove the entry points in on the machine. */ - while ( state->entryIds.length() > 0 ) - unsetEntry( state->entryIds[0], state ); - - /* Detach out range transitions. */ - for ( TransList::Iter trans = state->outList; trans.lte(); ) { - TransList::Iter next = trans.next(); - detachTrans( state, trans->toState, trans ); - delete trans; - trans = next; - } - - /* Delete all of the out range pointers. */ - state->outList.abandon(); - - /* Unset final stateness before detaching from graph. */ - if ( state->stateBits & SB_ISFINAL ) - finStateSet.remove( state ); -} - - -/* Duplicate a transition. Makes a new transition that is attached to the same - * dest as srcTrans. The new transition has functions and priority taken from - * srcTrans. Used for merging a transition in to a free spot. The trans can - * just be dropped in. It does not conflict with an existing trans and need - * not be crossed. Returns the new transition. */ -FsmTrans *FsmGraph::dupTrans( FsmState *from, FsmTrans *srcTrans ) -{ - /* Make a new transition. */ - FsmTrans *newTrans = new FsmTrans(); - - /* We can attach the transition, one does not exist. */ - attachTrans( from, srcTrans->toState, newTrans ); - - /* Call the user callback to add in the original source transition. */ - addInTrans( newTrans, srcTrans ); - - return newTrans; -} - -/* In crossing, src trans and dest trans both go to existing states. Make one - * state from the sets of states that src and dest trans go to. */ -FsmTrans *FsmGraph::fsmAttachStates( MergeData &md, FsmState *from, - FsmTrans *destTrans, FsmTrans *srcTrans ) -{ - /* The priorities are equal. We must merge the transitions. Does the - * existing trans go to the state we are to attach to? ie, are we to - * simply double up the transition? */ - FsmState *toState = srcTrans->toState; - FsmState *existingState = destTrans->toState; - - if ( existingState == toState ) { - /* The transition is a double up to the same state. Copy the src - * trans into itself. We don't need to merge in the from out trans - * data, that was done already. */ - addInTrans( destTrans, srcTrans ); - } - else { - /* The trans is not a double up. Dest trans cannot be the same as src - * trans. Set up the state set. */ - StateSet stateSet; - - /* We go to all the states the existing trans goes to, plus... */ - if ( existingState->stateDictEl == 0 ) - stateSet.insert( existingState ); - else - stateSet.insert( existingState->stateDictEl->stateSet ); - - /* ... all the states that we have been told to go to. */ - if ( toState->stateDictEl == 0 ) - stateSet.insert( toState ); - else - stateSet.insert( toState->stateDictEl->stateSet ); - - /* Look for the state. If it is not there already, make it. */ - StateDictEl *lastFound; - if ( md.stateDict.insert( stateSet, &lastFound ) ) { - /* Make a new state representing the combination of states in - * stateSet. It gets added to the fill list. This means that we - * need to fill in it's transitions sometime in the future. We - * don't do that now (ie, do not recurse). */ - FsmState *combinState = addState(); - - /* Link up the dict element and the state. */ - lastFound->targState = combinState; - combinState->stateDictEl = lastFound; - - /* Add to the fill list. */ - md.fillListAppend( combinState ); - } - - /* Get the state insertted/deleted. */ - FsmState *targ = lastFound->targState; - - /* Detach the state from existing state. */ - detachTrans( from, existingState, destTrans ); - - /* Re-attach to the new target. */ - attachTrans( from, targ, destTrans ); - - /* Add in src trans to the existing transition that we redirected to - * the new state. We don't need to merge in the from out trans data, - * that was done already. */ - addInTrans( destTrans, srcTrans ); - } - - return destTrans; -} - -/* Two transitions are to be crossed, handle the possibility of either going - * to the error state. */ -FsmTrans *FsmGraph::mergeTrans( MergeData &md, FsmState *from, - FsmTrans *destTrans, FsmTrans *srcTrans ) -{ - FsmTrans *retTrans = 0; - if ( destTrans->toState == 0 && srcTrans->toState == 0 ) { - /* Error added into error. */ - addInTrans( destTrans, srcTrans ); - retTrans = destTrans; - } - else if ( destTrans->toState == 0 && srcTrans->toState != 0 ) { - /* Non error added into error we need to detach and reattach, */ - detachTrans( from, destTrans->toState, destTrans ); - attachTrans( from, srcTrans->toState, destTrans ); - addInTrans( destTrans, srcTrans ); - retTrans = destTrans; - } - else if ( srcTrans->toState == 0 ) { - /* Dest goes somewhere but src doesn't, just add it it in. */ - addInTrans( destTrans, srcTrans ); - retTrans = destTrans; - } - else { - /* Both go somewhere, run the actual cross. */ - retTrans = fsmAttachStates( md, from, destTrans, srcTrans ); - } - - return retTrans; -} - -/* Find the trans with the higher priority. If src is lower priority then dest then - * src is ignored. If src is higher priority than dest, then src overwrites dest. If - * the priorities are equal, then they are merged. */ -FsmTrans *FsmGraph::crossTransitions( MergeData &md, FsmState *from, - FsmTrans *destTrans, FsmTrans *srcTrans ) -{ - FsmTrans *retTrans; - - /* Compare the priority of the dest and src transitions. */ - int compareRes = comparePrior( destTrans->priorTable, srcTrans->priorTable ); - if ( compareRes < 0 ) { - /* Src trans has a higher priority than dest, src overwrites dest. - * Detach dest and return a copy of src. */ - detachTrans( from, destTrans->toState, destTrans ); - retTrans = dupTrans( from, srcTrans ); - } - else if ( compareRes > 0 ) { - /* The dest trans has a higher priority, use dest. */ - retTrans = destTrans; - } - else { - /* Src trans and dest trans have the same priority, they must be merged. */ - retTrans = mergeTrans( md, from, destTrans, srcTrans ); - } - - /* Return the transition that resulted from the cross. */ - return retTrans; -} - -/* Copy the transitions in srcList to the outlist of dest. The srcList should - * not be the outList of dest, otherwise you would be copying the contents of - * srcList into itself as it's iterated: bad news. */ -void FsmGraph::outTransCopy( MergeData &md, FsmState *dest, FsmTrans *srcList ) -{ - /* The destination list. */ - TransList destList; - - /* Set up an iterator to stop at breaks. */ - PairIter<FsmTrans> outPair( dest->outList.head, srcList ); - for ( ; !outPair.end(); outPair++ ) { - switch ( outPair.userState ) { - case RangeInS1: { - /* The pair iter is the authority on the keys. It may have needed - * to break the dest range. */ - FsmTrans *destTrans = outPair.s1Tel.trans; - destTrans->lowKey = outPair.s1Tel.lowKey; - destTrans->highKey = outPair.s1Tel.highKey; - destList.append( destTrans ); - break; - } - case RangeInS2: { - /* Src range may get crossed with dest's default transition. */ - FsmTrans *newTrans = dupTrans( dest, outPair.s2Tel.trans ); - - /* Set up the transition's keys and append to the dest list. */ - newTrans->lowKey = outPair.s2Tel.lowKey; - newTrans->highKey = outPair.s2Tel.highKey; - destList.append( newTrans ); - break; - } - case RangeOverlap: { - /* Exact overlap, cross them. */ - FsmTrans *newTrans = crossTransitions( md, dest, - outPair.s1Tel.trans, outPair.s2Tel.trans ); - - /* Set up the transition's keys and append to the dest list. */ - newTrans->lowKey = outPair.s1Tel.lowKey; - newTrans->highKey = outPair.s1Tel.highKey; - destList.append( newTrans ); - break; - } - case BreakS1: { - /* Since we are always writing to the dest trans, the dest needs - * to be copied when it is broken. The copy goes into the first - * half of the break to "break it off". */ - outPair.s1Tel.trans = dupTrans( dest, outPair.s1Tel.trans ); - break; - } - case BreakS2: - break; - } - } - - /* Abandon the old outList and transfer destList into it. */ - dest->outList.transfer( destList ); -} - - -/* Move all the transitions that go into src so that they go into dest. */ -void FsmGraph::inTransMove( FsmState *dest, FsmState *src ) -{ - /* Do not try to move in trans to and from the same state. */ - assert( dest != src ); - - /* If src is the start state, dest becomes the start state. */ - if ( src == startState ) { - unsetStartState(); - setStartState( dest ); - } - - /* For each entry point into, create an entry point into dest, when the - * state is detached, the entry points to src will be removed. */ - for ( EntryIdSet::Iter enId = src->entryIds; enId.lte(); enId++ ) - changeEntry( *enId, dest, src ); - - /* Move the transitions in inList. */ - while ( src->inList.head != 0 ) { - /* Get trans and from state. */ - FsmTrans *trans = src->inList.head; - FsmState *fromState = trans->fromState; - - /* Detach from src, reattach to dest. */ - detachTrans( fromState, src, trans ); - attachTrans( fromState, dest, trans ); - } -} diff --git a/colm/fsmbase.cc b/colm/fsmbase.cc deleted file mode 100644 index 90341039..00000000 --- a/colm/fsmbase.cc +++ /dev/null @@ -1,602 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <string.h> -#include <assert.h> -#include "fsmgraph.h" - -/* Simple singly linked list append routine for the fill list. The new state - * goes to the end of the list. */ -void MergeData::fillListAppend( FsmState *state ) -{ - state->alg.next = 0; - - if ( stfillHead == 0 ) { - /* List is empty, state becomes head and tail. */ - stfillHead = state; - stfillTail = state; - } - else { - /* List is not empty, state goes after last element. */ - stfillTail->alg.next = state; - stfillTail = state; - } -} - -/* Graph constructor. */ -FsmGraph::FsmGraph() -: - /* No start state. */ - startState(0), - errState(0), - - /* Misfit accounting is a switch, turned on only at specific times. It - * controls what happens when states have no way in from the outside - * world.. */ - misfitAccounting(false), - - lmRequiresErrorState(false) -{ -} - -/* Copy all graph data including transitions. */ -FsmGraph::FsmGraph( const FsmGraph &graph ) -: - /* Lists start empty. Will be filled by copy. */ - stateList(), - misfitList(), - - /* Copy in the entry points, - * pointers will be resolved later. */ - entryPoints(graph.entryPoints), - startState(graph.startState), - errState(0), - - /* Will be filled by copy. */ - finStateSet(), - - /* Misfit accounting is only on during merging. */ - misfitAccounting(false), - - lmRequiresErrorState(graph.lmRequiresErrorState) -{ - /* Create the states and record their map in the original state. */ - StateList::Iter origState = graph.stateList; - for ( ; origState.lte(); origState++ ) { - /* Make the new state. */ - FsmState *newState = new FsmState( *origState ); - - /* Add the state to the list. */ - stateList.append( newState ); - - /* Set the mapsTo item of the old state. */ - origState->alg.stateMap = newState; - } - - /* Derefernce all the state maps. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - /* The points to the original in the src machine. The taget's duplicate - * is in the statemap. */ - FsmState *toState = trans->toState != 0 ? trans->toState->alg.stateMap : 0; - - /* Attach The transition to the duplicate. */ - trans->toState = 0; - attachTrans( state, toState, trans ); - } - } - - /* Fix the state pointers in the entry points array. */ - EntryMapEl *eel = entryPoints.data; - for ( int e = 0; e < entryPoints.length(); e++, eel++ ) { - /* Get the duplicate of the state. */ - eel->value = eel->value->alg.stateMap; - - /* Foreign in transitions must be built up when duping machines so - * increment it here. */ - eel->value->foreignInTrans += 1; - } - - /* Fix the start state pointer and the new start state's count of in - * transiions. */ - startState = startState->alg.stateMap; - startState->foreignInTrans += 1; - - /* Build the final state set. */ - StateSet::Iter st = graph.finStateSet; - for ( ; st.lte(); st++ ) - finStateSet.insert((*st)->alg.stateMap); -} - -/* Deletes all transition data then deletes each state. */ -FsmGraph::~FsmGraph() -{ - /* Delete all the transitions. */ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - /* Iterate the out transitions, deleting them. */ - state->outList.empty(); - } - - /* Delete all the states. */ - stateList.empty(); -} - -/* Set a state final. The state has its isFinState set to true and the state - * is added to the finStateSet. */ -void FsmGraph::setFinState( FsmState *state ) -{ - /* Is it already a fin state. */ - if ( state->stateBits & SB_ISFINAL ) - return; - - state->stateBits |= SB_ISFINAL; - finStateSet.insert( state ); -} - -/* Set a state non-final. The has its isFinState flag set false and the state - * is removed from the final state set. */ -void FsmGraph::unsetFinState( FsmState *state ) -{ - /* Is it already a non-final state? */ - if ( ! (state->stateBits & SB_ISFINAL) ) - return; - - /* When a state looses its final state status it must relinquish all the - * properties that are allowed only for final states. */ - clearOutData( state ); - - state->stateBits &= ~ SB_ISFINAL; - finStateSet.remove( state ); -} - -/* Set and unset a state as the start state. */ -void FsmGraph::setStartState( FsmState *state ) -{ - /* Sould change from unset to set. */ - assert( startState == 0 ); - startState = state; - - if ( misfitAccounting ) { - /* If the number of foreign in transitions is about to go up to 1 then - * take it off the misfit list and put it on the head list. */ - if ( state->foreignInTrans == 0 ) - stateList.append( misfitList.detach( state ) ); - } - - /* Up the foreign in transitions to the state. */ - state->foreignInTrans += 1; -} - -void FsmGraph::unsetStartState() -{ - /* Should change from set to unset. */ - assert( startState != 0 ); - - /* Decrement the entry's count of foreign entries. */ - startState->foreignInTrans -= 1; - - if ( misfitAccounting ) { - /* If the number of foreign in transitions just went down to 0 then take - * it off the main list and put it on the misfit list. */ - if ( startState->foreignInTrans == 0 ) - misfitList.append( stateList.detach( startState ) ); - } - - startState = 0; -} - -/* Associate an id with a state. Makes the state a named entry point. Has no - * effect if the entry point is already mapped to the state. */ -void FsmGraph::setEntry( int id, FsmState *state ) -{ - /* Insert the id into the state. If the state is already labelled with id, - * nothing to do. */ - if ( state->entryIds.insert( id ) ) { - /* Insert the entry and assert that it succeeds. */ - entryPoints.insertMulti( id, state ); - - if ( misfitAccounting ) { - /* If the number of foreign in transitions is about to go up to 1 then - * take it off the misfit list and put it on the head list. */ - if ( state->foreignInTrans == 0 ) - stateList.append( misfitList.detach( state ) ); - } - - /* Up the foreign in transitions to the state. */ - state->foreignInTrans += 1; - } -} - -/* Remove the association of an id with a state. The state looses it's entry - * point status. Assumes that the id is indeed mapped to state. */ -void FsmGraph::unsetEntry( int id, FsmState *state ) -{ - /* Find the entry point in on id. */ - EntryMapEl *enLow = 0, *enHigh = 0; - entryPoints.findMulti( id, enLow, enHigh ); - while ( enLow->value != state ) - enLow += 1; - - /* Remove the record from the map. */ - entryPoints.remove( enLow ); - - /* Remove the state's sense of the link. */ - state->entryIds.remove( id ); - state->foreignInTrans -= 1; - if ( misfitAccounting ) { - /* If the number of foreign in transitions just went down to 0 then take - * it off the main list and put it on the misfit list. */ - if ( state->foreignInTrans == 0 ) - misfitList.append( stateList.detach( state ) ); - } -} - -/* Remove all association of an id with states. Assumes that the id is indeed - * mapped to a state. */ -void FsmGraph::unsetEntry( int id ) -{ - /* Find the entry point in on id. */ - EntryMapEl *enLow = 0, *enHigh = 0; - entryPoints.findMulti( id, enLow, enHigh ); - for ( EntryMapEl *mel = enLow; mel <= enHigh; mel++ ) { - /* Remove the state's sense of the link. */ - mel->value->entryIds.remove( id ); - mel->value->foreignInTrans -= 1; - if ( misfitAccounting ) { - /* If the number of foreign in transitions just went down to 0 - * then take it off the main list and put it on the misfit list. */ - if ( mel->value->foreignInTrans == 0 ) - misfitList.append( stateList.detach( mel->value ) ); - } - } - - /* Remove the records from the entry points map. */ - entryPoints.removeMulti( enLow, enHigh ); -} - - -void FsmGraph::changeEntry( int id, FsmState *to, FsmState *from ) -{ - /* Find the entry in the entry map. */ - EntryMapEl *enLow = 0, *enHigh = 0; - entryPoints.findMulti( id, enLow, enHigh ); - while ( enLow->value != from ) - enLow += 1; - - /* Change it to the new target. */ - enLow->value = to; - - /* Remove from's sense of the link. */ - from->entryIds.remove( id ); - from->foreignInTrans -= 1; - if ( misfitAccounting ) { - /* If the number of foreign in transitions just went down to 0 then take - * it off the main list and put it on the misfit list. */ - if ( from->foreignInTrans == 0 ) - misfitList.append( stateList.detach( from ) ); - } - - /* Add to's sense of the link. */ - if ( to->entryIds.insert( id ) != 0 ) { - if ( misfitAccounting ) { - /* If the number of foreign in transitions is about to go up to 1 then - * take it off the misfit list and put it on the head list. */ - if ( to->foreignInTrans == 0 ) - stateList.append( misfitList.detach( to ) ); - } - - /* Up the foreign in transitions to the state. */ - to->foreignInTrans += 1; - } -} - - -/* Clear all entry points from a machine. */ -void FsmGraph::unsetAllEntryPoints() -{ - for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) { - /* Kill all the state's entry points at once. */ - if ( en->value->entryIds.length() > 0 ) { - en->value->foreignInTrans -= en->value->entryIds.length(); - - if ( misfitAccounting ) { - /* If the number of foreign in transitions just went down to 0 - * then take it off the main list and put it on the misfit - * list. */ - if ( en->value->foreignInTrans == 0 ) - misfitList.append( stateList.detach( en->value ) ); - } - - /* Clear the set of ids out all at once. */ - en->value->entryIds.empty(); - } - } - - /* Now clear out the entry map all at once. */ - entryPoints.empty(); -} - -/* Assigning an epsilon transition into final states. */ -void FsmGraph::epsilonTrans( int id ) -{ - for ( StateSet::Iter fs = finStateSet; fs.lte(); fs++ ) - (*fs)->epsilonTrans.append( id ); -} - -/* Mark all states reachable from state. Traverses transitions forward. Used - * for removing states that have no path into them. */ -void FsmGraph::markReachableFromHere( FsmState *state ) -{ - /* Base case: return; */ - if ( state->stateBits & SB_ISMARKED ) - return; - - /* Set this state as processed. We are going to visit all states that this - * state has a transition to. */ - state->stateBits |= SB_ISMARKED; - - /* Recurse on all out transitions. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - if ( trans->toState != 0 ) - markReachableFromHere( trans->toState ); - } -} - -void FsmGraph::markReachableFromHereStopFinal( FsmState *state ) -{ - /* Base case: return; */ - if ( state->stateBits & SB_ISMARKED ) - return; - - /* Set this state as processed. We are going to visit all states that this - * state has a transition to. */ - state->stateBits |= SB_ISMARKED; - - /* Recurse on all out transitions. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - FsmState *toState = trans->toState; - if ( toState != 0 && !toState->isFinState() ) - markReachableFromHereStopFinal( toState ); - } -} - -/* Mark all states reachable from state. Traverse transitions backwards. Used - * for removing dead end paths in graphs. */ -void FsmGraph::markReachableFromHereReverse( FsmState *state ) -{ - /* Base case: return; */ - if ( state->stateBits & SB_ISMARKED ) - return; - - /* Set this state as processed. We are going to visit all states with - * transitions into this state. */ - state->stateBits |= SB_ISMARKED; - - /* Recurse on all items in transitions. */ - for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) - markReachableFromHereReverse( trans->fromState ); -} - -/* Determine if there are any entry points into a start state other than the - * start state. Setting starting transitions requires that the start state be - * isolated. In most cases a start state will already be isolated. */ -bool FsmGraph::isStartStateIsolated() -{ - /* If there are any in transitions then the state is not isolated. */ - if ( startState->inList.head != 0 ) - return false; - - /* If there are any entry points then isolated. */ - if ( startState->entryIds.length() > 0 ) - return false; - - return true; -} - -/* Bring in other's entry points. Assumes others states are going to be - * copied into this machine. */ -void FsmGraph::copyInEntryPoints( FsmGraph *other ) -{ - /* Use insert multi because names are not unique. */ - for ( EntryMap::Iter en = other->entryPoints; en.lte(); en++ ) - entryPoints.insertMulti( en->key, en->value ); -} - - -void FsmGraph::unsetAllFinStates() -{ - for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) - (*st)->stateBits &= ~ SB_ISFINAL; - finStateSet.empty(); -} - -void FsmGraph::setFinBits( int finStateBits ) -{ - for ( int s = 0; s < finStateSet.length(); s++ ) - finStateSet.data[s]->stateBits |= finStateBits; -} - - -/* Tests the integrity of the transition lists and the fromStates. */ -void FsmGraph::verifyIntegrity() -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) { - /* Walk the out transitions and assert fromState is correct. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) - assert( trans->fromState == state ); - - /* Walk the inlist and assert toState is correct. */ - for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) - assert( trans->toState == state ); - } -} - -void FsmGraph::verifyReachability() -{ - /* Mark all the states that can be reached - * through the set of entry points. */ - markReachableFromHere( startState ); - for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) - markReachableFromHere( en->value ); - - /* Check that everything got marked. */ - for ( StateList::Iter st = stateList; st.lte(); st++ ) { - /* Assert it got marked and then clear the mark. */ - assert( st->stateBits & SB_ISMARKED ); - st->stateBits &= ~ SB_ISMARKED; - } -} - -void FsmGraph::verifyNoDeadEndStates() -{ - /* Mark all states that have paths to the final states. */ - for ( StateSet::Iter pst = finStateSet; pst.lte(); pst++ ) - markReachableFromHereReverse( *pst ); - - /* Start state gets honorary marking. Must be done AFTER recursive call. */ - startState->stateBits |= SB_ISMARKED; - - /* Make sure everything got marked. */ - for ( StateList::Iter st = stateList; st.lte(); st++ ) { - /* Assert the state got marked and unmark it. */ - assert( st->stateBits & SB_ISMARKED ); - st->stateBits &= ~ SB_ISMARKED; - } -} - -void FsmGraph::depthFirstOrdering( FsmState *state ) -{ - /* Nothing to do if the state is already on the list. */ - if ( state->stateBits & SB_ONLIST ) - return; - - /* Doing depth first, put state on the list. */ - state->stateBits |= SB_ONLIST; - stateList.append( state ); - - /* Recurse on everything ranges. */ - for ( TransList::Iter tel = state->outList; tel.lte(); tel++ ) { - if ( tel->toState != 0 ) - depthFirstOrdering( tel->toState ); - } -} - -/* Ordering states by transition connections. */ -void FsmGraph::depthFirstOrdering() -{ - /* Init on state list flags. */ - for ( StateList::Iter st = stateList; st.lte(); st++ ) - st->stateBits &= ~SB_ONLIST; - - /* Clear out the state list, we will rebuild it. */ - int stateListLen = stateList.length(); - stateList.abandon(); - - /* Add back to the state list from the start state and all other entry - * points. */ - if ( errState != 0 ) - depthFirstOrdering( errState ); - depthFirstOrdering( startState ); - for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) - depthFirstOrdering( en->value ); - - /* Make sure we put everything back on. */ - assert( stateListLen == stateList.length() ); -} - -/* Stable sort the states by final state status. */ -void FsmGraph::sortStatesByFinal() -{ - /* Move forward through the list and throw final states onto the end. */ - FsmState *state = 0; - FsmState *next = stateList.head; - FsmState *last = stateList.tail; - while ( state != last ) { - /* Move forward and load up the next. */ - state = next; - next = state->next; - - /* Throw to the end? */ - if ( state->isFinState() ) { - stateList.detach( state ); - stateList.append( state ); - } - } -} - -void FsmGraph::setStateNumbers( int base ) -{ - for ( StateList::Iter state = stateList; state.lte(); state++ ) - state->alg.stateNum = base++; -} - - -bool FsmGraph::checkErrTrans( FsmState *state, FsmTrans *trans ) -{ - /* Might go directly to error state. */ - if ( trans->toState == 0 ) - return true; - - if ( trans->prev == 0 ) { - /* If this is the first transition. */ - if ( keyOps->minKey < trans->lowKey ) - return true; - } - else { - /* Not the first transition. Compare against the prev. */ - FsmTrans *prev = trans->prev; - Key nextKey = prev->highKey; - nextKey.increment(); - if ( nextKey < trans->lowKey ) - return true; - } - return false; -} - -bool FsmGraph::checkErrTransFinish( FsmState *state ) -{ - /* Check if there are any ranges already. */ - if ( state->outList.length() == 0 ) - return true; - else { - /* Get the last and check for a gap on the end. */ - FsmTrans *last = state->outList.tail; - if ( last->highKey < keyOps->maxKey ) - return true; - } - return 0; -} - -bool FsmGraph::hasErrorTrans() -{ - bool result; - for ( StateList::Iter st = stateList; st.lte(); st++ ) { - for ( TransList::Iter tr = st->outList; tr.lte(); tr++ ) { - result = checkErrTrans( st, tr ); - if ( result ) - return true; - } - result = checkErrTransFinish( st ); - if ( result ) - return true; - } - return false; -} diff --git a/colm/fsmcodegen.cc b/colm/fsmcodegen.cc deleted file mode 100644 index 86302c31..00000000 --- a/colm/fsmcodegen.cc +++ /dev/null @@ -1,1098 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "parsedata.h" -#include "fsmcodegen.h" -#include "redfsm.h" -#include "bstmap.h" -#include "fsmrun.h" -#include <sstream> -#include <string> -#include <assert.h> - - -using std::ostream; -using std::ostringstream; -using std::string; -using std::cerr; -using std::endl; - - -/* Init code gen with in parameters. */ -FsmCodeGen::FsmCodeGen( const char *sourceFileName, const char *fsmName, ostream &out, - RedFsm *redFsm, FsmTables *fsmTables ) -: - sourceFileName(sourceFileName), - fsmName(fsmName), - out(out), - redFsm(redFsm), - fsmTables(fsmTables), - codeGenErrCount(0), - dataPrefix(true), - writeFirstFinal(true), - writeErr(true) -{ -} - -unsigned int FsmCodeGen::arrayTypeSize( unsigned long maxVal ) -{ - long long maxValLL = (long long) maxVal; - HostType *arrayType = keyOps->typeSubsumes( maxValLL ); - assert( arrayType != 0 ); - return arrayType->size; -} - -string FsmCodeGen::ARRAY_TYPE( unsigned long maxVal ) -{ - long long maxValLL = (long long) maxVal; - HostType *arrayType = keyOps->typeSubsumes( maxValLL ); - assert( arrayType != 0 ); - - string ret = arrayType->data1; - if ( arrayType->data2 != 0 ) { - ret += " "; - ret += arrayType->data2; - } - return ret; -} - - -/* Write out the fsm name. */ -string FsmCodeGen::FSM_NAME() -{ - return fsmName; -} - -/* Emit the offset of the start state as a decimal integer. */ -string FsmCodeGen::START_STATE_ID() -{ - ostringstream ret; - ret << redFsm->startState->id; - return ret.str(); -}; - -/* Write out the array of actions. */ -std::ostream &FsmCodeGen::ACTIONS_ARRAY() -{ - out << "\t0, "; - int totalActions = 1; - for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { - /* Write out the length, which will never be the last character. */ - out << act->key.length() << ", "; - /* Put in a line break every 8 */ - if ( totalActions++ % 8 == 7 ) - out << "\n\t"; - - for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) { - out << item->value->actionId; - if ( ! (act.last() && item.last()) ) - out << ", "; - - /* Put in a line break every 8 */ - if ( totalActions++ % 8 == 7 ) - out << "\n\t"; - } - } - out << "\n"; - return out; -} - - -string FsmCodeGen::CS() -{ - ostringstream ret; - /* Expression for retrieving the key, use simple dereference. */ - ret << ACCESS() << "cs"; - return ret.str(); -} - -string FsmCodeGen::GET_WIDE_KEY() -{ - if ( redFsm->anyConditions() ) - return "_widec"; - else - return GET_KEY(); -} - -string FsmCodeGen::GET_WIDE_KEY( RedState *state ) -{ - if ( state->stateCondList.length() > 0 ) - return "_widec"; - else - return GET_KEY(); -} - -string FsmCodeGen::GET_KEY() -{ - ostringstream ret; - /* Expression for retrieving the key, use simple dereference. */ - ret << "(*" << P() << ")"; - return ret.str(); -} - -/* Write out level number of tabs. Makes the nested binary search nice - * looking. */ -string FsmCodeGen::TABS( int level ) -{ - string result; - while ( level-- > 0 ) - result += "\t"; - return result; -} - -/* Write out a key from the fsm code gen. Depends on wether or not the key is - * signed. */ -string FsmCodeGen::KEY( Key key ) -{ - ostringstream ret; - if ( keyOps->isSigned || !hostLang->explicitUnsigned ) - ret << key.getVal(); - else - ret << (unsigned long) key.getVal() << 'u'; - return ret.str(); -} - -void FsmCodeGen::SET_ACT( ostream &ret, InlineItem *item ) -{ - ret << ACT() << " = " << item->longestMatchPart->longestMatchId << ";"; -} - -void FsmCodeGen::SET_TOKEND( ostream &ret, InlineItem *item ) -{ - /* The tokend action sets tokend. */ - ret << TOKEND() << " = " << P() << "+1;"; -} -void FsmCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item ) -{ - ret << TOKSTART() << " = 0;"; -} - -void FsmCodeGen::INIT_ACT( ostream &ret, InlineItem *item ) -{ - ret << ACT() << " = 0;"; -} - -void FsmCodeGen::SET_TOKSTART( ostream &ret, InlineItem *item ) -{ - ret << TOKSTART() << " = " << P() << ";"; -} - -void FsmCodeGen::EMIT_TOKEN( ostream &ret, LangEl *token ) -{ - ret << " " << MATCHED_TOKEN() << " = " << token->id << ";\n"; -} - -void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item, - int targState, int inFinish ) -{ - ret << - " " << P() << " = " << TOKEND() << ";\n" - " switch( " << ACT() << " ) {\n"; - - /* If the switch handles error then we also forced the error state. It - * will exist. */ - if ( item->tokenRegion->lmSwitchHandlesError ) { - ret << " case 0: " << P() << " = " << TOKSTART() << - "; goto st" << redFsm->errState->id << ";\n"; - } - - for ( TokenDefListReg::Iter lmi = item->tokenRegion->tokenDefList; lmi.lte(); lmi++ ) { - if ( lmi->inLmSelect ) { - assert( lmi->tdLangEl != 0 ); - ret << " case " << lmi->longestMatchId << ":\n"; - EMIT_TOKEN( ret, lmi->tdLangEl ); - ret << " break;\n"; - } - } - - ret << - " }\n" - "\t" - " return;\n"; -} - -void FsmCodeGen::LM_ON_LAST( ostream &ret, InlineItem *item ) -{ - assert( item->longestMatchPart->tdLangEl != 0 ); - - ret << " " << P() << " += 1;\n"; - EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl ); - ret << " return;\n"; -} - -void FsmCodeGen::LM_ON_NEXT( ostream &ret, InlineItem *item ) -{ - assert( item->longestMatchPart->tdLangEl != 0 ); - - EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl ); - ret << " return;\n"; -} - -void FsmCodeGen::LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item ) -{ - assert( item->longestMatchPart->tdLangEl != 0 ); - - ret << " " << P() << " = " << TOKEND() << ";\n"; - EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl ); - ret << " return;\n"; -} - - -/* Write out an inline tree structure. Walks the list and possibly calls out - * to virtual functions than handle language specific items in the tree. */ -void FsmCodeGen::INLINE_LIST( ostream &ret, InlineList *inlineList, - int targState, bool inFinish ) -{ - for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { - switch ( item->type ) { - case InlineItem::Text: - assert( false ); - break; - case InlineItem::LmSetActId: - SET_ACT( ret, item ); - break; - case InlineItem::LmSetTokEnd: - SET_TOKEND( ret, item ); - break; - case InlineItem::LmInitTokStart: - assert( false ); - break; - case InlineItem::LmInitAct: - INIT_ACT( ret, item ); - break; - case InlineItem::LmSetTokStart: - SET_TOKSTART( ret, item ); - break; - case InlineItem::LmSwitch: - LM_SWITCH( ret, item, targState, inFinish ); - break; - case InlineItem::LmOnLast: - LM_ON_LAST( ret, item ); - break; - case InlineItem::LmOnNext: - LM_ON_NEXT( ret, item ); - break; - case InlineItem::LmOnLagBehind: - LM_ON_LAG_BEHIND( ret, item ); - break; - } - } -} - -/* Write out paths in line directives. Escapes any special characters. */ -string FsmCodeGen::LDIR_PATH( char *path ) -{ - ostringstream ret; - for ( char *pc = path; *pc != 0; pc++ ) { - if ( *pc == '\\' ) - ret << "\\\\"; - else - ret << *pc; - } - return ret.str(); -} - -void FsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState, bool inFinish ) -{ - /* Write the block and close it off. */ - ret << "\t{"; - INLINE_LIST( ret, action->inlineList, targState, inFinish ); - - if ( action->markId > 0 ) - ret << "mark[" << action->markId-1 << "] = " << P() << ";\n"; - - ret << "}\n"; - -} - -void FsmCodeGen::CONDITION( ostream &ret, GenAction *condition ) -{ - ret << "\n"; - INLINE_LIST( ret, condition->inlineList, 0, false ); -} - -string FsmCodeGen::ERROR_STATE() -{ - ostringstream ret; - if ( redFsm->errState != 0 ) - ret << redFsm->errState->id; - else - ret << "-1"; - return ret.str(); -} - -string FsmCodeGen::FIRST_FINAL_STATE() -{ - ostringstream ret; - if ( redFsm->firstFinState != 0 ) - ret << redFsm->firstFinState->id; - else - ret << redFsm->nextStateId; - return ret.str(); -} - -string FsmCodeGen::DATA_PREFIX() -{ - if ( dataPrefix ) - return FSM_NAME() + "_"; - return ""; -} - -/* Emit the alphabet data type. */ -string FsmCodeGen::ALPH_TYPE() -{ - string ret = keyOps->alphType->data1; - if ( keyOps->alphType->data2 != 0 ) { - ret += " "; - ret += + keyOps->alphType->data2; - } - return ret; -} - -/* Emit the alphabet data type. */ -string FsmCodeGen::WIDE_ALPH_TYPE() -{ - string ret; - if ( redFsm->maxKey <= keyOps->maxKey ) - ret = ALPH_TYPE(); - else { - long long maxKeyVal = redFsm->maxKey.getLongLong(); - HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal ); - assert( wideType != 0 ); - - ret = wideType->data1; - if ( wideType->data2 != 0 ) { - ret += " "; - ret += wideType->data2; - } - } - return ret; -} - - -string FsmCodeGen::PTR_CONST() -{ - return "const "; -} - -std::ostream &FsmCodeGen::OPEN_ARRAY( string type, string name ) -{ - out << "static const " << type << " " << name << "[] = {\n"; - return out; -} - -std::ostream &FsmCodeGen::CLOSE_ARRAY() -{ - return out << "};\n"; -} - -std::ostream &FsmCodeGen::STATIC_VAR( string type, string name ) -{ - out << "static const " << type << " " << name; - return out; -} - -string FsmCodeGen::UINT( ) -{ - return "unsigned int"; -} - -string FsmCodeGen::ARR_OFF( string ptr, string offset ) -{ - return ptr + " + " + offset; -} - -string FsmCodeGen::CAST( string type ) -{ - return "(" + type + ")"; -} - -std::ostream &FsmCodeGen::TO_STATE_ACTION_SWITCH() -{ - /* Walk the list of functions, printing the cases. */ - for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) { - /* Write out referenced actions. */ - if ( act->numToStateRefs > 0 ) { - /* Write the case label, the action and the case break. */ - out << "\tcase " << act->actionId << ":\n"; - ACTION( out, act, 0, false ); - out << "\tbreak;\n"; - } - } - - return out; -} - -std::ostream &FsmCodeGen::FROM_STATE_ACTION_SWITCH() -{ - /* Walk the list of functions, printing the cases. */ - for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) { - /* Write out referenced actions. */ - if ( act->numFromStateRefs > 0 ) { - /* Write the case label, the action and the case break. */ - out << "\tcase " << act->actionId << ":\n"; - ACTION( out, act, 0, false ); - out << "\tbreak;\n"; - } - } - - return out; -} - -std::ostream &FsmCodeGen::ACTION_SWITCH() -{ - /* Walk the list of functions, printing the cases. */ - for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) { - /* Write out referenced actions. */ - if ( act->numTransRefs > 0 ) { - /* Write the case label, the action and the case break. */ - out << "\tcase " << act->actionId << ":\n"; - ACTION( out, act, 0, false ); - out << "\tbreak;\n"; - } - } - - return out; -} - -void FsmCodeGen::emitSingleSwitch( RedState *state ) -{ - /* Load up the singles. */ - int numSingles = state->outSingle.length(); - RedTransEl *data = state->outSingle.data; - - if ( numSingles == 1 ) { - /* If there is a single single key then write it out as an if. */ - out << "\tif ( " << GET_WIDE_KEY(state) << " == " << - KEY(data[0].lowKey) << " )\n\t\t"; - - /* Virtual function for writing the target of the transition. */ - TRANS_GOTO(data[0].value, 0) << "\n"; - } - else if ( numSingles > 1 ) { - /* Write out single keys in a switch if there is more than one. */ - out << "\tswitch( " << GET_WIDE_KEY(state) << " ) {\n"; - - /* Write out the single indicies. */ - for ( int j = 0; j < numSingles; j++ ) { - out << "\t\tcase " << KEY(data[j].lowKey) << ": "; - TRANS_GOTO(data[j].value, 0) << "\n"; - } - - /* Close off the transition switch. */ - out << "\t}\n"; - } -} - -void FsmCodeGen::emitRangeBSearch( RedState *state, int level, int low, int high ) -{ - /* Get the mid position, staying on the lower end of the range. */ - int mid = (low + high) >> 1; - RedTransEl *data = state->outRange.data; - - /* Determine if we need to look higher or lower. */ - bool anyLower = mid > low; - bool anyHigher = mid < high; - - /* Determine if the keys at mid are the limits of the alphabet. */ - bool limitLow = data[mid].lowKey == keyOps->minKey; - bool limitHigh = data[mid].highKey == keyOps->maxKey; - - if ( anyLower && anyHigher ) { - /* Can go lower and higher than mid. */ - out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " << - KEY(data[mid].lowKey) << " ) {\n"; - emitRangeBSearch( state, level+1, low, mid-1 ); - out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " > " << - KEY(data[mid].highKey) << " ) {\n"; - emitRangeBSearch( state, level+1, mid+1, high ); - out << TABS(level) << "} else\n"; - TRANS_GOTO(data[mid].value, level+1) << "\n"; - } - else if ( anyLower && !anyHigher ) { - /* Can go lower than mid but not higher. */ - out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " << - KEY(data[mid].lowKey) << " ) {\n"; - emitRangeBSearch( state, level+1, low, mid-1 ); - - /* if the higher is the highest in the alphabet then there is no - * sense testing it. */ - if ( limitHigh ) { - out << TABS(level) << "} else\n"; - TRANS_GOTO(data[mid].value, level+1) << "\n"; - } - else { - out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " <= " << - KEY(data[mid].highKey) << " )\n"; - TRANS_GOTO(data[mid].value, level+1) << "\n"; - } - } - else if ( !anyLower && anyHigher ) { - /* Can go higher than mid but not lower. */ - out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " > " << - KEY(data[mid].highKey) << " ) {\n"; - emitRangeBSearch( state, level+1, mid+1, high ); - - /* If the lower end is the lowest in the alphabet then there is no - * sense testing it. */ - if ( limitLow ) { - out << TABS(level) << "} else\n"; - TRANS_GOTO(data[mid].value, level+1) << "\n"; - } - else { - out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " >= " << - KEY(data[mid].lowKey) << " )\n"; - TRANS_GOTO(data[mid].value, level+1) << "\n"; - } - } - else { - /* Cannot go higher or lower than mid. It's mid or bust. What - * tests to do depends on limits of alphabet. */ - if ( !limitLow && !limitHigh ) { - out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " << - GET_WIDE_KEY(state) << " && " << GET_WIDE_KEY(state) << " <= " << - KEY(data[mid].highKey) << " )\n"; - TRANS_GOTO(data[mid].value, level+1) << "\n"; - } - else if ( limitLow && !limitHigh ) { - out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " <= " << - KEY(data[mid].highKey) << " )\n"; - TRANS_GOTO(data[mid].value, level+1) << "\n"; - } - else if ( !limitLow && limitHigh ) { - out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " << - GET_WIDE_KEY(state) << " )\n"; - TRANS_GOTO(data[mid].value, level+1) << "\n"; - } - else { - /* Both high and low are at the limit. No tests to do. */ - TRANS_GOTO(data[mid].value, level+1) << "\n"; - } - } -} - -void FsmCodeGen::COND_TRANSLATE( GenStateCond *stateCond, int level ) -{ - GenCondSpace *condSpace = stateCond->condSpace; - out << TABS(level) << "_widec = " << CAST(WIDE_ALPH_TYPE()) << "(" << - KEY(condSpace->baseKey) << " + (" << GET_KEY() << - " - " << KEY(keyOps->minKey) << "));\n"; - - for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { - out << TABS(level) << "if ( "; - CONDITION( out, *csi ); - Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize()); - out << " ) _widec += " << condValOffset << ";\n"; - } -} - -void FsmCodeGen::emitCondBSearch( RedState *state, int level, int low, int high ) -{ - /* Get the mid position, staying on the lower end of the range. */ - int mid = (low + high) >> 1; - GenStateCond **data = state->stateCondVect.data; - - /* Determine if we need to look higher or lower. */ - bool anyLower = mid > low; - bool anyHigher = mid < high; - - /* Determine if the keys at mid are the limits of the alphabet. */ - bool limitLow = data[mid]->lowKey == keyOps->minKey; - bool limitHigh = data[mid]->highKey == keyOps->maxKey; - - if ( anyLower && anyHigher ) { - /* Can go lower and higher than mid. */ - out << TABS(level) << "if ( " << GET_KEY() << " < " << - KEY(data[mid]->lowKey) << " ) {\n"; - emitCondBSearch( state, level+1, low, mid-1 ); - out << TABS(level) << "} else if ( " << GET_KEY() << " > " << - KEY(data[mid]->highKey) << " ) {\n"; - emitCondBSearch( state, level+1, mid+1, high ); - out << TABS(level) << "} else {\n"; - COND_TRANSLATE(data[mid], level+1); - out << TABS(level) << "}\n"; - } - else if ( anyLower && !anyHigher ) { - /* Can go lower than mid but not higher. */ - out << TABS(level) << "if ( " << GET_KEY() << " < " << - KEY(data[mid]->lowKey) << " ) {\n"; - emitCondBSearch( state, level+1, low, mid-1 ); - - /* if the higher is the highest in the alphabet then there is no - * sense testing it. */ - if ( limitHigh ) { - out << TABS(level) << "} else {\n"; - COND_TRANSLATE(data[mid], level+1); - out << TABS(level) << "}\n"; - } - else { - out << TABS(level) << "} else if ( " << GET_KEY() << " <= " << - KEY(data[mid]->highKey) << " ) {\n"; - COND_TRANSLATE(data[mid], level+1); - out << TABS(level) << "}\n"; - } - } - else if ( !anyLower && anyHigher ) { - /* Can go higher than mid but not lower. */ - out << TABS(level) << "if ( " << GET_KEY() << " > " << - KEY(data[mid]->highKey) << " ) {\n"; - emitCondBSearch( state, level+1, mid+1, high ); - - /* If the lower end is the lowest in the alphabet then there is no - * sense testing it. */ - if ( limitLow ) { - out << TABS(level) << "} else {\n"; - COND_TRANSLATE(data[mid], level+1); - out << TABS(level) << "}\n"; - } - else { - out << TABS(level) << "} else if ( " << GET_KEY() << " >= " << - KEY(data[mid]->lowKey) << " ) {\n"; - COND_TRANSLATE(data[mid], level+1); - out << TABS(level) << "}\n"; - } - } - else { - /* Cannot go higher or lower than mid. It's mid or bust. What - * tests to do depends on limits of alphabet. */ - if ( !limitLow && !limitHigh ) { - out << TABS(level) << "if ( " << KEY(data[mid]->lowKey) << " <= " << - GET_KEY() << " && " << GET_KEY() << " <= " << - KEY(data[mid]->highKey) << " ) {\n"; - COND_TRANSLATE(data[mid], level+1); - out << TABS(level) << "}\n"; - } - else if ( limitLow && !limitHigh ) { - out << TABS(level) << "if ( " << GET_KEY() << " <= " << - KEY(data[mid]->highKey) << " ) {\n"; - COND_TRANSLATE(data[mid], level+1); - out << TABS(level) << "}\n"; - } - else if ( !limitLow && limitHigh ) { - out << TABS(level) << "if ( " << KEY(data[mid]->lowKey) << " <= " << - GET_KEY() << " )\n {"; - COND_TRANSLATE(data[mid], level+1); - out << TABS(level) << "}\n"; - } - else { - /* Both high and low are at the limit. No tests to do. */ - COND_TRANSLATE(data[mid], level); - } - } -} - -std::ostream &FsmCodeGen::STATE_GOTOS() -{ - for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { - if ( st == redFsm->errState ) - STATE_GOTO_ERROR(); - else { - /* Writing code above state gotos. */ - GOTO_HEADER( st ); - - if ( st->stateCondVect.length() > 0 ) { - out << " _widec = " << GET_KEY() << ";\n"; - emitCondBSearch( st, 1, 0, st->stateCondVect.length() - 1 ); - } - - /* Try singles. */ - if ( st->outSingle.length() > 0 ) - emitSingleSwitch( st ); - - /* Default case is to binary search for the ranges, if that fails then */ - if ( st->outRange.length() > 0 ) - emitRangeBSearch( st, 1, 0, st->outRange.length() - 1 ); - - /* Write the default transition. */ - TRANS_GOTO( st->defTrans, 1 ) << "\n"; - } - } - return out; -} - -unsigned int FsmCodeGen::TO_STATE_ACTION( RedState *state ) -{ - int act = 0; - if ( state->toStateAction != 0 ) - act = state->toStateAction->location+1; - return act; -} - -unsigned int FsmCodeGen::FROM_STATE_ACTION( RedState *state ) -{ - int act = 0; - if ( state->fromStateAction != 0 ) - act = state->fromStateAction->location+1; - return act; -} - -std::ostream &FsmCodeGen::TO_STATE_ACTIONS() -{ - /* Take one off for the psuedo start state. */ - int numStates = redFsm->stateList.length(); - unsigned int *vals = new unsigned int[numStates]; - memset( vals, 0, sizeof(unsigned int)*numStates ); - - for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) - vals[st->id] = TO_STATE_ACTION(st); - - out << "\t"; - for ( int st = 0; st < redFsm->nextStateId; st++ ) { - /* Write any eof action. */ - out << vals[st]; - if ( st < numStates-1 ) { - out << ", "; - if ( (st+1) % IALL == 0 ) - out << "\n\t"; - } - } - out << "\n"; - delete[] vals; - return out; -} - -std::ostream &FsmCodeGen::FROM_STATE_ACTIONS() -{ - /* Take one off for the psuedo start state. */ - int numStates = redFsm->stateList.length(); - unsigned int *vals = new unsigned int[numStates]; - memset( vals, 0, sizeof(unsigned int)*numStates ); - - for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) - vals[st->id] = FROM_STATE_ACTION(st); - - out << "\t"; - for ( int st = 0; st < redFsm->nextStateId; st++ ) { - /* Write any eof action. */ - out << vals[st]; - if ( st < numStates-1 ) { - out << ", "; - if ( (st+1) % IALL == 0 ) - out << "\n\t"; - } - } - out << "\n"; - delete[] vals; - return out; -} - -bool FsmCodeGen::IN_TRANS_ACTIONS( RedState *state ) -{ - /* Emit any transitions that have actions and that go to this state. */ - for ( int it = 0; it < state->numInTrans; it++ ) { - RedTrans *trans = state->inTrans[it]; - if ( trans->action != 0 && trans->labelNeeded ) { - /* Write the label for the transition so it can be jumped to. */ - out << "tr" << trans->id << ":\n"; - - /* If the action contains a next, then we must preload the current - * state since the action may or may not set it. */ - if ( trans->action->anyNextStmt() ) - out << " " << CS() << " = " << trans->targ->id << ";\n"; - - /* Write each action in the list. */ - for ( GenActionTable::Iter item = trans->action->key; item.lte(); item++ ) - ACTION( out, item->value, trans->targ->id, false ); - - out << "\tgoto st" << trans->targ->id << ";\n"; - } - } - - return 0; -} - -/* Called from FsmCodeGen::STATE_GOTOS just before writing the gotos for each - * state. */ -void FsmCodeGen::GOTO_HEADER( RedState *state ) -{ - IN_TRANS_ACTIONS( state ); - - if ( state->labelNeeded ) - out << "st" << state->id << ":\n"; - - if ( state->toStateAction != 0 ) { - /* Remember that we wrote an action. Write every action in the list. */ - for ( GenActionTable::Iter item = state->toStateAction->key; item.lte(); item++ ) - ACTION( out, item->value, state->id, false ); - } - - /* Give the state a switch case. */ - out << "case " << state->id << ":\n"; - - /* Advance and test buffer pos. */ - out << - " if ( ++" << P() << " == " << PE() << " )\n" - " goto out" << state->id << ";\n"; - - if ( state->fromStateAction != 0 ) { - /* Remember that we wrote an action. Write every action in the list. */ - for ( GenActionTable::Iter item = state->fromStateAction->key; item.lte(); item++ ) - ACTION( out, item->value, state->id, false ); - } - - /* Record the prev state if necessary. */ - if ( state->anyRegCurStateRef() ) - out << " _ps = " << state->id << ";\n"; -} - -void FsmCodeGen::STATE_GOTO_ERROR() -{ - /* In the error state we need to emit some stuff that usually goes into - * the header. */ - RedState *state = redFsm->errState; - IN_TRANS_ACTIONS( state ); - - if ( state->labelNeeded ) - out << "st" << state->id << ":\n"; - - /* We do not need a case label here because the the error state is checked - * at the head of the loop. */ - - /* Break out here. */ - out << " goto out" << state->id << ";\n"; -} - - -/* Emit the goto to take for a given transition. */ -std::ostream &FsmCodeGen::TRANS_GOTO( RedTrans *trans, int level ) -{ - if ( trans->action != 0 ) { - /* Go to the transition which will go to the state. */ - out << TABS(level) << "goto tr" << trans->id << ";"; - } - else { - /* Go directly to the target state. */ - out << TABS(level) << "goto st" << trans->targ->id << ";"; - } - return out; -} - -std::ostream &FsmCodeGen::EXIT_STATES() -{ - for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { - out << " case " << st->id << ": out" << st->id << ": "; - if ( st->eofTrans != 0 ) { - out << "if ( " << PE() << " == " << PEOF() << " ) {"; - TRANS_GOTO( st->eofTrans, 0 ); - out << "\n"; - out << "}"; - } - - /* Exit. */ - out << CS() << " = " << st->id << "; goto out; \n"; - } - return out; -} - -/* Set up labelNeeded flag for each state. */ -void FsmCodeGen::setLabelsNeeded() -{ - /* Do not use all labels by default, init all labelNeeded vars to false. */ - for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) - st->labelNeeded = false; - - if ( redFsm->errState != 0 && redFsm->anyLmSwitchError() ) - redFsm->errState->labelNeeded = true; - - /* Walk all transitions and set only those that have targs. */ - for ( RedTransSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { - /* If there is no action with a next statement, then the label will be - * needed. */ - if ( trans->action == 0 || !trans->action->anyNextStmt() ) - trans->targ->labelNeeded = true; - } - - for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) - st->outNeeded = st->labelNeeded; -} - -void FsmCodeGen::writeData() -{ - out << "#define " << START() << " " << START_STATE_ID() << "\n"; - out << "#define " << FIRST_FINAL() << " " << FIRST_FINAL_STATE() << "\n"; - out << "#define " << ERROR() << " " << ERROR_STATE() << "\n"; - out << "#define false 0\n"; - out << "#define true 1\n"; - out << "\n"; - - out << "long " << ENTRY_BY_REGION() << "[] = {\n\t"; - for ( int i = 0; i < fsmTables->numRegions; i++ ) { - out << fsmTables->entryByRegion[i]; - - if ( i < fsmTables->numRegions-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << - "FsmTables fsmTables_start =\n" - "{\n" - " 0, " /* actions */ - " 0, " /* keyOffsets */ - " 0, " /* transKeys */ - " 0, " /* singleLengths */ - " 0, " /* rangeLengths */ - " 0, " /* indexOffsets */ - " 0, " /* transTargsWI */ - " 0, " /* transActionsWI */ - " 0, " /* toStateActions */ - " 0, " /* fromStateActions */ - " 0, " /* eofActions */ - " 0,\n" /* eofTargs */ - " " << ENTRY_BY_REGION() << ",\n" - - "\n" - " 0, " /* numStates */ - " 0, " /* numActions */ - " 0, " /* numTransKeys */ - " 0, " /* numSingleLengths */ - " 0, " /* numRangeLengths */ - " 0, " /* numIndexOffsets */ - " 0, " /* numTransTargsWI */ - " 0,\n" /* numTransActionsWI */ - " " << redFsm->regionToEntry.length() << ",\n" - "\n" - " " << START() << ",\n" - " " << FIRST_FINAL() << ",\n" - " " << ERROR() << ",\n" - "\n" - " 0,\n" /* actionSwitch */ - " 0\n" /* numActionSwitch */ - "};\n" - "\n"; -} - -void FsmCodeGen::writeInit() -{ - out << - " " << CS() << " = " << START() << ";\n"; - - /* If there are any calls, then the stack top needs initialization. */ - if ( redFsm->anyActionCalls() || redFsm->anyActionRets() ) - out << "\t" << TOP() << " = 0;\n"; - - out << - " " << TOKSTART() << " = 0;\n" - " " << TOKEND() << " = 0;\n" - " " << ACT() << " = 0;\n"; - - out << "\n"; -} - -void FsmCodeGen::writeExec() -{ - setLabelsNeeded(); - - out << - "void fsmExecute( FsmRun *fsmRun, InputStream *inputStream )\n" - "{\n" - "/*_resume:*/\n"; - - if ( redFsm->errState != 0 ) { - out << - " if ( " << CS() << " == " << redFsm->errState->id << " )\n" - " goto out;\n"; - } - - out << - " if ( " << P() << " == " << PE() << " )\n" - " goto out_switch;\n" - " --" << P() << ";\n" - "\n" - " switch ( " << CS() << " )\n {\n"; - STATE_GOTOS() << - " }\n"; - - out << - "out_switch:\n" - " switch ( " << CS() << " )\n {\n"; - EXIT_STATES() << - " }\n"; - - out << - " out: {}\n" - "}\n" - "\n"; -} - -void FsmCodeGen::writeIncludes() -{ - out << - "#include <colm/pdarun.h>\n" - "#include <colm/fsmrun.h>\n" - "#include <colm/debug.h>\n" - "#include <colm/bytecode.h>\n" - "#include <stdio.h>\n" - "#include <stdlib.h>\n" - "#include <string.h>\n" - "#include <assert.h>\n" - "#include <colm/config.h>\n" - "#include <colm/defs.h>\n" - "#include <colm/input.h>\n" - "#include <colm/tree.h>\n" - "#include <colm/program.h>\n" - "#include <colm/colm.h>\n" - "\n" - "\n"; -} - -void FsmCodeGen::writeCode() -{ - redFsm->depthFirstOrdering(); - - - writeData(); - writeExec(); - - /* Referenced in the runtime lib, but used only in the compiler. Probably - * should use the preprocessor to make these go away. */ - out << - "void sendNamedLangEl( Program *prg, Tree **tree, PdaRun *pdaRun,\n" - " FsmRun *fsmRun, InputStream *inputStream ) { }\n" - "void initBindings( PdaRun *pdaRun ) {}\n" - "void pushBinding( PdaRun *pdaRun, ParseTree *parseTree ) {}\n" - "void popBinding( PdaRun *pdaRun, ParseTree *tree ) {}\n" - "void initStaticFuncs() {}\n" - "void initPatternFuncs() {}\n" - "void initReplFuncs() {}\n" - "void initInputFuncs();\n" - "\n" - "\n"; -} - -ostream &FsmCodeGen::source_warning( const InputLoc &loc ) -{ - cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: "; - return cerr; -} - -ostream &FsmCodeGen::source_error( const InputLoc &loc ) -{ - codeGenErrCount += 1; - assert( sourceFileName != 0 ); - cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": "; - return cerr; -} - - diff --git a/colm/fsmcodegen.h b/colm/fsmcodegen.h deleted file mode 100644 index 41cd88ec..00000000 --- a/colm/fsmcodegen.h +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright 2001-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _FSMCODEGEN_H -#define _FSMCODEGEN_H - -#include <iostream> -#include <string> -#include <stdio.h> -#include "keyops.h" -#include "parsedata.h" -#include "redfsm.h" -#include "fsmrun.h" - -using std::string; -using std::ostream; - -/* Integer array line length. */ -#define IALL 8 - -/* Forwards. */ -struct RedFsm; -struct RedState; -struct GenAction; -struct NameInst; -struct RedAction; -struct LongestMatch; -struct TokenDef; -struct InlineList; -struct InlineItem; -struct NameInst; -struct FsmCodeGen; - -typedef unsigned long ulong; -typedef unsigned char uchar; - - -/* - * The interface to the parser - */ - -std::ostream *openOutput( char *inputFile ); - -inline string itoa( int i ) -{ - char buf[16]; - sprintf( buf, "%i", i ); - return buf; -} - -/* - * class FsmCodeGen - */ -class FsmCodeGen -{ -public: - FsmCodeGen( const char *sourceFileName, const char *fsmName, ostream &out, - RedFsm *redFsm, FsmTables *fsmTables ); - -protected: - string FSM_NAME(); - string START_STATE_ID(); - ostream &ACTIONS_ARRAY(); - string GET_WIDE_KEY(); - string GET_WIDE_KEY( RedState *state ); - string TABS( int level ); - string KEY( Key key ); - string LDIR_PATH( char *path ); - void ACTION( ostream &ret, GenAction *action, int targState, bool inFinish ); - void CONDITION( ostream &ret, GenAction *condition ); - string ALPH_TYPE(); - string WIDE_ALPH_TYPE(); - string ARRAY_TYPE( unsigned long maxVal ); - - string ARR_OFF( string ptr, string offset ); - string CAST( string type ); - string UINT(); - string GET_KEY(); - - string ACCESS() { return "fsmRun->"; } - - string P() { return ACCESS() + "p"; } - string PE() { return ACCESS() + "pe"; } - string PEOF() { return ACCESS() + "peof"; } - - string CS(); - string TOP() { return ACCESS() + "top"; } - string TOKSTART() { return ACCESS() + "tokstart"; } - string TOKEND() { return ACCESS() + "tokend"; } - string ACT() { return ACCESS() + "act"; } - string MATCHED_TOKEN() { return ACCESS() + "matchedToken"; } - - string DATA_PREFIX(); - - string START() { return DATA_PREFIX() + "start"; } - string ERROR() { return DATA_PREFIX() + "error"; } - string FIRST_FINAL() { return DATA_PREFIX() + "first_final"; } - - string ENTRY_BY_REGION() { return DATA_PREFIX() + "entryByRegion"; } - - - void INLINE_LIST( ostream &ret, InlineList *inlineList, - int targState, bool inFinish ); - void EXEC_TOKEND( ostream &ret, InlineItem *item, int targState, int inFinish ); - void EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish ); - void LM_SWITCH( ostream &ret, InlineItem *item, int targState, int inFinish ); - void SET_ACT( ostream &ret, InlineItem *item ); - void INIT_TOKSTART( ostream &ret, InlineItem *item ); - void INIT_ACT( ostream &ret, InlineItem *item ); - void SET_TOKSTART( ostream &ret, InlineItem *item ); - void SET_TOKEND( ostream &ret, InlineItem *item ); - void GET_TOKEND( ostream &ret, InlineItem *item ); - void SUB_ACTION( ostream &ret, InlineItem *item, int targState, bool inFinish ); - void LM_ON_LAST( ostream &ret, InlineItem *item ); - void LM_ON_NEXT( ostream &ret, InlineItem *item ); - void LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item ); - void EXEC_TOKEND( ostream &ret ); - void EMIT_TOKEN( ostream &ret, LangEl *token ); - - string ERROR_STATE(); - string FIRST_FINAL_STATE(); - - string PTR_CONST(); - ostream &OPEN_ARRAY( string type, string name ); - ostream &CLOSE_ARRAY(); - ostream &STATIC_VAR( string type, string name ); - - string CTRL_FLOW(); - - ostream &source_warning(const InputLoc &loc); - ostream &source_error(const InputLoc &loc); - - unsigned int arrayTypeSize( unsigned long maxVal ); - -/* subclass */ - -public: - const char *sourceFileName; - const char *fsmName; - ostream &out; - RedFsm *redFsm; - FsmTables *fsmTables; - int codeGenErrCount; - - /* Write options. */ - bool dataPrefix; - bool writeFirstFinal; - bool writeErr; - - std::ostream &TO_STATE_ACTION_SWITCH(); - std::ostream &FROM_STATE_ACTION_SWITCH(); - std::ostream &ACTION_SWITCH(); - std::ostream &STATE_GOTOS(); - std::ostream &TRANSITIONS(); - std::ostream &EXEC_FUNCS(); - - unsigned int TO_STATE_ACTION( RedState *state ); - unsigned int FROM_STATE_ACTION( RedState *state ); - - std::ostream &TO_STATE_ACTIONS(); - std::ostream &FROM_STATE_ACTIONS(); - - void COND_TRANSLATE( GenStateCond *stateCond, int level ); - void emitCondBSearch( RedState *state, int level, int low, int high ); - void STATE_CONDS( RedState *state, bool genDefault ); - - void emitSingleSwitch( RedState *state ); - void emitRangeBSearch( RedState *state, int level, int low, int high ); - - std::ostream &EXIT_STATES(); - std::ostream &TRANS_GOTO( RedTrans *trans, int level ); - std::ostream &FINISH_CASES(); - - void writeIncludes(); - void writeData(); - void writeInit(); - void writeExec(); - void writeCode(); - void writeMain(); - -protected: - bool useAgainLabel(); - - /* Called from GotoCodeGen::STATE_GOTOS just before writing the gotos for - * each state. */ - bool IN_TRANS_ACTIONS( RedState *state ); - void GOTO_HEADER( RedState *state ); - void STATE_GOTO_ERROR(); - - /* Set up labelNeeded flag for each state. */ - void setLabelsNeeded(); -}; - -#endif /* _FSMCODEGEN_H */ diff --git a/colm/fsmexec.cc b/colm/fsmexec.cc deleted file mode 100644 index f922c7a4..00000000 --- a/colm/fsmexec.cc +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <string.h> -#include <iostream> - -#include "config.h" -#include "defs.h" -#include "fsmrun.h" -#include "redfsm.h" -#include "parsedata.h" -#include "parsetree.h" -#include "pdarun.h" -#include "global.h" - -void execAction( FsmRun *fsmRun, GenAction *genAction ) -{ - for ( InlineList::Iter item = *genAction->inlineList; item.lte(); item++ ) { - switch ( item->type ) { - case InlineItem::Text: - assert(false); - break; - case InlineItem::LmSetActId: - fsmRun->act = item->longestMatchPart->longestMatchId; - break; - case InlineItem::LmSetTokEnd: - fsmRun->tokend = fsmRun->p + 1; - break; - case InlineItem::LmInitTokStart: - assert(false); - break; - case InlineItem::LmInitAct: - fsmRun->act = 0; - break; - case InlineItem::LmSetTokStart: - fsmRun->tokstart = fsmRun->p; - break; - case InlineItem::LmSwitch: - /* If the switch handles error then we also forced the error state. It - * will exist. */ - fsmRun->p = fsmRun->tokend; - if ( item->tokenRegion->lmSwitchHandlesError && fsmRun->act == 0 ) { - fsmRun->p = fsmRun->tokstart; - fsmRun->cs = fsmRun->tables->errorState; - } - else { - for ( TokenDefListReg::Iter lmi = item->tokenRegion->tokenDefList; - lmi.lte(); lmi++ ) - { - if ( lmi->inLmSelect && fsmRun->act == lmi->longestMatchId ) - fsmRun->matchedToken = lmi->tdLangEl->id; - } - } - fsmRun->returnResult = true; - break; - case InlineItem::LmOnLast: - fsmRun->p += 1; - fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id; - fsmRun->returnResult = true; - break; - case InlineItem::LmOnNext: - fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id; - fsmRun->returnResult = true; - break; - case InlineItem::LmOnLagBehind: - fsmRun->p = fsmRun->tokend; - fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id; - fsmRun->returnResult = true; - break; - } - } - - if ( genAction->markType == MarkMark ) - fsmRun->mark[genAction->markId-1] = fsmRun->p; -} - -void fsmExecute( FsmRun *fsmRun, InputStream *inputStream ) -{ - int _klen; - unsigned int _trans; - const long *_acts; - unsigned int _nacts; - const char *_keys; - - /* Init the token match to nothing (the sentinal). */ - fsmRun->matchedToken = 0; - -/*_resume:*/ - if ( fsmRun->cs == fsmRun->tables->errorState ) - goto out; - - if ( fsmRun->p == fsmRun->pe ) - goto out; - -_loop_head: - _acts = fsmRun->tables->actions + fsmRun->tables->fromStateActions[fsmRun->cs]; - _nacts = (unsigned int) *_acts++; - while ( _nacts-- > 0 ) - execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] ); - - _keys = fsmRun->tables->transKeys + fsmRun->tables->keyOffsets[fsmRun->cs]; - _trans = fsmRun->tables->indexOffsets[fsmRun->cs]; - - _klen = fsmRun->tables->singleLengths[fsmRun->cs]; - if ( _klen > 0 ) { - const char *_lower = _keys; - const char *_mid; - const char *_upper = _keys + _klen - 1; - while (1) { - if ( _upper < _lower ) - break; - - _mid = _lower + ((_upper-_lower) >> 1); - if ( (*fsmRun->p) < *_mid ) - _upper = _mid - 1; - else if ( (*fsmRun->p) > *_mid ) - _lower = _mid + 1; - else { - _trans += (_mid - _keys); - goto _match; - } - } - _keys += _klen; - _trans += _klen; - } - - _klen = fsmRun->tables->rangeLengths[fsmRun->cs]; - if ( _klen > 0 ) { - const char *_lower = _keys; - const char *_mid; - const char *_upper = _keys + (_klen<<1) - 2; - while (1) { - if ( _upper < _lower ) - break; - - _mid = _lower + (((_upper-_lower) >> 1) & ~1); - if ( (*fsmRun->p) < _mid[0] ) - _upper = _mid - 2; - else if ( (*fsmRun->p) > _mid[1] ) - _lower = _mid + 2; - else { - _trans += ((_mid - _keys)>>1); - goto _match; - } - } - _trans += _klen; - } - -_match: - fsmRun->cs = fsmRun->tables->transTargsWI[_trans]; - - if ( fsmRun->tables->transActionsWI[_trans] == 0 ) - goto _again; - - fsmRun->returnResult = false; - _acts = fsmRun->tables->actions + fsmRun->tables->transActionsWI[_trans]; - _nacts = (unsigned int) *_acts++; - while ( _nacts-- > 0 ) - execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] ); - if ( fsmRun->returnResult ) - return; - -_again: - _acts = fsmRun->tables->actions + fsmRun->tables->toStateActions[fsmRun->cs]; - _nacts = (unsigned int) *_acts++; - while ( _nacts-- > 0 ) - execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] ); - - if ( fsmRun->cs == fsmRun->tables->errorState ) - goto out; - - if ( ++fsmRun->p != fsmRun->pe ) - goto _loop_head; -out: - if ( fsmRun->p == fsmRun->peof ) { - fsmRun->returnResult = false; - _acts = fsmRun->tables->actions + fsmRun->tables->eofActions[fsmRun->cs]; - _nacts = (unsigned int) *_acts++; - - if ( fsmRun->tables->eofTargs[fsmRun->cs] >= 0 ) - fsmRun->cs = fsmRun->tables->eofTargs[fsmRun->cs]; - - while ( _nacts-- > 0 ) - execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] ); - if ( fsmRun->returnResult ) - return; - } -} - - diff --git a/colm/fsmgraph.cc b/colm/fsmgraph.cc deleted file mode 100644 index 590d7902..00000000 --- a/colm/fsmgraph.cc +++ /dev/null @@ -1,1408 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <assert.h> -#include <iostream> - -#include "config.h" -#include "defs.h" -#include "fsmgraph.h" -#include "mergesort.h" - -using std::cerr; -using std::endl; - -/* Make a new state. The new state will be put on the graph's - * list of state. The new state can be created final or non final. */ -FsmState *FsmGraph::addState() -{ - /* Make the new state to return. */ - FsmState *state = new FsmState(); - - if ( misfitAccounting ) { - /* Create the new state on the misfit list. All states are created - * with no foreign in transitions. */ - misfitList.append( state ); - } - else { - /* Create the new state. */ - stateList.append( state ); - } - - return state; -} - -/* Construct an FSM that is the concatenation of an array of characters. A new - * machine will be made that has len+1 states with one transition between each - * state for each integer in str. IsSigned determines if the integers are to - * be considered as signed or unsigned ints. */ -void FsmGraph::concatFsm( Key *str, int len ) -{ - /* Make the first state and set it as the start state. */ - FsmState *last = addState(); - setStartState( last ); - - /* Attach subsequent states. */ - for ( int i = 0; i < len; i++ ) { - FsmState *newState = addState(); - attachNewTrans( last, newState, str[i], str[i] ); - last = newState; - } - - /* Make the last state the final state. */ - setFinState( last ); -} - -/* Case insensitive version of concatFsm. */ -void FsmGraph::concatFsmCI( Key *str, int len ) -{ - /* Make the first state and set it as the start state. */ - FsmState *last = addState(); - setStartState( last ); - - /* Attach subsequent states. */ - for ( int i = 0; i < len; i++ ) { - FsmState *newState = addState(); - - KeySet keySet; - if ( str[i].isLower() ) - keySet.insert( str[i].toUpper() ); - if ( str[i].isUpper() ) - keySet.insert( str[i].toLower() ); - keySet.insert( str[i] ); - - for ( int i = 0; i < keySet.length(); i++ ) - attachNewTrans( last, newState, keySet[i], keySet[i] ); - - last = newState; - } - - /* Make the last state the final state. */ - setFinState( last ); -} - -/* Construct a machine that matches one character. A new machine will be made - * that has two states with a single transition between the states. IsSigned - * determines if the integers are to be considered as signed or unsigned ints. */ -void FsmGraph::concatFsm( Key chr ) -{ - /* Two states first start, second final. */ - setStartState( addState() ); - - FsmState *end = addState(); - setFinState( end ); - - /* Attach on the character. */ - attachNewTrans( startState, end, chr, chr ); -} - -/* Construct a machine that matches any character in set. A new machine will - * be made that has two states and len transitions between the them. The set - * should be ordered correctly accroding to KeyOps and should not contain - * any duplicates. */ -void FsmGraph::orFsm( Key *set, int len ) -{ - /* Two states first start, second final. */ - setStartState( addState() ); - - FsmState *end = addState(); - setFinState( end ); - - for ( int i = 1; i < len; i++ ) - assert( set[i-1] < set[i] ); - - /* Attach on all the integers in the given string of ints. */ - for ( int i = 0; i < len; i++ ) - attachNewTrans( startState, end, set[i], set[i] ); -} - -/* Construct a machine that matches a range of characters. A new machine will - * be made with two states and a range transition between them. The range will - * match any characters from low to high inclusive. Low should be less than or - * equal to high otherwise undefined behaviour results. IsSigned determines - * if the integers are to be considered as signed or unsigned ints. */ -void FsmGraph::rangeFsm( Key low, Key high ) -{ - /* Two states first start, second final. */ - setStartState( addState() ); - - FsmState *end = addState(); - setFinState( end ); - - /* Attach using the range of characters. */ - attachNewTrans( startState, end, low, high ); -} - -/* Construct a machine that a repeated range of characters. */ -void FsmGraph::rangeStarFsm( Key low, Key high) -{ - /* One state which is final and is the start state. */ - setStartState( addState() ); - setFinState( startState ); - - /* Attach start to start using range of characters. */ - attachNewTrans( startState, startState, low, high ); -} - -/* Construct a machine that matches the empty string. A new machine will be - * made with only one state. The new state will be both a start and final - * state. IsSigned determines if the machine has a signed or unsigned - * alphabet. Fsm operations must be done on machines with the same alphabet - * signedness. */ -void FsmGraph::lambdaFsm( ) -{ - /* Give it one state with no transitions making it - * the start state and final state. */ - setStartState( addState() ); - setFinState( startState ); -} - -/* Construct a machine that matches nothing at all. A new machine will be - * made with only one state. It will not be final. */ -void FsmGraph::emptyFsm( ) -{ - /* Give it one state with no transitions making it - * the start state and final state. */ - setStartState( addState() ); -} - -void FsmGraph::transferOutData( FsmState *destState, FsmState *srcState ) -{ - for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) { - if ( trans->toState != 0 ) { - /* Get the actions data from the outActionTable. */ - trans->actionTable.setActions( srcState->outActionTable ); - - /* Get the priorities from the outPriorTable. */ - trans->priorTable.setPriors( srcState->outPriorTable ); - } - } -} - -/* Kleene star operator. Makes this machine the kleene star of itself. Any - * transitions made going out of the machine and back into itself will be - * notified that they are leaving transitions by having the leavingFromState - * callback invoked. */ -void FsmGraph::starOp( ) -{ - /* For the merging process. */ - MergeData md; - - /* Turn on misfit accounting to possibly catch the old start state. */ - setMisfitAccounting( true ); - - /* Create the new new start state. It will be set final after the merging - * of the final states with the start state is complete. */ - FsmState *prevStartState = startState; - unsetStartState(); - setStartState( addState() ); - - /* Merge the new start state with the old one to isolate it. */ - mergeStates( md, startState, prevStartState ); - - /* Merge the start state into all final states. Except the start state on - * the first pass. If the start state is set final we will be doubling up - * its transitions, which will get transfered to any final states that - * follow it in the final state set. This will be determined by the order - * of items in the final state set. To prevent this we just merge with the - * start on a second pass. */ - for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) { - if ( *st != startState ) - mergeStatesLeaving( md, *st, startState ); - } - - /* Now it is safe to merge the start state with itself (provided it - * is set final). */ - if ( startState->isFinState() ) - mergeStatesLeaving( md, startState, startState ); - - /* Now ensure the new start state is a final state. */ - setFinState( startState ); - - /* Fill in any states that were newed up as combinations of others. */ - fillInStates( md ); - - /* Remove the misfits and turn off misfit accounting. */ - removeMisfits(); - setMisfitAccounting( false ); -} - -void FsmGraph::repeatOp( int times ) -{ - /* Must be 1 and up. 0 produces null machine and requires deleting this. */ - assert( times > 0 ); - - /* A repeat of one does absolutely nothing. */ - if ( times == 1 ) - return; - - /* Make a machine to make copies from. */ - FsmGraph *copyFrom = new FsmGraph( *this ); - - /* Concatentate duplicates onto the end up until before the last. */ - for ( int i = 1; i < times-1; i++ ) { - FsmGraph *dup = new FsmGraph( *copyFrom ); - doConcat( dup, 0, false ); - } - - /* Now use the copyFrom on the end. */ - doConcat( copyFrom, 0, false ); -} - -void FsmGraph::optionalRepeatOp( int times ) -{ - /* Must be 1 and up. 0 produces null machine and requires deleting this. */ - assert( times > 0 ); - - /* A repeat of one optional merely allows zero string. */ - if ( times == 1 ) { - setFinState( startState ); - return; - } - - /* Make a machine to make copies from. */ - FsmGraph *copyFrom = new FsmGraph( *this ); - - /* The state set used in the from end of the concatentation. Starts with - * the initial final state set, then after each concatenation, gets set to - * the the final states that come from the the duplicate. */ - StateSet lastFinSet( finStateSet ); - - /* Set the initial state to zero to allow zero copies. */ - setFinState( startState ); - - /* Concatentate duplicates onto the end up until before the last. */ - for ( int i = 1; i < times-1; i++ ) { - /* Make a duplicate for concating and set the fin bits to graph 2 so we - * can pick out it's final states after the optional style concat. */ - FsmGraph *dup = new FsmGraph( *copyFrom ); - dup->setFinBits( SB_GRAPH2 ); - doConcat( dup, &lastFinSet, true ); - - /* Clear the last final state set and make the new one by taking only - * the final states that come from graph 2.*/ - lastFinSet.empty(); - for ( int i = 0; i < finStateSet.length(); i++ ) { - /* If the state came from graph 2, add it to the last set and clear - * the bits. */ - FsmState *fs = finStateSet[i]; - if ( fs->stateBits & SB_GRAPH2 ) { - lastFinSet.insert( fs ); - fs->stateBits &= ~SB_GRAPH2; - } - } - } - - /* Now use the copyFrom on the end, no bits set, no bits to clear. */ - doConcat( copyFrom, &lastFinSet, true ); -} - - -/* Fsm concatentation worker. Supports treating the concatentation as optional, - * which essentially leaves the final states of machine one as final. */ -void FsmGraph::doConcat( FsmGraph *other, StateSet *fromStates, bool optional ) -{ - /* For the merging process. */ - StateSet finStateSetCopy, startStateSet; - MergeData md; - - /* Turn on misfit accounting for both graphs. */ - setMisfitAccounting( true ); - other->setMisfitAccounting( true ); - - /* Get the other's start state. */ - FsmState *otherStartState = other->startState; - - /* Unset other's start state before bringing in the entry points. */ - other->unsetStartState(); - - /* Bring in the rest of other's entry points. */ - copyInEntryPoints( other ); - other->entryPoints.empty(); - - /* Bring in other's states into our state lists. */ - stateList.append( other->stateList ); - misfitList.append( other->misfitList ); - - /* If from states is not set, then get a copy of our final state set before - * we clobber it and use it instead. */ - if ( fromStates == 0 ) { - finStateSetCopy = finStateSet; - fromStates = &finStateSetCopy; - } - - /* Unset all of our final states and get the final states from other. */ - if ( !optional ) - unsetAllFinStates(); - finStateSet.insert( other->finStateSet ); - - /* Since other's lists are empty, we can delete the fsm without - * affecting any states. */ - delete other; - - /* Merge our former final states with the start state of other. */ - for ( int i = 0; i < fromStates->length(); i++ ) { - FsmState *state = fromStates->data[i]; - - /* Merge the former final state with other's start state. */ - mergeStatesLeaving( md, state, otherStartState ); - - /* If the former final state was not reset final then we must clear - * the state's out trans data. If it got reset final then it gets to - * keep its out trans data. This must be done before fillInStates gets - * called to prevent the data from being sourced. */ - if ( ! state->isFinState() ) - clearOutData( state ); - } - - /* Fill in any new states made from merging. */ - fillInStates( md ); - - /* Remove the misfits and turn off misfit accounting. */ - removeMisfits(); - setMisfitAccounting( false ); -} - -/* Concatenates other to the end of this machine. Other is deleted. Any - * transitions made leaving this machine and entering into other are notified - * that they are leaving transitions by having the leavingFromState callback - * invoked. */ -void FsmGraph::concatOp( FsmGraph *other ) -{ - /* Assert same signedness and return graph concatenation op. */ - doConcat( other, 0, false ); -} - - -void FsmGraph::doOr( FsmGraph *other ) -{ - /* For the merging process. */ - MergeData md; - - /* Build a state set consisting of both start states */ - StateSet startStateSet; - startStateSet.insert( startState ); - startStateSet.insert( other->startState ); - - /* Both of the original start states loose their start state status. */ - unsetStartState(); - other->unsetStartState(); - - /* Bring in the rest of other's entry points. */ - copyInEntryPoints( other ); - other->entryPoints.empty(); - - /* Merge the lists. This will move all the states from other - * into this. No states will be deleted. */ - stateList.append( other->stateList ); - misfitList.append( other->misfitList ); - - /* Move the final set data from other into this. */ - finStateSet.insert(other->finStateSet); - other->finStateSet.empty(); - - /* Since other's list is empty, we can delete the fsm without - * affecting any states. */ - delete other; - - /* Create a new start state. */ - setStartState( addState() ); - - /* Merge the start states. */ - mergeStates( md, startState, startStateSet.data, startStateSet.length() ); - - /* Fill in any new states made from merging. */ - fillInStates( md ); -} - -/* Unions other with this machine. Other is deleted. */ -void FsmGraph::unionOp( FsmGraph *other ) -{ - /* Turn on misfit accounting for both graphs. */ - setMisfitAccounting( true ); - other->setMisfitAccounting( true ); - - /* Call Worker routine. */ - doOr( other ); - - /* Remove the misfits and turn off misfit accounting. */ - removeMisfits(); - setMisfitAccounting( false ); -} - -/* Intersects other with this machine. Other is deleted. */ -void FsmGraph::intersectOp( FsmGraph *other ) -{ - /* Turn on misfit accounting for both graphs. */ - setMisfitAccounting( true ); - other->setMisfitAccounting( true ); - - /* Set the fin bits on this and other to want each other. */ - setFinBits( SB_GRAPH1 ); - other->setFinBits( SB_GRAPH2 ); - - /* Call worker Or routine. */ - doOr( other ); - - /* Unset any final states that are no longer to - * be final due to final bits. */ - unsetIncompleteFinals(); - - /* Remove the misfits and turn off misfit accounting. */ - removeMisfits(); - setMisfitAccounting( false ); - - /* Remove states that have no path to a final state. */ - removeDeadEndStates(); -} - -/* Set subtracts other machine from this machine. Other is deleted. */ -void FsmGraph::subtractOp( FsmGraph *other ) -{ - /* Turn on misfit accounting for both graphs. */ - setMisfitAccounting( true ); - other->setMisfitAccounting( true ); - - /* Set the fin bits of other to be killers. */ - other->setFinBits( SB_GRAPH1 ); - - /* Call worker Or routine. */ - doOr( other ); - - /* Unset any final states that are no longer to - * be final due to final bits. */ - unsetKilledFinals(); - - /* Remove the misfits and turn off misfit accounting. */ - removeMisfits(); - setMisfitAccounting( false ); - - /* Remove states that have no path to a final state. */ - removeDeadEndStates(); -} - -bool FsmGraph::inEptVect( EptVect *eptVect, FsmState *state ) -{ - if ( eptVect != 0 ) { - /* Vect is there, walk it looking for state. */ - for ( int i = 0; i < eptVect->length(); i++ ) { - if ( eptVect->data[i].targ == state ) - return true; - } - } - return false; -} - -/* Fill epsilon vectors in a root state from a given starting point. Epmploys - * a depth first search through the graph of epsilon transitions. */ -void FsmGraph::epsilonFillEptVectFrom( FsmState *root, FsmState *from, bool parentLeaving ) -{ - /* Walk the epsilon transitions out of the state. */ - for ( EpsilonTrans::Iter ep = from->epsilonTrans; ep.lte(); ep++ ) { - /* Find the entry point, if the it does not resove, ignore it. */ - EntryMapEl *enLow, *enHigh; - if ( entryPoints.findMulti( *ep, enLow, enHigh ) ) { - /* Loop the targets. */ - for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) { - /* Do not add the root or states already in eptVect. */ - FsmState *targ = en->value; - if ( targ != from && !inEptVect(root->eptVect, targ) ) { - /* Maybe need to create the eptVect. */ - if ( root->eptVect == 0 ) - root->eptVect = new EptVect(); - - /* If moving to a different graph or if any parent is - * leaving then we are leaving. */ - bool leaving = parentLeaving || - root->owningGraph != targ->owningGraph; - - /* All ok, add the target epsilon and recurse. */ - root->eptVect->append( EptVectEl(targ, leaving) ); - epsilonFillEptVectFrom( root, targ, leaving ); - } - } - } - } -} - -void FsmGraph::shadowReadWriteStates( MergeData &md ) -{ - /* Init isolatedShadow algorithm data. */ - for ( StateList::Iter st = stateList; st.lte(); st++ ) - st->isolatedShadow = 0; - - /* Any states that may be both read from and written to must - * be shadowed. */ - for ( StateList::Iter st = stateList; st.lte(); st++ ) { - /* Find such states by looping through stateVect lists, which give us - * the states that will be read from. May cause us to visit the states - * that we are interested in more than once. */ - if ( st->eptVect != 0 ) { - /* For all states that will be read from. */ - for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { - /* Check for read and write to the same state. */ - FsmState *targ = ept->targ; - if ( targ->eptVect != 0 ) { - /* State is to be written to, if the shadow is not already - * there, create it. */ - if ( targ->isolatedShadow == 0 ) { - FsmState *shadow = addState(); - mergeStates( md, shadow, targ ); - targ->isolatedShadow = shadow; - } - - /* Write shadow into the state vector so that it is the - * state that the epsilon transition will read from. */ - ept->targ = targ->isolatedShadow; - } - } - } - } -} - -void FsmGraph::resolveEpsilonTrans( MergeData &md ) -{ - /* Walk the state list and invoke recursive worker on each state. */ - for ( StateList::Iter st = stateList; st.lte(); st++ ) - epsilonFillEptVectFrom( st, st, false ); - - /* Prevent reading from and writing to of the same state. */ - shadowReadWriteStates( md ); - - /* For all states that have epsilon transitions out, draw the transitions, - * clear the epsilon transitions. */ - for ( StateList::Iter st = stateList; st.lte(); st++ ) { - /* If there is a state vector, then create the pre-merge state. */ - if ( st->eptVect != 0 ) { - /* Merge all the epsilon targets into the state. */ - for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { - if ( ept->leaving ) - mergeStatesLeaving( md, st, ept->targ ); - else - mergeStates( md, st, ept->targ ); - } - - /* Clean up the target list. */ - delete st->eptVect; - st->eptVect = 0; - } - - /* Clear the epsilon transitions vector. */ - st->epsilonTrans.empty(); - } -} - -void FsmGraph::epsilonOp() -{ - /* For merging process. */ - MergeData md; - - setMisfitAccounting( true ); - - for ( StateList::Iter st = stateList; st.lte(); st++ ) - st->owningGraph = 0; - - /* Perform merges. */ - resolveEpsilonTrans( md ); - - /* Epsilons can caused merges which leave behind unreachable states. */ - fillInStates( md ); - - /* Remove the misfits and turn off misfit accounting. */ - removeMisfits(); - setMisfitAccounting( false ); -} - -/* Make a new maching by joining together a bunch of machines without making - * any transitions between them. A negative finalId results in there being no - * final id. */ -void FsmGraph::joinOp( int startId, int finalId, FsmGraph **others, int numOthers ) -{ - /* For the merging process. */ - MergeData md; - - /* Set the owning machines. Start at one. Zero is reserved for the start - * and final states. */ - for ( StateList::Iter st = stateList; st.lte(); st++ ) - st->owningGraph = 1; - for ( int m = 0; m < numOthers; m++ ) { - for ( StateList::Iter st = others[m]->stateList; st.lte(); st++ ) - st->owningGraph = 2+m; - } - - /* All machines loose start state status. */ - unsetStartState(); - for ( int m = 0; m < numOthers; m++ ) - others[m]->unsetStartState(); - - /* Bring the other machines into this. */ - for ( int m = 0; m < numOthers; m++ ) { - /* Bring in the rest of other's entry points. */ - copyInEntryPoints( others[m] ); - others[m]->entryPoints.empty(); - - /* Merge the lists. This will move all the states from other into - * this. No states will be deleted. */ - stateList.append( others[m]->stateList ); - assert( others[m]->misfitList.length() == 0 ); - - /* Move the final set data from other into this. */ - finStateSet.insert( others[m]->finStateSet ); - others[m]->finStateSet.empty(); - - /* Since other's list is empty, we can delete the fsm without - * affecting any states. */ - delete others[m]; - } - - /* Look up the start entry point. */ - EntryMapEl *enLow = 0, *enHigh = 0; - bool findRes = entryPoints.findMulti( startId, enLow, enHigh ); - if ( ! findRes ) { - /* No start state. Set a default one and proceed with the join. Note - * that the result of the join will be a very uninteresting machine. */ - setStartState( addState() ); - } - else { - /* There is at least one start state, create a state that will become - * the new start state. */ - FsmState *newStart = addState(); - setStartState( newStart ); - - /* The start state is in an owning machine class all it's own. */ - newStart->owningGraph = 0; - - /* Create the set of states to merge from. */ - StateSet stateSet; - for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) - stateSet.insert( en->value ); - - /* Merge in the set of start states into the new start state. */ - mergeStates( md, newStart, stateSet.data, stateSet.length() ); - } - - /* Take a copy of the final state set, before unsetting them all. This - * will allow us to call clearOutData on the states that don't get - * final state status back back. */ - StateSet finStateSetCopy = finStateSet; - - /* Now all final states are unset. */ - unsetAllFinStates(); - - if ( finalId >= 0 ) { - /* Create the implicit final state. */ - FsmState *finState = addState(); - setFinState( finState ); - - /* Assign an entry into the final state on the final state entry id. Note - * that there may already be an entry on this id. That's ok. Also set the - * final state owning machine id. It's in a class all it's own. */ - setEntry( finalId, finState ); - finState->owningGraph = 0; - } - - /* Hand over to workers for resolving epsilon trans. This will merge states - * with the targets of their epsilon transitions. */ - resolveEpsilonTrans( md ); - - /* Invoke the relinquish final callback on any states that did not get - * final state status back. */ - for ( StateSet::Iter st = finStateSetCopy; st.lte(); st++ ) { - if ( !((*st)->stateBits & SB_ISFINAL) ) - clearOutData( *st ); - } - - /* Fill in any new states made from merging. */ - fillInStates( md ); - - /* Joining can be messy. Instead of having misfit accounting on (which is - * tricky here) do a full cleaning. */ - removeUnreachableStates(); -} - -void FsmGraph::globOp( FsmGraph **others, int numOthers ) -{ - /* All other machines loose start states status. */ - for ( int m = 0; m < numOthers; m++ ) - others[m]->unsetStartState(); - - /* Bring the other machines into this. */ - for ( int m = 0; m < numOthers; m++ ) { - /* Bring in the rest of other's entry points. */ - copyInEntryPoints( others[m] ); - others[m]->entryPoints.empty(); - - /* Merge the lists. This will move all the states from other into - * this. No states will be deleted. */ - stateList.append( others[m]->stateList ); - assert( others[m]->misfitList.length() == 0 ); - - /* Move the final set data from other into this. */ - finStateSet.insert( others[m]->finStateSet ); - others[m]->finStateSet.empty(); - - /* Since other's list is empty, we can delete the fsm without - * affecting any states. */ - delete others[m]; - } -} - -void FsmGraph::deterministicEntry() -{ - /* For the merging process. */ - MergeData md; - - /* States may loose their entry points, turn on misfit accounting. */ - setMisfitAccounting( true ); - - /* Get a copy of the entry map then clear all the entry points. As we - * iterate the old entry map finding duplicates we will add the entry - * points for the new states that we create. */ - EntryMap prevEntry = entryPoints; - unsetAllEntryPoints(); - - for ( int enId = 0; enId < prevEntry.length(); ) { - /* Count the number of states on this entry key. */ - int highId = enId; - while ( highId < prevEntry.length() && prevEntry[enId].key == prevEntry[highId].key ) - highId += 1; - - int numIds = highId - enId; - if ( numIds == 1 ) { - /* Only a single entry point, just set the entry. */ - setEntry( prevEntry[enId].key, prevEntry[enId].value ); - } - else { - /* Multiple entry points, need to create a new state and merge in - * all the targets of entry points. */ - FsmState *newEntry = addState(); - for ( int en = enId; en < highId; en++ ) - mergeStates( md, newEntry, prevEntry[en].value ); - - /* Add the new state as the single entry point. */ - setEntry( prevEntry[enId].key, newEntry ); - } - - enId += numIds; - } - - /* The old start state may be unreachable. Remove the misfits and turn off - * misfit accounting. */ - removeMisfits(); - setMisfitAccounting( false ); -} - -/* Unset any final states that are no longer to be final due to final bits. */ -void FsmGraph::unsetKilledFinals() -{ - /* Duplicate the final state set before we begin modifying it. */ - StateSet fin( finStateSet ); - - for ( int s = 0; s < fin.length(); s++ ) { - /* Check for killing bit. */ - FsmState *state = fin.data[s]; - if ( state->stateBits & SB_GRAPH1 ) { - /* One final state is a killer, set to non-final. */ - unsetFinState( state ); - } - - /* Clear all killing bits. Non final states should never have had those - * state bits set in the first place. */ - state->stateBits &= ~SB_GRAPH1; - } -} - -/* Unset any final states that are no longer to be final due to final bits. */ -void FsmGraph::unsetIncompleteFinals() -{ - /* Duplicate the final state set before we begin modifying it. */ - StateSet fin( finStateSet ); - - for ( int s = 0; s < fin.length(); s++ ) { - /* Check for one set but not the other. */ - FsmState *state = fin.data[s]; - if ( state->stateBits & SB_BOTH && - (state->stateBits & SB_BOTH) != SB_BOTH ) - { - /* One state wants the other but it is not there. */ - unsetFinState( state ); - } - - /* Clear wanting bits. Non final states should never have had those - * state bits set in the first place. */ - state->stateBits &= ~SB_BOTH; - } -} - -/* Ensure that the start state is free of entry points (aside from the fact - * that it is the start state). If the start state has entry points then Make a - * new start state by merging with the old one. Useful before modifying start - * transitions. If the existing start state has any entry points other than the - * start state entry then modifying its transitions changes more than the start - * transitions. So isolate the start state by separating it out such that it - * only has start stateness as it's entry point. */ -void FsmGraph::isolateStartState( ) -{ - /* For the merging process. */ - MergeData md; - - /* Bail out if the start state is already isolated. */ - if ( isStartStateIsolated() ) - return; - - /* Turn on misfit accounting to possibly catch the old start state. */ - setMisfitAccounting( true ); - - /* This will be the new start state. The existing start - * state is merged with it. */ - FsmState *prevStartState = startState; - unsetStartState(); - setStartState( addState() ); - - /* Merge the new start state with the old one to isolate it. */ - mergeStates( md, startState, prevStartState ); - - /* Stfil and stateDict will be empty because the merging of the old start - * state into the new one will not have any conflicting transitions. */ - assert( md.stateDict.treeSize == 0 ); - assert( md.stfillHead == 0 ); - - /* The old start state may be unreachable. Remove the misfits and turn off - * misfit accounting. */ - removeMisfits(); - setMisfitAccounting( false ); -} - -#if COLM_LOG_CONDS -void logCondSpace( CondSpace *condSpace ) -{ - if ( condSpace == 0 ) - cerr << "<empty>"; - else { - for ( CondSet::Iter csi = condSpace->condSet.last(); csi.gtb(); csi-- ) { - if ( ! csi.last() ) - cerr << ','; - (*csi)->actionName( cerr ); - } - } -} - -void logNewExpansion( Expansion *exp ) -{ - cerr << "created expansion:" << endl; - cerr << " range: " << exp->lowKey.getVal() << " .. " << - exp->highKey.getVal() << endl; - - cerr << " fromCondSpace: "; - logCondSpace( exp->fromCondSpace ); - cerr << endl; - cerr << " fromVals: " << exp->fromVals << endl; - - cerr << " toCondSpace: "; - logCondSpace( exp->toCondSpace ); - cerr << endl; - cerr << " toValsList: "; - for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ ) - cerr << " " << *to; - cerr << endl; -} -#endif - - -void FsmGraph::findTransExpansions( ExpansionList &expansionList, - FsmState *destState, FsmState *srcState ) -{ - PairIter<FsmTrans, StateCond> transCond( destState->outList.head, - srcState->stateCondList.head ); - for ( ; !transCond.end(); transCond++ ) { - if ( transCond.userState == RangeOverlap ) { - Expansion *expansion = new Expansion( transCond.s1Tel.lowKey, - transCond.s1Tel.highKey ); - expansion->fromTrans = new FsmTrans(*transCond.s1Tel.trans); - expansion->fromTrans->fromState = 0; - expansion->fromTrans->toState = transCond.s1Tel.trans->toState; - expansion->fromCondSpace = 0; - expansion->fromVals = 0; - CondSpace *srcCS = transCond.s2Tel.trans->condSpace; - expansion->toCondSpace = srcCS; - - long numTargVals = (1 << srcCS->condSet.length()); - for ( long targVals = 0; targVals < numTargVals; targVals++ ) - expansion->toValsList.append( targVals ); - - #ifdef COLM_LOG_CONDS - if ( colm_log_conds ) { - logNewExpansion( expansion ); - } - #endif - expansionList.append( expansion ); - } - } -} - -void FsmGraph::findCondExpInTrans( ExpansionList &expansionList, FsmState *state, - Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace, - long fromVals, LongVect &toValsList ) -{ - FsmTrans searchTrans; - searchTrans.lowKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() + - (lowKey - keyOps->minKey); - searchTrans.highKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() + - (highKey - keyOps->minKey); - searchTrans.prev = searchTrans.next = 0; - - PairIter<FsmTrans> pairIter( state->outList.head, &searchTrans ); - for ( ; !pairIter.end(); pairIter++ ) { - if ( pairIter.userState == RangeOverlap ) { - Expansion *expansion = new Expansion( lowKey, highKey ); - expansion->fromTrans = new FsmTrans(*pairIter.s1Tel.trans); - expansion->fromTrans->fromState = 0; - expansion->fromTrans->toState = pairIter.s1Tel.trans->toState; - expansion->fromCondSpace = fromCondSpace; - expansion->fromVals = fromVals; - expansion->toCondSpace = toCondSpace; - expansion->toValsList = toValsList; - - expansionList.append( expansion ); - #ifdef COLM_LOG_CONDS - if ( colm_log_conds ) { - logNewExpansion( expansion ); - } - #endif - } - } -} - -void FsmGraph::findCondExpansions( ExpansionList &expansionList, - FsmState *destState, FsmState *srcState ) -{ - PairIter<StateCond, StateCond> condCond( destState->stateCondList.head, - srcState->stateCondList.head ); - for ( ; !condCond.end(); condCond++ ) { - if ( condCond.userState == RangeOverlap ) { - /* Loop over all existing condVals . */ - CondSet &destCS = condCond.s1Tel.trans->condSpace->condSet; - long destLen = destCS.length(); - - /* Find the items in src cond set that are not in dest - * cond set. These are the items that we must expand. */ - CondSet srcOnlyCS = condCond.s2Tel.trans->condSpace->condSet; - for ( CondSet::Iter dcsi = destCS; dcsi.lte(); dcsi++ ) - srcOnlyCS.remove( *dcsi ); - long srcOnlyLen = srcOnlyCS.length(); - - if ( srcOnlyCS.length() > 0 ) { - #ifdef COLM_LOG_CONDS - if ( colm_log_conds ) { - cerr << "there are " << srcOnlyCS.length() << " item(s) that are " - "only in the srcCS" << endl; - } - #endif - - CondSet mergedCS = destCS; - mergedCS.insert( condCond.s2Tel.trans->condSpace->condSet ); - - CondSpace *fromCondSpace = addCondSpace( destCS ); - CondSpace *toCondSpace = addCondSpace( mergedCS ); - - /* Loop all values in the dest space. */ - for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) { - long basicVals = 0; - for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) { - if ( destVals & (1 << csi.pos()) ) { - Action **cim = mergedCS.find( *csi ); - long bitPos = (cim - mergedCS.data); - basicVals |= 1 << bitPos; - } - } - - /* Loop all new values. */ - LongVect expandToVals; - for ( long soVals = 0; soVals < (1 << srcOnlyLen); soVals++ ) { - long targVals = basicVals; - for ( CondSet::Iter csi = srcOnlyCS; csi.lte(); csi++ ) { - if ( soVals & (1 << csi.pos()) ) { - Action **cim = mergedCS.find( *csi ); - long bitPos = (cim - mergedCS.data); - targVals |= 1 << bitPos; - } - } - expandToVals.append( targVals ); - } - - findCondExpInTrans( expansionList, destState, - condCond.s1Tel.lowKey, condCond.s1Tel.highKey, - fromCondSpace, toCondSpace, destVals, expandToVals ); - } - } - } - } -} - -void FsmGraph::doExpand( MergeData &md, FsmState *destState, ExpansionList &expList1 ) -{ - for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) { - for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ ) { - long targVals = *to; - - /* We will use the copy of the transition that was made when the - * expansion was created. It will get used multiple times. Each - * time we must set up the keys, everything else is constant and - * and already prepared. */ - FsmTrans *srcTrans = exp->fromTrans; - - srcTrans->lowKey = exp->toCondSpace->baseKey + - targVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey); - srcTrans->highKey = exp->toCondSpace->baseKey + - targVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey); - - TransList srcList; - srcList.append( srcTrans ); - outTransCopy( md, destState, srcList.head ); - srcList.abandon(); - } - } -} - - -void FsmGraph::doRemove( MergeData &md, FsmState *destState, ExpansionList &expList1 ) -{ - for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) { - Removal removal; - if ( exp->fromCondSpace == 0 ) { - removal.lowKey = exp->lowKey; - removal.highKey = exp->highKey; - } - else { - removal.lowKey = exp->fromCondSpace->baseKey + - exp->fromVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey); - removal.highKey = exp->fromCondSpace->baseKey + - exp->fromVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey); - } - removal.next = 0; - - TransList destList; - PairIter<FsmTrans, Removal> pairIter( destState->outList.head, &removal ); - for ( ; !pairIter.end(); pairIter++ ) { - switch ( pairIter.userState ) { - case RangeInS1: { - FsmTrans *destTrans = pairIter.s1Tel.trans; - destTrans->lowKey = pairIter.s1Tel.lowKey; - destTrans->highKey = pairIter.s1Tel.highKey; - destList.append( destTrans ); - break; - } - case RangeInS2: - break; - case RangeOverlap: { - FsmTrans *trans = pairIter.s1Tel.trans; - detachTrans( trans->fromState, trans->toState, trans ); - delete trans; - break; - } - case BreakS1: { - pairIter.s1Tel.trans = dupTrans( destState, - pairIter.s1Tel.trans ); - break; - } - case BreakS2: - break; - } - } - destState->outList.transfer( destList ); - } -} - -void FsmGraph::mergeStateConds( FsmState *destState, FsmState *srcState ) -{ - StateCondList destList; - PairIter<StateCond> pairIter( destState->stateCondList.head, - srcState->stateCondList.head ); - for ( ; !pairIter.end(); pairIter++ ) { - switch ( pairIter.userState ) { - case RangeInS1: { - StateCond *destCond = pairIter.s1Tel.trans; - destCond->lowKey = pairIter.s1Tel.lowKey; - destCond->highKey = pairIter.s1Tel.highKey; - destList.append( destCond ); - break; - } - case RangeInS2: { - StateCond *newCond = new StateCond( *pairIter.s2Tel.trans ); - newCond->lowKey = pairIter.s2Tel.lowKey; - newCond->highKey = pairIter.s2Tel.highKey; - destList.append( newCond ); - break; - } - case RangeOverlap: { - StateCond *destCond = pairIter.s1Tel.trans; - StateCond *srcCond = pairIter.s2Tel.trans; - CondSet mergedCondSet; - mergedCondSet.insert( destCond->condSpace->condSet ); - mergedCondSet.insert( srcCond->condSpace->condSet ); - destCond->condSpace = addCondSpace( mergedCondSet ); - - destCond->lowKey = pairIter.s1Tel.lowKey; - destCond->highKey = pairIter.s1Tel.highKey; - destList.append( destCond ); - break; - } - case BreakS1: - pairIter.s1Tel.trans = new StateCond( *pairIter.s1Tel.trans ); - break; - - case BreakS2: - break; - } - } - destState->stateCondList.transfer( destList ); -} - -/* A state merge which represents the drawing in of leaving transitions. If - * there is any out data then we duplicate the souce state, transfer the out - * data, then merge in the state. The new state will be reaped because it will - * not be given any in transitions. */ -void FsmGraph::mergeStatesLeaving( MergeData &md, FsmState *destState, FsmState *srcState ) -{ - if ( !hasOutData( destState ) ) - mergeStates( md, destState, srcState ); - else { - FsmState *ssMutable = addState(); - mergeStates( md, ssMutable, srcState ); - transferOutData( ssMutable, destState ); - - for ( ActionSet::Iter cond = destState->outCondSet; cond.lte(); cond++ ) - embedCondition( md, ssMutable, *cond ); - - mergeStates( md, destState, ssMutable ); - } -} - -void FsmGraph::mergeStates( MergeData &md, FsmState *destState, - FsmState **srcStates, int numSrc ) -{ - for ( int s = 0; s < numSrc; s++ ) - mergeStates( md, destState, srcStates[s] ); -} - -void FsmGraph::mergeStates( MergeData &md, FsmState *destState, FsmState *srcState ) -{ - ExpansionList expList1; - ExpansionList expList2; - - findTransExpansions( expList1, destState, srcState ); - findCondExpansions( expList1, destState, srcState ); - findTransExpansions( expList2, srcState, destState ); - findCondExpansions( expList2, srcState, destState ); - - mergeStateConds( destState, srcState ); - - outTransCopy( md, destState, srcState->outList.head ); - - doExpand( md, destState, expList1 ); - doExpand( md, destState, expList2 ); - - doRemove( md, destState, expList1 ); - doRemove( md, destState, expList2 ); - - expList1.empty(); - expList2.empty(); - - /* Get its bits and final state status. */ - destState->stateBits |= ( srcState->stateBits & ~SB_ISFINAL ); - if ( srcState->isFinState() ) - setFinState( destState ); - - /* Draw in any properties of srcState into destState. */ - if ( srcState == destState ) { - /* Duplicate the list to protect against write to source. The - * priorities sets are not copied in because that would have no - * effect. */ - destState->epsilonTrans.append( EpsilonTrans( srcState->epsilonTrans ) ); - - /* Get all actions, duplicating to protect against write to source. */ - destState->toStateActionTable.setActions( - ActionTable( srcState->toStateActionTable ) ); - destState->fromStateActionTable.setActions( - ActionTable( srcState->fromStateActionTable ) ); - destState->outActionTable.setActions( ActionTable( srcState->outActionTable ) ); - destState->outCondSet.insert( ActionSet( srcState->outCondSet ) ); - destState->errActionTable.setActions( ErrActionTable( srcState->errActionTable ) ); - destState->eofActionTable.setActions( ActionTable( srcState->eofActionTable ) ); - } - else { - /* Get the epsilons, out priorities. */ - destState->epsilonTrans.append( srcState->epsilonTrans ); - destState->outPriorTable.setPriors( srcState->outPriorTable ); - - /* Get all actions. */ - destState->toStateActionTable.setActions( srcState->toStateActionTable ); - destState->fromStateActionTable.setActions( srcState->fromStateActionTable ); - destState->outActionTable.setActions( srcState->outActionTable ); - destState->outCondSet.insert( srcState->outCondSet ); - destState->errActionTable.setActions( srcState->errActionTable ); - destState->eofActionTable.setActions( srcState->eofActionTable ); - } -} - -void FsmGraph::fillInStates( MergeData &md ) -{ - /* Merge any states that are awaiting merging. This will likey cause - * other states to be added to the stfil list. */ - FsmState *state = md.stfillHead; - while ( state != 0 ) { - StateSet *stateSet = &state->stateDictEl->stateSet; - mergeStates( md, state, stateSet->data, stateSet->length() ); - state = state->alg.next; - } - - /* Delete the state sets of all states that are on the fill list. */ - state = md.stfillHead; - while ( state != 0 ) { - /* Delete and reset the state set. */ - delete state->stateDictEl; - state->stateDictEl = 0; - - /* Next state in the stfill list. */ - state = state->alg.next; - } - - /* StateDict will still have its ptrs/size set but all of it's element - * will be deleted so we don't need to clean it up. */ -} - -void FsmGraph::findEmbedExpansions( ExpansionList &expansionList, - FsmState *destState, Action *condAction ) -{ - StateCondList destList; - PairIter<FsmTrans, StateCond> transCond( destState->outList.head, - destState->stateCondList.head ); - for ( ; !transCond.end(); transCond++ ) { - switch ( transCond.userState ) { - case RangeInS1: { - if ( transCond.s1Tel.lowKey <= keyOps->maxKey ) { - assert( transCond.s1Tel.highKey <= keyOps->maxKey ); - - /* Make a new state cond. */ - StateCond *newStateCond = new StateCond( transCond.s1Tel.lowKey, - transCond.s1Tel.highKey ); - newStateCond->condSpace = addCondSpace( CondSet( condAction ) ); - destList.append( newStateCond ); - - /* Create the expansion. */ - Expansion *expansion = new Expansion( transCond.s1Tel.lowKey, - transCond.s1Tel.highKey ); - expansion->fromTrans = new FsmTrans(*transCond.s1Tel.trans); - expansion->fromTrans->fromState = 0; - expansion->fromTrans->toState = transCond.s1Tel.trans->toState; - expansion->fromCondSpace = 0; - expansion->fromVals = 0; - expansion->toCondSpace = newStateCond->condSpace; - expansion->toValsList.append( 1 ); - #ifdef COLM_LOG_CONDS - if ( colm_log_conds ) { - logNewExpansion( expansion ); - } - #endif - expansionList.append( expansion ); - } - break; - } - case RangeInS2: { - /* Enhance state cond and find the expansion. */ - StateCond *stateCond = transCond.s2Tel.trans; - stateCond->lowKey = transCond.s2Tel.lowKey; - stateCond->highKey = transCond.s2Tel.highKey; - - CondSet &destCS = stateCond->condSpace->condSet; - long destLen = destCS.length(); - CondSpace *fromCondSpace = stateCond->condSpace; - - CondSet mergedCS = destCS; - mergedCS.insert( condAction ); - CondSpace *toCondSpace = addCondSpace( mergedCS ); - stateCond->condSpace = toCondSpace; - destList.append( stateCond ); - - /* Loop all values in the dest space. */ - for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) { - long basicVals = 0; - for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) { - if ( destVals & (1 << csi.pos()) ) { - Action **cim = mergedCS.find( *csi ); - long bitPos = (cim - mergedCS.data); - basicVals |= 1 << bitPos; - } - } - - long targVals = basicVals; - Action **cim = mergedCS.find( condAction ); - long bitPos = (cim - mergedCS.data); - targVals |= 1 << bitPos; - - LongVect expandToVals( targVals ); - findCondExpInTrans( expansionList, destState, - transCond.s2Tel.lowKey, transCond.s2Tel.highKey, - fromCondSpace, toCondSpace, destVals, expandToVals ); - } - break; - } - - - case RangeOverlap: - case BreakS1: - case BreakS2: - assert( false ); - break; - } - } - - destState->stateCondList.transfer( destList ); -} - -void FsmGraph::embedCondition( FsmState *state, Action *condAction ) -{ - MergeData md; - ExpansionList expList; - - /* Turn on misfit accounting to possibly catch the old start state. */ - setMisfitAccounting( true ); - - /* Worker. */ - embedCondition( md, state, condAction ); - - /* Fill in any states that were newed up as combinations of others. */ - fillInStates( md ); - - /* Remove the misfits and turn off misfit accounting. */ - removeMisfits(); - setMisfitAccounting( false ); -} - -void FsmGraph::embedCondition( MergeData &md, FsmState *state, Action *condAction ) -{ - ExpansionList expList; - - findEmbedExpansions( expList, state, condAction ); - doExpand( md, state, expList ); - doRemove( md, state, expList ); - expList.empty(); -} diff --git a/colm/fsmgraph.h b/colm/fsmgraph.h deleted file mode 100644 index fca23cc1..00000000 --- a/colm/fsmgraph.h +++ /dev/null @@ -1,1388 +0,0 @@ -/* - * Copyright 2001-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _FSMGRAPH_H -#define _FSMGRAPH_H - -#include <assert.h> -#include "keyops.h" -#include "vector.h" -#include "bstset.h" -#include "compare.h" -#include "avltree.h" -#include "dlist.h" -#include "bstmap.h" -#include "sbstmap.h" -#include "sbstset.h" -#include "sbsttable.h" -#include "avlset.h" -#include "avlmap.h" - -/* Flags that control merging. */ -#define SB_GRAPH1 0x01 -#define SB_GRAPH2 0x02 -#define SB_BOTH 0x03 -#define SB_ISFINAL 0x04 -#define SB_ISMARKED 0x08 -#define SB_ONLIST 0x10 - -struct FsmTrans; -struct FsmState; -struct FsmGraph; -struct Action; -struct TokenDef; -struct NameInst; - -/* State list element for unambiguous access to list element. */ -struct FsmListEl -{ - FsmState *prev, *next; -}; - -/* This is the marked index for a state pair. Used in minimization. It keeps - * track of whether or not the state pair is marked. */ -struct MarkIndex -{ - MarkIndex(int states); - ~MarkIndex(); - - void markPair(int state1, int state2); - bool isPairMarked(int state1, int state2); - -private: - int numStates; - bool *array; -}; - -extern KeyOps *keyOps; - -/* Transistion Action Element. */ -typedef SBstMapEl< int, Action* > ActionTableEl; - -/* Transition Action Table. */ -struct ActionTable - : public SBstMap< int, Action*, CmpOrd<int> > -{ - void setAction( int ordering, Action *action ); - void setActions( int *orderings, Action **actions, int nActs ); - void setActions( const ActionTable &other ); - - bool hasAction( Action *action ); -}; - -typedef SBstSet< Action*, CmpOrd<Action*> > ActionSet; -typedef CmpSTable< Action*, CmpOrd<Action*> > CmpActionSet; - -/* Transistion Action Element. */ -typedef SBstMapEl< int, TokenDef* > LmActionTableEl; - -/* Transition Action Table. */ -struct LmActionTable - : public SBstMap< int, TokenDef*, CmpOrd<int> > -{ - void setAction( int ordering, TokenDef *action ); - void setActions( const LmActionTable &other ); -}; - -/* Compare of a whole action table element (key & value). */ -struct CmpActionTableEl -{ - static int compare( const ActionTableEl &action1, - const ActionTableEl &action2 ) - { - if ( action1.key < action2.key ) - return -1; - else if ( action1.key > action2.key ) - return 1; - else if ( action1.value < action2.value ) - return -1; - else if ( action1.value > action2.value ) - return 1; - return 0; - } -}; - -/* Compare for ActionTable. */ -typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable; - -/* Compare of a whole lm action table element (key & value). */ -struct CmpLmActionTableEl -{ - static int compare( const LmActionTableEl &lmAction1, - const LmActionTableEl &lmAction2 ) - { - if ( lmAction1.key < lmAction2.key ) - return -1; - else if ( lmAction1.key > lmAction2.key ) - return 1; - else if ( lmAction1.value < lmAction2.value ) - return -1; - else if ( lmAction1.value > lmAction2.value ) - return 1; - return 0; - } -}; - -/* Compare for ActionTable. */ -typedef CmpSTable< LmActionTableEl, CmpLmActionTableEl > CmpLmActionTable; - -/* Action table element for error action tables. Adds the encoding of transfer - * point. */ -struct ErrActionTableEl -{ - ErrActionTableEl( Action *action, int ordering, int transferPoint ) - : ordering(ordering), action(action), transferPoint(transferPoint) { } - - /* Ordering and id of the action embedding. */ - int ordering; - Action *action; - - /* Id of point of transfere from Error action table to transtions and - * eofActionTable. */ - int transferPoint; - - int getKey() const { return ordering; } -}; - -struct ErrActionTable - : public SBstTable< ErrActionTableEl, int, CmpOrd<int> > -{ - void setAction( int ordering, Action *action, int transferPoint ); - void setActions( const ErrActionTable &other ); -}; - -/* Compare of an error action table element (key & value). */ -struct CmpErrActionTableEl -{ - static int compare( const ErrActionTableEl &action1, - const ErrActionTableEl &action2 ) - { - if ( action1.ordering < action2.ordering ) - return -1; - else if ( action1.ordering > action2.ordering ) - return 1; - else if ( action1.action < action2.action ) - return -1; - else if ( action1.action > action2.action ) - return 1; - else if ( action1.transferPoint < action2.transferPoint ) - return -1; - else if ( action1.transferPoint > action2.transferPoint ) - return 1; - return 0; - } -}; - -/* Compare for ErrActionTable. */ -typedef CmpSTable< ErrActionTableEl, CmpErrActionTableEl > CmpErrActionTable; - - -/* Descibe a priority, shared among PriorEls. - * Has key and whether or not used. */ -struct PriorDesc -{ - int key; - int priority; -}; - -/* Element in the arrays of priorities for transitions and arrays. Ordering is - * unique among instantiations of machines, desc is shared. */ -struct PriorEl -{ - PriorEl( int ordering, PriorDesc *desc ) - : ordering(ordering), desc(desc) { } - - int ordering; - PriorDesc *desc; -}; - -/* Compare priority elements, which are ordered by the priority descriptor - * key. */ -struct PriorElCmp -{ - static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) - { - if ( pel1.desc->key < pel2.desc->key ) - return -1; - else if ( pel1.desc->key > pel2.desc->key ) - return 1; - else - return 0; - } -}; - - -/* Priority Table. */ -struct PriorTable - : public SBstSet< PriorEl, PriorElCmp > -{ - void setPrior( int ordering, PriorDesc *desc ); - void setPriors( const PriorTable &other ); -}; - -/* Compare of prior table elements for distinguising state data. */ -struct CmpPriorEl -{ - static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) - { - if ( pel1.desc < pel2.desc ) - return -1; - else if ( pel1.desc > pel2.desc ) - return 1; - else if ( pel1.ordering < pel2.ordering ) - return -1; - else if ( pel1.ordering > pel2.ordering ) - return 1; - return 0; - } -}; - -/* Compare of PriorTable distinguising state data. Using a compare of the - * pointers is a little more strict than it needs be. It requires that - * prioritiy tables have the exact same set of priority assignment operators - * (from the input lang) to be considered equal. - * - * Really only key-value pairs need be tested and ordering be merged. However - * this would require that in the fuseing of states, priority descriptors be - * chosen for the new fused state based on priority. Since the out transition - * lists and ranges aren't necessarily going to line up, this is more work for - * little gain. Final compression resets all priorities first, so this would - * only be useful for compression at every operator, which is only an - * undocumented test feature. - */ -typedef CmpSTable<PriorEl, CmpPriorEl> CmpPriorTable; - -/* Plain action list that imposes no ordering. */ -typedef Vector<int> TransFuncList; - -/* Comparison for TransFuncList. */ -typedef CmpTable< int, CmpOrd<int> > TransFuncListCompare; - -/* Transition class that implements actions and priorities. */ -struct FsmTrans -{ - FsmTrans() : fromState(0), toState(0) {} - FsmTrans( const FsmTrans &other ) : - lowKey(other.lowKey), - highKey(other.highKey), - fromState(0), toState(0), - actionTable(other.actionTable), - priorTable(other.priorTable) - { - assert( lmActionTable.length() == 0 && other.lmActionTable.length() == 0 ); - } - - Key lowKey, highKey; - FsmState *fromState; - FsmState *toState; - - /* Pointers for outlist. */ - FsmTrans *prev, *next; - - /* Pointers for in-list. */ - FsmTrans *ilprev, *ilnext; - - /* The function table and priority for the transition. */ - ActionTable actionTable; - PriorTable priorTable; - - LmActionTable lmActionTable; -}; - -/* In transition list. Like DList except only has head pointers, which is all - * that is required. Insertion and deletion is handled by the graph. This - * class provides the iterator of a single list. */ -struct TransInList -{ - TransInList() : head(0) { } - - FsmTrans *head; - - struct Iter - { - /* Default construct. */ - Iter() : ptr(0) { } - - /* Construct, assign from a list. */ - Iter( const TransInList &il ) : ptr(il.head) { } - Iter &operator=( const TransInList &dl ) { ptr = dl.head; return *this; } - - /* At the end */ - bool lte() const { return ptr != 0; } - bool end() const { return ptr == 0; } - - /* At the first, last element. */ - bool first() const { return ptr && ptr->ilprev == 0; } - bool last() const { return ptr && ptr->ilnext == 0; } - - /* Cast, dereference, arrow ops. */ - operator FsmTrans*() const { return ptr; } - FsmTrans &operator *() const { return *ptr; } - FsmTrans *operator->() const { return ptr; } - - /* Increment, decrement. */ - inline void operator++(int) { ptr = ptr->ilnext; } - inline void operator--(int) { ptr = ptr->ilprev; } - - /* The iterator is simply a pointer. */ - FsmTrans *ptr; - }; -}; - -typedef DList<FsmTrans> TransList; - -/* Set of states, list of states. */ -typedef BstSet<FsmState*> StateSet; -typedef DList<FsmState> StateList; - -/* A element in a state dict. */ -struct StateDictEl -: - public AvlTreeEl<StateDictEl> -{ - StateDictEl(const StateSet &stateSet) - : stateSet(stateSet) { } - - const StateSet &getKey() { return stateSet; } - StateSet stateSet; - FsmState *targState; -}; - -/* Dictionary mapping a set of states to a target state. */ -typedef AvlTree< StateDictEl, StateSet, CmpTable<FsmState*> > StateDict; - -/* Data needed for a merge operation. */ -struct MergeData -{ - MergeData() - : stfillHead(0), stfillTail(0) { } - - StateDict stateDict; - - FsmState *stfillHead; - FsmState *stfillTail; - - void fillListAppend( FsmState *state ); -}; - -struct TransEl -{ - /* Constructors. */ - TransEl() { } - TransEl( Key lowKey, Key highKey ) - : lowKey(lowKey), highKey(highKey) { } - TransEl( Key lowKey, Key highKey, FsmTrans *value ) - : lowKey(lowKey), highKey(highKey), value(value) { } - - Key lowKey, highKey; - FsmTrans *value; -}; - -struct CmpKey -{ - static int compare( const Key key1, const Key key2 ) - { - if ( key1 < key2 ) - return -1; - else if ( key1 > key2 ) - return 1; - else - return 0; - } -}; - -/* Vector based set of key items. */ -typedef BstSet<Key, CmpKey> KeySet; - -struct MinPartition -{ - MinPartition() : active(false) { } - - StateList list; - bool active; - - MinPartition *prev, *next; -}; - -/* Epsilon transition stored in a state. Specifies the target */ -typedef Vector<int> EpsilonTrans; - -/* List of states that are to be drawn into this. */ -struct EptVectEl -{ - EptVectEl( FsmState *targ, bool leaving ) - : targ(targ), leaving(leaving) { } - - FsmState *targ; - bool leaving; -}; -typedef Vector<EptVectEl> EptVect; - -/* Set of entry ids that go into this state. */ -typedef BstSet<int> EntryIdSet; - -/* Set of longest match items that may be active in a given state. */ -typedef BstSet<TokenDef*> LmItemSet; - -/* Conditions. */ -typedef BstSet< Action*, CmpOrd<Action*> > CondSet; -typedef CmpTable< Action*, CmpOrd<Action*> > CmpCondSet; - -struct CondSpace - : public AvlTreeEl<CondSpace> -{ - CondSpace( const CondSet &condSet ) - : condSet(condSet) {} - - const CondSet &getKey() { return condSet; } - - CondSet condSet; - Key baseKey; - long condSpaceId; -}; - -typedef Vector<CondSpace*> CondSpaceVect; - -typedef AvlTree<CondSpace, CondSet, CmpCondSet> CondSpaceMap; - -struct StateCond -{ - StateCond( Key lowKey, Key highKey ) : - lowKey(lowKey), highKey(highKey) {} - - Key lowKey; - Key highKey; - CondSpace *condSpace; - - StateCond *prev, *next; -}; - -typedef DList<StateCond> StateCondList; -typedef Vector<long> LongVect; - -struct Expansion -{ - Expansion( Key lowKey, Key highKey ) : - lowKey(lowKey), highKey(highKey), - fromTrans(0), fromCondSpace(0), - toCondSpace(0) {} - - ~Expansion() - { - if ( fromTrans != 0 ) - delete fromTrans; - } - - Key lowKey; - Key highKey; - - FsmTrans *fromTrans; - CondSpace *fromCondSpace; - long fromVals; - - CondSpace *toCondSpace; - LongVect toValsList; - - Expansion *prev, *next; -}; - -typedef DList<Expansion> ExpansionList; - -struct Removal -{ - Key lowKey; - Key highKey; - - Removal *next; -}; - -struct CondData -{ - CondData() : nextCondKey(0) {} - - /* Condition info. */ - Key nextCondKey; - - CondSpaceMap condSpaceMap; -}; - -extern CondData *condData; - -/* State class that implements actions and priorities. */ -struct FsmState -{ - FsmState(); - FsmState(const FsmState &other); - ~FsmState(); - - /* Is the state final? */ - bool isFinState() { return stateBits & SB_ISFINAL; } - - /* Out transition list and the pointer for the default out trans. */ - TransList outList; - - /* In transition Lists. */ - TransInList inList; - - /* Entry points into the state. */ - EntryIdSet entryIds; - - /* Epsilon transitions. */ - EpsilonTrans epsilonTrans; - - /* Condition info. */ - StateCondList stateCondList; - - /* Number of in transitions from states other than ourselves. */ - int foreignInTrans; - - /* Temporary data for various algorithms. */ - union { - /* When duplicating the fsm we need to map each - * state to the new state representing it. */ - FsmState *stateMap; - - /* When minimizing machines by partitioning, this maps to the group - * the state is in. */ - MinPartition *partition; - - /* When merging states (state machine operations) this next pointer is - * used for the list of states that need to be filled in. */ - FsmState *next; - - /* Identification for printing and stable minimization. */ - int stateNum; - - } alg; - - /* Data used in epsilon operation, maybe fit into alg? */ - FsmState *isolatedShadow; - int owningGraph; - - /* A pointer to a dict element that contains the set of states this state - * represents. This cannot go into alg, because alg.next is used during - * the merging process. */ - StateDictEl *stateDictEl; - - /* When drawing epsilon transitions, holds the list of states to merge - * with. */ - EptVect *eptVect; - - /* Bits controlling the behaviour of the state during collapsing to dfa. */ - int stateBits; - - /* State list elements. */ - FsmState *next, *prev; - - /* - * Priority and Action data. - */ - - /* Out priorities transfered to out transitions. */ - PriorTable outPriorTable; - - /* The following two action tables are distinguished by the fact that when - * toState actions are executed immediatly after transition actions of - * incoming transitions and the current character will be the same as the - * one available then. The fromState actions are executed immediately - * before the transition actions of outgoing transitions and the current - * character is same as the one available then. */ - - /* Actions to execute upon entering into a state. */ - ActionTable toStateActionTable; - - /* Actions to execute when going from the state to the transition. */ - ActionTable fromStateActionTable; - - /* Actions to add to any future transitions that leave via this state. */ - ActionTable outActionTable; - - /* Conditions to add to any future transiions that leave via this sttate. */ - ActionSet outCondSet; - - /* Error action tables. */ - ErrActionTable errActionTable; - - /* Actions to execute on eof. */ - ActionTable eofActionTable; - - /* Set of longest match items that may be active in this state. */ - LmItemSet lmItemSet; - - FsmState *eofTarget; -}; - -template <class ListItem> struct NextTrans -{ - Key lowKey, highKey; - ListItem *trans; - ListItem *next; - - void load() { - if ( trans == 0 ) - next = 0; - else { - next = trans->next; - lowKey = trans->lowKey; - highKey = trans->highKey; - } - } - - void set( ListItem *t ) { - trans = t; - load(); - } - - void increment() { - trans = next; - load(); - } -}; - - -/* Encodes the different states that are meaningful to the of the iterator. */ -enum PairIterUserState -{ - RangeInS1, RangeInS2, - RangeOverlap, - BreakS1, BreakS2 -}; - -template <class ListItem1, class ListItem2 = ListItem1> struct PairIter -{ - /* Encodes the different states that an fsm iterator can be in. */ - enum IterState { - Begin, - ConsumeS1Range, ConsumeS2Range, - OnlyInS1Range, OnlyInS2Range, - S1SticksOut, S1SticksOutBreak, - S2SticksOut, S2SticksOutBreak, - S1DragsBehind, S1DragsBehindBreak, - S2DragsBehind, S2DragsBehindBreak, - ExactOverlap, End - }; - - PairIter( ListItem1 *list1, ListItem2 *list2 ); - - /* Query iterator. */ - bool lte() { return itState != End; } - bool end() { return itState == End; } - void operator++(int) { findNext(); } - void operator++() { findNext(); } - - /* Iterator state. */ - ListItem1 *list1; - ListItem2 *list2; - IterState itState; - PairIterUserState userState; - - NextTrans<ListItem1> s1Tel; - NextTrans<ListItem2> s2Tel; - Key bottomLow, bottomHigh; - ListItem1 *bottomTrans1; - ListItem2 *bottomTrans2; - -private: - void findNext(); -}; - -/* Init the iterator by advancing to the first item. */ -template <class ListItem1, class ListItem2> PairIter<ListItem1, ListItem2>::PairIter( - ListItem1 *list1, ListItem2 *list2 ) -: - list1(list1), - list2(list2), - itState(Begin) -{ - findNext(); -} - -/* Return and re-entry for the co-routine iterators. This should ALWAYS be - * used inside of a block. */ -#define CO_RETURN(label) \ - itState = label; \ - return; \ - entry##label: {} - -/* Return and re-entry for the co-routine iterators. This should ALWAYS be - * used inside of a block. */ -#define CO_RETURN2(label, uState) \ - itState = label; \ - userState = uState; \ - return; \ - entry##label: {} - -/* Advance to the next transition. When returns, trans points to the next - * transition, unless there are no more, in which case end() returns true. */ -template <class ListItem1, class ListItem2> void PairIter<ListItem1, ListItem2>::findNext() -{ - /* Jump into the iterator routine base on the iterator state. */ - switch ( itState ) { - case Begin: goto entryBegin; - case ConsumeS1Range: goto entryConsumeS1Range; - case ConsumeS2Range: goto entryConsumeS2Range; - case OnlyInS1Range: goto entryOnlyInS1Range; - case OnlyInS2Range: goto entryOnlyInS2Range; - case S1SticksOut: goto entryS1SticksOut; - case S1SticksOutBreak: goto entryS1SticksOutBreak; - case S2SticksOut: goto entryS2SticksOut; - case S2SticksOutBreak: goto entryS2SticksOutBreak; - case S1DragsBehind: goto entryS1DragsBehind; - case S1DragsBehindBreak: goto entryS1DragsBehindBreak; - case S2DragsBehind: goto entryS2DragsBehind; - case S2DragsBehindBreak: goto entryS2DragsBehindBreak; - case ExactOverlap: goto entryExactOverlap; - case End: goto entryEnd; - } - -entryBegin: - /* Set up the next structs at the head of the transition lists. */ - s1Tel.set( list1 ); - s2Tel.set( list2 ); - - /* Concurrently scan both out ranges. */ - while ( true ) { - if ( s1Tel.trans == 0 ) { - /* We are at the end of state1's ranges. Process the rest of - * state2's ranges. */ - while ( s2Tel.trans != 0 ) { - /* Range is only in s2. */ - CO_RETURN2( ConsumeS2Range, RangeInS2 ); - s2Tel.increment(); - } - break; - } - else if ( s2Tel.trans == 0 ) { - /* We are at the end of state2's ranges. Process the rest of - * state1's ranges. */ - while ( s1Tel.trans != 0 ) { - /* Range is only in s1. */ - CO_RETURN2( ConsumeS1Range, RangeInS1 ); - s1Tel.increment(); - } - break; - } - /* Both state1's and state2's transition elements are good. - * The signiture of no overlap is a back key being in front of a - * front key. */ - else if ( s1Tel.highKey < s2Tel.lowKey ) { - /* A range exists in state1 that does not overlap with state2. */ - CO_RETURN2( OnlyInS1Range, RangeInS1 ); - s1Tel.increment(); - } - else if ( s2Tel.highKey < s1Tel.lowKey ) { - /* A range exists in state2 that does not overlap with state1. */ - CO_RETURN2( OnlyInS2Range, RangeInS2 ); - s2Tel.increment(); - } - /* There is overlap, must mix the ranges in some way. */ - else if ( s1Tel.lowKey < s2Tel.lowKey ) { - /* Range from state1 sticks out front. Must break it into - * non-overlaping and overlaping segments. */ - bottomLow = s2Tel.lowKey; - bottomHigh = s1Tel.highKey; - s1Tel.highKey = s2Tel.lowKey; - s1Tel.highKey.decrement(); - bottomTrans1 = s1Tel.trans; - - /* Notify the caller that we are breaking s1. This gives them a - * chance to duplicate s1Tel[0,1].value. */ - CO_RETURN2( S1SticksOutBreak, BreakS1 ); - - /* Broken off range is only in s1. */ - CO_RETURN2( S1SticksOut, RangeInS1 ); - - /* Advance over the part sticking out front. */ - s1Tel.lowKey = bottomLow; - s1Tel.highKey = bottomHigh; - s1Tel.trans = bottomTrans1; - } - else if ( s2Tel.lowKey < s1Tel.lowKey ) { - /* Range from state2 sticks out front. Must break it into - * non-overlaping and overlaping segments. */ - bottomLow = s1Tel.lowKey; - bottomHigh = s2Tel.highKey; - s2Tel.highKey = s1Tel.lowKey; - s2Tel.highKey.decrement(); - bottomTrans2 = s2Tel.trans; - - /* Notify the caller that we are breaking s2. This gives them a - * chance to duplicate s2Tel[0,1].value. */ - CO_RETURN2( S2SticksOutBreak, BreakS2 ); - - /* Broken off range is only in s2. */ - CO_RETURN2( S2SticksOut, RangeInS2 ); - - /* Advance over the part sticking out front. */ - s2Tel.lowKey = bottomLow; - s2Tel.highKey = bottomHigh; - s2Tel.trans = bottomTrans2; - } - /* Low ends are even. Are the high ends even? */ - else if ( s1Tel.highKey < s2Tel.highKey ) { - /* Range from state2 goes longer than the range from state1. We - * must break the range from state2 into an evenly overlaping - * segment. */ - bottomLow = s1Tel.highKey; - bottomLow.increment(); - bottomHigh = s2Tel.highKey; - s2Tel.highKey = s1Tel.highKey; - bottomTrans2 = s2Tel.trans; - - /* Notify the caller that we are breaking s2. This gives them a - * chance to duplicate s2Tel[0,1].value. */ - CO_RETURN2( S2DragsBehindBreak, BreakS2 ); - - /* Breaking s2 produces exact overlap. */ - CO_RETURN2( S2DragsBehind, RangeOverlap ); - - /* Advance over the front we just broke off of range 2. */ - s2Tel.lowKey = bottomLow; - s2Tel.highKey = bottomHigh; - s2Tel.trans = bottomTrans2; - - /* Advance over the entire s1Tel. We have consumed it. */ - s1Tel.increment(); - } - else if ( s2Tel.highKey < s1Tel.highKey ) { - /* Range from state1 goes longer than the range from state2. We - * must break the range from state1 into an evenly overlaping - * segment. */ - bottomLow = s2Tel.highKey; - bottomLow.increment(); - bottomHigh = s1Tel.highKey; - s1Tel.highKey = s2Tel.highKey; - bottomTrans1 = s1Tel.trans; - - /* Notify the caller that we are breaking s1. This gives them a - * chance to duplicate s2Tel[0,1].value. */ - CO_RETURN2( S1DragsBehindBreak, BreakS1 ); - - /* Breaking s1 produces exact overlap. */ - CO_RETURN2( S1DragsBehind, RangeOverlap ); - - /* Advance over the front we just broke off of range 1. */ - s1Tel.lowKey = bottomLow; - s1Tel.highKey = bottomHigh; - s1Tel.trans = bottomTrans1; - - /* Advance over the entire s2Tel. We have consumed it. */ - s2Tel.increment(); - } - else { - /* There is an exact overlap. */ - CO_RETURN2( ExactOverlap, RangeOverlap ); - - s1Tel.increment(); - s2Tel.increment(); - } - } - - /* Done, go into end state. */ - CO_RETURN( End ); -} - - -/* Compare lists of epsilon transitions. Entries are name ids of targets. */ -typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans; - -/* Compare class for the Approximate minimization. */ -class ApproxCompare -{ -public: - ApproxCompare() { } - int compare( const FsmState *pState1, const FsmState *pState2 ); -}; - -/* Compare class for the initial partitioning of a partition minimization. */ -class InitPartitionCompare -{ -public: - InitPartitionCompare() { } - int compare( const FsmState *pState1, const FsmState *pState2 ); -}; - -/* Compare class for the regular partitioning of a partition minimization. */ -class PartitionCompare -{ -public: - PartitionCompare() { } - int compare( const FsmState *pState1, const FsmState *pState2 ); -}; - -/* Compare class for a minimization that marks pairs. Provides the shouldMark - * routine. */ -class MarkCompare -{ -public: - MarkCompare() { } - bool shouldMark( MarkIndex &markIndex, const FsmState *pState1, - const FsmState *pState2 ); -}; - -/* List of partitions. */ -typedef DList< MinPartition > PartitionList; - -/* List of transtions out of a state. */ -typedef Vector<TransEl> TransListVect; - -/* Entry point map used for keeping track of entry points in a machine. */ -typedef BstSet< int > EntryIdSet; -typedef BstMapEl< int, FsmState* > EntryMapEl; -typedef BstMap< int, FsmState* > EntryMap; -typedef Vector<EntryMapEl> EntryMapBase; - -/* Graph class that implements actions and priorities. */ -struct FsmGraph -{ - /* Constructors/Destructors. */ - FsmGraph( ); - FsmGraph( const FsmGraph &graph ); - ~FsmGraph(); - - /* The list of states. */ - StateList stateList; - StateList misfitList; - - /* The map of entry points. */ - EntryMap entryPoints; - - /* The start state. */ - FsmState *startState; - - /* Error state, possibly created only when the final machine has been - * created and the XML machine is about to be written. No transitions - * point to this state. */ - FsmState *errState; - - /* The set of final states. */ - StateSet finStateSet; - - /* Misfit Accounting. Are misfits put on a separate list. */ - bool misfitAccounting; - - bool lmRequiresErrorState; - NameInst *rootName; - NameInst **nameIndex; - - /* - * Transition actions and priorities. - */ - - /* Set priorities on transtions. */ - void startFsmPrior( int ordering, PriorDesc *prior ); - void allTransPrior( int ordering, PriorDesc *prior ); - void finishFsmPrior( int ordering, PriorDesc *prior ); - void leaveFsmPrior( int ordering, PriorDesc *prior ); - - /* Action setting support. */ - void transferErrorActions( FsmState *state, int transferPoint ); - void setErrorAction( FsmState *state, int ordering, Action *action ); - void setErrorActions( FsmState *state, const ActionTable &other ); - - /* Fill all spaces in a transition list with an error transition. */ - void fillGaps( FsmState *state ); - - /* Similar to setErrorAction, instead gives a state to go to on error. */ - void setErrorTarget( FsmState *state, FsmState *target, int *orderings, - Action **actions, int nActs ); - - /* Set actions to execute. */ - void startFsmAction( int ordering, Action *action ); - void allTransAction( int ordering, Action *action ); - void finishFsmAction( int ordering, Action *action ); - void leaveFsmAction( int ordering, Action *action ); - void longMatchAction( int ordering, TokenDef *lmPart ); - - /* Set conditions. */ - CondSpace *addCondSpace( const CondSet &condSet ); - - void findEmbedExpansions( ExpansionList &expansionList, - FsmState *destState, Action *condAction ); - void embedCondition( MergeData &md, FsmState *state, Action *condAction ); - void embedCondition( FsmState *state, Action *condAction ); - - void startFsmCondition( Action *condAction ); - void allTransCondition( Action *condAction ); - void leaveFsmCondition( Action *condAction ); - - /* Set error actions to execute. */ - void startErrorAction( int ordering, Action *action, int transferPoint ); - void allErrorAction( int ordering, Action *action, int transferPoint ); - void finalErrorAction( int ordering, Action *action, int transferPoint ); - void notStartErrorAction( int ordering, Action *action, int transferPoint ); - void notFinalErrorAction( int ordering, Action *action, int transferPoint ); - void middleErrorAction( int ordering, Action *action, int transferPoint ); - - /* Set EOF actions. */ - void startEOFAction( int ordering, Action *action ); - void allEOFAction( int ordering, Action *action ); - void finalEOFAction( int ordering, Action *action ); - void notStartEOFAction( int ordering, Action *action ); - void notFinalEOFAction( int ordering, Action *action ); - void middleEOFAction( int ordering, Action *action ); - - /* Set To State actions. */ - void startToStateAction( int ordering, Action *action ); - void allToStateAction( int ordering, Action *action ); - void finalToStateAction( int ordering, Action *action ); - void notStartToStateAction( int ordering, Action *action ); - void notFinalToStateAction( int ordering, Action *action ); - void middleToStateAction( int ordering, Action *action ); - - /* Set From State actions. */ - void startFromStateAction( int ordering, Action *action ); - void allFromStateAction( int ordering, Action *action ); - void finalFromStateAction( int ordering, Action *action ); - void notStartFromStateAction( int ordering, Action *action ); - void notFinalFromStateAction( int ordering, Action *action ); - void middleFromStateAction( int ordering, Action *action ); - - /* Shift the action ordering of the start transitions to start at - * fromOrder and increase in units of 1. Useful before kleene star - * operation. */ - int shiftStartActionOrder( int fromOrder ); - - /* Clear all priorities from the fsm to so they won't affcet minimization - * of the final fsm. */ - void clearAllPriorities(); - - /* Zero out all the function keys. */ - void nullActionKeys(); - - /* Walk the list of states and verify state properties. */ - void verifyStates(); - - /* Misfit Accounting. Are misfits put on a separate list. */ - void setMisfitAccounting( bool val ) - { misfitAccounting = val; } - - /* Set and Unset a state as final. */ - void setFinState( FsmState *state ); - void unsetFinState( FsmState *state ); - - void setStartState( FsmState *state ); - void unsetStartState( ); - - /* Set and unset a state as an entry point. */ - void setEntry( int id, FsmState *state ); - void changeEntry( int id, FsmState *to, FsmState *from ); - void unsetEntry( int id, FsmState *state ); - void unsetEntry( int id ); - void unsetAllEntryPoints(); - - /* Epsilon transitions. */ - void epsilonTrans( int id ); - void shadowReadWriteStates( MergeData &md ); - - /* - * Basic attaching and detaching. - */ - - /* Common to attaching/detaching list and default. */ - void attachToInList( FsmState *from, FsmState *to, FsmTrans *&head, FsmTrans *trans ); - void detachFromInList( FsmState *from, FsmState *to, FsmTrans *&head, FsmTrans *trans ); - - /* Attach with a new transition. */ - FsmTrans *attachNewTrans( FsmState *from, FsmState *to, - Key onChar1, Key onChar2 ); - - /* Attach with an existing transition that already in an out list. */ - void attachTrans( FsmState *from, FsmState *to, FsmTrans *trans ); - - /* Redirect a transition away from error and towards some state. */ - void redirectErrorTrans( FsmState *from, FsmState *to, FsmTrans *trans ); - - /* Detach a transition from a target state. */ - void detachTrans( FsmState *from, FsmState *to, FsmTrans *trans ); - - /* Detach a state from the graph. */ - void detachState( FsmState *state ); - - /* - * NFA to DFA conversion routines. - */ - - /* Duplicate a transition that will dropin to a free spot. */ - FsmTrans *dupTrans( FsmState *from, FsmTrans *srcTrans ); - - /* In crossing, two transitions both go to real states. */ - FsmTrans *fsmAttachStates( MergeData &md, FsmState *from, - FsmTrans *destTrans, FsmTrans *srcTrans ); - - /* Two transitions are to be crossed, handle the possibility of either - * going to the error state. */ - FsmTrans *mergeTrans( MergeData &md, FsmState *from, - FsmTrans *destTrans, FsmTrans *srcTrans ); - - /* Compare deterimne relative priorities of two transition tables. */ - int comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ); - - /* Cross a src transition with one that is already occupying a spot. */ - FsmTrans *crossTransitions( MergeData &md, FsmState *from, - FsmTrans *destTrans, FsmTrans *srcTrans ); - - void outTransCopy( MergeData &md, FsmState *dest, FsmTrans *srcList ); - - void doRemove( MergeData &md, FsmState *destState, ExpansionList &expList1 ); - void doExpand( MergeData &md, FsmState *destState, ExpansionList &expList1 ); - void findCondExpInTrans( ExpansionList &expansionList, FsmState *state, - Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace, - long destVals, LongVect &toValsList ); - void findTransExpansions( ExpansionList &expansionList, - FsmState *destState, FsmState *srcState ); - void findCondExpansions( ExpansionList &expansionList, - FsmState *destState, FsmState *srcState ); - void mergeStateConds( FsmState *destState, FsmState *srcState ); - - /* Merge a set of states into newState. */ - void mergeStates( MergeData &md, FsmState *destState, - FsmState **srcStates, int numSrc ); - void mergeStatesLeaving( MergeData &md, FsmState *destState, FsmState *srcState ); - void mergeStates( MergeData &md, FsmState *destState, FsmState *srcState ); - - /* Make all states that are combinations of other states and that - * have not yet had their out transitions filled in. This will - * empty out stateDict and stFil. */ - void fillInStates( MergeData &md ); - - /* - * Transition Comparison. - */ - - /* Compare transition data. Either of the pointers may be null. */ - static inline int compareDataPtr( FsmTrans *trans1, FsmTrans *trans2 ); - - /* Compare target state and transition data. Either pointer may be null. */ - static inline int compareFullPtr( FsmTrans *trans1, FsmTrans *trans2 ); - - /* Compare target partitions. Either pointer may be null. */ - static inline int comparePartPtr( FsmTrans *trans1, FsmTrans *trans2 ); - - /* Check marked status of target states. Either pointer may be null. */ - static inline bool shouldMarkPtr( MarkIndex &markIndex, - FsmTrans *trans1, FsmTrans *trans2 ); - - /* - * Callbacks. - */ - - /* Compare priority and function table of transitions. */ - static int compareTransData( FsmTrans *trans1, FsmTrans *trans2 ); - - /* Add in the properties of srcTrans into this. */ - void addInTrans( FsmTrans *destTrans, FsmTrans *srcTrans ); - - /* Compare states on data stored in the states. */ - static int compareStateData( const FsmState *state1, const FsmState *state2 ); - - /* Out transition data. */ - void clearOutData( FsmState *state ); - bool hasOutData( FsmState *state ); - void transferOutData( FsmState *destState, FsmState *srcState ); - - /* - * Allocation. - */ - - /* New up a state and add it to the graph. */ - FsmState *addState(); - - /* - * Building basic machines - */ - - void concatFsm( Key c ); - void concatFsm( Key *str, int len ); - void concatFsmCI( Key *str, int len ); - void orFsm( Key *set, int len ); - void rangeFsm( Key low, Key high ); - void rangeStarFsm( Key low, Key high ); - void emptyFsm( ); - void lambdaFsm( ); - - /* - * Fsm operators. - */ - - void starOp( ); - void repeatOp( int times ); - void optionalRepeatOp( int times ); - void concatOp( FsmGraph *other ); - void unionOp( FsmGraph *other ); - void intersectOp( FsmGraph *other ); - void subtractOp( FsmGraph *other ); - void epsilonOp(); - void joinOp( int startId, int finalId, FsmGraph **others, int numOthers ); - void globOp( FsmGraph **others, int numOthers ); - void deterministicEntry(); - - /* - * Operator workers - */ - - /* Determine if there are any entry points into a start state other than - * the start state. */ - bool isStartStateIsolated(); - - /* Make a new start state that has no entry points. Will not change the - * identity of the fsm. */ - void isolateStartState(); - - /* Workers for resolving epsilon transitions. */ - bool inEptVect( EptVect *eptVect, FsmState *targ ); - void epsilonFillEptVectFrom( FsmState *root, FsmState *from, bool parentLeaving ); - void resolveEpsilonTrans( MergeData &md ); - - /* Workers for concatenation and union. */ - void doConcat( FsmGraph *other, StateSet *fromStates, bool optional ); - void doOr( FsmGraph *other ); - - /* - * Final states - */ - - /* Unset any final states that are no longer to be final - * due to final bits. */ - void unsetIncompleteFinals(); - void unsetKilledFinals(); - - /* Bring in other's entry points. Assumes others states are going to be - * copied into this machine. */ - void copyInEntryPoints( FsmGraph *other ); - - /* Ordering states. */ - void depthFirstOrdering( FsmState *state ); - void depthFirstOrdering(); - void sortStatesByFinal(); - - /* Set sqequential state numbers starting at 0. */ - void setStateNumbers( int base ); - - /* Unset all final states. */ - void unsetAllFinStates(); - - /* Set the bits of final states and clear the bits of non final states. */ - void setFinBits( int finStateBits ); - - /* - * Self-consistency checks. - */ - - /* Run a sanity check on the machine. */ - void verifyIntegrity(); - - /* Verify that there are no unreachable states, or dead end states. */ - void verifyReachability(); - void verifyNoDeadEndStates(); - - /* - * Path pruning - */ - - /* Mark all states reachable from state. */ - void markReachableFromHereReverse( FsmState *state ); - - /* Mark all states reachable from state. */ - void markReachableFromHere( FsmState *state ); - void markReachableFromHereStopFinal( FsmState *state ); - - /* Removes states that cannot be reached by any path in the fsm and are - * thus wasted silicon. */ - void removeDeadEndStates(); - - /* Removes states that cannot be reached by any path in the fsm and are - * thus wasted silicon. */ - void removeUnreachableStates(); - - /* Remove error actions from states on which the error transition will - * never be taken. */ - bool outListCovers( FsmState *state ); - bool anyErrorRange( FsmState *state ); - - /* Remove states that are on the misfit list. */ - void removeMisfits(); - - /* - * FSM Minimization - */ - - /* Minimization by partitioning. */ - void minimizePartition1(); - void minimizePartition2(); - - /* Minimize the final state Machine. The result is the minimal fsm. Slow - * but stable, correct minimization. Uses n^2 space (lookout) and average - * n^2 time. Worst case n^3 time, but a that is a very rare case. */ - void minimizeStable(); - - /* Minimize the final state machine. Does not find the minimal fsm, but a - * pretty good approximation. Does not use any extra space. Average n^2 - * time. Worst case n^3 time, but a that is a very rare case. */ - void minimizeApproximate(); - - /* This is the worker for the minimize approximate solution. It merges - * states that have identical out transitions. */ - bool minimizeRound( ); - - /* Given an intial partioning of states, split partitions that have out trans - * to differing partitions. */ - int partitionRound( FsmState **statePtrs, MinPartition *parts, int numParts ); - - /* Split partitions that have a transition to a previously split partition, until - * there are no more partitions to split. */ - int splitCandidates( FsmState **statePtrs, MinPartition *parts, int numParts ); - - /* Fuse together states in the same partition. */ - void fusePartitions( MinPartition *parts, int numParts ); - - /* Mark pairs where out final stateness differs, out trans data differs, - * trans pairs go to a marked pair or trans data differs. Should get - * alot of pairs. */ - void initialMarkRound( MarkIndex &markIndex ); - - /* One marking round on all state pairs. Considers if trans pairs go - * to a marked state only. Returns whether or not a pair was marked. */ - bool markRound( MarkIndex &markIndex ); - - /* Move the in trans into src into dest. */ - void inTransMove(FsmState *dest, FsmState *src); - - /* Make state src and dest the same state. */ - void fuseEquivStates(FsmState *dest, FsmState *src); - - /* Find any states that didn't get marked by the marking algorithm and - * merge them into the primary states of their equivalence class. */ - void fuseUnmarkedPairs( MarkIndex &markIndex ); - - /* Merge neighboring transitions go to the same state and have the same - * transitions data. */ - void compressTransitions(); - - /* Returns true if there is a transtion (either explicit or by a gap) to - * the error state. */ - bool checkErrTrans( FsmState *state, FsmTrans *trans ); - bool checkErrTransFinish( FsmState *state ); - bool hasErrorTrans(); -}; - - -#endif /* _FSMGRAPH_H */ diff --git a/colm/fsmmin.cc b/colm/fsmmin.cc deleted file mode 100644 index cbb2b99f..00000000 --- a/colm/fsmmin.cc +++ /dev/null @@ -1,732 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "fsmgraph.h" -#include "mergesort.h" - -int FsmGraph::partitionRound( FsmState **statePtrs, MinPartition *parts, int numParts ) -{ - /* Need a mergesort object and a single partition compare. */ - MergeSort<FsmState*, PartitionCompare> mergeSort; - PartitionCompare partCompare; - - /* For each partition. */ - for ( int p = 0; p < numParts; p++ ) { - /* Fill the pointer array with the states in the partition. */ - StateList::Iter state = parts[p].list; - for ( int s = 0; state.lte(); state++, s++ ) - statePtrs[s] = state; - - /* Sort the states using the partitioning compare. */ - int numStates = parts[p].list.length(); - mergeSort.sort( statePtrs, numStates ); - - /* Assign the states into partitions based on the results of the sort. */ - int destPart = p, firstNewPart = numParts; - for ( int s = 1; s < numStates; s++ ) { - /* If this state differs from the last then move to the next partition. */ - if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { - /* The new partition is the next avail spot. */ - destPart = numParts; - numParts += 1; - } - - /* If the state is not staying in the first partition, then - * transfer it to its destination partition. */ - if ( destPart != p ) { - FsmState *state = parts[p].list.detach( statePtrs[s] ); - parts[destPart].list.append( state ); - } - } - - /* Fix the partition pointer for all the states that got moved to a new - * partition. This must be done after the states are transfered so the - * result of the sort is not altered. */ - for ( int newPart = firstNewPart; newPart < numParts; newPart++ ) { - StateList::Iter state = parts[newPart].list; - for ( ; state.lte(); state++ ) - state->alg.partition = &parts[newPart]; - } - } - - return numParts; -} - -/** - * \brief Minimize by partitioning version 1. - * - * Repeatedly tries to split partitions until all partitions are unsplittable. - * Produces the most minimal FSM possible. - */ -void FsmGraph::minimizePartition1() -{ - /* Need one mergesort object and partition compares. */ - MergeSort<FsmState*, InitPartitionCompare> mergeSort; - InitPartitionCompare initPartCompare; - - /* Nothing to do if there are no states. */ - if ( stateList.length() == 0 ) - return; - - /* - * First thing is to partition the states by final state status and - * transition functions. This gives us an initial partitioning to work - * with. - */ - - /* Make a array of pointers to states. */ - int numStates = stateList.length(); - FsmState** statePtrs = new FsmState*[numStates]; - - /* Fill up an array of pointers to the states for easy sorting. */ - StateList::Iter state = stateList; - for ( int s = 0; state.lte(); state++, s++ ) - statePtrs[s] = state; - - /* Sort the states using the array of states. */ - mergeSort.sort( statePtrs, numStates ); - - /* An array of lists of states is used to partition the states. */ - MinPartition *parts = new MinPartition[numStates]; - - /* Assign the states into partitions. */ - int destPart = 0; - for ( int s = 0; s < numStates; s++ ) { - /* If this state differs from the last then move to the next partition. */ - if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { - /* Move to the next partition. */ - destPart += 1; - } - - /* Put the state into its partition. */ - statePtrs[s]->alg.partition = &parts[destPart]; - parts[destPart].list.append( statePtrs[s] ); - } - - /* We just moved all the states from the main list into partitions without - * taking them off the main list. So clean up the main list now. */ - stateList.abandon(); - - /* Split partitions. */ - int numParts = destPart + 1; - while ( true ) { - /* Test all partitions for splitting. */ - int newNum = partitionRound( statePtrs, parts, numParts ); - - /* When no partitions can be split, stop. */ - if ( newNum == numParts ) - break; - - numParts = newNum; - } - - /* Fuse states in the same partition. The states will end up back on the - * main list. */ - fusePartitions( parts, numParts ); - - /* Cleanup. */ - delete[] statePtrs; - delete[] parts; -} - -/* Split partitions that need splittting, decide which partitions might need - * to be split as a result, continue until there are no more that might need - * to be split. */ -int FsmGraph::splitCandidates( FsmState **statePtrs, MinPartition *parts, int numParts ) -{ - /* Need a mergesort and a partition compare. */ - MergeSort<FsmState*, PartitionCompare> mergeSort; - PartitionCompare partCompare; - - /* The lists of unsplitable (partList) and splitable partitions. - * Only partitions in the splitable list are check for needing splitting. */ - PartitionList partList, splittable; - - /* Initially, all partitions are born from a split (the initial - * partitioning) and can cause other partitions to be split. So any - * partition with a state with a transition out to another partition is a - * candidate for splitting. This will make every partition except possibly - * partitions of final states split candidates. */ - for ( int p = 0; p < numParts; p++ ) { - /* Assume not active. */ - parts[p].active = false; - - /* Look for a trans out of any state in the partition. */ - for ( StateList::Iter state = parts[p].list; state.lte(); state++ ) { - /* If there is at least one transition out to another state then - * the partition becomes splittable. */ - if ( state->outList.length() > 0 ) { - parts[p].active = true; - break; - } - } - - /* If it was found active then it goes on the splittable list. */ - if ( parts[p].active ) - splittable.append( &parts[p] ); - else - partList.append( &parts[p] ); - } - - /* While there are partitions that are splittable, pull one off and try - * to split it. If it splits, determine which partitions may now be split - * as a result of the newly split partition. */ - while ( splittable.length() > 0 ) { - MinPartition *partition = splittable.detachFirst(); - - /* Fill the pointer array with the states in the partition. */ - StateList::Iter state = partition->list; - for ( int s = 0; state.lte(); state++, s++ ) - statePtrs[s] = state; - - /* Sort the states using the partitioning compare. */ - int numStates = partition->list.length(); - mergeSort.sort( statePtrs, numStates ); - - /* Assign the states into partitions based on the results of the sort. */ - MinPartition *destPart = partition; - int firstNewPart = numParts; - for ( int s = 1; s < numStates; s++ ) { - /* If this state differs from the last then move to the next partition. */ - if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { - /* The new partition is the next avail spot. */ - destPart = &parts[numParts]; - numParts += 1; - } - - /* If the state is not staying in the first partition, then - * transfer it to its destination partition. */ - if ( destPart != partition ) { - FsmState *state = partition->list.detach( statePtrs[s] ); - destPart->list.append( state ); - } - } - - /* Fix the partition pointer for all the states that got moved to a new - * partition. This must be done after the states are transfered so the - * result of the sort is not altered. */ - int newPart; - for ( newPart = firstNewPart; newPart < numParts; newPart++ ) { - StateList::Iter state = parts[newPart].list; - for ( ; state.lte(); state++ ) - state->alg.partition = &parts[newPart]; - } - - /* Put the partition we just split and any new partitions that came out - * of the split onto the inactive list. */ - partition->active = false; - partList.append( partition ); - for ( newPart = firstNewPart; newPart < numParts; newPart++ ) { - parts[newPart].active = false; - partList.append( &parts[newPart] ); - } - - if ( destPart == partition ) - continue; - - /* Now determine which partitions are splittable as a result of - * splitting partition by walking the in lists of the states in - * partitions that got split. Partition is the faked first item in the - * loop. */ - MinPartition *causalPart = partition; - newPart = firstNewPart - 1; - while ( newPart < numParts ) { - /* Loop all states in the causal partition. */ - StateList::Iter state = causalPart->list; - for ( ; state.lte(); state++ ) { - /* Walk all transition into the state and put the partition - * that the from state is in onto the splittable list. */ - for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) { - MinPartition *fromPart = trans->fromState->alg.partition; - if ( ! fromPart->active ) { - fromPart->active = true; - partList.detach( fromPart ); - splittable.append( fromPart ); - } - } - } - - newPart += 1; - causalPart = &parts[newPart]; - } - } - return numParts; -} - - -/** - * \brief Minimize by partitioning version 2 (best alg). - * - * Repeatedly tries to split partitions that may splittable until there are no - * more partitions that might possibly need splitting. Runs faster than - * version 1. Produces the most minimal fsm possible. - */ -void FsmGraph::minimizePartition2() -{ - /* Need a mergesort and an initial partition compare. */ - MergeSort<FsmState*, InitPartitionCompare> mergeSort; - InitPartitionCompare initPartCompare; - - /* Nothing to do if there are no states. */ - if ( stateList.length() == 0 ) - return; - - /* - * First thing is to partition the states by final state status and - * transition functions. This gives us an initial partitioning to work - * with. - */ - - /* Make a array of pointers to states. */ - int numStates = stateList.length(); - FsmState** statePtrs = new FsmState*[numStates]; - - /* Fill up an array of pointers to the states for easy sorting. */ - StateList::Iter state = stateList; - for ( int s = 0; state.lte(); state++, s++ ) - statePtrs[s] = state; - - /* Sort the states using the array of states. */ - mergeSort.sort( statePtrs, numStates ); - - /* An array of lists of states is used to partition the states. */ - MinPartition *parts = new MinPartition[numStates]; - - /* Assign the states into partitions. */ - int destPart = 0; - for ( int s = 0; s < numStates; s++ ) { - /* If this state differs from the last then move to the next partition. */ - if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { - /* Move to the next partition. */ - destPart += 1; - } - - /* Put the state into its partition. */ - statePtrs[s]->alg.partition = &parts[destPart]; - parts[destPart].list.append( statePtrs[s] ); - } - - /* We just moved all the states from the main list into partitions without - * taking them off the main list. So clean up the main list now. */ - stateList.abandon(); - - /* Split partitions. */ - int numParts = splitCandidates( statePtrs, parts, destPart+1 ); - - /* Fuse states in the same partition. The states will end up back on the - * main list. */ - fusePartitions( parts, numParts ); - - /* Cleanup. */ - delete[] statePtrs; - delete[] parts; -} - -void FsmGraph::initialMarkRound( MarkIndex &markIndex ) -{ - /* P and q for walking pairs. */ - FsmState *p = stateList.head, *q; - - /* Need an initial partition compare. */ - InitPartitionCompare initPartCompare; - - /* Walk all unordered pairs of (p, q) where p != q. - * The second depth of the walk stops before reaching p. This - * gives us all unordered pairs of states (p, q) where p != q. */ - while ( p != 0 ) { - q = stateList.head; - while ( q != p ) { - /* If the states differ on final state status, out transitions or - * any transition data then they should be separated on the initial - * round. */ - if ( initPartCompare.compare( p, q ) != 0 ) - markIndex.markPair( p->alg.stateNum, q->alg.stateNum ); - - q = q->next; - } - p = p->next; - } -} - -bool FsmGraph::markRound( MarkIndex &markIndex ) -{ - /* P an q for walking pairs. Take note if any pair gets marked. */ - FsmState *p = stateList.head, *q; - bool pairWasMarked = false; - - /* Need a mark comparison. */ - MarkCompare markCompare; - - /* Walk all unordered pairs of (p, q) where p != q. - * The second depth of the walk stops before reaching p. This - * gives us all unordered pairs of states (p, q) where p != q. */ - while ( p != 0 ) { - q = stateList.head; - while ( q != p ) { - /* Should we mark the pair? */ - if ( !markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) { - if ( markCompare.shouldMark( markIndex, p, q ) ) { - markIndex.markPair( p->alg.stateNum, q->alg.stateNum ); - pairWasMarked = true; - } - } - q = q->next; - } - p = p->next; - } - - return pairWasMarked; -} - - -/** - * \brief Minimize by pair marking. - * - * Decides if each pair of states is distinct or not. Uses O(n^2) memory and - * should only be used on small graphs. Produces the most minmimal FSM - * possible. - */ -void FsmGraph::minimizeStable() -{ - /* Set the state numbers. */ - setStateNumbers( 0 ); - - /* This keeps track of which pairs have been marked. */ - MarkIndex markIndex( stateList.length() ); - - /* Mark pairs where final stateness, out trans, or trans data differ. */ - initialMarkRound( markIndex ); - - /* While the last round of marking succeeded in marking a state - * continue to do another round. */ - int modified = markRound( markIndex ); - while (modified) - modified = markRound( markIndex ); - - /* Merge pairs that are unmarked. */ - fuseUnmarkedPairs( markIndex ); -} - -bool FsmGraph::minimizeRound() -{ - /* Nothing to do if there are no states. */ - if ( stateList.length() == 0 ) - return false; - - /* Need a mergesort on approx compare and an approx compare. */ - MergeSort<FsmState*, ApproxCompare> mergeSort; - ApproxCompare approxCompare; - - /* Fill up an array of pointers to the states. */ - FsmState **statePtrs = new FsmState*[stateList.length()]; - StateList::Iter state = stateList; - for ( int s = 0; state.lte(); state++, s++ ) - statePtrs[s] = state; - - bool modified = false; - - /* Sort The list. */ - mergeSort.sort( statePtrs, stateList.length() ); - - /* Walk the list looking for duplicates next to each other, - * merge in any duplicates. */ - FsmState **pLast = statePtrs; - FsmState **pState = statePtrs + 1; - for ( int i = 1; i < stateList.length(); i++, pState++ ) { - if ( approxCompare.compare( *pLast, *pState ) == 0 ) { - /* Last and pState are the same, so fuse together. Move forward - * with pState but not with pLast. If any more are identical, we - * must */ - fuseEquivStates( *pLast, *pState ); - modified = true; - } - else { - /* Last and this are different, do not set to merge them. Move - * pLast to the current (it may be way behind from merging many - * states) and pState forward one to consider the next pair. */ - pLast = pState; - } - } - delete[] statePtrs; - return modified; -} - -/** - * \brief Minmimize by an approximation. - * - * Repeatedly tries to find states with transitions out to the same set of - * states on the same set of keys until no more identical states can be found. - * Does not produce the most minimial FSM possible. - */ -void FsmGraph::minimizeApproximate() -{ - /* While the last minimization round succeeded in compacting states, - * continue to try to compact states. */ - while ( true ) { - bool modified = minimizeRound(); - if ( ! modified ) - break; - } -} - - -/* Remove states that have no path to them from the start state. Recursively - * traverses the graph marking states that have paths into them. Then removes - * all states that did not get marked. */ -void FsmGraph::removeUnreachableStates() -{ - /* Misfit accounting should be off and there should be no states on the - * misfit list. */ - assert( !misfitAccounting && misfitList.length() == 0 ); - - /* Mark all the states that can be reached - * through the existing set of entry points. */ - markReachableFromHere( startState ); - for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) - markReachableFromHere( en->value ); - - /* Delete all states that are not marked - * and unmark the ones that are marked. */ - FsmState *state = stateList.head; - while ( state ) { - FsmState *next = state->next; - - if ( state->stateBits & SB_ISMARKED ) - state->stateBits &= ~ SB_ISMARKED; - else { - detachState( state ); - stateList.detach( state ); - delete state; - } - - state = next; - } -} - -bool FsmGraph::outListCovers( FsmState *state ) -{ - /* Must be at least one range to cover. */ - if ( state->outList.length() == 0 ) - return false; - - /* The first must start at the lower bound. */ - TransList::Iter trans = state->outList.first(); - if ( keyOps->minKey < trans->lowKey ) - return false; - - /* Loop starts at second el. */ - trans.increment(); - - /* Loop checks lower against prev upper. */ - for ( ; trans.lte(); trans++ ) { - /* Lower end of the trans must be one greater than the - * previous' high end. */ - Key lowKey = trans->lowKey; - lowKey.decrement(); - if ( trans->prev->highKey < lowKey ) - return false; - } - - /* Require that the last range extends to the upper bound. */ - trans = state->outList.last(); - if ( trans->highKey < keyOps->maxKey ) - return false; - - return true; -} - -/* Remove states that that do not lead to a final states. Works recursivly traversing - * the graph in reverse (starting from all final states) and marking seen states. Then - * removes states that did not get marked. */ -void FsmGraph::removeDeadEndStates() -{ - /* Misfit accounting should be off and there should be no states on the - * misfit list. */ - assert( !misfitAccounting && misfitList.length() == 0 ); - - /* Mark all states that have paths to the final states. */ - FsmState **st = finStateSet.data; - int nst = finStateSet.length(); - for ( int i = 0; i < nst; i++, st++ ) - markReachableFromHereReverse( *st ); - - /* Start state gets honorary marking. If the machine accepts nothing we - * still want the start state to hang around. This must be done after the - * recursive call on all the final states so that it does not cause the - * start state in transitions to be skipped when the start state is - * visited by the traversal. */ - startState->stateBits |= SB_ISMARKED; - - /* Delete all states that are not marked - * and unmark the ones that are marked. */ - FsmState *state = stateList.head; - while ( state != 0 ) { - FsmState *next = state->next; - - if ( state->stateBits & SB_ISMARKED ) - state->stateBits &= ~ SB_ISMARKED; - else { - detachState( state ); - stateList.detach( state ); - delete state; - } - - state = next; - } -} - -/* Remove states on the misfit list. To work properly misfit accounting should - * be on when this is called. The detaching of a state will likely cause - * another misfit to be collected and it can then be removed. */ -void FsmGraph::removeMisfits() -{ - while ( misfitList.length() > 0 ) { - /* Get the first state. */ - FsmState *state = misfitList.head; - - /* Detach and delete. */ - detachState( state ); - - /* The state was previously on the misfit list and detaching can only - * remove in transitions so the state must still be on the misfit - * list. */ - misfitList.detach( state ); - delete state; - } -} - -/* Fuse src into dest because they have been deemed equivalent states. - * Involves moving transitions into src to go into dest and invoking - * callbacks. Src is deleted detached from the graph and deleted. */ -void FsmGraph::fuseEquivStates( FsmState *dest, FsmState *src ) -{ - /* This would get ugly. */ - assert( dest != src ); - - /* Cur is a duplicate. We can merge it with trail. */ - inTransMove( dest, src ); - - detachState( src ); - stateList.detach( src ); - delete src; -} - -void FsmGraph::fuseUnmarkedPairs( MarkIndex &markIndex ) -{ - FsmState *p = stateList.head, *nextP, *q; - - /* Definition: The primary state of an equivalence class is the first state - * encounterd that belongs to the equivalence class. All equivalence - * classes have primary state including equivalence classes with one state - * in it. */ - - /* For each unmarked pair merge p into q and delete p. q is always the - * primary state of it's equivalence class. We wouldn't have landed on it - * here if it were not, because it would have been deleted. - * - * Proof that q is the primaray state of it's equivalence class: Assume q - * is not the primary state of it's equivalence class, then it would be - * merged into some state that came before it and thus p would be - * equivalent to that state. But q is the first state that p is equivalent - * to so we have a contradiction. */ - - /* Walk all unordered pairs of (p, q) where p != q. - * The second depth of the walk stops before reaching p. This - * gives us all unordered pairs of states (p, q) where p != q. */ - while ( p != 0 ) { - nextP = p->next; - - q = stateList.head; - while ( q != p ) { - /* If one of p or q is a final state then mark. */ - if ( ! markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) { - fuseEquivStates( q, p ); - break; - } - q = q->next; - } - p = nextP; - } -} - -void FsmGraph::fusePartitions( MinPartition *parts, int numParts ) -{ - /* For each partition, fuse state 2, 3, ... into state 1. */ - for ( int p = 0; p < numParts; p++ ) { - /* Assume that there will always be at least one state. */ - FsmState *first = parts[p].list.head, *toFuse = first->next; - - /* Put the first state back onto the main state list. Don't bother - * removing it from the partition list first. */ - stateList.append( first ); - - /* Fuse the rest of the state into the first. */ - while ( toFuse != 0 ) { - /* Save the next. We will trash it before it is needed. */ - FsmState *next = toFuse->next; - - /* Put the state to be fused in to the first back onto the main - * list before it is fuse. the graph. The state needs to be on - * the main list for the detach from the graph to work. Don't - * bother removing the state from the partition list first. We - * need not maintain it. */ - stateList.append( toFuse ); - - /* Now fuse to the first. */ - fuseEquivStates( first, toFuse ); - - /* Go to the next that we saved before trashing the next pointer. */ - toFuse = next; - } - - /* We transfered the states from the partition list into the main list without - * removing the states from the partition list first. Clean it up. */ - parts[p].list.abandon(); - } -} - - -/* Merge neighboring transitions go to the same state and have the same - * transitions data. */ -void FsmGraph::compressTransitions() -{ - for ( StateList::Iter st = stateList; st.lte(); st++ ) { - if ( st->outList.length() > 1 ) { - for ( TransList::Iter trans = st->outList, next = trans.next(); next.lte(); ) { - Key nextLow = next->lowKey; - nextLow.decrement(); - if ( trans->highKey == nextLow && trans->toState == next->toState && - CmpActionTable::compare( trans->actionTable, next->actionTable ) == 0 ) - { - trans->highKey = next->highKey; - st->outList.detach( next ); - detachTrans( next->fromState, next->toState, next ); - delete next; - next = trans.next(); - } - else { - trans.increment(); - next.increment(); - } - } - } - } -} diff --git a/colm/fsmrun.h b/colm/fsmrun.h deleted file mode 100644 index 821b3ccf..00000000 --- a/colm/fsmrun.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _FSMRUN2_H -#define _FSMRUN2_H - -#include <colm/input.h> - -#ifdef __cplusplus -extern "C" { -#endif - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/colm/fsmstate.cc b/colm/fsmstate.cc deleted file mode 100644 index dae1479b..00000000 --- a/colm/fsmstate.cc +++ /dev/null @@ -1,467 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <string.h> -#include <assert.h> -#include "fsmgraph.h" - -#include <iostream> -using namespace std; - -/* Construct a mark index for a specified number of states. Must new up - * an array that is states^2 in size. */ -MarkIndex::MarkIndex( int states ) : numStates(states) -{ - /* Total pairs is states^2. Actually only use half of these, but we allocate - * them all to make indexing into the array easier. */ - int total = states * states; - - /* New up chars so that individual DListEl constructors are - * not called. Zero out the mem manually. */ - array = new bool[total]; - memset( array, 0, sizeof(bool) * total ); -} - -/* Free the array used to store state pairs. */ -MarkIndex::~MarkIndex() -{ - delete[] array; -} - -/* Mark a pair of states. States are specified by their number. The - * marked states are moved from the unmarked list to the marked list. */ -void MarkIndex::markPair(int state1, int state2) -{ - int pos = ( state1 >= state2 ) ? - ( state1 * numStates ) + state2 : - ( state2 * numStates ) + state1; - - array[pos] = true; -} - -/* Returns true if the pair of states are marked. Returns false otherwise. - * Ordering of states given does not matter. */ -bool MarkIndex::isPairMarked(int state1, int state2) -{ - int pos = ( state1 >= state2 ) ? - ( state1 * numStates ) + state2 : - ( state2 * numStates ) + state1; - - return array[pos]; -} - -/* Create a new fsm state. State has not out transitions or in transitions, not - * out out transition data and not number. */ -FsmState::FsmState() -: - /* No out or in transitions. */ - outList(), - inList(), - - /* No entry points, or epsilon trans. */ - entryIds(), - epsilonTrans(), - - /* Conditions. */ - stateCondList(), - - /* No transitions in from other states. */ - foreignInTrans(0), - - /* Only used during merging. Normally null. */ - stateDictEl(0), - eptVect(0), - - /* No state identification bits. */ - stateBits(0), - - /* No Priority data. */ - outPriorTable(), - - /* No Action data. */ - toStateActionTable(), - fromStateActionTable(), - outActionTable(), - outCondSet(), - errActionTable(), - eofActionTable(), - - eofTarget(0) -{ -} - -/* Copy everything except actual the transitions. That is left up to the - * FsmGraph copy constructor. */ -FsmState::FsmState(const FsmState &other) -: - /* All lists are cleared. They will be filled in when the - * individual transitions are duplicated and attached. */ - outList(), - inList(), - - /* Duplicate the entry id set and epsilon transitions. These - * are sets of integers and as such need no fixing. */ - entryIds(other.entryIds), - epsilonTrans(other.epsilonTrans), - - /* Copy in the elements of the conditions. */ - stateCondList( other.stateCondList ), - - /* No transitions in from other states. */ - foreignInTrans(0), - - /* This is only used during merging. Normally null. */ - stateDictEl(0), - eptVect(0), - - /* Fsm state data. */ - stateBits(other.stateBits), - - /* Copy in priority data. */ - outPriorTable(other.outPriorTable), - - /* Copy in action data. */ - toStateActionTable(other.toStateActionTable), - fromStateActionTable(other.fromStateActionTable), - outActionTable(other.outActionTable), - outCondSet(other.outCondSet), - errActionTable(other.errActionTable), - eofActionTable(other.eofActionTable), - - eofTarget(0) -{ - /* Duplicate all the transitions. */ - for ( TransList::Iter trans = other.outList; trans.lte(); trans++ ) { - /* Dupicate and store the orginal target in the transition. This will - * be corrected once all the states have been created. */ - FsmTrans *newTrans = new FsmTrans(*trans); - newTrans->toState = trans->toState; - outList.append( newTrans ); - } -} - -/* If there is a state dict element, then delete it. Everything else is left - * up to the FsmGraph destructor. */ -FsmState::~FsmState() -{ - if ( stateDictEl != 0 ) - delete stateDictEl; -} - -/* Compare two states using pointers to the states. With the approximate - * compare the idea is that if the compare finds them the same, they can - * immediately be merged. */ -int ApproxCompare::compare( const FsmState *state1 , const FsmState *state2 ) -{ - int compareRes; - - /* Test final state status. */ - if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) ) - return -1; - else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) ) - return 1; - - /* Test epsilon transition sets. */ - compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans, - state2->epsilonTrans ); - if ( compareRes != 0 ) - return compareRes; - - /* Compare the out transitions. */ - compareRes = FsmGraph::compareStateData( state1, state2 ); - if ( compareRes != 0 ) - return compareRes; - - /* Use a pair iterator to get the transition pairs. */ - PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head ); - for ( ; !outPair.end(); outPair++ ) { - switch ( outPair.userState ) { - - case RangeInS1: - compareRes = FsmGraph::compareFullPtr( outPair.s1Tel.trans, 0 ); - if ( compareRes != 0 ) - return compareRes; - break; - - case RangeInS2: - compareRes = FsmGraph::compareFullPtr( 0, outPair.s2Tel.trans ); - if ( compareRes != 0 ) - return compareRes; - break; - - case RangeOverlap: - compareRes = FsmGraph::compareFullPtr( - outPair.s1Tel.trans, outPair.s2Tel.trans ); - if ( compareRes != 0 ) - return compareRes; - break; - - case BreakS1: - case BreakS2: - break; - } - } - - /* Got through the entire state comparison, deem them equal. */ - return 0; -} - -/* Compare class for the sort that does the intial partition of compaction. */ -int InitPartitionCompare::compare( const FsmState *state1 , const FsmState *state2 ) -{ - int compareRes; - - /* Test final state status. */ - if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) ) - return -1; - else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) ) - return 1; - - /* Test epsilon transition sets. */ - compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans, - state2->epsilonTrans ); - if ( compareRes != 0 ) - return compareRes; - - /* Compare the out transitions. */ - compareRes = FsmGraph::compareStateData( state1, state2 ); - if ( compareRes != 0 ) - return compareRes; - - /* Use a pair iterator to test the condition pairs. */ - PairIter<StateCond> condPair( state1->stateCondList.head, state2->stateCondList.head ); - for ( ; !condPair.end(); condPair++ ) { - switch ( condPair.userState ) { - case RangeInS1: - return 1; - case RangeInS2: - return -1; - - case RangeOverlap: { - CondSpace *condSpace1 = condPair.s1Tel.trans->condSpace; - CondSpace *condSpace2 = condPair.s2Tel.trans->condSpace; - if ( condSpace1 < condSpace2 ) - return -1; - else if ( condSpace1 > condSpace2 ) - return 1; - break; - } - case BreakS1: - case BreakS2: - break; - } - } - - /* Use a pair iterator to test the transition pairs. */ - PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head ); - for ( ; !outPair.end(); outPair++ ) { - switch ( outPair.userState ) { - - case RangeInS1: - compareRes = FsmGraph::compareDataPtr( outPair.s1Tel.trans, 0 ); - if ( compareRes != 0 ) - return compareRes; - break; - - case RangeInS2: - compareRes = FsmGraph::compareDataPtr( 0, outPair.s2Tel.trans ); - if ( compareRes != 0 ) - return compareRes; - break; - - case RangeOverlap: - compareRes = FsmGraph::compareDataPtr( - outPair.s1Tel.trans, outPair.s2Tel.trans ); - if ( compareRes != 0 ) - return compareRes; - break; - - case BreakS1: - case BreakS2: - break; - } - } - - return 0; -} - -/* Compare class for the sort that does the partitioning. */ -int PartitionCompare::compare( const FsmState *state1, const FsmState *state2 ) -{ - int compareRes; - - /* Use a pair iterator to get the transition pairs. */ - PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head ); - for ( ; !outPair.end(); outPair++ ) { - switch ( outPair.userState ) { - - case RangeInS1: - compareRes = FsmGraph::comparePartPtr( outPair.s1Tel.trans, 0 ); - if ( compareRes != 0 ) - return compareRes; - break; - - case RangeInS2: - compareRes = FsmGraph::comparePartPtr( 0, outPair.s2Tel.trans ); - if ( compareRes != 0 ) - return compareRes; - break; - - case RangeOverlap: - compareRes = FsmGraph::comparePartPtr( - outPair.s1Tel.trans, outPair.s2Tel.trans ); - if ( compareRes != 0 ) - return compareRes; - break; - - case BreakS1: - case BreakS2: - break; - } - } - - return 0; -} - -/* Compare class for the sort that does the partitioning. */ -bool MarkCompare::shouldMark( MarkIndex &markIndex, const FsmState *state1, - const FsmState *state2 ) -{ - /* Use a pair iterator to get the transition pairs. */ - PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head ); - for ( ; !outPair.end(); outPair++ ) { - switch ( outPair.userState ) { - - case RangeInS1: - if ( FsmGraph::shouldMarkPtr( markIndex, outPair.s1Tel.trans, 0 ) ) - return true; - break; - - case RangeInS2: - if ( FsmGraph::shouldMarkPtr( markIndex, 0, outPair.s2Tel.trans ) ) - return true; - break; - - case RangeOverlap: - if ( FsmGraph::shouldMarkPtr( markIndex, - outPair.s1Tel.trans, outPair.s2Tel.trans ) ) - return true; - break; - - case BreakS1: - case BreakS2: - break; - } - } - - return false; -} - -/* - * Transition Comparison. - */ - -/* Compare target partitions. Either pointer may be null. */ -int FsmGraph::comparePartPtr( FsmTrans *trans1, FsmTrans *trans2 ) -{ - if ( trans1 != 0 ) { - /* If trans1 is set then so should trans2. The initial partitioning - * guarantees this for us. */ - if ( trans1->toState == 0 && trans2->toState != 0 ) - return -1; - else if ( trans1->toState != 0 && trans2->toState == 0 ) - return 1; - else if ( trans1->toState != 0 ) { - /* Both of targets are set. */ - return CmpOrd< MinPartition* >::compare( - trans1->toState->alg.partition, trans2->toState->alg.partition ); - } - } - return 0; -} - - -/* Compares two transition pointers according to priority and functions. - * Either pointer may be null. Does not consider to state or from state. */ -int FsmGraph::compareDataPtr( FsmTrans *trans1, FsmTrans *trans2 ) -{ - if ( trans1 == 0 && trans2 != 0 ) - return -1; - else if ( trans1 != 0 && trans2 == 0 ) - return 1; - else if ( trans1 != 0 ) { - /* Both of the transition pointers are set. */ - int compareRes = compareTransData( trans1, trans2 ); - if ( compareRes != 0 ) - return compareRes; - } - return 0; -} - -/* Compares two transitions according to target state, priority and functions. - * Does not consider from state. Either of the pointers may be null. */ -int FsmGraph::compareFullPtr( FsmTrans *trans1, FsmTrans *trans2 ) -{ - if ( (trans1 != 0) ^ (trans2 != 0) ) { - /* Exactly one of the transitions is set. */ - if ( trans1 != 0 ) - return -1; - else - return 1; - } - else if ( trans1 != 0 ) { - /* Both of the transition pointers are set. Test target state, - * priority and funcs. */ - if ( trans1->toState < trans2->toState ) - return -1; - else if ( trans1->toState > trans2->toState ) - return 1; - else if ( trans1->toState != 0 ) { - /* Test transition data. */ - int compareRes = compareTransData( trans1, trans2 ); - if ( compareRes != 0 ) - return compareRes; - } - } - return 0; -} - - -bool FsmGraph::shouldMarkPtr( MarkIndex &markIndex, FsmTrans *trans1, - FsmTrans *trans2 ) -{ - if ( (trans1 != 0) ^ (trans2 != 0) ) { - /* Exactly one of the transitions is set. The initial mark round - * should rule out this case. */ - assert( false ); - } - else if ( trans1 != 0 ) { - /* Both of the transitions are set. If the target pair is marked, then - * the pair we are considering gets marked. */ - return markIndex.isPairMarked( trans1->toState->alg.stateNum, - trans2->toState->alg.stateNum ); - } - - /* Neither of the transitiosn are set. */ - return false; -} - - diff --git a/colm/global.h b/colm/global.h deleted file mode 100644 index d67c55e4..00000000 --- a/colm/global.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef __COLM_GLOBAL_H -#define __COLM_GLOBAL_H - -#include <stdio.h> -#include <iostream> -#include <fstream> -#include <fstream> -#include <string> - -#include "config.h" -#include "defs.h" -#include "avltree.h" -#include "keyops.h" - -#define PROGNAME "colm" - -/* IO filenames and stream. */ -extern bool genGraphviz; -extern int gblErrorCount; - -std::ostream &error(); - -/* IO filenames and stream. */ -extern const char *outputFileName; -extern std::ostream *outStream; -extern bool generateGraphviz; -extern bool branchPointInfo; -extern bool verbose, logging; -extern bool addUniqueEmptyProductions; - -extern int gblErrorCount; -extern char startDefName[]; - -/* Error reporting. */ -std::ostream &error(); -std::ostream &error( int first_line, int first_column ); -std::ostream &warning( ); -std::ostream &warning( int first_line, int first_column ); - -extern std::ostream *outStream; -extern bool printStatistics; - -extern int gblErrorCount; -extern char machineMain[]; -extern bool gblLibrary; -extern const char *gblExportTo; - -/* Location in an input file. */ -struct InputLoc -{ - const char *fileName; - int line; - int col; -}; - -/* Error reporting. */ -std::ostream &error(); -std::ostream &error( const InputLoc &loc ); -std::ostream &warning( const InputLoc &loc ); - -void scan( char *fileName, std::istream &input, std::ostream &output ); -void terminateAllParsers( ); -void checkMachines( ); - -void xmlEscapeHost( std::ostream &out, char *data, int len ); -void openOutput(); -void escapeLiteralString( std::ostream &out, const char *data ); - -#endif diff --git a/colm/input.c b/colm/input.c deleted file mode 100644 index b6c96369..00000000 --- a/colm/input.c +++ /dev/null @@ -1,847 +0,0 @@ -/* - * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <colm/input.h> -#include <colm/fsmrun.h> -#include <colm/pdarun.h> -#include <colm/debug.h> - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <assert.h> -#include <unistd.h> - -#define true 1 -#define false 0 - -RunBuf *newRunBuf() -{ - RunBuf *rb = (RunBuf*)malloc(sizeof(RunBuf)); - memset( rb, 0, sizeof(RunBuf) ); - return rb; -} - -void initFdFuncs(); -void initFileFuncs(); -void initPatternFuncs(); -void initReplFuncs(); - -struct SourceFuncs dynamicFuncs; -struct SourceFuncs fileFuncs; -struct SourceFuncs fdFuncs; - -void initSourceStream( SourceStream *inputStream ) -{ - /* FIXME: correct values here. */ - inputStream->line = 1; - inputStream->column = 1; - inputStream->byte = 0; -} - -void clearSourceStream( struct ColmProgram *prg, Tree **sp, SourceStream *sourceStream ) -{ - RunBuf *buf = sourceStream->queue; - while ( buf != 0 ) { - switch ( buf->type ) { - case RunBufDataType: - break; - - case RunBufTokenType: - case RunBufIgnoreType: - case RunBufSourceType: - treeDownref( prg, sp, buf->tree ); - break; - } - - RunBuf *next = buf->next; - free( buf ); - buf = next; - } - - sourceStream->queue = 0; -} - -SourceStream *newSourceStreamFile( FILE *file ) -{ - SourceStream *is = (SourceStream*)malloc(sizeof(SourceStream)); - memset( is, 0, sizeof(SourceStream) ); - is->line = 1; - is->column = 1; - is->file = file; - is->funcs = &fileFuncs; - return is; -} - -SourceStream *newSourceStreamFd( long fd ) -{ - SourceStream *is = (SourceStream*)malloc(sizeof(SourceStream)); - memset( is, 0, sizeof(SourceStream) ); - is->line = 1; - is->column = 1; - is->fd = fd; - is->funcs = &fdFuncs; - return is; -} - -static RunBuf *sourceStreamPopHead( SourceStream *is ) -{ - RunBuf *ret = is->queue; - is->queue = is->queue->next; - if ( is->queue == 0 ) - is->queueTail = 0; - else - is->queue->prev = 0; - return ret; -} - -static void sourceStreamAppend( SourceStream *is, RunBuf *runBuf ) -{ - if ( is->queue == 0 ) { - runBuf->prev = runBuf->next = 0; - is->queue = is->queueTail = runBuf; - } - else { - is->queueTail->next = runBuf; - runBuf->prev = is->queueTail; - runBuf->next = 0; - is->queueTail = runBuf; - } -} - -static void sourceStreamPrepend( SourceStream *is, RunBuf *runBuf ) -{ - if ( is->queue == 0 ) { - runBuf->prev = runBuf->next = 0; - is->queue = is->queueTail = runBuf; - } - else { - is->queue->prev = runBuf; - runBuf->prev = 0; - runBuf->next = is->queue; - is->queue = runBuf; - } -} - -void initInputFuncs() -{ - initFdFuncs(); - initFileFuncs(); - initPatternFuncs(); - initReplFuncs(); -} - -/* - * Base run-time input streams. - */ - -int fdGetData( SourceStream *is, int skip, char *dest, int length, int *copied ) -{ - int ret = 0; - *copied = 0; - - if ( skip == 9 && length == 6 ) { - debug( REALM_INPUT, "foo\n" ); - } - - /* Move over skip bytes. */ - RunBuf *buf = is->queue; - while ( true ) { - if ( buf == 0 ) { - /* Got through the in-mem buffers without copying anything. */ - RunBuf *runBuf = newRunBuf(); - sourceStreamAppend( is, runBuf ); - int received = is->funcs->getDataImpl( is, runBuf->data, FSM_BUFSIZE ); - if ( received == 0 ) { - ret = INPUT_EOD; - break; - } - runBuf->length = received; - - int slen = received < length ? received : length; - memcpy( dest, runBuf->data, slen ); - *copied = slen; - ret = INPUT_DATA; - break; - } - - int avail = buf->length - buf->offset; - - /* Anything available in the current buffer. */ - if ( avail > 0 ) { - /* The source data from the current buffer. */ - char *src = &buf->data[buf->offset]; - - /* Need to skip? */ - if ( skip > 0 && skip >= avail ) { - /* Skipping the the whole source. */ - skip -= avail; - } - else { - /* Either skip is zero, or less than slen. Skip goes to zero. - * Some data left over, copy it. */ - src += skip; - avail -= skip; - skip = 0; - - int slen = avail < length ? avail : length; - memcpy( dest, src, slen ) ; - *copied += slen; - ret = INPUT_DATA; - break; - } - } - - buf = buf->next; - } - - return ret; -} - -int fdConsumeData( SourceStream *is, int length ) -{ - debug( REALM_INPUT, "source consuming %ld bytes\n", length ); - - int consumed = 0; - - /* Move over skip bytes. */ - while ( true ) { - RunBuf *buf = is->queue; - - if ( buf == 0 ) - break; - - if ( buf->type == RunBufTokenType ) - break; - else if ( buf->type == RunBufIgnoreType ) - break; - else { - /* Anything available in the current buffer. */ - int avail = buf->length - buf->offset; - if ( avail > 0 ) { - /* The source data from the current buffer. */ - int slen = avail <= length ? avail : length; - debug( REALM_INPUT, "consumed: %.*s\n", slen, buf->data + buf->offset ); - consumed += slen; - length -= slen; - buf->offset += slen; - } - } - - if ( length == 0 ) - break; - - RunBuf *runBuf = sourceStreamPopHead( is ); - free( runBuf ); - } - - return consumed; -} - -int fdUndoConsumeData( SourceStream *is, const char *data, int length ) -{ - debug( REALM_INPUT, "undoing consume of %ld bytes\n", length ); - - RunBuf *newBuf = newRunBuf(); - newBuf->length = length; - memcpy( newBuf->data, data, length ); - sourceStreamPrepend( is, newBuf ); - - return length; -} - -/* - * File - */ - -int fileGetDataImpl( SourceStream *is, char *dest, int length ) -{ - debug( REALM_INPUT, "inputStreamFileGetDataImpl length = %ld\n", length ); - size_t res = fread( dest, 1, length, is->file ); - return res; -} - -void initFileFuncs() -{ - memset( &fileFuncs, 0, sizeof(struct SourceFuncs) ); - fileFuncs.getData = &fdGetData; - fileFuncs.consumeData = &fdConsumeData; - fileFuncs.undoConsumeData = &fdUndoConsumeData; - fileFuncs.getDataImpl = &fileGetDataImpl; -} - -/* - * FD - */ - -int fdGetDataImpl( SourceStream *is, char *dest, int length ) -{ - long got = read( is->fd, dest, length ); - return got; -} - -void initFdFuncs() -{ - memset( &fdFuncs, 0, sizeof(struct SourceFuncs) ); - fdFuncs.getData = &fdGetData; - fdFuncs.consumeData = &fdConsumeData; - fdFuncs.undoConsumeData = &fdUndoConsumeData; - fdFuncs.getDataImpl = &fdGetDataImpl; -} - -/* - * InputStream struct, this wraps the list of input streams. - */ - -void initInputStream( InputStream *inputStream ) -{ - memset( inputStream, 0, sizeof(InputStream) ); - - /* FIXME: correct values here. */ - inputStream->line = 1; - inputStream->column = 1; - inputStream->byte = 0; -} - -void clearInputStream( struct ColmProgram *prg, Tree **sp, InputStream *inputStream ) -{ - RunBuf *buf = inputStream->queue; - while ( buf != 0 ) { - switch ( buf->type ) { - case RunBufDataType: - break; - - case RunBufTokenType: - case RunBufIgnoreType: - case RunBufSourceType: - treeDownref( prg, sp, buf->tree ); - break; - } - - RunBuf *next = buf->next; - free( buf ); - buf = next; - } - - inputStream->queue = 0; -} - -static void inputStreamPrepend( InputStream *is, RunBuf *runBuf ) -{ - if ( is->queue == 0 ) { - runBuf->prev = runBuf->next = 0; - is->queue = is->queueTail = runBuf; - } - else { - is->queue->prev = runBuf; - runBuf->prev = 0; - runBuf->next = is->queue; - is->queue = runBuf; - } -} - -static RunBuf *inputStreamPopHead( InputStream *is ) -{ - RunBuf *ret = is->queue; - is->queue = is->queue->next; - if ( is->queue == 0 ) - is->queueTail = 0; - else - is->queue->prev = 0; - return ret; -} - -static void inputStreamAppend( InputStream *is, RunBuf *runBuf ) -{ - if ( is->queue == 0 ) { - runBuf->prev = runBuf->next = 0; - is->queue = is->queueTail = runBuf; - } - else { - is->queueTail->next = runBuf; - runBuf->prev = is->queueTail; - runBuf->next = 0; - is->queueTail = runBuf; - } -} - -static RunBuf *inputStreamPopTail( InputStream *is ) -{ - RunBuf *ret = is->queueTail; - is->queueTail = is->queueTail->prev; - if ( is->queueTail == 0 ) - is->queue = 0; - else - is->queueTail->next = 0; - return ret; -} - -static int isSourceStream( InputStream *is ) -{ - if ( is->queue != 0 && is->queue->type == RunBufSourceType ) - return true; - return false; -} - -void setEof( InputStream *is ) -{ - debug( REALM_INPUT, "setting EOF in input stream\n" ); - is->eof = true; -} - -void unsetEof( InputStream *is ) -{ - if ( isSourceStream( is ) ) { - Stream *stream = (Stream*)is->queue->tree; - stream->in->eof = false; - } - else { - is->eof = false; - } -} - -int getData( FsmRun *fsmRun, InputStream *is, int skip, char *dest, int length, int *copied ) -{ - int ret = 0; - *copied = 0; - - /* Move over skip bytes. */ - RunBuf *buf = is->queue; - while ( true ) { - if ( buf == 0 ) { - /* Got through the in-mem buffers without copying anything. */ - ret = is->eof ? INPUT_EOF : INPUT_EOD; - break; - } - - if ( buf->type == RunBufSourceType ) { - Stream *stream = (Stream*)buf->tree; - int type = stream->in->funcs->getData( stream->in, skip, dest, length, copied ); - - attachSource( fsmRun, stream->in ); - - if ( type == INPUT_EOD && is->eof ) { - ret = INPUT_EOF; - break; - } - - ret = type; - break; - } - - if ( buf->type == RunBufTokenType ) { - ret = INPUT_TREE; - break; - } - - if ( buf->type == RunBufIgnoreType ) { - ret = INPUT_IGNORE; - break; - } - - int avail = buf->length - buf->offset; - - /* Anything available in the current buffer. */ - if ( avail > 0 ) { - /* The source data from the current buffer. */ - char *src = &buf->data[buf->offset]; - - /* Need to skip? */ - if ( skip > 0 && skip >= avail ) { - /* Skipping the the whole source. */ - skip -= avail; - } - else { - /* Either skip is zero, or less than slen. Skip goes to zero. - * Some data left over, copy it. */ - src += skip; - avail -= skip; - skip = 0; - - int slen = avail <= length ? avail : length; - memcpy( dest, src, slen ) ; - *copied += slen; - ret = INPUT_DATA; - break; - } - } - - buf = buf->next; - } - - attachInput( fsmRun, is ); - -#if DEBUG - switch ( ret ) { - case INPUT_DATA: - debug( REALM_INPUT, "get data: DATA copied: %d: %.*s\n", *copied, (int)*copied, dest ); - break; - case INPUT_EOD: - debug( REALM_INPUT, "get data: EOD\n" ); - break; - case INPUT_EOF: - debug( REALM_INPUT, "get data: EOF\n" ); - break; - case INPUT_TREE: - debug( REALM_INPUT, "get data: TREE\n" ); - break; - case INPUT_IGNORE: - debug( REALM_INPUT, "get data: IGNORE\n" ); - break; - case INPUT_LANG_EL: - debug( REALM_INPUT, "get data: LANG_EL\n" ); - break; - } -#endif - - return ret; -} - -int consumeData( InputStream *is, int length ) -{ - debug( REALM_INPUT, "consuming %d bytes\n", length ); - - int consumed = 0; - - /* Move over skip bytes. */ - while ( true ) { - RunBuf *buf = is->queue; - - if ( buf == 0 ) - break; - - if ( buf->type == RunBufSourceType ) { - Stream *stream = (Stream*)buf->tree; - int slen = stream->in->funcs->consumeData( stream->in, length ); - - consumed += slen; - length -= slen; - } - else if ( buf->type == RunBufTokenType ) - break; - else if ( buf->type == RunBufIgnoreType ) - break; - else { - /* Anything available in the current buffer. */ - int avail = buf->length - buf->offset; - if ( avail > 0 ) { - /* The source data from the current buffer. */ - int slen = avail <= length ? avail : length; - consumed += slen; - length -= slen; - buf->offset += slen; - } - } - - if ( length == 0 ) - break; - - RunBuf *runBuf = inputStreamPopHead( is ); - free( runBuf ); - } - - return consumed; -} - -int undoConsumeData( FsmRun *fsmRun, InputStream *is, const char *data, int length ) -{ - debug( REALM_INPUT, "undoing consume of %ld bytes\n", length ); - - if ( isSourceStream( is ) ) { - Stream *stream = (Stream*)is->queue->tree; - int len = stream->in->funcs->undoConsumeData( stream->in, data, length ); - - if ( stream->in->attached != 0 ) - detachSource( stream->in->attached, stream->in ); - - return len; - } - else { - RunBuf *newBuf = newRunBuf(); - newBuf->length = length; - memcpy( newBuf->data, data, length ); - inputStreamPrepend( is, newBuf ); - - if ( is->attached != 0 ) - detachInput( is->attached, is ); - - return length; - } -} - -Tree *consumeTree( InputStream *is ) -{ - while ( is->queue != 0 && is->queue->type == RunBufDataType && is->queue->offset == is->queue->length ) { - RunBuf *runBuf = inputStreamPopHead( is ); - free( runBuf ); - } - - if ( is->queue != 0 && (is->queue->type == RunBufTokenType || is->queue->type == RunBufIgnoreType) ) { - RunBuf *runBuf = inputStreamPopHead( is ); - - /* FIXME: using runbufs here for this is a poor use of memory. */ - Tree *tree = runBuf->tree; - free(runBuf); - return tree; - } - - return 0; -} - -void undoConsumeTree( InputStream *is, Tree *tree, int ignore ) -{ - if ( is->attached != 0 ) - detachInput( is->attached, is ); - - /* Create a new buffer for the data. This is the easy implementation. - * Something better is needed here. It puts a max on the amount of - * data that can be pushed back to the inputStream. */ - RunBuf *newBuf = newRunBuf(); - newBuf->type = ignore ? RunBufIgnoreType : RunBufTokenType; - newBuf->tree = tree; - inputStreamPrepend( is, newBuf ); -} - -struct LangEl *consumeLangEl( InputStream *is, long *bindId, char **data, long *length ) -{ - if ( isSourceStream( is ) ) { - Stream *stream = (Stream*)is->queue->tree; - return stream->in->funcs->consumeLangEl( stream->in, bindId, data, length ); - } - else { - assert( false ); - } -} - -void undoConsumeLangEl( InputStream *is ) -{ - if ( isSourceStream( is ) ) { - Stream *stream = (Stream*)is->queue->tree; - return stream->in->funcs->undoConsumeLangEl( stream->in ); - } - else { - assert( false ); - } -} - -void prependData( InputStream *is, const char *data, long length ) -{ - if ( is->attached != 0 ) - detachInput( is->attached, is ); - - /* Create a new buffer for the data. This is the easy implementation. - * Something better is needed here. It puts a max on the amount of - * data that can be pushed back to the inputStream. */ - assert( length < FSM_BUFSIZE ); - - RunBuf *newBuf = newRunBuf(); - newBuf->length = length; - memcpy( newBuf->data, data, length ); - - inputStreamPrepend( is, newBuf ); -} - -int undoPrependData( InputStream *is, int length ) -{ - if ( is->attached != 0 ) - detachInput( is->attached, is ); - - debug( REALM_INPUT, "consuming %d bytes\n", length ); - - int consumed = 0; - - /* Move over skip bytes. */ - while ( true ) { - RunBuf *buf = is->queue; - - if ( buf == 0 ) - break; - - if ( buf->type == RunBufSourceType ) { - Stream *stream = (Stream*)buf->tree; - int slen = stream->in->funcs->consumeData( stream->in, length ); - - consumed += slen; - length -= slen; - } - else if ( buf->type == RunBufTokenType ) - break; - else if ( buf->type == RunBufIgnoreType ) - break; - else { - /* Anything available in the current buffer. */ - int avail = buf->length - buf->offset; - if ( avail > 0 ) { - /* The source data from the current buffer. */ - int slen = avail <= length ? avail : length; - consumed += slen; - length -= slen; - buf->offset += slen; - } - } - - if ( length == 0 ) - break; - - RunBuf *runBuf = inputStreamPopHead( is ); - free( runBuf ); - } - - return consumed; -} - -void prependTree( InputStream *is, Tree *tree, int ignore ) -{ - if ( is->attached != 0 ) - detachInput( is->attached, is ); - - /* Create a new buffer for the data. This is the easy implementation. - * Something better is needed here. It puts a max on the amount of - * data that can be pushed back to the inputStream. */ - RunBuf *newBuf = newRunBuf(); - newBuf->type = ignore ? RunBufIgnoreType : RunBufTokenType; - newBuf->tree = tree; - inputStreamPrepend( is, newBuf ); -} - -Tree *undoPrependTree( InputStream *is ) -{ - if ( is->attached != 0 ) - detachInput( is->attached, is ); - - while ( is->queue != 0 && is->queue->type == RunBufDataType && is->queue->offset == is->queue->length ) { - RunBuf *runBuf = inputStreamPopHead( is ); - free( runBuf ); - } - - if ( is->queue != 0 && (is->queue->type == RunBufTokenType || is->queue->type == RunBufIgnoreType) ) { - RunBuf *runBuf = inputStreamPopHead( is ); - - /* FIXME: using runbufs here for this is a poor use of memory. */ - Tree *tree = runBuf->tree; - free(runBuf); - return tree; - } - - return 0; -} - -void appendData( InputStream *is, const char *data, long len ) -{ - while ( len > 0 ) { - RunBuf *ad = newRunBuf(); - inputStreamAppend( is, ad ); - - long consume = - len <= (long)sizeof(ad->data) ? - len : (long)sizeof(ad->data); - - memcpy( ad->data, data, consume ); - ad->length = consume; - - len -= consume; - data += consume; - } -} - -Tree *undoAppendData( InputStream *is, int length ) -{ - if ( is->attached != 0 ) - detachInput( is->attached, is ); - - int consumed = 0; - - /* Move over skip bytes. */ - while ( true ) { - RunBuf *buf = is->queueTail; - - if ( buf == 0 ) - break; - - if ( buf->type == RunBufTokenType ) - break; - else if ( buf->type == RunBufIgnoreType ) - break; - else { - /* Anything available in the current buffer. */ - int avail = buf->length - buf->offset; - if ( avail > 0 ) { - /* The source data from the current buffer. */ - int slen = avail <= length ? avail : length; - consumed += slen; - length -= slen; - buf->length -= slen; - } - } - - if ( length == 0 ) - break; - - RunBuf *runBuf = inputStreamPopTail( is ); - free( runBuf ); - } - - return 0; -} - -void appendTree( InputStream *is, Tree *tree ) -{ - RunBuf *ad = newRunBuf(); - - inputStreamAppend( is, ad ); - - ad->type = RunBufTokenType; - ad->tree = tree; - ad->length = 0; -} - -void appendStream( InputStream *in, struct ColmTree *tree ) -{ - RunBuf *ad = newRunBuf(); - - inputStreamAppend( in, ad ); - - ad->type = RunBufSourceType; - ad->tree = tree; - ad->length = 0; -} - -Tree *undoAppendStream( InputStream *is ) -{ - if ( is->attached != 0 ) - detachInput( is->attached, is ); - - RunBuf *runBuf = inputStreamPopTail( is ); - Tree *tree = runBuf->tree; - free( runBuf ); - return tree; -} - -Tree *undoAppendTree( InputStream *is ) -{ - if ( is->attached != 0 ) - detachInput( is->attached, is ); - - RunBuf *runBuf = inputStreamPopTail( is ); - Tree *tree = runBuf->tree; - free( runBuf ); - return tree; -} diff --git a/colm/input.h b/colm/input.h deleted file mode 100644 index 882c6b31..00000000 --- a/colm/input.h +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _INPUT_H -#define _INPUT_H - -#include <stdio.h> - -#ifdef __cplusplus -extern "C" { -#endif - -#define FSM_BUFSIZE 8192 -//#define FSM_BUFSIZE 8 - -#define INPUT_DATA 1 -/* This is for data sources to return, not for the wrapper. */ -#define INPUT_EOD 2 -#define INPUT_EOF 3 -#define INPUT_LANG_EL 4 -#define INPUT_TREE 5 -#define INPUT_IGNORE 6 - -/* - * pdaRun <- fsmRun <- stream - * - * Activities we need to support: - * - * 1. Stuff data into an input stream each time we << - * 2. Detach an input stream, and attach another when we include - * 3. Send data back to an input stream when the parser backtracks - * 4. Temporarily stop parsing due to a lack of input. - * - * At any given time, the fsmRun struct may have a prefix of the stream's - * input. If getting data we first get what we can out of the fsmRun, then - * consult the stream. If sending data back, we first shift pointers in the - * fsmRun, then ship to the stream. If changing streams the old stream needs to - * take back unprocessed data from the fsmRun. - */ - -struct LangEl; -struct Pattern; -struct PatternItem; -struct Replacement; -struct ReplItem; -struct _FsmRun; -struct ColmTree; - -enum RunBufType { - RunBufDataType = 0, - RunBufTokenType, - RunBufIgnoreType, - RunBufSourceType -}; - -typedef struct _RunBuf -{ - enum RunBufType type; - char data[FSM_BUFSIZE]; - long length; - struct ColmTree *tree; - long offset; - struct _RunBuf *next, *prev; -} RunBuf; - -RunBuf *newRunBuf(); - -typedef struct _SourceStream SourceStream; - -struct SourceFuncs -{ - /* Data. */ - int (*getData)( SourceStream *is, int offset, char *dest, int length, int *copied ); - int (*consumeData)( SourceStream *is, int length ); - int (*undoConsumeData)( SourceStream *is, const char *data, int length ); - - /* Language elments (compile-time). */ - struct LangEl *(*consumeLangEl)( SourceStream *is, long *bindId, char **data, long *length ); - void (*undoConsumeLangEl)( SourceStream *is ); - - /* Private implmentation for some shared get data functions. */ - int (*getDataImpl)( SourceStream *is, char *dest, int length ); -}; - -struct _SourceStream -{ - struct SourceFuncs *funcs; - - struct _FsmRun *hasData; - - char eofSent; - char eof; - - long line; - long column; - long byte; - - /* This is set true for input streams that do their own line counting. - * Causes FsmRun to ignore NLs. */ - int handlesLine; - - RunBuf *queue; - RunBuf *queueTail; - - const char *data; - long dlen; - int offset; - - FILE *file; - long fd; - - struct Pattern *pattern; - struct PatternItem *patItem; - struct Replacement *replacement; - struct ReplItem *replItem; - - struct _FsmRun *attached; -}; - -SourceStream *newSourceStreamPattern( struct Pattern *pattern ); -SourceStream *newSourceStreamRepl( struct Replacement *replacement ); -SourceStream *newSourceStreamFile( FILE *file ); -SourceStream *newSourceStreamFd( long fd ); - -void initInputFuncs(); -void initStaticFuncs(); -void initPatternFuncs(); -void initReplFuncs(); - -/* List of input streams. Enables streams to be pushed/popped. */ -struct _InputStream -{ - char eofSent; - char eof; - - long line; - long column; - long byte; - - /* This is set true for input streams that do their own line counting. - * Causes FsmRun to ignore NLs. */ - int handlesLine; - - RunBuf *queue; - RunBuf *queueTail; - - const char *data; - long dlen; - int offset; - - FILE *file; - long fd; - - struct Pattern *pattern; - struct PatternItem *patItem; - struct Replacement *replacement; - struct ReplItem *replItem; - - struct _FsmRun *attached; -}; - -typedef struct _InputStream InputStream; - -/* The input stream interface. */ - -int getData( struct _FsmRun *fsmRun, InputStream *in, int offset, char *dest, int length, int *copied ); -int consumeData( InputStream *in, int length ); -int undoConsumeData( struct _FsmRun *fsmRun, InputStream *is, const char *data, int length ); - -struct ColmTree *consumeTree( InputStream *in ); -void undoConsumeTree( InputStream *in, struct ColmTree *tree, int ignore ); - -struct LangEl *consumeLangEl( InputStream *in, long *bindId, char **data, long *length ); -void undoConsumeLangEl( InputStream *in ); - -void setEof( InputStream *is ); -void unsetEof( InputStream *is ); - -void prependData( InputStream *in, const char *data, long len ); -int undoPrependData( InputStream *is, int length ); - -void prependTree( InputStream *is, struct ColmTree *tree, int ignore ); -struct ColmTree *undoPrependTree( InputStream *is ); - -void appendData( InputStream *in, const char *data, long len ); -void appendTree( InputStream *in, struct ColmTree *tree ); -void appendStream( InputStream *in, struct ColmTree *tree ); -struct ColmTree *undoAppendData( InputStream *in, int length ); -struct ColmTree *undoAppendStream( InputStream *in ); -struct ColmTree *undoAppendTree( InputStream *in ); - -#ifdef __cplusplus -} -#endif - -#endif /* _INPUT_H */ diff --git a/colm/keyops.h b/colm/keyops.h deleted file mode 100644 index 1808c6a6..00000000 --- a/colm/keyops.h +++ /dev/null @@ -1,283 +0,0 @@ -/* - * Copyright 2001-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _KEYOPS_H -#define _KEYOPS_H - -#include <fstream> -#include <climits> - -enum MarkType -{ - MarkNone = 0, - MarkMark -}; - -typedef unsigned long long Size; - -struct Key -{ -private: - long key; - -public: - friend inline Key operator+(const Key key1, const Key key2); - friend inline Key operator-(const Key key1, const Key key2); - friend inline Key operator/(const Key key1, const Key key2); - friend inline long operator&(const Key key1, const Key key2); - - friend inline bool operator<( const Key key1, const Key key2 ); - friend inline bool operator<=( const Key key1, const Key key2 ); - friend inline bool operator>( const Key key1, const Key key2 ); - friend inline bool operator>=( const Key key1, const Key key2 ); - friend inline bool operator==( const Key key1, const Key key2 ); - friend inline bool operator!=( const Key key1, const Key key2 ); - - friend struct KeyOps; - - Key( ) {} - Key( const Key &key ) : key(key.key) {} - Key( long key ) : key(key) {} - - /* Returns the value used to represent the key. This value must be - * interpreted based on signedness. */ - long getVal() const { return key; }; - - /* Returns the key casted to a long long. This form of the key does not - * require and signedness interpretation. */ - long long getLongLong() const; - - bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); } - bool isLower() const { return ( 'a' <= key && key <= 'z' ); } - bool isPrintable() const { return ( 32 <= key && key < 127 ); } - - Key toUpper() const - { return Key( 'A' + ( key - 'a' ) ); } - Key toLower() const - { return Key( 'a' + ( key - 'A' ) ); } - - void operator+=( const Key other ) - { - /* FIXME: must be made aware of isSigned. */ - key += other.key; - } - - void operator-=( const Key other ) - { - /* FIXME: must be made aware of isSigned. */ - key -= other.key; - } - - void operator|=( const Key other ) - { - /* FIXME: must be made aware of isSigned. */ - key |= other.key; - } - - /* Decrement. Needed only for ranges. */ - inline void decrement(); - inline void increment(); -}; - -struct HostType -{ - const char *data1; - const char *data2; - bool isSigned; - long long minVal; - long long maxVal; - unsigned int size; -}; - -struct HostLang -{ - HostType *hostTypes; - int numHostTypes; - HostType *defaultAlphType; - bool explicitUnsigned; -}; - - -/* Target language. */ -enum HostLangType -{ - CCode, - DCode, - JavaCode, - RubyCode -}; - -extern HostLang *hostLang; -extern HostLangType hostLangType; - -extern HostLang hostLangC; -extern HostLang hostLangD; -extern HostLang hostLangJava; -extern HostLang hostLangRuby; - -/* An abstraction of the key operators that manages key operations such as - * comparison and increment according the signedness of the key. */ -struct KeyOps -{ - /* Default to signed alphabet. */ - KeyOps() : - isSigned(true), - alphType(0) - {} - - /* Default to signed alphabet. */ - KeyOps( bool isSigned ) - :isSigned(isSigned) {} - - bool isSigned; - Key minKey, maxKey; - HostType *alphType; - - void setAlphType( HostType *alphType ) - { - this->alphType = alphType; - isSigned = alphType->isSigned; - if ( isSigned ) { - minKey = (long) alphType->minVal; - maxKey = (long) alphType->maxVal; - } - else { - minKey = (long) (unsigned long) alphType->minVal; - maxKey = (long) (unsigned long) alphType->maxVal; - } - } - - /* Compute the distance between two keys. */ - Size span( Key key1, Key key2 ) - { - return isSigned ? - (unsigned long long)( - (long long)key2.key - - (long long)key1.key + 1) : - (unsigned long long)( - (unsigned long)key2.key) - - (unsigned long long)((unsigned long)key1.key) + 1; - } - - Size alphSize() - { return span( minKey, maxKey ); } - - HostType *typeSubsumes( long long maxVal ) - { - for ( int i = 0; i < hostLang->numHostTypes; i++ ) { - if ( maxVal <= hostLang->hostTypes[i].maxVal ) - return hostLang->hostTypes + i; - } - return 0; - } - - HostType *typeSubsumes( bool isSigned, long long maxVal ) - { - for ( int i = 0; i < hostLang->numHostTypes; i++ ) { - if ( ( (isSigned && hostLang->hostTypes[i].isSigned) || !isSigned ) && - maxVal <= hostLang->hostTypes[i].maxVal ) - return hostLang->hostTypes + i; - } - return 0; - } -}; - -extern KeyOps *keyOps; - -inline bool operator<( const Key key1, const Key key2 ) -{ - return keyOps->isSigned ? key1.key < key2.key : - (unsigned long)key1.key < (unsigned long)key2.key; -} - -inline bool operator<=( const Key key1, const Key key2 ) -{ - return keyOps->isSigned ? key1.key <= key2.key : - (unsigned long)key1.key <= (unsigned long)key2.key; -} - -inline bool operator>( const Key key1, const Key key2 ) -{ - return keyOps->isSigned ? key1.key > key2.key : - (unsigned long)key1.key > (unsigned long)key2.key; -} - -inline bool operator>=( const Key key1, const Key key2 ) -{ - return keyOps->isSigned ? key1.key >= key2.key : - (unsigned long)key1.key >= (unsigned long)key2.key; -} - -inline bool operator==( const Key key1, const Key key2 ) -{ - return key1.key == key2.key; -} - -inline bool operator!=( const Key key1, const Key key2 ) -{ - return key1.key != key2.key; -} - -/* Decrement. Needed only for ranges. */ -inline void Key::decrement() -{ - key = keyOps->isSigned ? key - 1 : ((unsigned long)key)-1; -} - -/* Increment. Needed only for ranges. */ -inline void Key::increment() -{ - key = keyOps->isSigned ? key+1 : ((unsigned long)key)+1; -} - -inline long long Key::getLongLong() const -{ - return keyOps->isSigned ? (long long)key : (long long)(unsigned long)key; -} - -inline Key operator+(const Key key1, const Key key2) -{ - /* FIXME: must be made aware of isSigned. */ - return Key( key1.key + key2.key ); -} - -inline Key operator-(const Key key1, const Key key2) -{ - /* FIXME: must be made aware of isSigned. */ - return Key( key1.key - key2.key ); -} - -inline long operator&(const Key key1, const Key key2) -{ - /* FIXME: must be made aware of isSigned. */ - return key1.key & key2.key; -} - -inline Key operator/(const Key key1, const Key key2) -{ - /* FIXME: must be made aware of isSigned. */ - return key1.key / key2.key; -} - -const char *findFileExtension( const char *stemFile ); -char *fileNameFromStem( const char *stemFile, const char *suffix ); - -#endif /* _KEYOPS_H */ diff --git a/colm/list.c b/colm/list.c deleted file mode 100644 index d9180b73..00000000 --- a/colm/list.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <colm/pdarun.h> - -void listAddAfter( List *list, ListEl *prev_el, ListEl *new_el ) -{ - /* Set the previous pointer of new_el to prev_el. We do - * this regardless of the state of the list. */ - new_el->prev = prev_el; - - /* Set forward pointers. */ - if (prev_el == 0) { - /* There was no prev_el, we are inserting at the head. */ - new_el->next = list->head; - list->head = new_el; - } - else { - /* There was a prev_el, we can access previous next. */ - new_el->next = prev_el->next; - prev_el->next = new_el; - } - - /* Set reverse pointers. */ - if (new_el->next == 0) { - /* There is no next element. Set the tail pointer. */ - list->tail = new_el; - } - else { - /* There is a next element. Set it's prev pointer. */ - new_el->next->prev = new_el; - } - - /* Update list length. */ - list->listLen++; -} - -void listAddBefore( List *list, ListEl *next_el, ListEl *new_el) -{ - /* Set the next pointer of the new element to next_el. We do - * this regardless of the state of the list. */ - new_el->next = next_el; - - /* Set reverse pointers. */ - if (next_el == 0) { - /* There is no next elememnt. We are inserting at the tail. */ - new_el->prev = list->tail; - list->tail = new_el; - } - else { - /* There is a next element and we can access next's previous. */ - new_el->prev = next_el->prev; - next_el->prev = new_el; - } - - /* Set forward pointers. */ - if (new_el->prev == 0) { - /* There is no previous element. Set the head pointer.*/ - list->head = new_el; - } - else { - /* There is a previous element, set it's next pointer to new_el. */ - new_el->prev->next = new_el; - } - - list->listLen++; -} - -ListEl *listDetach( List *list, ListEl *el ) -{ - /* Set forward pointers to skip over el. */ - if (el->prev == 0) - list->head = el->next; - else - el->prev->next = el->next; - - /* Set reverse pointers to skip over el. */ - if (el->next == 0) - list->tail = el->prev; - else - el->next->prev = el->prev; - - /* Update List length and return element we detached. */ - list->listLen--; - return el; -} - diff --git a/colm/lmparse.kh b/colm/lmparse.kh deleted file mode 100644 index 86b70b6f..00000000 --- a/colm/lmparse.kh +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright 2001-2007 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef RLPARSE_H -#define RLPARSE_H - -#include <iostream> -#include "avltree.h" -#include "parsedata.h" - -#define PROPERTY_REDUCE_FIRST 0x1 - -struct ColmParser -{ - ColmParser( Compiler *pd, const char *fileName, const char *sectionName, const InputLoc §ionLoc ) - : pd(pd), sectionName(sectionName), enterRl(false) - {} - - %%{ - parser ColmParser; - - # Use a class for tokens. - token uses class Token; - - # Atoms. - token TK_Word, TK_Literal, TK_SingleLit, TK_DoubleLit, TK_Number, TK_UInt, - TK_Hex, KW_Nil, KW_True, KW_False; - - # General tokens. - token TK_DotDot, TK_ColonGt, TK_ColonGtGt, TK_LtColon, - TK_DoubleArrow, TK_StarStar, TK_NameSep, TK_DashDash, TK_DoubleEql, - TK_NotEql, TK_DoubleColon, TK_LessEql, TK_GrtrEql, TK_RightArrow, - TK_LitPat, TK_AmpAmp, TK_BarBar, TK_SqOpen, TK_SqOpenNeg, TK_SqClose, - TK_Dash, TK_ReChar, TK_LtLt; - - # Defining things. - token KW_Rl, KW_Def, KW_Lex, KW_Context, KW_Ignore, KW_Token, KW_Commit, KW_Namespace, - KW_Literal, KW_ReduceFirst, KW_Map, KW_List, KW_Vector, KW_Accum, KW_Global, KW_Export, - KW_Iter, KW_Reject, KW_Ptr, KW_Ref, KW_Deref; - - # Language. - token KW_If, KW_While, KW_Else, KW_Elsif, KW_For, KW_Return, KW_Yield, KW_In, - KW_Break, KW_PrintXMLAC, KW_PrintXML, KW_Print, KW_PrintStream, KW_Require; - - # Patterns. - token KW_Match, KW_Construct, KW_Parse, KW_ParseStop, KW_New, KW_MakeToken, - KW_MakeTree, KW_TypeId, KW_Alias, KW_Send, KW_Ni, KW_Ci; - - token KW_Include, KW_Preeof; - - token KW_Left, KW_Right, KW_Nonassoc, KW_Prec; - - }%% - - %% write instance_data; - - - void init(); - int parseLangEl( int type, const Token *token ); - - int token( InputLoc &loc, int tokId, char *tokstart, int toklen ); - void addRegularDef( const InputLoc &loc, Namespace *nspace, - const String &name, Join *join ); - TokenRegion *createRegion( String &name ); - void addRegionDef( const InputLoc &loc, Namespace *nspace, - const String &name, TokenRegion *join ); - void addProduction( const InputLoc &loc, const String &name, - ProdElList *prodElList, bool commit, CodeBlock *redBlock, LangEl *predOf ); - void addArgvList(); - - /* Report an error encountered by the parser. */ - ostream &parse_error( int tokId, Token &token ); - - Compiler *pd; - - /* The name of the root section, this does not change during an include. */ - const char *sectionName; - - NameRef nameRef; - NameRefList nameRefList; - - LangElVect langElVect; - - PatternItemList *patternItemList; - ReplItemList *replItemList; - RegionVect regionStack; - NamespaceVect namespaceStack; - ContextVect contextStack; - - String curDefineId; - LelDefList *curDefList; - ProdElList *curProdElList; - - PredType predType; - ReCaptureVect reCaptureVect; - - bool enterRl; -}; - -%% write token_defs; - -#endif diff --git a/colm/lmparse.kl b/colm/lmparse.kl deleted file mode 100644 index 3ead7c98..00000000 --- a/colm/lmparse.kl +++ /dev/null @@ -1,2677 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <iostream> -#include <errno.h> - -#include "config.h" -#include "lmparse.h" -#include "global.h" -#include "input.h" -#include "fsmrun.h" - -using std::cout; -using std::cerr; -using std::endl; - -ParserDict parserDict; - -%%{ - -parser ColmParser; - -include "lmparse.kh"; - -start: root_item_list - final { - if ( colm_log_compile ) { - cerr << "parsing complete" << endl; - } - - pd->rootCodeBlock = new CodeBlock( $1->stmtList ); - }; - -nonterm root_item_list uses lang_stmt_list; - -root_item_list: root_item_list root_item - final { - $$->stmtList = $1->stmtList; - - /* Maybe a statement. */ - if ( $2->stmt != 0 ) - $$->stmtList->append( $2->stmt ); - }; - -root_item_list: - final { - $$->stmtList = new StmtList; - }; - -nonterm root_item uses statement; - -root_item: literal_def commit final { $$->stmt = 0; }; -root_item: rl_def commit final { $$->stmt = 0; }; -root_item: token_def commit final { $$->stmt = 0; }; -root_item: cfl_def commit final { $$->stmt = 0; }; -root_item: region_def commit final { $$->stmt = 0; }; -root_item: context_def commit final { $$->stmt = 0; }; -root_item: namespace_def commit final { $$->stmt = 0; }; -root_item: function_def commit final { $$->stmt = 0; }; -root_item: iter_def commit final { $$->stmt = 0; }; -root_item: global_def commit final { $$->stmt = $1->stmt; }; -root_item: statement commit final { $$->stmt = $1->stmt; }; -root_item: pre_eof commit final { $$->stmt = 0; }; -root_item: precedence commit final { $$->stmt = 0; }; -root_item: typedef commit final { $$->stmt = 0; }; - -nonterm block_open -{ - ObjectDef *localFrame; -}; - -block_open: '{' - final { - /* Init the object representing the local frame. */ - $$->localFrame = new ObjectDef( ObjectDef::FrameType, - "local", pd->nextObjectId++ ); - - pd->curLocalFrame = $$->localFrame; - - /* Add captures to the local frame. We Depend on these becoming the - * first local variables so we can compute their location. */ - - /* Make local variables corresponding to the local capture vector. */ - for ( ReCaptureVect::Iter c = reCaptureVect; c.lte(); c++ ) - { - ObjField *objField = new ObjField( c->objField->loc, - c->objField->typeRef, c->objField->name ); - - /* Insert it into the field map. */ - pd->curLocalFrame->insertField( objField->name, objField ); - } - }; - -block_close: '}' - final { - /* Pop the cur local frame, back to the root. */ - pd->curLocalFrame = pd->rootLocalFrame; - }; - - -iter_def: - KW_Iter TK_Word '(' opt_param_list ')' block_open lang_stmt_list block_close - final { - CodeBlock *codeBlock = new CodeBlock( $7->stmtList ); - codeBlock->localFrame = $6->localFrame; - Function *newFunction = new Function( 0, $2->data, - $4->paramList, codeBlock, pd->nextFuncId++, true ); - pd->functionList.append( newFunction ); - }; - -function_def: - type_ref TK_Word '(' opt_param_list ')' block_open lang_stmt_list block_close - final { - CodeBlock *codeBlock = new CodeBlock( $7->stmtList ); - codeBlock->localFrame = $6->localFrame; - Function *newFunction = new Function( $1->typeRef, $2->data, - $4->paramList, codeBlock, pd->nextFuncId++, false ); - pd->functionList.append( newFunction ); - - if ( contextStack.length() > 0 ) - newFunction->inContext = contextStack.top(); - }; - -nonterm opt_param_list uses param_list; - -opt_param_list: param_list - final { - $$->paramList = $1->paramList; - }; - -opt_param_list: - final { - $$->paramList = new ParameterList; - }; - -nonterm param_list -{ - ParameterList *paramList; -}; - -param_list: param_list param_var_def - final { - $$->paramList = $1->paramList; - $$->paramList->append( $2->objField ); - }; - -param_list: param_var_def - final { - /* Create the map and insert the first item. */ - $$->paramList = new ParameterList; - $$->paramList->append( $1->objField ); - }; - -nonterm param_var_def uses var_def; - -param_var_def: TK_Word ':' type_ref - final { - $$->objField = new ObjField( $1->loc, $3->typeRef, $1->data ); - $$->objField->isParam = true; - }; -param_var_def: TK_Word ':' reference_type_ref - final { - $$->objField = new ObjField( $1->loc, $3->typeRef, $1->data ); - $$->objField->isParam = true; - }; - -nonterm reference_type_ref uses type_ref; - -reference_type_ref: KW_Ref type_ref - final { - $$->typeRef = new TypeRef( TypeRef::Ref, $1->loc, $2->typeRef ); - }; - -nonterm global_def uses statement; - -global_def: KW_Export var_def opt_def_init - final { - $$->stmt = 0; - - if ( contextStack.length() != 0 ) - error($2->objField->loc) << "cannot export parser context variables" << endp; - - ObjectDef *object = pd->globalObjectDef; - - if ( object->checkRedecl( $2->objField->name ) != 0 ) - error($2->objField->loc) << "object field renamed" << endp; - - object->insertField( $2->objField->name, $2->objField ); - $2->objField->isExport = true; - - if ( $3->expr != 0 ) { - LangVarRef *varRef = new LangVarRef( $2->objField->loc, - new QualItemVect, $2->objField->name ); - - $$->stmt = new LangStmt( $2->objField->loc, - $3->assignType, varRef, $3->expr ); - } - }; - -global_def: KW_Global var_def opt_def_init - final { - $$->stmt = 0; - - ObjectDef *object; - if ( contextStack.length() == 0 ) - object = pd->globalObjectDef; - else { - Context *context = contextStack.top(); - $2->objField->context = context; - object = context->contextObjDef; - } - - if ( object->checkRedecl( $2->objField->name ) != 0 ) - error($2->objField->loc) << "object field renamed" << endp; - - object->insertField( $2->objField->name, $2->objField ); - - if ( $3->expr != 0 ) { - LangVarRef *varRef = new LangVarRef( $2->objField->loc, - new QualItemVect, $2->objField->name ); - - $$->stmt = new LangStmt( $2->objField->loc, - $3->assignType, varRef, $3->expr ); - } - }; - -precedence: pred_type pred_token_list final { pd->predValue++; }; - -pred_type: KW_Left final { predType = PredLeft; }; -pred_type: KW_Right final { predType = PredRight; }; -pred_type: KW_Nonassoc final { predType = PredNonassoc; }; - -pred_token_list: pred_token_list ',' pred_token - final { - }; - -pred_token_list: pred_token; - -nonterm pred_token -{ - ProdEl *factor; - TypeRef *typeRef; -}; - -pred_token: - region_qual TK_Word - final { - TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, $2->data ); - - PredDecl *predDecl = new PredDecl( typeRef, predType, pd->predValue ); - pd->predDeclList.append( predDecl ); - }; - -pred_token: - region_qual TK_Literal - final { - PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 ); - TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, literal ); - - PredDecl *predDecl = new PredDecl( typeRef, predType, pd->predValue ); - pd->predDeclList.append( predDecl ); - }; - -typedef: - KW_Alias TK_Word type_ref - final { - Namespace *nspace = namespaceStack.top(); - TypeAlias *typeAlias = new TypeAlias( - $1->loc, nspace, $2->data, $3->typeRef ); - nspace->typeAliasList.append( typeAlias ); - }; - -cfl_def: cfl_def_head obj_var_list properties_list cfl_prod_list - final { - Namespace *nspace = namespaceStack.top(); - NtDef *ntDef = new NtDef( - curDefineId, - nspace, - $4->defList, - pd->objectDef, - contextStack.length() > 0 ? contextStack.top() : 0, - $3->property & PROPERTY_REDUCE_FIRST ); - - nspace->ntDefList.append( ntDef ); - }; - -cfl_def_head: KW_Def TK_Word - final { - curDefineId = $2->data; - curDefList = new LelDefList; - }; - -nonterm cfl_prod_list -{ - LelDefList *defList; -}; - -cfl_prod_list: cfl_prod_list '|' define_prod - final { - $$->defList = $1->defList; - $3->definition->prodNum = $$->defList->length(); - $$->defList->append( $3->definition ); - }; -cfl_prod_list: define_prod - final { - $$->defList = curDefList; - $1->definition->prodNum = $$->defList->length(); - $$->defList->append( $1->definition ); - }; - -nonterm property -{ - long property; -}; - -nonterm properties_list uses property; - -properties_list: properties_list property - final { - $$->property = $1->property | $2->property; - }; -properties_list: - final { - $$->property = 0; - }; - -property: - KW_ReduceFirst - final { - $$->property = PROPERTY_REDUCE_FIRST; - }; - -nonterm opt_prec -{ - LangEl *predOf; -}; - -opt_prec: - final { - $$->predOf = 0; - }; - -opt_prec: - KW_Prec pred_token - final { - $$->predOf = $2->factor->langEl; - }; - -nonterm define_prod -{ - Definition *definition; -}; - -define_prod: '[' prod_el_list ']' opt_commit opt_reduce_code opt_prec - final { - const InputLoc &loc = $1->loc; - //const String &name = curDefineId; - ProdElList *prodElList = curProdElList; - bool commit = $4->commit; - CodeBlock *redBlock = $5->codeBlock; - LangEl *predOf = $6->predOf; - - //Namespace *nspace = namespaceStack.top(); - - Definition *newDef = new Definition( loc, 0/*prodName*/, - prodElList, commit, redBlock, - pd->prodList.length(), 0, Definition::Production ); - newDef->predOf = predOf; - - pd->prodList.append( newDef ); - - $$->definition = newDef; - }; - -obj_var_list: obj_var_list var_def - final { - if ( pd->objectDef->checkRedecl( $2->objField->name ) != 0 ) - error() << "object field renamed" << endp; - - pd->objectDef->insertField( $2->objField->name, $2->objField ); - }; - -obj_var_list: - final { - pd->objectDef = new ObjectDef( ObjectDef::UserType, - curDefineId, pd->nextObjectId++ ); - }; - - -nonterm type_ref -{ - TypeRef *typeRef; -}; - -type_ref: basic_type_ref - final { - $$->typeRef = $1->typeRef; - }; - -type_ref: KW_Map '<' type_ref type_ref '>' - final { - NamespaceQual *nspaceQual = new NamespaceQual( - namespaceStack.top(), regionStack.top() ); - $$->typeRef = new TypeRef( TypeRef::Map, InputLoc(), nspaceQual, - $3->typeRef, $4->typeRef ); - }; - -type_ref: KW_List '<' type_ref '>' - final { - NamespaceQual *nspaceQual = new NamespaceQual( - namespaceStack.top(), regionStack.top() ); - $$->typeRef = new TypeRef( TypeRef::List, InputLoc(), nspaceQual, $3->typeRef, 0 ); - }; -type_ref: KW_Vector '<' type_ref '>' - final { - NamespaceQual *nspaceQual = new NamespaceQual( - namespaceStack.top(), regionStack.top() ); - $$->typeRef = new TypeRef( TypeRef::Vector, InputLoc(), nspaceQual, $3->typeRef, 0 ); - }; -type_ref: KW_Accum '<' type_ref '>' - final { - NamespaceQual *nspaceQual = new NamespaceQual( - namespaceStack.top(), regionStack.top() ); - $$->typeRef = new TypeRef( TypeRef::Parser, InputLoc(), nspaceQual, $3->typeRef, 0 ); - }; - -nonterm basic_type_ref uses type_ref; - -basic_type_ref: region_qual TK_Word opt_repeat - final { - $$->typeRef = new TypeRef( $2->loc, $1->nspaceQual, $2->data ); - $$->typeRef->repeatType = $3->repeatType; - }; - -basic_type_ref: KW_Ptr region_qual TK_Word opt_repeat - final { - $$->typeRef = new TypeRef( $1->loc, $2->nspaceQual, $3->data ); - $$->typeRef->repeatType = $4->repeatType; - $$->typeRef = new TypeRef( TypeRef::Ptr, $1->loc, $$->typeRef ); - }; - - -nonterm var_def -{ - InputLoc loc; - ObjField *objField; -}; - -var_def: TK_Word ':' type_ref - final { - /* Return an object field object. The user of this nonterminal must - * load it into the approrpriate map and do error checking. */ - $$->objField = new ObjField( $1->loc, $3->typeRef, $1->data ); - }; - -region_def: - region_head '{' root_item_list '}' - final { - /* Pop the top of the stack. */ - regionStack.pop(); - }; - -region_head: - KW_Lex TK_Word - final { - /* Just for ignores. */ - String scannerNameIgn( $2->data.length() + 2, "<%s>-ign", $2->data.data ); - TokenRegion *tokenRegionIgn = createRegion( scannerNameIgn ); - - /* Just for collect ignores. Will use the ignore-only start state. */ - String scannerNameCi( $2->data.length() + 2, "<%s>-ci", $2->data.data ); - TokenRegion *tokenRegionCi = createRegion( scannerNameCi ); - - /* Just for tokens. */ - String scannerNameTok( $2->data.length() + 2, "<%s>-tok", $2->data.data ); - TokenRegion *tokenRegionTok = createRegion( scannerNameTok ); - - /* Make the new token region. */ - String scannerName( $2->data.length() + 2, "<%s>", $2->data.data ); - TokenRegion *tokenRegion = createRegion( scannerName ); - - regionStack.push( tokenRegion ); - - tokenRegion->ignoreOnlyRegion = tokenRegionIgn; - tokenRegion->tokenOnlyRegion = tokenRegionTok; - tokenRegion->ciRegion = tokenRegionCi; - - tokenRegion->isFullRegion = true; - tokenRegionIgn->isIgnoreOnly = true; - tokenRegionCi->isCiOnly = true; - tokenRegionTok->isTokenOnly = true; - - tokenRegionIgn->derivedFrom = tokenRegion; - tokenRegionCi->derivedFrom = tokenRegion; - tokenRegionTok->derivedFrom = tokenRegion; - }; - -namespace_def: - namespace_head '{' root_item_list '}' - final { - namespaceStack.pop(); - }; - -namespace_head: - KW_Namespace TK_Word - final { - /* Make the new namespace. */ - Namespace *nspace = new Namespace( InputLoc(), $2->data, - pd->namespaceList.length(), namespaceStack.top() ); - namespaceStack.top()->childNamespaces.append( nspace ); - pd->namespaceList.append( nspace ); - namespaceStack.push( nspace ); - }; - -context_var_def: - var_def - final { - ObjectDef *object; - if ( contextStack.length() == 0 ) - error($1->loc) << "internal error: no context stack items found" << endp; - - Context *context = contextStack.top(); - $1->objField->context = context; - object = context->contextObjDef; - - if ( object->checkRedecl( $1->objField->name ) != 0 ) - error($1->objField->loc) << "object field renamed" << endp; - - object->insertField( $1->objField->name, $1->objField ); - }; - - -context_item: context_var_def commit; -context_item: literal_def commit; -context_item: rl_def commit; -context_item: token_def commit; -context_item: cfl_def commit; -context_item: region_def commit; -context_item: context_def commit; -context_item: function_def commit; -context_item: iter_def commit; -context_item: pre_eof commit; -context_item: precedence commit; - -context_item_list: - context_item_list context_item; -context_item_list: - ; - -context_def: - context_head '{' context_item_list '}' - final { - contextStack.pop(); - namespaceStack.pop(); - }; - -context_head: - KW_Context TK_Word - final { - /* Make the new namespace. */ - Namespace *nspace = new Namespace( InputLoc(), $2->data, - pd->namespaceList.length(), namespaceStack.top() ); - namespaceStack.top()->childNamespaces.append( nspace ); - pd->namespaceList.append( nspace ); - namespaceStack.push( nspace ); - - Context *context = new Context( $1->loc, 0 ); - contextStack.push( context ); - - ContextDef *contextDef = new ContextDef( $2->data, context, nspace ); - nspace->contextDefList.append( contextDef ); - - context->contextObjDef = new ObjectDef( ObjectDef::UserType, - $2->data, pd->nextObjectId++ ); - }; - -pattern_list: pattern_list pattern; -pattern_list: init_pattern_list pattern; - -init_pattern_list: - final { - patternItemList = new PatternItemList; - }; - -pattern: '"' litpat_el_list '"'; -pattern: '[' pattern_el_list ']'; - -litpat_el_list: litpat_el_list litpat_el; -litpat_el_list: ; - -litpat_el: TK_LitPat - final { - PatternItem *patternItem = new PatternItem( $1->loc, $1->data, - PatternItem::InputText ); - patternItemList->append( patternItem ); - }; - -litpat_el: '[' pattern_el_list ']'; - -pattern_el_list: pattern_el_list pattern_el; -pattern_el_list: ; - -pattern_el: opt_label pattern_el_type_or_lit - final { - /* Store the variable reference in the pattern itemm. */ - $2->patternItem->varRef = $1->varRef; - - if ( $1->varRef != 0 ) { - if ( pd->curLocalFrame->checkRedecl( $1->varRef->name ) != 0 ) { - error( $1->varRef->loc ) << "variable " << $1->varRef->name << - " redeclared" << endp; - } - - TypeRef *typeRef = $2->patternItem->factor->typeRef; - ObjField *objField = new ObjField( InputLoc(), typeRef, $1->varRef->name ); - - /* Insert it into the field map. */ - pd->curLocalFrame->insertField( $1->varRef->name, objField ); - } - }; - -pattern_el: '"' litpat_el_list '"'; -pattern_el: '?' TK_Word - final { - /* FIXME: Implement */ - assert(false); - }; - -nonterm pattern_el_type_or_lit -{ - PatternItem *patternItem; -}; - -pattern_el_type_or_lit: region_qual TK_Word opt_repeat - final { - TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, $2->data ); - typeRef->repeatType = $3->repeatType; - ProdEl *factor = new ProdEl( ProdEl::ReferenceType, $2->loc, 0, false, typeRef, 0 ); - $$->patternItem = new PatternItem( $2->loc, factor, PatternItem::FactorType ); - patternItemList->append( $$->patternItem ); - }; - -pattern_el_type_or_lit: region_qual TK_Literal opt_repeat - final { - PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 ); - TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, literal ); - typeRef->repeatType = $3->repeatType; - - ProdEl *factor = new ProdEl( ProdEl::ReferenceType, $2->loc, 0, false, typeRef, 0 ); - $$->patternItem = new PatternItem( $2->loc, factor, PatternItem::FactorType ); - patternItemList->append( $$->patternItem ); - }; - -nonterm opt_label -{ - /* Variable reference. */ - LangVarRef *varRef; -}; - -opt_label: TK_Word ':' - final { - $$->varRef = new LangVarRef( $1->loc, new QualItemVect, $1->data ); - }; -opt_label: - final { - $$->varRef = 0; - }; - -# -# Replacement -# - -repl_list: repl_list replacement; -repl_list: init_repl_list replacement; - -init_repl_list: - final { - replItemList = new ReplItemList; - }; - -replacement: '"' lit_repl_el_list '"'; -replacement: '[' repl_el_list ']'; - -lit_repl_el_list: lit_repl_el_list lit_repl_el; -lit_repl_el_list: ; - -lit_repl_el: TK_LitPat - final { - ReplItem *replItem = new ReplItem( $1->loc, ReplItem::InputText, $1->data ); - replItemList->append( replItem ); - }; - -lit_repl_el: '[' repl_el_list ']'; - -repl_el_list: repl_el_list repl_el; -repl_el_list: ; - -repl_el: region_qual TK_Literal - final { - PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 ); - TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, literal ); - typeRef->repeatType = RepeatNone; - ProdEl *factor = new ProdEl( ProdEl::LiteralType, $2->loc, 0, false, typeRef, 0 ); - ReplItem *replItem = new ReplItem( $2->loc, ReplItem::FactorType, factor ); - replItemList->append( replItem ); - }; -repl_el: '"' lit_repl_el_list '"'; - -repl_el: code_expr - final { - ReplItem *replItem = new ReplItem( $1->expr->loc, ReplItem::ExprType, $1->expr ); - replItemList->append( replItem ); - }; - -# -# Accum -# -accumulate: init_repl_list accum_list; -accumulate: init_repl_list code_expr - final { - ReplItem *replItem = new ReplItem( $2->expr->loc, ReplItem::ExprType, $2->expr ); - replItemList->append( replItem ); - }; - -accum_list: accum_list accum; -accum_list: accum; - -init_accum_list: - final { - replItemList = new ReplItemList; - }; - -accum: '"' lit_accum_el_list '"'; -accum: '[' accum_el_list ']'; - -lit_accum_el_list: lit_accum_el_list lit_accum_el; -lit_accum_el_list: ; - -lit_accum_el: TK_LitPat - final { - ReplItem *replItem = new ReplItem( $1->loc, ReplItem::InputText, $1->data ); - replItemList->append( replItem ); - }; - -lit_accum_el: '[' accum_el_list ']'; - -accum_el_list: accum_el_list accum_el; -accum_el_list: ; - -#accum_el: region_qual TK_Literal -# final { -# PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 ); -# ProdEl *factor = new ProdEl( $2->loc, false, $1->nspaceQual, -# literal, 0 ); -# ReplItem *replItem = new ReplItem( $2->loc, ReplItem::FactorType, factor ); -# replItemList->append( replItem ); -# }; -accum_el: code_expr - final { - ReplItem *replItem = new ReplItem( $1->expr->loc, ReplItem::ExprType, $1->expr ); - replItemList->append( replItem ); - }; - -accum_el: '"' lit_accum_el_list '"'; - - -# -# String -# - -string_list: string_list string; -string_list: init_string_list string; - -init_string_list: - final { - replItemList = new ReplItemList; - }; - -string: '"' lit_string_el_list '"'; -string: '[' string_el_list ']'; - -lit_string_el_list: lit_string_el_list lit_string_el; -lit_string_el_list: ; - -lit_string_el: TK_LitPat - final { - ReplItem *replItem = new ReplItem( $1->loc, ReplItem::InputText, $1->data ); - replItemList->append( replItem ); - }; - -lit_string_el: '[' string_el_list ']'; - -string_el_list: string_el_list string_el; -string_el_list: ; - -#accum_el: region_qual TK_Literal -# final { -# PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 ); -# ProdEl *factor = new ProdEl( $2->loc, false, $1->nspaceQual, -# literal, 0 ); -# ReplItem *replItem = new ReplItem( $2->loc, ReplItem::FactorType, factor ); -# replItemList->append( replItem ); -# }; -string_el: code_expr - final { - ReplItem *replItem = new ReplItem( $1->expr->loc, ReplItem::ExprType, $1->expr ); - replItemList->append( replItem ); - }; - -string_el: '"' lit_string_el_list '"'; - -prod_el_list: - prod_el_list prod_el - final { - curProdElList->append( $2->factor ); - }; - -prod_el_list: - final { curProdElList = new ProdElList; }; - -nonterm opt_no_ignore { bool value; }; - -opt_no_ignore: KW_Ni final { $$->value = true; }; -opt_no_ignore: final { $$->value = false; }; - -nonterm prod_el -{ - ProdEl *factor; -}; - -prod_el: - opt_capture opt_commit region_qual TK_Word opt_repeat - final { - TypeRef *typeRef = new TypeRef( $4->loc, $3->nspaceQual, $4->data ); - typeRef->repeatType = $5->repeatType; - $$->factor = new ProdEl( ProdEl::ReferenceType, $4->loc, $1->objField, $2->commit, typeRef, 0 ); - - /* If there is a capture, create the field. */ - if ( $1->objField != 0 ) { - /* Might already exist. */ - ObjField *objField = pd->objectDef->checkRedecl( $1->objField->name ); - if ( objField == 0 ) { - objField = $1->objField; - objField->typeRef = typeRef; - pd->objectDef->insertField( objField->name, objField ); - } - else { - /* FIXME: check the types are the same. */ - //error() << "object field renamed" << endp; - } - - objField->isRhsGet = true; - RhsVal rhsVal( curDefList->length(), curProdElList->length() ); - objField->rhsVal.append( RhsVal( curDefList->length(), curProdElList->length() ) ); - } - }; - -prod_el: - opt_capture opt_commit region_qual TK_Literal opt_repeat - final { - /* Create a new factor node going to a concat literal. */ - PdaLiteral *literal = new PdaLiteral( $4->loc, *$4 ); - TypeRef *typeRef = new TypeRef( $4->loc, $3->nspaceQual, literal ); - typeRef->repeatType = $5->repeatType; - $$->factor = new ProdEl( ProdEl::LiteralType, $4->loc, $1->objField, $2->commit, typeRef, 0 ); - - /* If there is a capture, create the field. */ - if ( $1->objField != 0 ) { - $1->objField->typeRef = typeRef; - if ( pd->objectDef->checkRedecl( $1->objField->name ) != 0 ) - error() << "object field renamed" << endp; - - pd->objectDef->insertField( $1->objField->name, $1->objField ); - } - }; - -nonterm opt_repeat -{ - bool opt; - bool repeat; - RepeatType repeatType; -}; - -opt_repeat: '*' final { $$->opt = false; $$->repeat = true; $$->repeatType = RepeatRepeat; }; -opt_repeat: '+' final { $$->opt = false; $$->repeat = false; $$->repeatType = RepeatList; }; -opt_repeat: '?' final { $$->opt = true; $$->repeat = false; $$->repeatType = RepeatOpt; }; -opt_repeat: final { $$->opt = false; $$->repeat = false; $$->repeatType = RepeatNone; }; - -nonterm region_qual -{ - NamespaceQual *nspaceQual; -}; - -region_qual: region_qual TK_Word TK_DoubleColon - final { - $$->nspaceQual = $1->nspaceQual; - $$->nspaceQual->qualNames.append( $2->data ); - }; - -region_qual: - final { - $$->nspaceQual = new NamespaceQual( namespaceStack.top(), regionStack.top() ); - }; - -literal_def: KW_Literal literal_list; - -literal_list: literal_list ',' literal_item; -literal_list: literal_item; - -literal_item: opt_no_ignore TK_Literal opt_no_ignore - final { - /* Create a name for the literal. */ - String name( 32, "_literal_%.4x", pd->nextTokenId ); - - bool insideRegion = regionStack.top() != pd->rootRegion; - if ( !insideRegion ) { - /* Just for ignores. */ - String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data ); - TokenRegion *tokenRegionIgn = createRegion( scannerNameIgn ); - - /* Just for collect ignores. Will use the ignore-only start state. */ - String scannerNameCi( name.length() + 2, "<%s>-ci", name.data ); - TokenRegion *tokenRegionCi = createRegion( scannerNameCi ); - - /* Just for tokens. */ - String scannerNameTok( name.length() + 2, "<%s>-tok", name.data ); - TokenRegion *tokenRegionTok = createRegion( scannerNameTok ); - - /* Make a new token region just for the token. */ - String scannerName( name.length() + 2, "<%s>", name.data ); - TokenRegion *tokenRegion = createRegion( scannerName ); - - regionStack.push( tokenRegion ); - - tokenRegion->ignoreOnlyRegion = tokenRegionIgn; - tokenRegion->tokenOnlyRegion = tokenRegionTok; - tokenRegion->ciRegion = tokenRegionCi; - - tokenRegion->isFullRegion = true; - tokenRegionIgn->isIgnoreOnly = true; - tokenRegionCi->isCiOnly = true; - tokenRegionTok->isTokenOnly = true; - - tokenRegionIgn->derivedFrom = tokenRegion; - tokenRegionCi->derivedFrom = tokenRegion; - tokenRegionTok->derivedFrom = tokenRegion; - } - - bool unusedCI; - String interp; - prepareLitString( interp, unusedCI, $2->data, $2->loc ); - - /* Look for the production's associated region. */ - Namespace *nspace = namespaceStack.top(); - TokenRegion *region = regionStack.top(); - - - LiteralDictEl *ldel = nspace->literalDict.find( interp ); - if ( ldel != 0 ) - error( $2->loc ) << "literal already defined in this namespace" << endp; - else { - Join *join = new Join( new Expression( new Term( new FactorWithAug( - new FactorWithRep( $2->loc, new FactorWithNeg( $2->loc, new Factor( - new Literal( $2->loc, $2->data, - Literal::LitString ) ) ) ) ) ) ) ); - - if ( strcmp( interp.data, "" ) == 0 ) { - TokenDef *tokenDef = new TokenDef( name, $2->data, true, false, join, - 0, $2->loc, pd->nextTokenId++, nspace, region, 0, 0, 0 ); - - //region->tokenDefList.append( tokenDef ); - - ldel = nspace->literalDict.insert( interp, tokenDef ); - nspace->tokenDefList.append( tokenDef ); - - tokenDef->isZero = true; - } - else { - TokenDef *tokenDef = new TokenDef( name, $2->data, true, false, join, - 0, $2->loc, pd->nextTokenId++, nspace, region, 0, 0, 0 ); - region->tokenDefList.append( tokenDef ); - ldel = nspace->literalDict.insert( interp, tokenDef ); - nspace->tokenDefList.append( tokenDef ); - - if ( $1->value ) - tokenDef->noPreIgnore = true; - if ( $3->value ) - tokenDef->noPostIgnore = true; - - TokenDef *tokenDefTok = new TokenDef( name + "_tok", $2->data, true, false, join, - 0, $2->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion, 0, 0, 0 ); - tokenDefTok->dupOf = tokenDef; - region->tokenOnlyRegion->tokenDefList.append( tokenDefTok ); - ldel = nspace->literalDict.insert( "|" + interp + "_tok", tokenDefTok ); - nspace->tokenDefList.append( tokenDefTok ); - } - } - - if ( !insideRegion ) { - /* Leave the region just for this token. */ - regionStack.pop(); - } - }; - - -# These two productions are responsible for setting and unsetting the Regular -# language scanning context. -enter_rl: - try { - enterRl = true; - } - undo { - enterRl = false; - }; -leave_rl: - try { - enterRl = false; - } - undo { - enterRl = true; - }; - -token_def: - token_or_ignore token_def_name obj_var_list - enter_rl opt_no_ignore '/' opt_rl_join leave_rl '/' opt_no_ignore - opt_translate - final { - bool ignore = $1->ignore; - String name = $2->name; - Join *join = $7->join; - CodeBlock *transBlock = $11->transBlock; - - /* Check the region if this is for an ignore. */ - if ( ignore && !pd->insideRegion ) - error($1->loc) << "ignore tokens can only appear inside scanners" << endp; - - /* Check the name if this is a token. */ - if ( !ignore && name == 0 ) - error($1->loc) << "tokens must have a name" << endp; - - /* Give a default name to ignores. */ - if ( name == 0 ) - name.setAs( 32, "_ignore_%.4x", pd->nextTokenId ); - - Namespace *nspace = namespaceStack.top(); - TokenRegion *region = regionStack.top(); - - TokenDef *tokenDef = new TokenDef( name, String(), false, ignore, join, - transBlock, $1->loc, pd->nextTokenId++, nspace, region, - &reCaptureVect, pd->objectDef, - contextStack.length() > 0 ? contextStack.top() : 0 ); - - region->tokenDefList.append( tokenDef ); - nspace->tokenDefList.append( tokenDef ); - - if ( $5->value ) - tokenDef->noPreIgnore = true; - if ( $10->value ) - tokenDef->noPostIgnore = true; - - /* All again for the ignore. */ - if ( ignore ) { - TokenDef *tokenDefIgn = new TokenDef( name + "_ign", String(), false, ignore, join, - 0, $1->loc, pd->nextTokenId++, nspace, region->ignoreOnlyRegion, - &reCaptureVect, pd->objectDef, - contextStack.length() > 0 ? contextStack.top() : 0 ); - - tokenDefIgn->dupOf = tokenDef; - - region->ignoreOnlyRegion->tokenDefList.append( tokenDefIgn ); - nspace->tokenDefList.append( tokenDefIgn ); - } - else { - TokenDef *tokenDefTok = new TokenDef( name + "_tok", String(), false, ignore, join, - 0, $1->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion, - &reCaptureVect, pd->objectDef, - contextStack.length() > 0 ? contextStack.top() : 0 ); - - tokenDefTok->dupOf = tokenDef; - - region->tokenOnlyRegion->tokenDefList.append( tokenDefTok ); - nspace->tokenDefList.append( tokenDefTok ); - } - - /* This is created and pushed in the name. */ - if ( !pd->insideRegion ) { - /* Leave the region that we made just for this token. */ - regionStack.pop(); - } - - if ( join != 0 ) { - /* Create a regular language definition so the token can be used to - * make other tokens */ - addRegularDef( $1->loc, namespaceStack.top(), name, join ); - } - - - reCaptureVect.empty(); - }; - -nonterm token_or_ignore -{ - InputLoc loc; - bool ignore; -}; - -token_or_ignore: KW_Token - final { $$->loc = $1->loc; $$->ignore = false; }; - -token_or_ignore: KW_Ignore - final { $$->loc = $1->loc; $$->ignore = true; }; - -nonterm class token_def_name -{ - String name; -}; - -token_def_name: - opt_name - final { - String name = $1->name; - - $$->name = name; - pd->insideRegion = regionStack.top() != pd->rootRegion; - curDefineId = name; - - if ( !pd->insideRegion ) { - /* For just ignores. */ - String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data ); - TokenRegion *tokenRegionIgn = createRegion( scannerNameIgn ); - - /* Just for explicitly collecting ignores. */ - String scannerNameCi( name.length() + 2, "<%s>-ci", name.data ); - TokenRegion *tokenRegionCi = createRegion( scannerNameCi ); - - /* Just for tokens. */ - String scannerNameTok( name.length() + 2, "<%s>-tok", name.data ); - TokenRegion *tokenRegionTok = createRegion( scannerNameTok ); - - /* If not inside a region, make one for the token. */ - String scannerName( name.length() + 2, "<%s>", name.data ); - TokenRegion *tokenRegion = createRegion( scannerName ); - - regionStack.push( tokenRegion ); - - tokenRegion->ignoreOnlyRegion = tokenRegionIgn; - tokenRegion->tokenOnlyRegion = tokenRegionTok; - tokenRegion->ciRegion = tokenRegionCi; - - tokenRegion->isFullRegion = true; - tokenRegionIgn->isIgnoreOnly = true; - tokenRegionCi->isCiOnly = true; - tokenRegionTok->isTokenOnly = true; - - tokenRegionIgn->derivedFrom = tokenRegion; - tokenRegionCi->derivedFrom = tokenRegion; - tokenRegionTok->derivedFrom = tokenRegion; - } - - /* Reset the lable id counter. */ - pd->nextLabelId = 0; - }; - -nonterm class opt_name -{ - String name; -}; - -opt_name: TK_Word final { $$->name = $1->data; }; -opt_name: ; - -nonterm opt_translate -{ - CodeBlock *transBlock; -}; - -opt_translate: - block_open lang_stmt_list block_close - final { - $$->transBlock = new CodeBlock( $2->stmtList ); - $$->transBlock->localFrame = $1->localFrame; - $$->transBlock->context = contextStack.length() == 0 ? 0 : contextStack.top(); - }; - -opt_translate: - final { - $$->transBlock = 0; - }; - -pre_eof: - KW_Preeof block_open lang_stmt_list block_close - final { - bool insideRegion = regionStack.top() != pd->rootRegion; - if ( !insideRegion ) - error($1->loc) << "preeof must be used inside an existing region" << endl; - - CodeBlock *codeBlock = new CodeBlock( $3->stmtList ); - codeBlock->localFrame = $2->localFrame; - codeBlock->context = contextStack.length() == 0 ? 0 : contextStack.top(); - - TokenRegion *region = regionStack.top(); - region->preEofBlock = codeBlock; - }; - -rl_def: - KW_Rl machine_name enter_rl '/' rl_join leave_rl '/' - final { - /* Generic creation of machine for instantiation and assignment. */ - addRegularDef( $2->loc, namespaceStack.top(), $2->data, $5->join ); - - if ( reCaptureVect.length() > 0 ) - error($1->loc) << "rl definitions cannot capture vars" << endl; - }; - -type class token_data -{ - InputLoc loc; - String data; -}; - -nonterm machine_name uses token_data; - -machine_name: - TK_Word - final { - /* Make/get the priority key. The name may have already been referenced - * and therefore exist. */ - PriorDictEl *priorDictEl; - if ( pd->priorDict.insert( $1->data, pd->nextPriorKey, &priorDictEl ) ) - pd->nextPriorKey += 1; - pd->curDefPriorKey = priorDictEl->value; - - /* Make/get the local error key. */ - LocalErrDictEl *localErrDictEl; - if ( pd->localErrDict.insert( $1->data, pd->nextLocalErrKey, &localErrDictEl ) ) - pd->nextLocalErrKey += 1; - pd->curDefLocalErrKey = localErrDictEl->value; - - $$->loc = $1->loc; - $$->data = $1->data; - }; - -# -# Reduce statements -# - -nonterm opt_reduce_code -{ - CodeBlock *codeBlock; -}; - -opt_reduce_code: - final { $$->codeBlock = 0; }; - -opt_reduce_code: - start_reduce lang_stmt_list block_close - final { - $$->codeBlock = new CodeBlock( $2->stmtList ); - $$->codeBlock->localFrame = $1->localFrame; - $$->codeBlock->context = contextStack.length() == 0 ? 0 : contextStack.top(); - }; - -nonterm start_reduce uses block_open; - -start_reduce: - block_open - final { - $$->localFrame = $1->localFrame; - }; - -nonterm lang_stmt_list -{ - StmtList *stmtList; -}; - -lang_stmt_list: rec_stmt_list opt_require_stmt - final { - $$->stmtList = $1->stmtList; - if ( $2->stmt != 0 ) - $$->stmtList->append( $2->stmt ); - }; - -nonterm rec_stmt_list uses lang_stmt_list; - -rec_stmt_list: rec_stmt_list statement - final { - $$->stmtList = $1->stmtList; - - /* Maybe a statement was generated. */ - if ( $2->stmt != 0 ) - $$->stmtList->append( $2->stmt ); - }; - -rec_stmt_list: - final { - $$->stmtList = new StmtList; - }; - -nonterm opt_def_init -{ - LangExpr *expr; - LangStmt::Type assignType; -}; - -opt_def_init: '=' code_expr - final { - $$->expr = $2->expr; - $$->assignType = LangStmt::AssignType; - }; -opt_def_init: - final { - $$->expr = 0; - }; - -scope_push: - final { - pd->curLocalFrame->pushScope(); - //cout << "push scope" << endl; - }; - -scope_pop: - final { - pd->curLocalFrame->popScope(); - //cout << "pop scope" << endl; - }; - -nonterm statement -{ - LangStmt *stmt; -}; -nonterm for_scope uses statement; - -statement: var_def opt_def_init - final { - /* By default no statement here. Maybe will add an initialization. */ - $$->stmt = 0; - - /* Check for redeclaration. */ - if ( pd->curLocalFrame->checkRedecl( $1->objField->name ) != 0 ) { - error( $1->objField->loc ) << "variable " << $1->objField->name << - " redeclared" << endp; - } - - /* Insert it into the field map. */ - pd->curLocalFrame->insertField( $1->objField->name, $1->objField ); - - //cout << "var def " << $1->objField->name << endl; - - if ( $2->expr != 0 ) { - LangVarRef *varRef = new LangVarRef( $1->objField->loc, - new QualItemVect, $1->objField->name ); - - $$->stmt = new LangStmt( $1->objField->loc, - $2->assignType, varRef, $2->expr ); - } - }; -statement: var_ref '=' code_expr - final { - $$->stmt = new LangStmt( $2->loc, LangStmt::AssignType, $1->varRef, $3->expr ); - }; -statement: KW_Print '(' code_expr_list ')' - final { - $$->stmt = new LangStmt( $1->loc, LangStmt::PrintType, $3->exprVect ); - }; -statement: KW_PrintXMLAC '(' code_expr_list ')' - final { - $$->stmt = new LangStmt( $1->loc, LangStmt::PrintXMLACType, $3->exprVect ); - }; -statement: KW_PrintXML '(' code_expr_list ')' - final { - $$->stmt = new LangStmt( $1->loc, LangStmt::PrintXMLType, $3->exprVect ); - }; -statement: KW_PrintStream '(' code_expr_list ')' - final { - $$->stmt = new LangStmt( $1->loc, LangStmt::PrintStreamType, $3->exprVect ); - }; -statement: code_expr - final { - $$->stmt = new LangStmt( InputLoc(), LangStmt::ExprType, $1->expr ); - }; -statement: if_stmt - final { - $$->stmt = $1->stmt; - }; -statement: KW_Reject - final { - $$->stmt = new LangStmt( $1->loc, LangStmt::RejectType ); - }; -statement: KW_While scope_push code_expr block_or_single scope_pop - final { - $$->stmt = new LangStmt( LangStmt::WhileType, $3->expr, $4->stmtList ); - }; - -for_scope: TK_Word ':' type_ref KW_In iter_call block_or_single - final { - /* Check for redeclaration. */ - if ( pd->curLocalFrame->checkRedecl( $1->data ) != 0 ) - error( $1->loc ) << "variable " << $1->data << " redeclared" << endp; - - /* Note that we pass in a null type reference. This type is dependent - * on the result of the iter_call lookup since it must contain a reference - * to the iterator that is called. This lookup is done at compile time. */ - ObjField *iterField = new ObjField( $1->loc, (TypeRef*)0, $1->data ); - pd->curLocalFrame->insertField( $1->data, iterField ); - - $$->stmt = new LangStmt( $1->loc, LangStmt::ForIterType, - iterField, $3->typeRef, $5->langTerm, $6->stmtList ); - }; - -statement: KW_For scope_push for_scope scope_pop - final { - $$->stmt = $3->stmt; - }; - -statement: KW_Return code_expr - final { - $$->stmt = new LangStmt( $1->loc, LangStmt::ReturnType, $2->expr ); - }; -statement: KW_Break - final { - $$->stmt = new LangStmt( LangStmt::BreakType ); - }; -statement: KW_Yield var_ref - final { - $$->stmt = new LangStmt( LangStmt::YieldType, $2->varRef ); - }; -statement: var_ref TK_LtLt accumulate - final { - Namespace *nspace = namespaceStack.top(); - TokenRegion *region = regionStack.top(); - ParserText *parserText = new ParserText( $2->loc, nspace, region, replItemList ); - pd->parserTextList.append( parserText ); - - $$->stmt = new LangStmt( LangStmt::ParserType, $1->varRef, parserText ); - }; -statement: KW_Send var_ref accumulate - final { - Namespace *nspace = namespaceStack.top(); - TokenRegion *region = regionStack.top(); - ParserText *parserText = new ParserText( $1->loc, nspace, region, replItemList ); - pd->parserTextList.append( parserText ); - - $$->stmt = new LangStmt( LangStmt::ParserType, $2->varRef, parserText ); - }; - -nonterm opt_require_stmt uses statement; - -opt_require_stmt: - scope_push require_pattern lang_stmt_list scope_pop - final { - $$->stmt = new LangStmt( LangStmt::IfType, $2->expr, $3->stmtList, 0 ); - }; -opt_require_stmt: - final { - $$->stmt = 0; - }; - -nonterm require_pattern uses code_expr; - -require_pattern: - KW_Require var_ref pattern_list - final { - Namespace *nspace = namespaceStack.top(); - TokenRegion *region = regionStack.top(); - Pattern *pattern = new Pattern( $1->loc, nspace, region, - patternItemList, pd->nextPatReplId++ ); - pd->patternList.append( pattern ); - - $$->expr = new LangExpr( - new LangTerm( LangTerm::MatchType, $2->varRef, pattern ) ); - }; - -nonterm block_or_single uses lang_stmt_list; - -block_or_single: '{' lang_stmt_list '}' - final { - $$->stmtList = $2->stmtList; - }; -block_or_single: statement - final { - $$->stmtList = new StmtList; - $$->stmtList->append( $1->stmt ); - }; - -nonterm iter_call -{ - LangTerm *langTerm; -}; - -iter_call: var_ref '(' opt_code_expr_list ')' - final { - $$->langTerm = new LangTerm( $1->varRef, $3->exprVect ); - }; -iter_call: TK_Word - final { - $$->langTerm = new LangTerm( LangTerm::VarRefType, - new LangVarRef( $1->loc, new QualItemVect, $1->data ) ); - }; - -# -# If Statements -# - -nonterm if_stmt uses statement; - -if_stmt: KW_If scope_push code_expr block_or_single scope_pop elsif_list - final { - $$->stmt = new LangStmt( LangStmt::IfType, $3->expr, $4->stmtList, $6->stmt ); - }; - -nonterm elsif_list -{ - LangStmt *stmt; -}; - -elsif_list: - elsif_clause elsif_list - final { - /* Put any of the followng elseif part, an else, or null into the elsePart. */ - $$->stmt = $1->stmt; - $$->stmt->elsePart = $2->stmt; - }; -elsif_list: - optional_else - final { - $$->stmt = $1->stmt; - }; - -nonterm elsif_clause -{ - LangStmt *stmt; -}; - -elsif_clause: - KW_Elsif scope_push code_expr block_or_single scope_pop - final { - $$->stmt = new LangStmt( LangStmt::IfType, $3->expr, $4->stmtList, 0 ); - }; - -nonterm optional_else -{ - LangStmt *stmt; -}; - -optional_else: - KW_Else scope_push block_or_single scope_pop - final { - $$->stmt = new LangStmt( LangStmt::ElseType, $3->stmtList ); - }; - -optional_else: - final { - $$->stmt = 0; - }; - -# -# Code Expression Lists. -# -nonterm code_expr_list -{ - ExprVect *exprVect; -}; - -code_expr_list: code_expr_list code_expr - final { - $$->exprVect = $1->exprVect; - $$->exprVect->append( $2->expr ); - }; -code_expr_list: code_expr - final { - $$->exprVect = new ExprVect; - $$->exprVect->append( $1->expr ); - }; - -nonterm opt_code_expr_list uses code_expr_list; - -opt_code_expr_list: code_expr_list - final { - $$->exprVect = $1->exprVect; - }; - -opt_code_expr_list: - final { - $$->exprVect = 0; - }; - -# -# Type list -# - -nonterm type_list -{ - TypeRefVect *typeRefVect; -}; - -type_list: type_list ',' type_ref - final { - $$->typeRefVect = $1->typeRefVect; - $$->typeRefVect->append( $3->typeRef ); - }; -type_list: type_ref - final { - $$->typeRefVect = new TypeRefVect; - $$->typeRefVect->append( $1->typeRef ); - }; - -nonterm opt_type_list uses type_list; - -opt_type_list: type_list - final { - $$->typeRefVect = $1->typeRefVect; - }; - -opt_type_list: - final { - $$->typeRefVect = 0; - }; - - -# -# Variable reference -# - -nonterm var_ref -{ - LangVarRef *varRef; -}; - -var_ref: qual TK_Word - final { - $$->varRef = new LangVarRef( $2->loc, $1->qual, $2->data ); - }; - -nonterm qual -{ - QualItemVect *qual; -}; - -qual: qual TK_Word '.' - final { - $$->qual = $1->qual; - $$->qual->append( QualItem( $2->loc, $2->data, QualItem::Dot ) ); - }; -qual: qual TK_Word TK_RightArrow - final { - $$->qual = $1->qual; - $$->qual->append( QualItem( $2->loc, $2->data, QualItem::Arrow ) ); - }; -qual: - final { - $$->qual = new QualItemVect; - }; - -# -# Code expression -# - -nonterm code_expr -{ - LangExpr *expr; -}; - -code_expr: code_expr TK_AmpAmp code_relational - final { - $$->expr = new LangExpr( $2->loc, $1->expr, OP_LogicalAnd, $3->expr ); - }; - -code_expr: code_expr TK_BarBar code_relational - final { - $$->expr = new LangExpr( $2->loc, $1->expr, OP_LogicalOr, $3->expr ); - }; - -code_expr: code_relational - final { - $$->expr = $1->expr; - }; - -nonterm code_relational uses code_expr; - -code_relational: code_relational TK_DoubleEql code_additive - final { - $$->expr = new LangExpr( $2->loc, $1->expr, OP_DoubleEql, $3->expr ); - }; - -code_relational: code_relational TK_NotEql code_additive - final { - $$->expr = new LangExpr( $2->loc, $1->expr, OP_NotEql, $3->expr ); - }; - -code_relational: code_relational '<' code_additive - final { - $$->expr = new LangExpr( $2->loc, $1->expr, '<', $3->expr ); - }; - -code_relational: code_relational '>' code_additive - final { - $$->expr = new LangExpr( $2->loc, $1->expr, '>', $3->expr ); - }; - -code_relational: code_relational TK_LessEql code_additive - final { - $$->expr = new LangExpr( $2->loc, $1->expr, OP_LessEql, $3->expr ); - }; - -code_relational: code_relational TK_GrtrEql code_additive - final { - $$->expr = new LangExpr( $2->loc, $1->expr, OP_GrtrEql, $3->expr ); - }; - - -code_relational: code_additive - final { - $$->expr = $1->expr; - }; - -nonterm code_additive uses code_expr; - -code_additive: code_additive '+' code_multiplicitive - final { - $$->expr = new LangExpr( $2->loc, $1->expr, '+', $3->expr ); - }; - -code_additive: code_additive '-' code_multiplicitive - final { - $$->expr = new LangExpr( $2->loc, $1->expr, '-', $3->expr ); - }; - -code_additive: code_multiplicitive - final { - $$->expr = $1->expr; - }; - -nonterm code_multiplicitive uses code_expr; - -code_multiplicitive: code_multiplicitive '*' code_unary - final { - $$->expr = new LangExpr( $2->loc, $1->expr, '*', $3->expr ); - }; - -code_multiplicitive: code_multiplicitive '/' code_unary - final { - $$->expr = new LangExpr( $2->loc, $1->expr, '/', $3->expr ); - }; - -code_multiplicitive: code_unary - final { - $$->expr = $1->expr; - }; - -nonterm code_unary uses code_expr; -code_unary: '!' code_factor - final { - $$->expr = new LangExpr( $1->loc, '!', $2->expr ); - }; -code_unary: '$' code_factor - final { - $$->expr = new LangExpr( $1->loc, '$', $2->expr ); - }; -code_unary: '^' code_factor - final { - $$->expr = new LangExpr( $1->loc, '^', $2->expr ); - }; -code_unary: '%' code_factor - final { - $$->expr = new LangExpr( $1->loc, '%', $2->expr ); - }; -code_unary: code_factor - final { - $$->expr = $1->expr; - }; - -nonterm opt_capture uses var_def; - -opt_capture: TK_Word ':' - final { - $$->objField = new ObjField( $1->loc, 0, $1->data ); - }; -opt_capture: - final { - $$->objField = 0; - }; - -nonterm code_factor uses code_expr; - -code_factor: TK_Number - final { - $$->expr = new LangExpr( new LangTerm( LangTerm::NumberType, $1->data ) ); - }; -code_factor: TK_Literal - final { - $$->expr = new LangExpr( new LangTerm( LangTerm::StringType, $1->data ) ); - }; -code_factor: var_ref '(' opt_code_expr_list ')' - final { - $$->expr = new LangExpr( new LangTerm( $1->varRef, $3->exprVect ) ); - }; -code_factor: var_ref - final { - $$->expr = new LangExpr( new LangTerm( LangTerm::VarRefType, $1->varRef ) ); - }; -code_factor: KW_Match var_ref pattern_list - final { - Namespace *nspace = namespaceStack.top(); - TokenRegion *region = regionStack.top(); - Pattern *pattern = new Pattern( $1->loc, nspace, region, - patternItemList, pd->nextPatReplId++ ); - pd->patternList.append( pattern ); - - $$->expr = new LangExpr( new LangTerm( LangTerm::MatchType, $2->varRef, pattern ) ); - }; -code_factor: KW_New code_factor - final { - $$->expr = new LangExpr( new LangTerm( LangTerm::NewType, $2->expr ) ); - }; -code_factor: - KW_Construct opt_capture type_ref opt_field_init repl_list - final { - Namespace *nspace = namespaceStack.top(); - TokenRegion *region = regionStack.top(); - Replacement *replacement = new Replacement( $1->loc, nspace, region, - replItemList, pd->nextPatReplId++ ); - pd->replList.append( replacement ); - - LangVarRef *varRef = 0; - if ( $2->objField != 0 ) - varRef = new LangVarRef( $2->objField->loc, new QualItemVect, $2->objField->name ); - - $$->expr = new LangExpr( new LangTerm( $1->loc, LangTerm::ConstructType, - varRef, $2->objField, $3->typeRef, $4->fieldInitVect, replacement ) ); - - /* Check for redeclaration. */ - if ( $2->objField != 0 ) { - if ( pd->curLocalFrame->checkRedecl( $2->objField->name ) != 0 ) { - error( $2->objField->loc ) << "variable " << $2->objField->name << - " redeclared" << endp; - } - - /* Insert it into the field map. */ - $2->objField->typeRef = $3->typeRef; - pd->curLocalFrame->insertField( $2->objField->name, $2->objField ); - } - }; -code_factor: KW_Parse opt_capture type_ref '(' opt_code_expr_list ')' - final { - String parserName = $3->typeRef->typeName + "_parser"; - - /* Get the language element. */ - Namespace *nspace = namespaceStack.top(); - - GenericType *generic = 0; - - NamespaceQual *nspaceQual = new NamespaceQual( - namespaceStack.top(), regionStack.top() ); - TypeRef *parserTypeRef = new TypeRef( TypeRef::Parser, - InputLoc(), nspaceQual, $3->typeRef, 0 ); - - Replacement *replacement = new Replacement( $1->loc, nspace, pd->rootRegion, - new ReplItemList, pd->nextPatReplId++ ); - pd->replList.append( replacement ); - - LangVarRef *varRef = 0; - if ( $2->objField != 0 ) - varRef = new LangVarRef( $2->objField->loc, new QualItemVect, $2->objField->name ); - - $$->expr = new LangExpr( new LangTerm( $1->loc, LangTerm::ParseType, - varRef, $2->objField, $3->typeRef, generic, parserTypeRef, replacement ) ); - $$->expr->term->args = $5->exprVect; - - /* Check for redeclaration. */ - if ( $2->objField != 0 ) { - if ( pd->curLocalFrame->checkRedecl( $2->objField->name ) != 0 ) { - error( $2->objField->loc ) << "variable " << $2->objField->name << - " redeclared" << endp; - } - - /* Insert it into the field map. */ - $2->objField->typeRef = $3->typeRef; - pd->curLocalFrame->insertField( $2->objField->name, $2->objField ); - } - }; -code_factor: KW_ParseStop opt_capture type_ref '(' opt_code_expr_list ')' - final { - /* This is a silly clone. To be fixed later. */ - String parserName = $3->typeRef->typeName + "_parser"; - - /* Get the language element. */ - Namespace *nspace = namespaceStack.top(); - - GenericType *generic = 0; - - NamespaceQual *nspaceQual = new NamespaceQual( - namespaceStack.top(), regionStack.top() ); - TypeRef *parserTypeRef = new TypeRef( TypeRef::Parser, - InputLoc(), nspaceQual, $3->typeRef, 0 ); - - Replacement *replacement = new Replacement( $1->loc, nspace, pd->rootRegion, - new ReplItemList, pd->nextPatReplId++ ); - pd->replList.append( replacement ); - - LangVarRef *varRef = 0; - if ( $2->objField != 0 ) - varRef = new LangVarRef( $2->objField->loc, new QualItemVect, $2->objField->name ); - - $$->expr = new LangExpr( new LangTerm( $1->loc, LangTerm::ParseStopType, - varRef, $2->objField, $3->typeRef, generic, parserTypeRef, replacement ) ); - $$->expr->term->args = $5->exprVect; - - /* Check for redeclaration. */ - if ( $2->objField != 0 ) { - if ( pd->curLocalFrame->checkRedecl( $2->objField->name ) != 0 ) { - error( $2->objField->loc ) << "variable " << $2->objField->name << - " redeclared" << endp; - } - - /* Insert it into the field map. */ - $2->objField->typeRef = $3->typeRef; - pd->curLocalFrame->insertField( $2->objField->name, $2->objField ); - } - - }; -code_factor: KW_TypeId '<' type_ref '>' - final { - $$->expr = new LangExpr( new LangTerm( $1->loc, - LangTerm::TypeIdType, $3->typeRef ) ); - }; -code_factor: type_ref KW_In var_ref - final { - $$->expr = new LangExpr( new LangTerm( $2->loc, - LangTerm::SearchType, $1->typeRef, $3->varRef ) ); - }; -code_factor: KW_Nil - final { - $$->expr = new LangExpr( new LangTerm( $1->loc, - LangTerm::NilType ) ); - }; -code_factor: KW_True - final { - $$->expr = new LangExpr( new LangTerm( $1->loc, - LangTerm::TrueType ) ); - }; -code_factor: KW_False - final { - $$->expr = new LangExpr( new LangTerm( $1->loc, - LangTerm::FalseType ) ); - }; -code_factor: '(' code_expr ')' - final { - $$->expr = $2->expr; - }; -code_factor: KW_MakeTree '(' opt_code_expr_list ')' - final { - $$->expr = new LangExpr( new LangTerm( $1->loc, - LangTerm::MakeTreeType, $3->exprVect ) ); - }; -code_factor: KW_MakeToken '(' opt_code_expr_list ')' - final { - $$->expr = new LangExpr( new LangTerm( $1->loc, - LangTerm::MakeTokenType, $3->exprVect ) ); - }; -code_factor: KW_Deref code_expr - final { - $$->expr = new LangExpr( $1->loc, OP_Deref, $2->expr ); - }; -code_factor: string_list - final { - $$->expr = new LangExpr( new LangTerm( replItemList ) ); - }; - -nonterm opt_field_init uses field_init_list; - -opt_field_init: '(' opt_field_init_list ')' - final { - $$->fieldInitVect = $2->fieldInitVect; - }; -opt_field_init: - final { - $$->fieldInitVect = 0; - }; - -nonterm opt_field_init_list uses field_init_list; - -opt_field_init_list: field_init_list - final { - $$->fieldInitVect = $1->fieldInitVect; - }; -opt_field_init_list: - final { - $$->fieldInitVect = 0; - }; - -nonterm field_init_list -{ - FieldInitVect *fieldInitVect; -}; - -field_init_list: field_init_list field_init - final { - $$->fieldInitVect = $1->fieldInitVect; - $$->fieldInitVect->append( $2->fieldInit ); - }; -field_init_list: field_init - final { - $$->fieldInitVect = new FieldInitVect; - $$->fieldInitVect->append( $1->fieldInit ); - }; - -nonterm field_init -{ - FieldInit *fieldInit; -}; - -field_init: code_expr - final { - $$->fieldInit = new FieldInit( InputLoc(), "_name", $1->expr ); - }; - -# -# Regular Expressions -# - -nonterm opt_rl_join uses rl_join; - -opt_rl_join: rl_join opt_context - final { - $$->join = $1->join; - $$->context = $2->context; - - if ( $2->context != 0 ) { - /* Create the enter and leaving actions that will mark the substring. */ - Action *mark = new Action( MarkMark, pd->nextMatchEndNum++ ); - pd->actionList.append( mark ); - - $$->join->context = $2->context; - $$->join->mark = mark; - } - }; - -opt_rl_join: - final { - $$->join = 0; - $$->context = 0; - }; - -nonterm rl_join -{ - Join *join; - Join *context; -}; - -rl_join: - rl_join ',' rl_expr - final { - /* Append the expression to the list and return it. */ - $1->join->exprList.append( $3->expression ); - $$->join = $1->join; - }; -rl_join: - rl_expr - final { - $$->join = new Join( $1->expression ); - }; - -# Context at the end of a pattern that is not included in the match -nonterm opt_context uses rl_join; - -opt_context: '@' rl_join final { $$->context = $2->join; }; -opt_context: final { $$->context = 0; }; - -nonterm rl_expr -{ - Expression *expression; -}; - -rl_expr: - rl_expr '|' rl_term_short final { - $$->expression = new Expression( $1->expression, - $3->term, Expression::OrType ); - }; -rl_expr: - rl_expr '&' rl_term_short final { - $$->expression = new Expression( $1->expression, - $3->term, Expression::IntersectType ); - }; -# This priority specification overrides the innermost parsing strategy which -# results ordered choice interpretation of the grammar. -rl_expr: - rl_expr '-' rl_term_short final { - $$->expression = new Expression( $1->expression, - $3->term, Expression::SubtractType ); - }; -rl_expr: - rl_expr TK_DashDash rl_term_short final { - $$->expression = new Expression( $1->expression, - $3->term, Expression::StrongSubtractType ); - }; -rl_expr: - rl_term_short final { - $$->expression = new Expression( $1->term ); - }; - -nonterm rl_term_short -{ - Term *term; -}; - -shortest rl_term_short; - -rl_term_short: rl_term - final { $$->term = $1->term; }; - -nonterm rl_term -{ - Term *term; -}; - -rl_term: - rl_term factor_with_label final { - $$->term = new Term( $1->term, $2->factorWithAug ); - }; -rl_term: - rl_term '.' factor_with_label final { - $$->term = new Term( $1->term, $3->factorWithAug ); - }; -rl_term: - rl_term TK_ColonGt factor_with_label final { - $$->term = new Term( $1->term, $3->factorWithAug, Term::RightStartType ); - }; -rl_term: - rl_term TK_ColonGtGt factor_with_label final { - $$->term = new Term( $1->term, $3->factorWithAug, Term::RightFinishType ); - }; -rl_term: - rl_term TK_LtColon factor_with_label final { - $$->term = new Term( $1->term, - $3->factorWithAug, Term::LeftType ); - }; -rl_term: - factor_with_label final { - $$->term = new Term( $1->factorWithAug ); - }; - -nonterm factor_with_label -{ - FactorWithAug *factorWithAug; -}; - -factor_with_label: - factor_with_ep final { - $$->factorWithAug = $1->factorWithAug; - }; - -factor_with_label: - TK_Word ':' factor_with_label final { - $$->factorWithAug = $3->factorWithAug; - - if ( pd->objectDef->checkRedecl( $1->data ) != 0 ) - error($1->loc) << "label name \"" << $1->data << "\" already in use" << endp; - - /* Create the object field. */ - NamespaceQual *qual = new NamespaceQual( namespaceStack.top(), regionStack.top() ); - TypeRef *typeRef = new TypeRef( $1->loc, qual, "str" ); - ObjField *objField = new ObjField( $1->loc, typeRef, $1->data ); - - /* Insert it into the map. */ - pd->objectDef->insertField( $1->data, objField ); - - /* Create the enter and leaving actions that will mark the substring. */ - Action *enter = new Action( MarkMark, pd->nextMatchEndNum++ ); - Action *leave = new Action( MarkMark, pd->nextMatchEndNum++ ); - pd->actionList.append( enter ); - pd->actionList.append( leave ); - - /* Add entering and leaving actions. */ - $$->factorWithAug->actions.append( ParserAction( $1->loc, at_start, 0, enter ) ); - $$->factorWithAug->actions.append( ParserAction( $1->loc, at_leave, 0, leave ) ); - - reCaptureVect.append( ReCapture( enter, leave, objField ) ); - }; - -nonterm factor_with_ep -{ - FactorWithAug *factorWithAug; -}; - -factor_with_ep: - factor_with_aug final { - $$->factorWithAug = $1->factorWithAug; - }; - -nonterm factor_with_aug -{ - FactorWithAug *factorWithAug; -}; - -factor_with_aug: - factor_with_rep final { - $$->factorWithAug = new FactorWithAug( $1->factorWithRep ); - }; - - -# The fourth level of precedence. These are the trailing unary operators that -# allow for repetition. - -nonterm factor_with_rep -{ - FactorWithRep *factorWithRep; -}; - -factor_with_rep: - factor_with_rep '*' final { - $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, - 0, 0, FactorWithRep::StarType ); - }; -factor_with_rep: - factor_with_rep TK_StarStar final { - $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, - 0, 0, FactorWithRep::StarStarType ); - }; -factor_with_rep: - factor_with_rep '?' final { - $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, - 0, 0, FactorWithRep::OptionalType ); - }; -factor_with_rep: - factor_with_rep '+' final { - $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, - 0, 0, FactorWithRep::PlusType ); - }; -factor_with_rep: - factor_with_rep '{' factor_rep_num '}' final { - $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, - $3->rep, 0, FactorWithRep::ExactType ); - }; -factor_with_rep: - factor_with_rep '{' ',' factor_rep_num '}' final { - $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, - 0, $4->rep, FactorWithRep::MaxType ); - }; -factor_with_rep: - factor_with_rep '{' factor_rep_num ',' '}' final { - $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, - $3->rep, 0, FactorWithRep::MinType ); - }; -factor_with_rep: - factor_with_rep '{' factor_rep_num ',' factor_rep_num '}' final { - $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, - $3->rep, $5->rep, FactorWithRep::RangeType ); - }; -factor_with_rep: - factor_with_neg final { - $$->factorWithRep = new FactorWithRep( - $1->factorWithNeg->loc, $1->factorWithNeg ); - }; - -nonterm factor_rep_num -{ - int rep; -}; - -factor_rep_num: - TK_UInt final { - // Convert the priority number to a long. Check for overflow. - errno = 0; - int rep = strtol( $1->data, 0, 10 ); - if ( errno == ERANGE && rep == LONG_MAX ) { - // Repetition too large. Recover by returing repetition 1. */ - error($1->loc) << "repetition number " << $1->data << " overflows" << endl; - $$->rep = 1; - } - else { - // Cannot be negative, so no overflow. - $$->rep = rep; - } - }; - - -# -# The fifth level up in precedence. Negation. -# - -nonterm factor_with_neg -{ - FactorWithNeg *factorWithNeg; -}; - -factor_with_neg: - '!' factor_with_neg final { - $$->factorWithNeg = new FactorWithNeg( $1->loc, - $2->factorWithNeg, FactorWithNeg::NegateType ); - }; -factor_with_neg: - '^' factor_with_neg final { - $$->factorWithNeg = new FactorWithNeg( $1->loc, - $2->factorWithNeg, FactorWithNeg::CharNegateType ); - }; -factor_with_neg: - rl_factor final { - $$->factorWithNeg = new FactorWithNeg( $1->factor->loc, $1->factor ); - }; - -nonterm rl_factor -{ - Factor *factor; -}; - -rl_factor: - TK_Literal final { - /* Create a new factor node going to a concat literal. */ - $$->factor = new Factor( new Literal( $1->loc, $1->data, Literal::LitString ) ); - }; -rl_factor: - alphabet_num final { - /* Create a new factor node going to a literal number. */ - $$->factor = new Factor( new Literal( $1->loc, - $1->data, Literal::Number ) ); - }; -rl_factor: - TK_Word final { - /* Find the named graph. */ - Namespace *nspace = namespaceStack.top(); - - while ( nspace != 0 ) { - GraphDictEl *gdNode = nspace->rlMap.find( $1->data ); - if ( gdNode != 0 ) { - if ( gdNode->isInstance ) { - /* Recover by retuning null as the factor node. */ - error($1->loc) << "references to graph instantiations not allowed " - "in expressions" << endl; - $$->factor = 0; - } - else { - /* Create a factor node that is a lookup of an expression. */ - $$->factor = new Factor( $1->loc, gdNode->value ); - } - break; - } - - nspace = nspace->parentNamespace; - } - - if ( nspace == 0 ) { - /* Recover by returning null as the factor node. */ - error($1->loc) << "graph lookup of \"" << $1->data << "\" failed" << endl; - $$->factor = 0; - } - }; -rl_factor: - TK_SqOpen regular_expr_or_data TK_SqClose final { - /* Create a new factor node going to an OR expression. */ - $$->factor = new Factor( new ReItem( $1->loc, $2->reOrBlock, ReItem::OrBlock ) ); - }; -rl_factor: - TK_SqOpenNeg regular_expr_or_data TK_SqClose final { - /* Create a new factor node going to a negated OR expression. */ - $$->factor = new Factor( new ReItem( $1->loc, $2->reOrBlock, ReItem::NegOrBlock ) ); - }; -rl_factor: - range_lit TK_DotDot range_lit final { - /* Create a new factor node going to a range. */ - $$->factor = new Factor( new Range( $1->literal, $3->literal ) ); - }; -rl_factor: - '(' rl_join ')' final { - /* Create a new factor going to a parenthesized join. */ - $$->factor = new Factor( $2->join ); - }; - -nonterm range_lit -{ - Literal *literal; -}; - -# Literals which can be the end points of ranges. -range_lit: - TK_Literal final { - /* Range literas must have only one char. We restrict this in the parse tree. */ - $$->literal = new Literal( $1->loc, $1->data, Literal::LitString ); - }; -range_lit: - alphabet_num final { - /* Create a new literal number. */ - $$->literal = new Literal( $1->loc, $1->data, Literal::Number ); - }; - -nonterm alphabet_num uses token_data; - -# Any form of a number that can be used as a basic machine. */ -alphabet_num: - TK_UInt final { - $$->loc = $1->loc; - $$->data = $1->data; - }; -alphabet_num: - '-' TK_UInt final { - $$->loc = $1->loc; - $$->data = '+'; - $$->data += $2->data; - }; -alphabet_num: - TK_Hex final { - $$->loc = $1->loc; - $$->data = $1->data; - }; - -# -# Regular Expressions. -# - - -# The data inside of a [] expression in a regular expression. Accepts any -# number of characters or ranges. */ -nonterm regular_expr_or_data -{ - ReOrBlock *reOrBlock; -}; - -regular_expr_or_data: - regular_expr_or_data regular_expr_or_char final { - /* An optimization to lessen the tree size. If an or char is directly - * under the left side on the right and the right side is another or - * char then paste them together and return the left side. Otherwise - * just put the two under a new or data node. */ - if ( $2->reOrItem->type == ReOrItem::Data && - $1->reOrBlock->type == ReOrBlock::RecurseItem && - $1->reOrBlock->item->type == ReOrItem::Data ) - { - /* Append the right side to right side of the left and toss the - * right side. */ - $1->reOrBlock->item->data += $2->reOrItem->data; - delete $2->reOrItem; - $$->reOrBlock = $1->reOrBlock; - } - else { - /* Can't optimize, put the left and right under a new node. */ - $$->reOrBlock = new ReOrBlock( $1->reOrBlock, $2->reOrItem ); - } - }; -regular_expr_or_data: - final { - $$->reOrBlock = new ReOrBlock(); - }; - -# A single character inside of an or expression. Can either be a character or a -# set of characters. -nonterm regular_expr_or_char -{ - ReOrItem *reOrItem; -}; - -regular_expr_or_char: - TK_ReChar final { - $$->reOrItem = new ReOrItem( $1->loc, $1->data ); - }; -regular_expr_or_char: - TK_ReChar TK_Dash TK_ReChar final { - $$->reOrItem = new ReOrItem( $2->loc, $1->data[0], $3->data[0] ); - }; - -# A local state reference. Cannot have :: prefix. -local_state_ref: - no_name_sep state_ref_names; - -# Clear the name ref structure. -no_name_sep: - final { - nameRef.empty(); - }; - -# A qualified state reference. -state_ref: opt_name_sep state_ref_names; - -# Optional leading name separator. -opt_name_sep: - TK_NameSep - final { - /* Insert an initial null pointer val to indicate the existence of the - * initial name seperator. */ - nameRef.setAs( 0 ); - }; -opt_name_sep: - final { - nameRef.empty(); - }; - -# List of names separated by :: -state_ref_names: - state_ref_names TK_NameSep TK_Word - final { - nameRef.append( $3->data ); - }; -state_ref_names: - TK_Word - final { - nameRef.append( $1->data ); - }; - -nonterm opt_commit -{ - bool commit; -}; - -opt_commit: final { $$->commit = false; }; -opt_commit: KW_Commit final { $$->commit = true; }; - -# -# Grammar Finished -# - - write types; - write data; -}%% - -void ColmParser::init() -{ - /* Set up the root namespace. */ - const char *rootNamespaceName = "___ROOT_NAMESPACE"; - Namespace *rootNamespace = new Namespace( InputLoc(), - rootNamespaceName, pd->namespaceList.length(), 0 ); - pd->namespaceList.append( rootNamespace ); - namespaceStack.push( rootNamespace ); - pd->rootNamespace = rootNamespace; - - /* Set up the root token region. */ - const char *rootRegionName = "___ROOT_REGION"; - - TokenRegion *rootRegion = new TokenRegion( InputLoc(), rootRegionName, - pd->regionList.length(), 0 ); - pd->regionList.append( rootRegion ); - addRegionDef( InputLoc(), namespaceStack.top(), rootRegionName, rootRegion ); - - regionStack.push( rootRegion ); - - pd->rootRegion = rootRegion; - - /* Set up the global object. */ - String global = "global"; - pd->globalObjectDef = new ObjectDef( ObjectDef::UserType, - global, pd->nextObjectId++ ); - - /* The eofTokenRegion defaults to the root region. */ - pd->eofTokenRegion = rootRegion; - - /* Initialize the dictionary of graphs. This is our symbol table. The - * initialization needs to be done on construction which happens at the - * beginning of a machine spec so any assignment operators can reference - * the builtins. */ - pd->initGraphDict(); - - pd->rootLocalFrame = new ObjectDef( ObjectDef::FrameType, - "local", pd->nextObjectId++ ); - pd->curLocalFrame = pd->rootLocalFrame; - - %% write init; - - addArgvList(); -} - -void ColmParser::addArgvList() -{ - NamespaceQual *nspaceQual1 = new NamespaceQual( - namespaceStack.top(), regionStack.top() ); - TypeRef *typeRef = new TypeRef( InputLoc(), nspaceQual1, "str" ); - - NamespaceQual *nspaceQual2 = new NamespaceQual( - namespaceStack.top(), regionStack.top() ); - - pd->argvTypeRef = new TypeRef( TypeRef::List, InputLoc(), - nspaceQual2, typeRef, 0 ); -} - -int ColmParser::parseLangEl( int type, const Token *token ) -{ - %% write exec; - return errCount == 0 ? 0 : -1; -} - -void ColmParser::addRegularDef( const InputLoc &loc, Namespace *nspace, - const String &name, Join *join ) -{ - GraphDictEl *newEl = nspace->rlMap.insert( name ); - if ( newEl != 0 ) { - /* New element in the dict, all good. */ - newEl->value = new VarDef( name, join ); - newEl->isInstance = false; - newEl->loc = loc; - } - else { - // Recover by ignoring the duplicate. - error(loc) << "regular definition \"" << name << "\" already exists" << endl; - } -} - -TokenRegion *ColmParser::createRegion( String &scannerName ) -{ - TokenRegion *tokenRegion = new TokenRegion( InputLoc(), scannerName, - pd->regionList.length(), regionStack.top() ); - - regionStack.top()->childRegions.append( tokenRegion ); - - pd->regionList.append( tokenRegion ); - - addRegionDef( InputLoc(), namespaceStack.top(), scannerName, tokenRegion ); - - return tokenRegion; -} - - -void ColmParser::addRegionDef( const InputLoc &loc, Namespace *nspace, - const String &name, TokenRegion *tokenRegion ) -{ - RegionGraphDictEl *newEl = nspace->graphDict.insert( name ); - if ( newEl != 0 ) { - /* New element in the dict, all good. */ - newEl->value = new RegionDef( name, tokenRegion ); - newEl->isInstance = true; - newEl->loc = loc; - - /* It it is an instance, put on the instance list. */ - pd->instanceList.append( newEl ); - } - else { - // Recover by ignoring the duplicate. - error(loc) << "regular definition \"" << name << "\" already exists" << endl; - } -} - -ostream &ColmParser::parse_error( int tokId, Token &token ) -{ - /* Maintain the error count. */ - gblErrorCount += 1; - - cerr << token.loc.fileName << ":" << token.loc.line << ":" << token.loc.col << ": "; - cerr << "at token "; - if ( tokId < 128 ) - cerr << "\"" << ColmParser_lelNames[tokId] << "\""; - else - cerr << ColmParser_lelNames[tokId]; - if ( token.data != 0 ) - cerr << " with data \"" << token.data << "\""; - cerr << ": "; - - return cerr; -} - -int ColmParser::token( InputLoc &loc, int tokId, char *tokstart, int toklen ) -{ - Token token; - - if ( toklen > 0 ) - token.data.setAs( tokstart, toklen ); - - token.loc = loc; - int res = parseLangEl( tokId, &token ); - if ( res < 0 ) { - parse_error(tokId, token) << "parse error" << endl; - exit(1); - } - return res; -} diff --git a/colm/lmscan.h b/colm/lmscan.h deleted file mode 100644 index 5badaed5..00000000 --- a/colm/lmscan.h +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _RLSCAN_H -#define _RLSCAN_H - -#include <iostream> -#include <fstream> -#include <string.h> - -#include "global.h" -#include "lmparse.h" -#include "parsedata.h" -#include "avltree.h" -#include "vector.h" -#include "buffer.h" - -using std::ifstream; -using std::istream; -using std::ostream; -using std::cout; -using std::cerr; -using std::endl; - -extern char *Parser_lelNames[]; - -/* This is used for tracking the current stack of include file/machine pairs. It is - * is used to detect and recursive include structure. */ -struct IncludeStackItem -{ - IncludeStackItem( const char *fileName ) - : fileName(fileName) {} - - const char *fileName; -}; - -typedef Vector<IncludeStackItem> IncludeStack; -typedef Vector<const char *> ArgsVector; - -extern ArgsVector includePaths; - -struct ColmScanner -{ - ColmScanner( const char *fileName, istream &input, - ostream &output, ColmParser *parser, int includeDepth ) - : - fileName(fileName), input(input), output(output), - includeDepth(includeDepth), - line(1), column(1), lastnl(0), - parser(parser), - parserExistsError(false), - whitespaceOn(true) - { - } - - ifstream *tryOpenInclude( char **pathChecks, long &found ); - char **makeIncludePathChecks( const char *thisFileName, const char *fileName ); - bool recursiveInclude( const char *inclFileName ); - - void sectionParseInit(); - void token( int type, char *start, char *end ); - void token( int type, char c ); - void token( int type ); - void updateCol(); - void endSection(); - void scan(); - void eof(); - ostream &scan_error(); - - const char *fileName; - istream &input; - ostream &output; - int includeDepth; - - int cs; - int line; - char *word, *lit; - int word_len, lit_len; - InputLoc sectionLoc; - char *ts, *te; - int column; - char *lastnl; - - /* Set by machine statements, these persist from section to section - * allowing for unnamed sections. */ - ColmParser *parser; - IncludeStack includeStack; - - /* This is set if ragel has already emitted an error stating that - * no section name has been seen and thus no parser exists. */ - bool parserExistsError; - - /* This is for inline code. By default it is on. It goes off for - * statements and values in inline blocks which are parsed. */ - bool whitespaceOn; - - Buffer litBuf; -}; - -#endif /* _RLSCAN_H */ diff --git a/colm/lmscan.rl b/colm/lmscan.rl deleted file mode 100644 index 070a1e66..00000000 --- a/colm/lmscan.rl +++ /dev/null @@ -1,636 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <iostream> -#include <fstream> -#include <string.h> - -#include "global.h" -#include "lmscan.h" -#include "lmparse.h" -#include "parsedata.h" -#include "avltree.h" -#include "vector.h" - -//#define PRINT_TOKENS - -using std::ifstream; -using std::istream; -using std::ostream; -using std::cout; -using std::cerr; -using std::endl; - -%%{ - machine section_parse; - alphtype int; - write data; -}%% - -void ColmScanner::sectionParseInit() -{ - %% write init; -} - -ostream &ColmScanner::scan_error() -{ - /* Maintain the error count. */ - gblErrorCount += 1; - cerr << fileName << ":" << line << ":" << column << ": "; - return cerr; -} - -bool ColmScanner::recursiveInclude( const char *inclFileName ) -{ - for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) { - if ( strcmp( si->fileName, inclFileName ) == 0 ) - return true; - } - return false; -} - -void ColmScanner::updateCol() -{ - char *from = lastnl; - if ( from == 0 ) - from = ts; - //cerr << "adding " << te - from << " to column" << endl; - column += te - from; - lastnl = 0; -} - -void ColmScanner::token( int type, char c ) -{ - token( type, &c, &c + 1 ); -} - -void ColmScanner::token( int type ) -{ - token( type, 0, 0 ); -} - -bool isAbsolutePath( const char *path ) -{ - return path[0] == '/'; -} - -ifstream *ColmScanner::tryOpenInclude( char **pathChecks, long &found ) -{ - char **check = pathChecks; - ifstream *inFile = new ifstream; - - while ( *check != 0 ) { - inFile->open( *check ); - if ( inFile->is_open() ) { - found = check - pathChecks; - return inFile; - } - check += 1; - } - - found = -1; - delete inFile; - return 0; -} - -char **ColmScanner::makeIncludePathChecks( const char *thisFileName, const char *fileName ) -{ - char **checks = 0; - long nextCheck = 0; - char *data = strdup(fileName); - long length = strlen(fileName); - - /* Absolute path? */ - if ( isAbsolutePath( data ) ) { - checks = new char*[2]; - checks[nextCheck++] = data; - } - else { - /* Search from the the location of the current file. */ - checks = new char *[2 + includePaths.length()]; - const char *lastSlash = strrchr( thisFileName, '/' ); - if ( lastSlash == 0 ) - checks[nextCheck++] = data; - else { - long givenPathLen = (lastSlash - thisFileName) + 1; - long checklen = givenPathLen + length; - char *check = new char[checklen+1]; - memcpy( check, thisFileName, givenPathLen ); - memcpy( check+givenPathLen, data, length ); - check[checklen] = 0; - checks[nextCheck++] = check; - } - - /* Search from the include paths given on the command line. */ - for ( ArgsVector::Iter incp = includePaths; incp.lte(); incp++ ) { - long pathLen = strlen( *incp ); - long checkLen = pathLen + 1 + length; - char *check = new char[checkLen+1]; - memcpy( check, *incp, pathLen ); - check[pathLen] = '/'; - memcpy( check+pathLen+1, data, length ); - check[checkLen] = 0; - checks[nextCheck++] = check; - } - } - - checks[nextCheck] = 0; - return checks; -} - - -%%{ - machine section_parse; - import "lmparse.h"; - - action clear_words { word = lit = 0; word_len = lit_len = 0; } - action store_lit { lit = tokdata; lit_len = toklen; } - - action mach_err { scan_error() << "bad machine statement" << endl; } - action incl_err { scan_error() << "bad include statement" << endl; } - action write_err { scan_error() << "bad write statement" << endl; } - - action handle_include - { - String src( lit, lit_len ); - String fileName; - bool unused; - - /* Need a location. */ - InputLoc here; - here.fileName = fileName; - here.line = line; - here.col = column; - - prepareLitString( fileName, unused, src, here ); - char **checks = makeIncludePathChecks( this->fileName, fileName ); - - /* Open the input file for reading. */ - long found = 0; - ifstream *inFile = tryOpenInclude( checks, found ); - if ( inFile == 0 ) { - scan_error() << "include: could not open " << - fileName << " for reading" << endl; - } - else { - /* Only proceed with the include if it was found. */ - if ( recursiveInclude( checks[found] ) ) - scan_error() << "include: this is a recursive include operation" << endl; - - /* Check for a recursive include structure. Add the current file/section - * name then check if what we are including is already in the stack. */ - includeStack.append( IncludeStackItem( checks[found] ) ); - - ColmScanner *scanner = new ColmScanner( fileName, *inFile, output, parser, includeDepth+1 ); - scanner->scan(); - delete inFile; - - /* Remove the last element (len-1) */ - includeStack.remove( -1 ); - - delete scanner; - } - } - - include_target = - TK_Literal >clear_words @store_lit; - - include_stmt = - ( KW_Include include_target ) @handle_include - <>err incl_err <>eof incl_err; - - action handle_token - { -// cout << Parser_lelNames[type] << " "; -// if ( start != 0 ) { -// cout.write( start, end-start ); -// } -// cout << endl; - - InputLoc loc; - - #ifdef PRINT_TOKENS - cerr << "scanner:" << line << ":" << column << - ": sending token to the parser " << Parser_lelNames[*p]; - cerr << " " << toklen; - if ( tokdata != 0 ) - cerr << " " << tokdata; - cerr << endl; - #endif - - loc.fileName = fileName; - loc.line = line; - loc.col = column; - - if ( tokdata != 0 && tokdata[toklen-1] == '\n' ) - loc.line -= 1; - - parser->token( loc, type, tokdata, toklen ); - } - - # Catch everything else. - everything_else = ^( KW_Include ) @handle_token; - - main := ( - include_stmt | - everything_else - )*; -}%% - -void ColmScanner::token( int type, char *start, char *end ) -{ - char *tokdata = 0; - int toklen = 0; - int *p = &type; - int *pe = &type + 1; - int *eof = 0; - - if ( start != 0 ) { - toklen = end-start; - tokdata = new char[toklen+1]; - memcpy( tokdata, start, toklen ); - tokdata[toklen] = 0; - } - - %%{ - machine section_parse; - write exec; - }%% - - updateCol(); -} - -void ColmScanner::endSection( ) -{ - /* Execute the eof actions for the section parser. */ - /* Probably use: token( -1 ); */ -} - -%%{ - machine rlscan; - - # This is sent by the driver code. - EOF = 0; - - action inc_nl { - lastnl = p; - column = 0; - line++; - } - NL = '\n' @inc_nl; - - # Identifiers, numbers, commetns, and other common things. - ident = ( alpha | '_' ) ( alpha |digit |'_' )*; - number = digit+; - hex_number = '0x' [0-9a-fA-F]+; - - # These literal forms are common to C-like host code and ragel. - s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'"; - d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"'; - - whitespace = [ \t] | NL; - pound_comment = '#' [^\n]* NL; - - or_literal := |* - # Escape sequences in OR expressions. - '\\0' => { token( TK_ReChar, '\0' ); }; - '\\a' => { token( TK_ReChar, '\a' ); }; - '\\b' => { token( TK_ReChar, '\b' ); }; - '\\t' => { token( TK_ReChar, '\t' ); }; - '\\n' => { token( TK_ReChar, '\n' ); }; - '\\v' => { token( TK_ReChar, '\v' ); }; - '\\f' => { token( TK_ReChar, '\f' ); }; - '\\r' => { token( TK_ReChar, '\r' ); }; - '\\\n' => { updateCol(); }; - '\\' any => { token( TK_ReChar, ts+1, te ); }; - - # Range dash in an OR expression. - '-' => { token( TK_Dash, 0, 0 ); }; - - # Terminate an OR expression. - ']' => { token( TK_SqClose ); fret; }; - - EOF => { - scan_error() << "unterminated OR literal" << endl; - }; - - # Characters in an OR expression. - [^\]] => { token( TK_ReChar, ts, te ); }; - - *|; - - regular_type := |* - # Identifiers. - ident => { token( TK_Word, ts, te ); } ; - - # Numbers - number => { token( TK_UInt, ts, te ); }; - hex_number => { token( TK_Hex, ts, te ); }; - - # Literals, with optionals. - ( s_literal | d_literal ) [i]? - => { token( TK_Literal, ts, te ); }; - - '[' => { token( TK_SqOpen ); fcall or_literal; }; - '[^' => { token( TK_SqOpenNeg ); fcall or_literal; }; - - '/' => { token( '/'); fret; }; - - # Ignore. - pound_comment => { updateCol(); }; - - '..' => { token( TK_DotDot ); }; - '**' => { token( TK_StarStar ); }; - '--' => { token( TK_DashDash ); }; - - ':>' => { token( TK_ColonGt ); }; - ':>>' => { token( TK_ColonGtGt ); }; - '<:' => { token( TK_LtColon ); }; - - # Whitespace other than newline. - [ \t\r]+ => { updateCol(); }; - - # If we are in a single line machine then newline may end the spec. - NL => { updateCol(); }; - - # Consume eof. - EOF; - - any => { token( *ts ); } ; - *|; - - literal_pattern := |* - '\\' '0' { litBuf.append( '\0' ); }; - '\\' 'a' { litBuf.append( '\a' ); }; - '\\' 'b' { litBuf.append( '\b' ); }; - '\\' 't' { litBuf.append( '\t' ); }; - '\\' 'n' { litBuf.append( '\n' ); }; - '\\' 'v' { litBuf.append( '\v' ); }; - '\\' 'f' { litBuf.append( '\f' ); }; - '\\' 'r' { litBuf.append( '\r' ); }; - - '\\' any { - litBuf.append( ts[1] ); - }; - '"' => { - if ( litBuf.length > 0 ) { - token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length ); - litBuf.clear(); - } - token( '"' ); - fret; - }; - NL => { - litBuf.append( '\n' ); - token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length ); - litBuf.clear(); - token( '"' ); - fret; - }; - '[' => { - if ( litBuf.length > 0 ) { - token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length ); - litBuf.clear(); - } - token( '[' ); - fcall main; - }; - any => { - litBuf.append( *ts ); - }; - *|; - - # Parser definitions. - main := |* - 'lex' => { token( KW_Lex ); }; - 'commit' => { token( KW_Commit ); }; - 'token' => { token( KW_Token ); }; - 'literal' => { token( KW_Literal ); }; - 'rl' => { token( KW_Rl ); }; - 'def' => { token( KW_Def ); }; - 'ignore' => { token( KW_Ignore ); }; - 'construct' => { token( KW_Construct ); }; - 'cons' => { token( KW_Construct ); }; - 'new' => { token( KW_New ); }; - 'if' => { token( KW_If ); }; - 'reject' => { token( KW_Reject ); }; - 'while' => { token( KW_While ); }; - 'else' => { token( KW_Else ); }; - 'elsif' => { token( KW_Elsif ); }; - 'match' => { token( KW_Match ); }; - 'for' => { token( KW_For ); }; - 'iter' => { token( KW_Iter ); }; - 'prints' => { token( KW_PrintStream ); }; - 'print' => { token( KW_Print ); }; - 'print_xml_ac' => { token( KW_PrintXMLAC ); }; - 'print_xml' => { token( KW_PrintXML ); }; - 'namespace' => { token( KW_Namespace ); }; - 'lex' => { token( KW_Lex ); }; - 'map' => { token( KW_Map ); }; - 'list' => { token( KW_List ); }; - 'vector' => { token( KW_Vector ); }; - 'accum' => { token( KW_Accum ); }; - 'parser' => { token( KW_Accum ); }; - 'return' => { token( KW_Return ); }; - 'break' => { token( KW_Break ); }; - 'yield' => { token( KW_Yield ); }; - 'typeid' => { token( KW_TypeId ); }; - 'make_token' => { token( KW_MakeToken ); }; - 'make_tree' => { token( KW_MakeTree ); }; - 'reducefirst' => { token( KW_ReduceFirst ); }; - 'for' => { token( KW_For ); }; - 'in' => { token( KW_In ); }; - 'nil' => { token( KW_Nil ); }; - 'true' => { token( KW_True ); }; - 'false' => { token( KW_False ); }; - 'parse' => { token( KW_Parse ); }; - 'parse_stop' => { token( KW_ParseStop ); }; - 'global' => { token( KW_Global ); }; - 'export' => { token( KW_Export ); }; - 'ptr' => { token( KW_Ptr ); }; - 'ref' => { token( KW_Ref ); }; - 'deref' => { token( KW_Deref ); }; - 'require' => { token( KW_Require ); }; - 'preeof' => { token( KW_Preeof ); }; - 'left' => { token( KW_Left ); }; - 'right' => { token( KW_Right ); }; - 'nonassoc' => { token( KW_Nonassoc ); }; - 'prec' => { token( KW_Prec ); }; - 'include' => { token( KW_Include ); }; - 'context' => { token( KW_Context ); }; - 'alias' => { token( KW_Alias ); }; - 'send' => { token( KW_Send ); }; - 'ni' => { token( KW_Ni ); }; - 'ci' => { token( KW_Ci ); }; - - # Identifiers. - ident => { token( TK_Word, ts, te ); } ; - - number => { token( TK_Number, ts, te ); }; - - '/' => { - token( '/' ); - if ( parser->enterRl ) - fcall regular_type; - }; - - "~" [^\n]* NL => { - token( '"' ); - token( TK_LitPat, ts+1, te ); - token( '"' ); - }; - - "'" ([^'\\\n] | '\\' (any | NL))* ( "'" | NL ) => { - token( TK_Literal, ts, te ); - }; - - '"' => { - token( '"' ); - litBuf.clear(); - fcall literal_pattern; - }; - '[' => { - token( '[' ); - fcall main; - }; - - ']' => { - token( ']' ); - if ( top > 0 ) - fret; - }; - - # Ignore. - pound_comment => { updateCol(); }; - - '=>' => { token( TK_DoubleArrow ); }; - '==' => { token( TK_DoubleEql ); }; - '!=' => { token( TK_NotEql ); }; - '::' => { token( TK_DoubleColon ); }; - '<=' => { token( TK_LessEql ); }; - '>=' => { token( TK_GrtrEql ); }; - '->' => { token( TK_RightArrow ); }; - '&&' => { token( TK_AmpAmp ); }; - '||' => { token( TK_BarBar ); }; - '<<' => { token( TK_LtLt ); }; - - ('+' | '-' | '*' | '/' | '(' | ')' | '@' | '$' | '^' ) => { token( *ts ); }; - - - # Whitespace other than newline. - [ \t\r]+ => { updateCol(); }; - NL => { updateCol(); }; - - # Consume eof. - EOF; - - any => { token( *ts ); } ; - *|; -}%% - -%% write data; - -void ColmScanner::scan() -{ - int bufsize = 8; - char *buf = new char[bufsize]; - const char last_char = 0; - int cs, act, have = 0; - int top, stack[32]; - bool execute = true; - - sectionParseInit(); - %% write init; - - while ( execute ) { - char *p = buf + have; - int space = bufsize - have; - - if ( space == 0 ) { - /* We filled up the buffer trying to scan a token. Grow it. */ - bufsize = bufsize * 2; - char *newbuf = new char[bufsize]; - - /* Recompute p and space. */ - p = newbuf + have; - space = bufsize - have; - - /* Patch up pointers possibly in use. */ - if ( ts != 0 ) - ts = newbuf + ( ts - buf ); - te = newbuf + ( te - buf ); - - /* Copy the new buffer in. */ - memcpy( newbuf, buf, have ); - delete[] buf; - buf = newbuf; - } - - input.read( p, space ); - int len = input.gcount(); - - /* If we see eof then append the EOF char. */ - if ( len == 0 ) { - p[0] = last_char, len = 1; - execute = false; - } - - char *pe = p + len; - char *eof = 0; - %% write exec; - - /* Check if we failed. */ - if ( cs == rlscan_error ) { - /* Machine failed before finding a token. I'm not yet sure if this - * is reachable. */ - scan_error() << "colm scanner error (metalanguage)" << endl; - exit(1); - } - - /* Decide if we need to preserve anything. */ - char *preserve = ts; - - /* Now set up the prefix. */ - if ( preserve == 0 ) - have = 0; - else { - /* There is data that needs to be shifted over. */ - have = pe - preserve; - memmove( buf, preserve, have ); - unsigned int shiftback = preserve - buf; - if ( ts != 0 ) - ts -= shiftback; - te -= shiftback; - - preserve = buf; - } - } - delete[] buf; -} - -void ColmScanner::eof() -{ - InputLoc loc; - loc.fileName = "<EOF>"; - loc.line = line; - loc.col = 1; - parser->token( loc, ColmParser_tk_eof, 0, 0 ); -} diff --git a/colm/main.cc b/colm/main.cc deleted file mode 100644 index 435bb697..00000000 --- a/colm/main.cc +++ /dev/null @@ -1,623 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <stdlib.h> -#include <string.h> -#include <stdio.h> -#include <iostream> -#include <fstream> -#include <unistd.h> -#include <sstream> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> - -#include "global.h" -#include "debug.h" -#include "lmscan.h" -#include "pcheck.h" -#include "vector.h" -#include "version.h" -#include "keyops.h" -#include "parsedata.h" -#include "vector.h" -#include "version.h" -#include "fsmcodegen.h" - -using std::istream; -using std::ifstream; -using std::ostream; -using std::ios; -using std::cin; -using std::cout; -using std::cerr; -using std::endl; - -/* Graphviz dot file generation. */ -bool genGraphviz = false; - -using std::ostream; -using std::istream; -using std::ifstream; -using std::ofstream; -using std::ios; -using std::cout; -using std::cerr; -using std::cin; -using std::endl; - -/* Io globals. */ -istream *inStream = 0; -ostream *outStream = 0; -const char *inputFileName = 0; -const char *outputFileName = 0; -const char *gblExportTo = 0; -const char *gblExpImplTo = 0; -bool exportCode = false; - -bool generateGraphviz = false; -bool verbose = false; -bool logging = false; -bool branchPointInfo = false; -bool addUniqueEmptyProductions = false; -bool gblLibrary = false; - -ArgsVector includePaths; - -/* Print version information. */ -void version(); - -/* Total error count. */ -int gblErrorCount = 0; - -HostType hostTypesC[] = -{ - { "char", 0, true, CHAR_MIN, CHAR_MAX, sizeof(char) }, -}; - -HostLang hostLangC = { hostTypesC, 8, hostTypesC+0, true }; - -HostLang *hostLang = &hostLangC; -HostLangType hostLangType = CCode; - -/* Print the opening to an error in the input, then return the error ostream. */ -ostream &error( const InputLoc &loc ) -{ - /* Keep the error count. */ - gblErrorCount += 1; - - cerr << "error: " << inputFileName << ":" << - loc.line << ":" << loc.col << ": "; - return cerr; -} - -/* Print the opening to a program error, then return the error stream. */ -ostream &error() -{ - gblErrorCount += 1; - cerr << "error: " PROGNAME ": "; - return cerr; -} - - -/* Print the opening to a warning, then return the error ostream. */ -ostream &warning( ) -{ - cerr << "warning: " << inputFileName << ": "; - return cerr; -} - -/* Print the opening to a warning in the input, then return the error ostream. */ -ostream &warning( const InputLoc &loc ) -{ - assert( inputFileName != 0 ); - cerr << "warning: " << inputFileName << ":" << - loc.line << ":" << loc.col << ": "; - return cerr; -} - -void escapeLineDirectivePath( std::ostream &out, char *path ) -{ - for ( char *pc = path; *pc != 0; pc++ ) { - if ( *pc == '\\' ) - out << "\\\\"; - else - out << *pc; - } -} - -void escapeLineDirectivePath( std::ostream &out, char *path ); -void scan( char *fileName, istream &input ); - -bool printStatistics = false; - -/* Print a summary of the options. */ -void usage() -{ - cout << -"usage: colm [options] file\n" -"general:\n" -" -h, -H, -?, --help print this usage and exit\n" -" -v --version print version information and exit\n" -" -o <file> write output to <file>\n" -" -i show conflict information\n" -" -d make colm verbose\n" -" -l compile logging into the output executable\n" - ; -} - -/* Print version information. */ -void version() -{ - cout << "Colm version " VERSION << " " PUBDATE << endl << - "Copyright (c) 2007-2012 by Adrian D. Thurston" << endl; -} - -/* Scans a string looking for the file extension. If there is a file - * extension then pointer returned points to inside the string - * passed in. Otherwise returns null. */ -const char *findFileExtension( const char *stemFile ) -{ - const char *ppos = stemFile + strlen(stemFile) - 1; - - /* Scan backwards from the end looking for the first dot. - * If we encounter a '/' before the first dot, then stop the scan. */ - while ( 1 ) { - /* If we found a dot or got to the beginning of the string then - * we are done. */ - if ( ppos == stemFile || *ppos == '.' ) - break; - - /* If we hit a / then there is no extension. Done. */ - if ( *ppos == '/' ) { - ppos = stemFile; - break; - } - ppos--; - } - - /* If we got to the front of the string then bail we - * did not find an extension */ - if ( ppos == stemFile ) - ppos = 0; - - return ppos; -} - -/* Make a file name from a stem. Removes the old filename suffix and - * replaces it with a new one. Returns a newed up string. */ -char *fileNameFromStem( const char *stemFile, const char *suffix ) -{ - int len = strlen( stemFile ); - assert( len > 0 ); - - /* Get the extension. */ - const char *ppos = findFileExtension( stemFile ); - - /* If an extension was found, then shorten what we think the len is. */ - if ( ppos != 0 ) - len = ppos - stemFile; - - /* Make the return string from the stem and the suffix. */ - char *retVal = new char[ len + strlen( suffix ) + 1 ]; - strncpy( retVal, stemFile, len ); - strcpy( retVal + len, suffix ); - - return retVal; -} - - -/* Invoked by the parser when the root element is opened. */ -void openOutput( ) -{ - /* If the output format is code and no output file name is given, then - * make a default. */ - if ( outputFileName == 0 ) { - const char *ext = findFileExtension( inputFileName ); - if ( ext != 0 && strcmp( ext, ".rh" ) == 0 ) - outputFileName = fileNameFromStem( inputFileName, ".h" ); - else { - const char *defExtension = ".c"; - outputFileName = fileNameFromStem( inputFileName, defExtension ); - } - } - - if ( colm_log_compile ) { - cerr << "opening output file: " << outputFileName << endl; - } - - /* Make sure we are not writing to the same file as the input file. */ - if ( outputFileName != 0 && strcmp( inputFileName, outputFileName ) == 0 ) { - error() << "output file \"" << outputFileName << - "\" is the same as the input file" << endl; - } - - if ( outputFileName != 0 ) { - /* Open the output stream, attaching it to the filter. */ - ofstream *outFStream = new ofstream( outputFileName ); - - if ( !outFStream->is_open() ) { - error() << "error opening " << outputFileName << " for writing" << endl; - exit(1); - } - - outStream = outFStream; - } - else { - /* Writing out ot std out. */ - outStream = &cout; - } -} - -void openExports( ) -{ - /* Make sure we are not writing to the same file as the input file. */ - if ( gblExportTo != 0 && strcmp( inputFileName, gblExportTo ) == 0 ) { - error() << "output file \"" << gblExportTo << - "\" is the same as the input file" << endl; - } - - if ( gblExportTo != 0 ) { - /* Open the output stream, attaching it to the filter. */ - ofstream *outFStream = new ofstream( gblExportTo ); - - if ( !outFStream->is_open() ) { - error() << "error opening " << outputFileName << " for writing" << endl; - exit(1); - } - - outStream = outFStream; - } - else { - /* Writing out ot std out. */ - outStream = &cout; - } -} - -void openExportsImpl( ) -{ - /* Make sure we are not writing to the same file as the input file. */ - if ( gblExpImplTo != 0 && strcmp( inputFileName, gblExpImplTo ) == 0 ) { - error() << "output file \"" << gblExpImplTo << - "\" is the same as the input file" << endl; - } - - if ( gblExpImplTo != 0 ) { - /* Open the output stream, attaching it to the filter. */ - ofstream *outFStream = new ofstream( gblExpImplTo ); - - if ( !outFStream->is_open() ) { - error() << "error opening " << outputFileName << " for writing" << endl; - exit(1); - } - - outStream = outFStream; - } - else { - /* Writing out ot std out. */ - outStream = &cout; - } -} - -void compileOutputCommand( const char *command ) -{ - if ( colm_log_compile ) - cout << "compiling with: " << command << endl; - int res = system( command ); - if ( res != 0 ) - cout << "there was a problem compiling the output" << endl; -} - -void compileOutputInstalled( const char *argv0 ) -{ - /* Find the location of the colm program that is executing. */ - char *location = strdup( argv0 ); - char *last = location + strlen(location) - 1; - while ( true ) { - if ( last == location ) { - last[0] = '.'; - last[1] = 0; - break; - } - if ( *last == '/' ) { - last[0] = 0; - break; - } - last -= 1; - } - - char *exec = fileNameFromStem( outputFileName, ".bin" ); - - int length = 1024 + 3*strlen(location) + strlen(outputFileName) + strlen(exec); - char command[length]; - sprintf( command, - "gcc -Wall -Wwrite-strings" - " -I" PREFIX "/include" - " -g" - " -o %s" - " %s" - " -L" PREFIX "/lib" - " -lcolm%c", - exec, outputFileName, logging ? 'd' : 'p' ); - - compileOutputCommand( command ); -} - -void compileOutputInSource( const char *argv0 ) -{ - /* Find the location of the colm program that is executing. */ - char *location = strdup( argv0 ); - char *last = strrchr( location, '/' ); - assert( last != 0 ); - last[1] = 0; - - char *exec = fileNameFromStem( outputFileName, ".bin" ); - - int length = 1024 + 3*strlen(location) + strlen(outputFileName) + strlen(exec); - char command[length]; - sprintf( command, - "gcc -Wall -Wwrite-strings" - " -I%s.." - " -I%s../aapl" - " -g" - " -o %s" - " %s" - " -L%s" - " -lcolm%c", - location, location, - exec, outputFileName, location, logging ? 'd' : 'p' ); - - compileOutputCommand( command ); -} - -bool inSourceTree( const char *argv0 ) -{ - const char *lastSlash = strrchr( argv0, '/' ); - if ( lastSlash != 0 ) { - int rootLen = lastSlash - argv0 + 1; - char *mainPath = new char[rootLen + 16]; - memcpy( mainPath, argv0, rootLen ); - strcpy( mainPath + rootLen, "main.cc" ); - - struct stat sb; - int res = stat( mainPath, &sb ); - delete[] mainPath; - - if ( res == 0 && S_ISREG( sb.st_mode ) ) - return true; - } - - return false; -} - -void processArgs( int argc, const char **argv ) -{ - ParamCheck pc( "D:e:c:LI:vdlio:S:M:vHh?-:sV", argc, argv ); - - while ( pc.check() ) { - switch ( pc.state ) { - case ParamCheck::match: - switch ( pc.parameter ) { - case 'I': - includePaths.append( pc.parameterArg ); - break; - case 'v': - version(); - exit(0); - break; - case 'd': - verbose = true; - break; - case 'l': - logging = true; - break; - case 'i': - branchPointInfo = true; - break; - /* Output. */ - case 'o': - if ( *pc.parameterArg == 0 ) - error() << "a zero length output file name was given" << endl; - else if ( outputFileName != 0 ) - error() << "more than one output file name was given" << endl; - else { - /* Ok, remember the output file name. */ - outputFileName = pc.parameterArg; - } - break; - - case 'H': case 'h': case '?': - usage(); - exit(0); - case 's': - printStatistics = true; - break; - case 'V': - generateGraphviz = true; - break; - case '-': - if ( strcasecmp(pc.parameterArg, "help") == 0 ) { - usage(); - exit(0); - } - else if ( strcasecmp(pc.parameterArg, "version") == 0 ) { - version(); - exit(0); - } - else { - error() << "--" << pc.parameterArg << - " is an invalid argument" << endl; - } - break; - case 'L': - gblLibrary = true; - break; - case 'e': - gblExportTo = pc.parameterArg; - break; - case 'c': - gblExpImplTo = pc.parameterArg; - break; - case 'D': -#if DEBUG - if ( strcmp( pc.parameterArg, "BYTECODE" ) == 0 ) - colmActiveRealm |= REALM_BYTECODE; - else if ( strcmp( pc.parameterArg, "PARSE" ) == 0 ) - colmActiveRealm |= REALM_PARSE; - else if ( strcmp( pc.parameterArg, "MATCH" ) == 0 ) - colmActiveRealm |= REALM_MATCH; - else if ( strcmp( pc.parameterArg, "COMPILE" ) == 0 ) - colmActiveRealm |= REALM_COMPILE; - else if ( strcmp( pc.parameterArg, "POOL" ) == 0 ) - colmActiveRealm |= REALM_POOL; - else if ( strcmp( pc.parameterArg, "PRINT" ) == 0 ) - colmActiveRealm |= REALM_PRINT; - else if ( strcmp( pc.parameterArg, "INPUT" ) == 0 ) - colmActiveRealm |= REALM_INPUT; - else if ( strcmp( pc.parameterArg, "SCAN" ) == 0 ) - colmActiveRealm |= REALM_SCAN; - else - fatal( "unknown argument to -D %s\n", pc.parameterArg ); -#else - fatal("-D option specified but debugging messsages not compiled in"); -#endif - - } - break; - - case ParamCheck::invalid: - error() << "-" << pc.parameter << " is an invalid argument" << endl; - break; - - case ParamCheck::noparam: - /* It is interpreted as an input file. */ - if ( *pc.curArg == 0 ) - error() << "a zero length input file name was given" << endl; - else if ( inputFileName != 0 ) - error() << "more than one input file name was given" << endl; - else { - /* OK, Remember the filename. */ - inputFileName = pc.curArg; - } - break; - } - } -} - -/* Main, process args and call yyparse to start scanning input. */ -int main(int argc, const char **argv) -{ - processArgs( argc, argv ); - - if ( verbose ) { - colm_log_bytecode = 1; - colm_log_parse = 1; - colm_log_match = 1; - colm_log_compile = 1; - colm_log_conds = 1; - colmActiveRealm = 0xffffffff; - } - initInputFuncs(); - - /* Bail on above errors. */ - if ( gblErrorCount > 0 ) - exit(1); - - /* Make sure we are not writing to the same file as the input file. */ - if ( inputFileName != 0 && outputFileName != 0 && - strcmp( inputFileName, outputFileName ) == 0 ) - { - error() << "output file \"" << outputFileName << - "\" is the same as the input file" << endl; - } - - /* Open the input file for reading. */ - istream *inStream; - if ( inputFileName != 0 ) { - /* Open the input file for reading. */ - ifstream *inFile = new ifstream( inputFileName ); - inStream = inFile; - if ( ! inFile->is_open() ) - error() << "could not open " << inputFileName << " for reading" << endl; - } - else { - inputFileName = "<stdin>"; - inStream = &cin; - } - - /* Bail on above errors. */ - if ( gblErrorCount > 0 ) - exit(1); - - Compiler *pd = new Compiler( inputFileName, "machine", InputLoc(), std::cout ); - ColmParser *parser = new ColmParser( pd, inputFileName, "machine", InputLoc() ); - ColmScanner *scanner = new ColmScanner( inputFileName, *inStream, cout, parser, 0 ); - - parser->init(); - scanner->scan(); - scanner->eof(); - - /* Parsing complete, check for errors.. */ - if ( gblErrorCount > 0 ) - return 1; - - /* Initiate a compile following a parse. */ - pd->compile(); - - /* - * Write output. - */ - if ( generateGraphviz ) { - outStream = &cout; - pd->writeDotFile(); - } - else { - openOutput(); - pd->generateOutput(); - - if ( outStream != 0 ) - delete outStream; - - if ( !gblLibrary ) { - if ( inSourceTree( argv[0] ) ) - compileOutputInSource( argv[0] ); - else - compileOutputInstalled( argv[0] ); - } - - if ( gblExportTo != 0 ) { - openExports(); - pd->generateExports(); - delete outStream; - } - if ( gblExpImplTo != 0 ) { - openExportsImpl(); - scanner->parser->pd->generateExportsImpl(); - delete outStream; - } - } - - delete scanner; - delete parser; - delete pd; - - return 0; -} diff --git a/colm/map.c b/colm/map.c deleted file mode 100644 index 4609db58..00000000 --- a/colm/map.c +++ /dev/null @@ -1,763 +0,0 @@ -/* - * Copyright 2010-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <assert.h> -#include <colm/pdarun.h> -#include <colm/map.h> -#include <colm/pool.h> - -#define true 1 -#define false 0 - -void mapListAbandon( Map *map ) -{ - map->head = map->tail = 0; -} - -void mapListAddBefore( Map *map, MapEl *next_el, MapEl *new_el ) -{ - /* Set the next pointer of the new element to next_el. We do - * this regardless of the state of the list. */ - new_el->next = next_el; - - /* Set reverse pointers. */ - if ( next_el == 0 ) { - /* There is no next elememnt. We are inserting at the tail. */ - new_el->prev = map->tail; - map->tail = new_el; - } - else { - /* There is a next element and we can access next's previous. */ - new_el->prev = next_el->prev; - next_el->prev = new_el; - } - - /* Set forward pointers. */ - if ( new_el->prev == 0 ) { - /* There is no previous element. Set the head pointer.*/ - map->head = new_el; - } - else { - /* There is a previous element, set it's next pointer to new_el. */ - new_el->prev->next = new_el; - } -} - -void mapListAddAfter( Map *map, MapEl *prev_el, MapEl *new_el ) -{ - /* Set the previous pointer of new_el to prev_el. We do - * this regardless of the state of the list. */ - new_el->prev = prev_el; - - /* Set forward pointers. */ - if (prev_el == 0) { - /* There was no prev_el, we are inserting at the head. */ - new_el->next = map->head; - map->head = new_el; - } - else { - /* There was a prev_el, we can access previous next. */ - new_el->next = prev_el->next; - prev_el->next = new_el; - } - - /* Set reverse pointers. */ - if (new_el->next == 0) { - /* There is no next element. Set the tail pointer. */ - map->tail = new_el; - } - else { - /* There is a next element. Set it's prev pointer. */ - new_el->next->prev = new_el; - } -} - - -MapEl *mapListDetach( Map *map, MapEl *el ) -{ - /* Set forward pointers to skip over el. */ - if ( el->prev == 0 ) - map->head = el->next; - else - el->prev->next = el->next; - - /* Set reverse pointers to skip over el. */ - if ( el->next == 0 ) - map->tail = el->prev; - else - el->next->prev = el->prev; - - /* Update List length and return element we detached. */ - return el; -} - - -/* Once an insertion position is found, attach a element to the tree. */ -void mapAttachRebal( Map *map, MapEl *element, MapEl *parentEl, MapEl *lastLess ) -{ - /* Increment the number of element in the tree. */ - map->treeSize += 1; - - /* Set element's parent. */ - element->parent = parentEl; - - /* New element always starts as a leaf with height 1. */ - element->left = 0; - element->right = 0; - element->height = 1; - - /* Are we inserting in the tree somewhere? */ - if ( parentEl != 0 ) { - /* We have a parent so we are somewhere in the tree. If the parent - * equals lastLess, then the last traversal in the insertion went - * left, otherwise it went right. */ - if ( lastLess == parentEl ) { - parentEl->left = element; - - mapListAddBefore( map, parentEl, element ); - } - else { - parentEl->right = element; - - mapListAddAfter( map, parentEl, element ); - } - } - else { - /* No parent element so we are inserting the root. */ - map->root = element; - - mapListAddAfter( map, map->tail, element ); - } - - /* Recalculate the heights. */ - mapRecalcHeights( map, parentEl ); - - /* Find the first unbalance. */ - MapEl *ub = mapFindFirstUnbalGP( map, element ); - - /* rebalance. */ - if ( ub != 0 ) - { - /* We assert that after this single rotation the - * tree is now properly balanced. */ - mapRebalance( map, ub ); - } -} - -#if 0 -/* Recursively delete all the children of a element. */ -void mapDeleteChildrenOf( Map *map, MapEl *element ) -{ - /* Recurse left. */ - if ( element->left ) { - mapDeleteChildrenOf( map, element->left ); - - /* Delete left element. */ - delete element->left; - element->left = 0; - } - - /* Recurse right. */ - if ( element->right ) { - mapDeleteChildrenOf( map, element->right ); - - /* Delete right element. */ - delete element->right; - element->left = 0; - } -} - -void mapEmpty( Map *map ) -{ - if ( map->root ) { - /* Recursively delete from the tree structure. */ - mapDeleteChildrenOf( map, map->root ); - delete map->root; - map->root = 0; - map->treeSize = 0; - - mapListAbandon( map ); - } -} -#endif - -/* rebalance from a element whose gradparent is unbalanced. Only - * call on a element that has a grandparent. */ -MapEl *mapRebalance( Map *map, MapEl *n ) -{ - long lheight, rheight; - MapEl *a, *b, *c; - MapEl *t1, *t2, *t3, *t4; - - MapEl *p = n->parent; /* parent (Non-NUL). L*/ - MapEl *gp = p->parent; /* Grand-parent (Non-NULL). */ - MapEl *ggp = gp->parent; /* Great grand-parent (may be NULL). */ - - if (gp->right == p) - { - /* gp - * * p - p - */ - if (p->right == n) - { - /* gp - * * p - p - * * n - n - */ - a = gp; - b = p; - c = n; - t1 = gp->left; - t2 = p->left; - t3 = n->left; - t4 = n->right; - } - else - { - /* gp - * * p - p - * / - * n - */ - a = gp; - b = n; - c = p; - t1 = gp->left; - t2 = n->left; - t3 = n->right; - t4 = p->right; - } - } - else - { - /* gp - * / - * p - */ - if (p->right == n) - { - /* gp - * / - * p - * * n - n - */ - a = p; - b = n; - c = gp; - t1 = p->left; - t2 = n->left; - t3 = n->right; - t4 = gp->right; - } - else - { - /* gp - * / - * p - * / - * n - */ - a = n; - b = p; - c = gp; - t1 = n->left; - t2 = n->right; - t3 = p->right; - t4 = gp->right; - } - } - - /* Perform rotation. - */ - - /* Tie b to the great grandparent. */ - if ( ggp == 0 ) - map->root = b; - else if ( ggp->left == gp ) - ggp->left = b; - else - ggp->right = b; - b->parent = ggp; - - /* Tie a as a leftchild of b. */ - b->left = a; - a->parent = b; - - /* Tie c as a rightchild of b. */ - b->right = c; - c->parent = b; - - /* Tie t1 as a leftchild of a. */ - a->left = t1; - if ( t1 != 0 ) t1->parent = a; - - /* Tie t2 as a rightchild of a. */ - a->right = t2; - if ( t2 != 0 ) t2->parent = a; - - /* Tie t3 as a leftchild of c. */ - c->left = t3; - if ( t3 != 0 ) t3->parent = c; - - /* Tie t4 as a rightchild of c. */ - c->right = t4; - if ( t4 != 0 ) t4->parent = c; - - /* The heights are all recalculated manualy and the great - * grand-parent is passed to recalcHeights() to ensure - * the heights are correct up the tree. - * - * Note that recalcHeights() cuts out when it comes across - * a height that hasn't changed. - */ - - /* Fix height of a. */ - lheight = a->left ? a->left->height : 0; - rheight = a->right ? a->right->height : 0; - a->height = (lheight > rheight ? lheight : rheight) + 1; - - /* Fix height of c. */ - lheight = c->left ? c->left->height : 0; - rheight = c->right ? c->right->height : 0; - c->height = (lheight > rheight ? lheight : rheight) + 1; - - /* Fix height of b. */ - lheight = a->height; - rheight = c->height; - b->height = (lheight > rheight ? lheight : rheight) + 1; - - /* Fix height of b's parents. */ - mapRecalcHeights( map, ggp ); - return ggp; -} - -/* Recalculates the heights of all the ancestors of element. */ -void mapRecalcHeights( Map *map, MapEl *element ) -{ - while ( element != 0 ) - { - long lheight = element->left ? element->left->height : 0; - long rheight = element->right ? element->right->height : 0; - - long new_height = (lheight > rheight ? lheight : rheight) + 1; - - /* If there is no chage in the height, then there will be no - * change in any of the ancestor's height. We can stop going up. - * If there was a change, continue upward. */ - if (new_height == element->height) - return; - else - element->height = new_height; - - element = element->parent; - } -} - -/* Finds the first element whose grandparent is unbalanced. */ -MapEl *mapFindFirstUnbalGP( Map *map, MapEl *element ) -{ - long lheight, rheight, balanceProp; - MapEl *gp; - - if ( element == 0 || element->parent == 0 || - element->parent->parent == 0 ) - return 0; - - /* Don't do anything if we we have no grandparent. */ - gp = element->parent->parent; - while ( gp != 0 ) - { - lheight = gp->left ? gp->left->height : 0; - rheight = gp->right ? gp->right->height : 0; - balanceProp = lheight - rheight; - - if ( balanceProp < -1 || balanceProp > 1 ) - return element; - - element = element->parent; - gp = gp->parent; - } - return 0; -} - - - -/* Finds the first element that is unbalanced. */ -MapEl *mapFindFirstUnbalEl( Map *map, MapEl *element ) -{ - if ( element == 0 ) - return 0; - - while ( element != 0 ) - { - long lheight = element->left ? - element->left->height : 0; - long rheight = element->right ? - element->right->height : 0; - long balanceProp = lheight - rheight; - - if ( balanceProp < -1 || balanceProp > 1 ) - return element; - - element = element->parent; - } - return 0; -} - -/* Replace a element in the tree with another element not in the tree. */ -void mapReplaceEl( Map *map, MapEl *element, MapEl *replacement ) -{ - MapEl *parent = element->parent, - *left = element->left, - *right = element->right; - - replacement->left = left; - if (left) - left->parent = replacement; - replacement->right = right; - if (right) - right->parent = replacement; - - replacement->parent = parent; - if (parent) - { - if (parent->left == element) - parent->left = replacement; - else - parent->right = replacement; - } - else { - map->root = replacement; - } - - replacement->height = element->height; -} - - -/* Removes a element from a tree and puts filler in it's place. - * Filler should be null or a child of element. */ -void mapRemoveEl( Map *map, MapEl *element, MapEl *filler ) -{ - MapEl *parent = element->parent; - - if ( parent ) - { - if ( parent->left == element ) - parent->left = filler; - else - parent->right = filler; - } - else { - map->root = filler; - } - - if ( filler ) - filler->parent = parent; - - return; -} - -/* Recursive worker for tree copying. */ -MapEl *mapCopyBranch( Program *prg, Map *map, MapEl *el, Kid *oldNextDown, Kid **newNextDown ) -{ - /* Duplicate element. Either the base element's copy constructor or defaul - * constructor will get called. Both will suffice for initting the - * pointers to null when they need to be. */ - MapEl *newEl = mapElAllocate( prg ); - - if ( (Kid*)el == oldNextDown ) - *newNextDown = (Kid*)newEl; - - /* If the left tree is there, copy it. */ - if ( newEl->left ) { - newEl->left = mapCopyBranch( prg, map, newEl->left, oldNextDown, newNextDown ); - newEl->left->parent = newEl; - } - - mapListAddAfter( map, map->tail, newEl ); - - /* If the right tree is there, copy it. */ - if ( newEl->right ) { - newEl->right = mapCopyBranch( prg, map, newEl->right, oldNextDown, newNextDown ); - newEl->right->parent = newEl; - } - - return newEl; -} - -MapEl *mapInsertEl( Program *prg, Map *map, MapEl *element, MapEl **lastFound ) -{ - long keyRelation; - MapEl *curEl = map->root, *parentEl = 0; - MapEl *lastLess = 0; - - while ( true ) { - if ( curEl == 0 ) { - /* We are at an external element and did not find the key we were - * looking for. Attach underneath the leaf and rebalance. */ - mapAttachRebal( map, element, parentEl, lastLess ); - - if ( lastFound != 0 ) - *lastFound = element; - return element; - } - - keyRelation = cmpTree( prg, - element->key, curEl->key ); - - /* Do we go left? */ - if ( keyRelation < 0 ) { - parentEl = lastLess = curEl; - curEl = curEl->left; - } - /* Do we go right? */ - else if ( keyRelation > 0 ) { - parentEl = curEl; - curEl = curEl->right; - } - /* We have hit the target. */ - else { - if ( lastFound != 0 ) - *lastFound = curEl; - return 0; - } - } -} - -MapEl *mapInsertKey( Program *prg, Map *map, Tree *key, MapEl **lastFound ) -{ - long keyRelation; - MapEl *curEl = map->root, *parentEl = 0; - MapEl *lastLess = 0; - - while ( true ) { - if ( curEl == 0 ) { - /* We are at an external element and did not find the key we were - * looking for. Create the new element, attach it underneath the leaf - * and rebalance. */ - MapEl *element = mapElAllocate( prg ); - element->key = key; - element->tree = 0; - mapAttachRebal( map, element, parentEl, lastLess ); - - if ( lastFound != 0 ) - *lastFound = element; - return element; - } - - keyRelation = cmpTree( prg, key, curEl->key ); - - /* Do we go left? */ - if ( keyRelation < 0 ) { - parentEl = lastLess = curEl; - curEl = curEl->left; - } - /* Do we go right? */ - else if ( keyRelation > 0 ) { - parentEl = curEl; - curEl = curEl->right; - } - /* We have hit the target. */ - else { - if ( lastFound != 0 ) - *lastFound = curEl; - return 0; - } - } -} - - -/** - * \brief Find a element in the tree with the given key. - * - * \returns The element if key exists, null if the key does not exist. - */ -MapEl *mapImplFind( Program *prg, Map *map, Tree *key ) -{ - MapEl *curEl = map->root; - long keyRelation; - - while ( curEl != 0 ) { - keyRelation = cmpTree( prg, key, curEl->key ); - - /* Do we go left? */ - if ( keyRelation < 0 ) - curEl = curEl->left; - /* Do we go right? */ - else if ( keyRelation > 0 ) - curEl = curEl->right; - /* We have hit the target. */ - else { - return curEl; - } - } - return 0; -} - - -/** - * \brief Find a element, then detach it from the tree. - * - * The element is not deleted. - * - * \returns The element detached if the key is found, othewise returns null. - */ -MapEl *mapDetachByKey( Program *prg, Map *map, Tree *key ) -{ - MapEl *element = mapImplFind( prg, map, key ); - if ( element ) - mapDetach( prg, map, element ); - - return element; -} - -/** - * \brief Detach a element from the tree. - * - * If the element is not in the tree then undefined behaviour results. - * - * \returns The element given. - */ -MapEl *mapDetach( Program *prg, Map *map, MapEl *element ) -{ - MapEl *replacement, *fixfrom; - long lheight, rheight; - - /* Remove the element from the ordered list. */ - mapListDetach( map, element ); - - /* Update treeSize. */ - map->treeSize--; - - /* Find a replacement element. */ - if (element->right) - { - /* Find the leftmost element of the right subtree. */ - replacement = element->right; - while (replacement->left) - replacement = replacement->left; - - /* If replacing the element the with its child then we need to start - * fixing at the replacement, otherwise we start fixing at the - * parent of the replacement. */ - if (replacement->parent == element) - fixfrom = replacement; - else - fixfrom = replacement->parent; - - mapRemoveEl( map, replacement, replacement->right ); - mapReplaceEl( map, element, replacement ); - } - else if (element->left) - { - /* Find the rightmost element of the left subtree. */ - replacement = element->left; - while (replacement->right) - replacement = replacement->right; - - /* If replacing the element the with its child then we need to start - * fixing at the replacement, otherwise we start fixing at the - * parent of the replacement. */ - if (replacement->parent == element) - fixfrom = replacement; - else - fixfrom = replacement->parent; - - mapRemoveEl( map, replacement, replacement->left ); - mapReplaceEl( map, element, replacement ); - } - else - { - /* We need to start fixing at the parent of the element. */ - fixfrom = element->parent; - - /* The element we are deleting is a leaf element. */ - mapRemoveEl( map, element, 0 ); - } - - /* If fixfrom is null it means we just deleted - * the root of the tree. */ - if ( fixfrom == 0 ) - return element; - - /* Fix the heights after the deletion. */ - mapRecalcHeights( map, fixfrom ); - - /* Fix every unbalanced element going up in the tree. */ - MapEl *ub = mapFindFirstUnbalEl( map, fixfrom ); - while ( ub ) - { - /* Find the element to rebalance by moving down from the first unbalanced - * element 2 levels in the direction of the greatest heights. On the - * second move down, the heights may be equal ( but not on the first ). - * In which case go in the direction of the first move. */ - lheight = ub->left ? ub->left->height : 0; - rheight = ub->right ? ub->right->height : 0; - assert( lheight != rheight ); - if (rheight > lheight) - { - ub = ub->right; - lheight = ub->left ? - ub->left->height : 0; - rheight = ub->right ? - ub->right->height : 0; - if (rheight > lheight) - ub = ub->right; - else if (rheight < lheight) - ub = ub->left; - else - ub = ub->right; - } - else - { - ub = ub->left; - lheight = ub->left ? - ub->left->height : 0; - rheight = ub->right ? - ub->right->height : 0; - if (rheight > lheight) - ub = ub->right; - else if (rheight < lheight) - ub = ub->left; - else - ub = ub->left; - } - - - /* rebalance returns the grandparant of the subtree formed - * by the element that were rebalanced. - * We must continue upward from there rebalancing. */ - fixfrom = mapRebalance( map, ub ); - - /* Find the next unbalaced element. */ - ub = mapFindFirstUnbalEl( map, fixfrom ); - } - - return element; -} - - - diff --git a/colm/map.cc b/colm/map.cc deleted file mode 100644 index 52dd2697..00000000 --- a/colm/map.cc +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright 2008-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "pdarun.h" -#include <assert.h> - - - diff --git a/colm/map.h b/colm/map.h deleted file mode 100644 index 993ca86e..00000000 --- a/colm/map.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright 2010-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _MAP_H -#define _MAP_H - -#if defined(__cplusplus) -extern "C" { -#endif - -#include <colm/program.h> - -typedef struct _MapEl -{ - /* Must overlay Kid. */ - Tree *tree; - struct _MapEl *next; - struct _MapEl *prev; - - struct _MapEl *left, *right, *parent; - long height; - Tree *key; -} MapEl; - -typedef struct _Map -{ - /* Must overlay Tree. */ - short id; - unsigned short flags; - long refs; - MapEl *head; - - MapEl *tail; - MapEl *root; - long treeSize; - GenericInfo *genericInfo; -} Map; - -void mapListAbandon( Map *map ); - -void mapListAddBefore( Map *map, MapEl *next_el, MapEl *new_el ); -void mapListAddAfter( Map *map, MapEl *prev_el, MapEl *new_el ); -MapEl *mapListDetach( Map *map, MapEl *el ); -void mapAttachRebal( Map *map, MapEl *element, MapEl *parentEl, MapEl *lastLess ); -void mapDeleteChildrenOf( Map *map, MapEl *element ); -void mapEmpty( Map *map ); -MapEl *mapRebalance( Map *map, MapEl *n ); -void mapRecalcHeights( Map *map, MapEl *element ); -MapEl *mapFindFirstUnbalGP( Map *map, MapEl *element ); -MapEl *mapFindFirstUnbalEl( Map *map, MapEl *element ); -void mapRemoveEl( Map *map, MapEl *element, MapEl *filler ); -void mapReplaceEl( Map *map, MapEl *element, MapEl *replacement ); -MapEl *mapInsertEl( Program *prg, Map *map, MapEl *element, MapEl **lastFound ); -MapEl *mapInsertKey( Program *prg, Map *map, Tree *key, MapEl **lastFound ); -MapEl *mapImplFind( Program *prg, Map *map, Tree *key ); -MapEl *mapDetachByKey( Program *prg, Map *map, Tree *key ); -MapEl *mapDetach( Program *prg, Map *map, MapEl *element ); -MapEl *mapCopyBranch( Program *prg, Map *map, MapEl *el, Kid *oldNextDown, Kid **newNextDown ); - -long cmpTree( Program *prg, const Tree *tree1, const Tree *tree2 ); - -void mapImplRemoveEl( Program *prg, Map *map, MapEl *element ); -int mapImplRemoveKey( Program *prg, Map *map, Tree *key ); - -/* - * Iterators. - */ - -void initTreeIter( TreeIter *treeIter, const Ref *rootRef, int searchId, Tree **stackRoot ); -void initRevTreeIter( RevTreeIter *revTriter, const Ref *rootRef, - int searchId, Tree **stackRoot, int children ); - - -void initUserIter( UserIter *userIter, Tree **stackRoot, long argSize, long searchId ); - -Tree *mapFind( Program *prg, Map *map, Tree *key ); -long mapLength( Map *map ); -Tree *mapUnstore( Program *prg, Map *map, Tree *key, Tree *existing ); -int mapInsert( Program *prg, Map *map, Tree *key, Tree *element ); -void mapUnremove( Program *prg, Map *map, Tree *key, Tree *element ); -Tree *mapUninsert( Program *prg, Map *map, Tree *key ); -Tree *mapStore( Program *prg, Map *map, Tree *key, Tree *element ); - - -#if defined(__cplusplus) -} -#endif - -#endif - diff --git a/colm/parsedata.h b/colm/parsedata.h deleted file mode 100644 index 79ba08c1..00000000 --- a/colm/parsedata.h +++ /dev/null @@ -1,1063 +0,0 @@ -/* - * Copyright 2001-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _PARSEDATA_H -#define _PARSEDATA_H - -#include <iostream> -#include <limits.h> -#include "bstset.h" -#include "global.h" -#include "avlmap.h" -#include "avlset.h" -#include "bstmap.h" -#include "vector.h" -#include "dlist.h" -#include "dlistmel.h" -#include "fsmgraph.h" -#include "compare.h" -#include "vector.h" -#include "keyops.h" -#include "parsetree.h" -#include "astring.h" -#include "pdagraph.h" -#include "compare.h" -#include "pdarun.h" -#include "bytecode.h" -#include "program.h" - -using std::ostream; - -struct exit_object { }; -extern exit_object endp; -void operator<<( std::ostream &out, exit_object & ); - -/* Forwards. */ -struct RedFsm; -struct LangEl; -struct Compiler; -struct PdaCodeGen; -struct FsmCodeGen; - -#define SHIFT_CODE 0x1 -#define REDUCE_CODE 0x2 -#define SHIFT_REDUCE_CODE 0x3 - -inline long makeReduceCode( long reduction, bool isShiftReduce ) -{ - return ( isShiftReduce ? SHIFT_REDUCE_CODE : REDUCE_CODE ) | - ( reduction << 2 ); -} - -struct ProdEl; -struct ProdElList; -struct PdaLiteral; -struct Definition; - -/* A pointer to this is in PdaRun, but it's specification is not known by the - * runtime code. The runtime functions that access it are defined in - * ctinput.cpp and stubbed in fsmcodegen.cpp */ -struct Bindings - : public Vector<ParseTree*> -{}; - -struct DefListEl { Definition *prev, *next; }; -struct LelDefListEl { Definition *prev, *next; }; -typedef Vector< LangEl* > LangElVect; -typedef Vector< ProdEl* > FactorVect; - -typedef AvlMap<String, long, CmpStr> StringMap; -typedef AvlMapEl<String, long> StringMapEl; - -enum PredType { - PredLeft, - PredRight, - PredNonassoc, - PredNone -}; - -struct PredDecl -{ - PredDecl( TypeRef *typeRef, PredType predType, long predValue ) - : typeRef(typeRef), predType(predType), predValue(predValue) - {} - - TypeRef *typeRef; - PredType predType; - long predValue; - - PredDecl *prev, *next; -}; - -typedef DList<PredDecl> PredDeclList; - -/* Graph dictionary. */ -struct Definition -: - public DefListEl, public LelDefListEl -{ - enum Type { Production }; - - Definition( const InputLoc &loc, LangEl *prodName, ProdElList *prodElList, - bool prodCommit, CodeBlock *redBlock, int prodId, int prodNum, Type type ) - : - loc(loc), prodName(prodName), prodElList(prodElList), - prodCommit(prodCommit), redBlock(redBlock), prodId(prodId), prodNum(prodNum), - type(type), fsm(0), fsmLength(0), uniqueEmptyLeader(0), - isLeftRec(false), localFrame(0), lhsField(0), predOf(0), - collectIgnoreRegion(0) {} - - InputLoc loc; - LangEl *prodName; - ProdElList *prodElList; - bool prodCommit; - - CodeBlock *redBlock; - - int prodId; - int prodNum; - Type type; - - PdaGraph *fsm; - int fsmLength; - String data; - LongSet reducesTo; - - LangEl *uniqueEmptyLeader; - - ProdIdSet nonTermFirstSet; - AlphSet firstSet; - bool isLeftRec; - - ObjectDef *localFrame; - ObjField *lhsField; - - LangEl *predOf; - - UnsignedCharVect copy; - - TokenRegion *collectIgnoreRegion; -}; - -struct CmpDefById -{ - static int compare( Definition *d1, Definition *d2 ) - { - if ( d1->prodId < d2->prodId ) - return -1; - else if ( d1->prodId > d2->prodId ) - return 1; - else - return 0; - } -}; - - -/* Map dotItems to productions. */ -typedef BstMap< int, Definition*, CmpOrd<int> > DotItemIndex; -typedef BstMapEl< int, Definition*> DotItemIndexEl; - -struct DefList -: - public DListMel<Definition, DefListEl> -{}; - -/* A vector of production vectors. Each non terminal can have many productions. */ -struct LelDefList -: - public DListMel<Definition, LelDefListEl> -{}; - -/* A set of machines made during a closure round. */ -typedef Vector< PdaGraph* > Machines; - -/* List of language elements. */ -typedef DList<LangEl> LelList; - -typedef Vector< TokenDef* > TokenDefVect; - -struct UniqueType; - -typedef Vector<LangEl*> LangElVect; -typedef BstSet<LangEl*> LangElSet; - -/* A language element class. Can be a nonTerm or a term. */ -struct LangEl : public DListEl<LangEl> -{ - enum Type { Unknown, Term, NonTerm }; - - LangEl( Namespace *nspace, const String &name, Type type ); - ~LangEl(); - - /* The region the language element was defined in. */ - Namespace *nspace; - - String name; - String lit; - - String fullName; - String fullLit; - - /* For referencing the type. */ - String refName; - - /* For declaring things inside the type. */ - String declName; - - String xmlTag; - - Type type; - long id; - bool isUserTerm; - bool isContext; - String displayString; - long numAppearances; - bool commit; - bool ignore; - bool reduceFirst; - bool isLiteral; - bool isRepeat; - bool isList; - bool isOpt; - bool parseStop; - bool isEOF; - - LangEl *repeatOf; - - /* Productions from the language element if it is a non-terminal. */ - LelDefList defList; - - TokenDef *tokenDef; - Definition *rootDef; - LangEl *termDup; - LangEl *eofLel; - - PdaGraph *pdaGraph; - PdaTables *pdaTables; - - PdaState *startState; - - CodeBlock *transBlock; - - ObjectDef *objectDef; - NamespaceQual *objectDefUsesQual; - String objectDefUses; - - long thisSize; - long ofiOffset; - - GenericType *generic; - - long parserId; - - PredType predType; - long predValue; - - Context *contextDef; - Context *contextIn; - bool noPreIgnore; - bool noPostIgnore; - bool isCI; - TokenRegion *ciRegion; -}; - -struct ProdEl -{ - /* Language elements a factor node can be. */ - enum Type { - LiteralType, - ReferenceType - }; - - /* Construct with a reference to a var def. */ - ProdEl( Type type, const InputLoc &loc, ObjField *captureField, bool commit, TypeRef *typeRef, int priorVal ) - : - captureField(captureField), - commit(commit), - typeRef(typeRef), - langEl(0), - priorVal(priorVal), - type(type), - objField(0) - {} - - ProdEl( const InputLoc &loc, TypeRef *typeRef ) - : - captureField(0), - commit(false), - typeRef(typeRef), - langEl(0), - priorVal(0), - type(ReferenceType), - objField(0) - {} - - ObjField *captureField; - bool commit; - - TypeRef *typeRef; - - LangEl *langEl; - int priorVal; - Type type; - ObjField *objField; - ProdEl *prev, *next; -}; - -struct ProdElList : public DList<ProdEl> -{ - PdaGraph *walk( Compiler *pd, Definition *prod ); -}; - -/* This should be renamed. It is a literal string in a type reference. */ -struct PdaLiteral -{ - PdaLiteral( const InputLoc &loc, const Token &token ) - : loc(loc), token(token), value(0) { } - - InputLoc loc; - Token token; - long value; -}; - -/* Nodes in the tree that use this action. */ -typedef Vector<NameInst*> ActionRefs; - -/* Element in list of actions. Contains the string for the code to exectute. */ -struct Action -: - public DListEl<Action>, - public AvlTreeEl<Action> -{ -public: - - Action( const InputLoc &loc, const String &name, InlineList *inlineList ) - : - loc(loc), - name(name), - markType(MarkNone), - objField(0), - markId(-1), - inlineList(inlineList), - actionId(-1), - numTransRefs(0), - numToStateRefs(0), - numFromStateRefs(0), - numEofRefs(0), - numCondRefs(0), - anyCall(false), - isLmAction(false) - { - } - - Action( MarkType markType, long markId ) - : - name("mark"), - markType(markType), - objField(0), - markId(markId), - inlineList(new InlineList), - actionId(-1), - numTransRefs(0), - numToStateRefs(0), - numFromStateRefs(0), - numEofRefs(0), - numCondRefs(0), - anyCall(false), - isLmAction(false) - { - } - - /* Key for action dictionary. */ - const String &getKey() const { return name; } - - /* Data collected during parse. */ - InputLoc loc; - String name; - - MarkType markType; - ObjField *objField; - long markId; - - InlineList *inlineList; - int actionId; - - void actionName( ostream &out ) - { - if ( name != 0 ) - out << name; - else - out << loc.line << ":" << loc.col; - } - - /* Places in the input text that reference the action. */ - ActionRefs actionRefs; - - /* Number of references in the final machine. */ - bool numRefs() - { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } - int numTransRefs; - int numToStateRefs; - int numFromStateRefs; - int numEofRefs; - int numCondRefs; - bool anyCall; - - bool isLmAction; -}; - -/* A list of actions. */ -typedef DList<Action> ActionList; -typedef AvlTree<Action, String, CmpStr> ActionDict; - -struct VarDef; -struct Join; -struct Expression; -struct Term; -struct FactorWithAug; -struct FactorWithLabel; -struct FactorWithRep; -struct FactorWithNeg; -struct Factor; -struct Literal; -struct Range; -struct RegExpr; -struct ReItem; -struct ReOrBlock; -struct ReOrItem; -struct TokenRegion; - -/* Priority name dictionary. */ -typedef AvlMapEl<String, int> PriorDictEl; -typedef AvlMap<String, int, CmpStr> PriorDict; - -/* Local error name dictionary. */ -typedef AvlMapEl<String, int> LocalErrDictEl; -typedef AvlMap<String, int, CmpStr> LocalErrDict; - -/* Tree of instantiated names. */ -typedef BstMapEl<String, NameInst*> NameMapEl; -typedef BstMap<String, NameInst*, CmpStr> NameMap; -typedef Vector<NameInst*> NameVect; -typedef BstSet<NameInst*> NameSet; - -/* Node in the tree of instantiated names. */ -struct NameInst -{ - NameInst( const InputLoc &loc, NameInst *parent, const String &name, - int id, bool isLabel ) : - loc(loc), parent(parent), name(name), id(id), isLabel(isLabel), - isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {} - - InputLoc loc; - - /* Keep parent pointers in the name tree to retrieve - * fully qulified names. */ - NameInst *parent; - - String name; - int id; - bool isLabel; - bool isLongestMatch; - - int numRefs; - int numUses; - - /* Names underneath us, excludes anonymous names. */ - NameMap children; - - /* All names underneath us in order of appearance. */ - NameVect childVect; - - /* Join scopes need an implicit "final" target. */ - NameInst *start, *final; - - /* During a fsm generation walk, lists the names that are referenced by - * epsilon operations in the current scope. After the link is made by the - * epsilon reference and the join operation is complete, the label can - * have its refcount decremented. Once there are no more references the - * entry point can be removed from the fsm returned. */ - NameVect referencedNames; - - /* Pointers for the name search queue. */ - NameInst *prev, *next; - - /* Check if this name inst or any name inst below is referenced. */ - bool anyRefsRec(); -}; - -typedef DList<NameInst> NameInstList; - -/* Stack frame used in walking the name tree. */ -struct NameFrame -{ - NameInst *prevNameInst; - int prevNameChild; - NameInst *prevLocalScope; -}; - -/* Class to collect information about the machine during the - * parse of input. */ -struct Compiler -{ - /* Create a new parse data object. This is done at the beginning of every - * fsm specification. */ - Compiler( const String &fileName, const String §ionName, - const InputLoc §ionLoc, ostream &out ); - ~Compiler(); - - /* - * Setting up the graph dict. - */ - - void compileLiteralTokens(); - void initEmptyScanners(); - void initUniqueTypes(); - - /* Initialize a graph dict with the basic fsms. */ - void initGraphDict(); - void createBuiltin( const char *name, BuiltinMachine builtin ); - - /* Make a name id in the current name instantiation scope if it is not - * already there. */ - NameInst *addNameInst( const InputLoc &loc, char *data, bool isLabel ); - NameInst *makeJoinNameTree( Join *join ); - NameInst *makeNameTree( ); - void fillNameIndex( NameInst **nameIndex, NameInst *from ); - NameInst **makeNameIndex( NameInst *rootName ); - - - void printNameTree( NameInst *rootName ); - void printNameIndex( NameInst **nameIndex ); - - /* Increments the usage count on entry names. Names that are no longer - * needed will have their entry points unset. */ - void unsetObsoleteEntries( FsmGraph *graph ); - - /* Resove name references in action code and epsilon transitions. */ - NameSet resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly ); - void resolveFrom( NameSet &result, NameInst *refFrom, - const NameRef &nameRef, int namePos ); - void referenceRegions( NameInst *root ); - - /* Set the alphabet type. If type types are not valid returns false. */ - bool setAlphType( char *s1, char *s2 ); - bool setAlphType( char *s1 ); - - /* Unique actions. */ - void removeDups( ActionTable &actionTable ); - void removeActionDups( FsmGraph *graph ); - - /* Dumping the name instantiation tree. */ - void printNameInst( NameInst *nameInst, int level ); - - /* Make the graph from a graph dict node. Does minimization. */ - void finishGraphBuild( FsmGraph *graph ); - FsmGraph *makeAllRegions(); - FsmGraph *makeScanner(); - - void analyzeAction( Action *action, InlineList *inlineList ); - void analyzeGraph( FsmGraph *graph ); - void resolvePrecedence( PdaGraph *pdaGraph ); - LangEl *predOf( PdaTrans *trans, long action ); - bool precedenceSwap( long action1, long action2, LangEl *l1, LangEl *l2 ); - bool precedenceRemoveBoth( LangEl *l1, LangEl *l2 ); - - void initKeyOps(); - - /* - * Data collected during the parse. - */ - - /* The list of instances. */ - RegionGraphList instanceList; - - /* Dictionary of actions. Lets actions be defined and then referenced. */ - ActionDict actionDict; - - /* Dictionary of named priorities. */ - PriorDict priorDict; - - /* Dictionary of named local errors. */ - LocalErrDict localErrDict; - - /* List of actions. Will be pasted into a switch statement. */ - ActionList actionList; - - /* The id of the next priority name and label. */ - int nextPriorKey, nextLocalErrKey, nextNameId; - - /* The default priority number key for a machine. This is active during - * the parse of the rhs of a machine assignment. */ - int curDefPriorKey; - - int curDefLocalErrKey; - - /* Alphabet type. */ - HostType *userAlphType; - bool alphTypeSet; - - /* Element type and get key expression. */ - InlineList *getKeyExpr; - InlineList *accessExpr; - InlineList *curStateExpr; - - /* The alphabet range. */ - char *lowerNum, *upperNum; - Key lowKey, highKey; - InputLoc rangeLowLoc, rangeHighLoc; - - /* The name of the file the fsm is from, and the spec name. */ - String fileName; - String sectionName; - InputLoc sectionLoc; - - /* Number of errors encountered parsing the fsm spec. */ - int errorCount; - - /* Counting the action and priority ordering. */ - int curActionOrd; - int curPriorOrd; - - /* Root of the name tree. */ - NameInst *curNameInst; - int curNameChild; - - /* The place where resolved epsilon transitions go. These cannot go into - * the parse tree because a single epsilon op can resolve more than once - * to different nameInsts if the machine it's in is used more than once. */ - NameVect epsilonResolvedLinks; - int nextEpsilonResolvedLink; - - /* Root of the name tree used for doing local name searches. */ - NameInst *localNameScope; - - void setLmInRetLoc( InlineList *inlineList ); - void initLongestMatchData(); - void initNameWalk( NameInst *rootName ); - NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; } - NameFrame enterNameScope( bool isLocal, int numScopes ); - void popNameScope( const NameFrame &frame ); - void resetNameScope( const NameFrame &frame ); - - /* Counter for assigning ids to longest match items. */ - int nextTokenId; - - /* List of all longest match parse tree items. */ - RegionList regionList; - - NamespaceList namespaceList; - - Action *newAction( const String &name, InlineList *inlineList ); - - Action *setTokStart; - int setTokStartOrd; - - Action *initActId; - int initActIdOrd; - - Action *setTokEnd; - int setTokEndOrd; - - CodeBlock *rootCodeBlock; - - void beginProcessing() - { - ::condData = &thisCondData; - ::keyOps = &thisKeyOps; - } - - CondData thisCondData; - KeyOps thisKeyOps; - - UniqueType *mainReturnUT; - - /* CONTEXT FREE */ - ProdElList *makeProdElList( LangEl *langEl ); - void wrapNonTerminals(); - void makeDefinitionNames(); - void noUndefindLangEls(); - void declareBaseLangEls(); - void makeLangElIds(); - void makeLangElNames(); - void makeTerminalWrappers(); - void makeEofElements(); - void makeIgnoreCollectors(); - void setPrecedence(); - - void typeDeclaration(); - void typeResolve(); - - /* Parser generation. */ - void advanceReductions( PdaGraph *pdaGraph ); - void sortActions( PdaGraph *pdaGraph ); - void addDupTerms( PdaGraph *pdaGraph ); - void linkExpansions( PdaGraph *pdaGraph ); - void lalr1FollowEpsilonOp( PdaGraph *pdaGraph ); - - void transferCommits( PdaGraph *pdaGraph, PdaTrans *trans, PdaState *state, long prodId ); - - void lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, FollowToAdd &followKeys ); - void lalr1AddFollow1( PdaGraph *pdaGraph, PdaState *state ); - - void lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, long followKey, long prior ); - void lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans ); - - void lalr1AddFollowSets( PdaGraph *pdaGraph, LangElSet &parserEls ); - - void lr0BringInItem( PdaGraph *pdaGraph, PdaState *dest, PdaState *prodState, - PdaTrans *expandFrom, Definition *prod ); - void lr0InvokeClosure( PdaGraph *pdaGraph, PdaState *state ); - void lr0CloseAllStates( PdaGraph *pdaGraph ); - - void lalr1GenerateParser( PdaGraph *pdaGraph, LangElSet &parserEls ); - - void reduceActions( PdaGraph *pdaGraph ); - - bool makeNonTermFirstSetProd( Definition *prod, PdaState *state ); - void makeNonTermFirstSets(); - - bool makeFirstSetProd( Definition *prod, PdaState *state ); - void makeFirstSets(); - - int findIndexOff( PdaTables *pdaTables, PdaGraph *pdaGraph, PdaState *state, int &currLen ); - void trySetTime( PdaTrans *trans, long code, long &time ); - void addRegion( PdaState *tabState, PdaTrans *pdaTrans, long pdaKey, - bool noPreIgnore, bool noPostIgnore ); - PdaState *followProd( PdaState *tabState, PdaState *prodState ); - void findFollow( AlphSet &result, PdaState *overTab, - PdaState *overSrc, Definition *parentDef ); - void pdaActionOrder( PdaGraph *pdaGraph, LangElSet &parserEls ); - void pdaOrderFollow( LangEl *rootEl, PdaState *tabState, - PdaTrans *tabTrans, PdaTrans *srcTrans, - Definition *parentDef, Definition *definition, long &time ); - void pdaOrderProd( LangEl *rootEl, PdaState *tabState, - PdaState *srcState, Definition *parentDef, long &time ); - void analyzeMachine( PdaGraph *pdaGraph, LangElSet &parserEls ); - - void makeProdFsms(); - void insertUniqueEmptyProductions(); - void printNonTermFirstSets(); - void printFirstSets(); - - LangEl *makeRepeatProd( Namespace *nspace, const String &repeatName, - NamespaceQual *nspaceQual, const String &name ); - LangEl *makeListProd( Namespace *nspace, const String &listName, - NamespaceQual *nspaceQual, const String &name ); - LangEl *makeOptProd( Namespace *nspace, const String &optName, - NamespaceQual *nspaceQual, const String &name ); - void resolveFactor( ProdEl *fact ); - void resolveProductionEls(); - void resolvePatternEls(); - void resolveReplacementEls(); - void resolveParserEls(); - - void addMatchText( ObjectDef *frame, LangEl *lel ); - void addMatchLength( ObjectDef *frame, LangEl *lel ); - void addInput( ObjectDef *frame ); - void addCtx( ObjectDef *frame ); - void addTransTokVar( ObjectDef *frame, LangEl *lel ); - void addProdRHSVars( ObjectDef *localFrame, ProdElList *prodElList ); - void addProdRedObjectVar( ObjectDef *localFrame, LangEl *langEl ); - void addProdObjects(); - - void addProdRHSLoads( Definition *prod, CodeVect &code, long &insertPos ); - void addProdLHSLoad( Definition *prod, CodeVect &code, long &insertPos ); - void addPushBackLHS( Definition *prod, CodeVect &code, long &insertPos ); - - void prepGrammar(); - void parsePatterns(); - - void collectParserEls( LangElSet &parserEls ); - void makeParser( LangElSet &parserEls ); - PdaGraph *makePdaGraph( BstSet<LangEl*> &parserEls ); - PdaTables *makePdaTables( PdaGraph *pdaGraph ); - - void fillInPatterns( Program *prg ); - void makeRuntimeData(); - - /* Generate and write out the fsm. */ - void generateGraphviz(); - - void verifyParseStopGrammar( LangEl *langEl, PdaGraph *pdaGraph ); - void computeAdvanceReductions( LangEl *langEl, PdaGraph *pdaGraph ); - - void initFieldInstructions( ObjField *el ); - void initLocalInstructions( ObjField *el ); - void initLocalRefInstructions( ObjField *el ); - - void initMapFunctions( GenericType *gen ); - void initListField( GenericType *gen, const char *name, int offset ); - void initListFields( GenericType *gen ); - void initListFunctions( GenericType *gen ); - void initVectorFunctions( GenericType *gen ); - void initParserFunctions( GenericType *gen ); - void initParserFields( GenericType *gen ); - void initCtxField( GenericType *gen ); - - void addStdin(); - void addStdout(); - void addStderr(); - void addArgv(); - int argvOffset(); - void initGlobalFunctions(); - void makeDefaultIterators(); - void addLengthField( ObjectDef *objDef, Code getLength ); - ObjectDef *findObject( const String &name ); - void initAllLanguageObjects(); - void resolveListElementOf( ObjectDef *container, ObjectDef *obj, ElementOf *elof ); - void resolveMapElementOf( ObjectDef *container, ObjectDef *obj, ElementOf *elof ); - void resolveElementOf( ObjectDef *obj ); - void makeFuncVisible( Function *func, bool isUserIter ); - - void resolveFunction( Function *func ); - void resolveUserIter( Function *func ); - void resolvePreEof( TokenRegion *region ); - void resolveRootBlock(); - void resolveTranslateBlock( LangEl *langEl ); - void resolveReductionCode( Definition *prod ); - void resolveParseTree(); - void resolveGenericTypes(); - - void compileFunction( Function *func, CodeVect &code ); - void compileFunction( Function *func ); - void compileUserIter( Function *func, CodeVect &code ); - void compileUserIter( Function *func ); - void compilePreEof( TokenRegion *region ); - void compileRootBlock(); - void compileTranslateBlock( LangEl *langEl ); - void findLocalTrees( CharSet &trees ); - void makeProdCopies( Definition *prod ); - void compileReductionCode( Definition *prod ); - void initGenericTypes(); - void removeNonUnparsableRepls(); - void compileByteCode(); - - void resolveUses(); - void createDefaultScanner(); - void generateOutput(); - void compile(); - - void openNameSpace( ostream &out, Namespace *nspace ); - void closeNameSpace( ostream &out, Namespace *nspace ); - void refNameSpace( LangEl *lel, Namespace *nspace ); - void generateExports(); - void generateExportsImpl(); - - /* - * Graphviz Generation - */ - void writeTransList( PdaState *state ); - void writeDotFile( PdaGraph *graph ); - void writeDotFile( ); - - - /* - * Data collected during the parse. - */ - - LelList langEls; - DefList prodList; - - /* Dumping. */ - DotItemIndex dotItemIndex; - - PredDeclList predDeclList; - - /* The name of the file the fsm is from, and the spec name. */ - // EXISTS IN RL: char *fileName; - String parserName; - ostream &out; - // EXISTS IN RL: InputLoc sectionLoc; - - /* How to access the instance data. */ - String access; - - /* The name of the token structure. */ - String tokenStruct; - - GenericType *anyList; - GenericType *anyMap; - GenericType *anyVector; - - LangEl *ptrLangEl; - LangEl *boolLangEl; - LangEl *intLangEl; - LangEl *strLangEl; - LangEl *streamLangEl; - LangEl *inputLangEl; - LangEl *anyLangEl; - LangEl *rootLangEl; - LangEl *noTokenLangEl; - LangEl *eofLangEl; - LangEl *errorLangEl; - LangEl *defaultCharLangEl; - LangEl *ignoreLangEl; - - TokenRegion *rootRegion; - TokenRegion *defaultRegion; - TokenRegion *eofTokenRegion; - - Namespace *defaultNamespace; - Namespace *rootNamespace; - - int nextSymbolId; - int firstNonTermId; - - LangEl **langElIndex; - PdaState *actionDestState; - DefSetSet prodSetSet; - - Definition **prodIdIndex; - AlphSet literalSet; - - PatternList patternList; - ReplList replList; - ParserTextList parserTextList; - - ObjectDef *globalObjectDef; - - VectorTypeIdMap vectorTypeIdMap; - ObjectDef *curLocalFrame; - - UniqueType *findUniqueType( int typeId ); - UniqueType *findUniqueType( int typeId, LangEl *langEl ); - UniqueType *findUniqueType( int typeId, IterDef *iterDef ); - - UniqueType *uniqueTypeNil; - UniqueType *uniqueTypePtr; - UniqueType *uniqueTypeBool; - UniqueType *uniqueTypeInt; - UniqueType *uniqueTypeStr; - UniqueType *uniqueTypeStream; - UniqueType *uniqueTypeInput; - UniqueType *uniqueTypeIgnore; - UniqueType *uniqueTypeAny; - - UniqueTypeMap uniqeTypeMap; - UniqueRepeatMap uniqeRepeatMap; - UniqueMapMap uniqueMapMap; - UniqueListMap uniqueListMap; - UniqueVectorMap uniqueVectorMap; - UniqueParserMap uniqueParserMap; - - void initStrObject(); - void initStreamObject(); - void initInputObject(); - void initIntObject(); - void initTokenObjects(); - - ObjectDef *intObj; - ObjectDef *strObj; - ObjectDef *streamObj; - ObjectDef *inputObj; - ObjectDef *tokenObj; - - FsmTables *fsmTables; - RuntimeData *runtimeData; - - int nextPatReplId; - int nextGenericId; - - FunctionList functionList; - int nextFuncId; - - enum CompileContext { - CompileTranslation, - CompileReduction, - CompileFunction, - CompileRoot - }; - - CompileContext compileContext; - LongVect returnJumps; - LongVect breakJumps; - Function *curFunction; - - /* Loops fill this in for return statements to use. */ - CodeVect *loopCleanup; - - ObjField *makeDataEl(); - ObjField *makePosEl(); - ObjField *makeLineEl(); - - IterDef *findIterDef( IterDef::Type type, GenericType *generic ); - IterDef *findIterDef( IterDef::Type type, Function *func ); - IterDef *findIterDef( IterDef::Type type ); - IterDefSet iterDefSet; - - enum GeneratesType { GenToken, GenIgnore, GenCfl }; - - int nextObjectId; - GeneratesType generatesType; - bool generatesIgnore; - bool insideRegion; - - StringMap literalStrings; - - long nextFrameId; - long nextParserId; - - ObjectDef *rootLocalFrame; - - long nextLabelId; - ObjectDef *objectDef; - - bool revertOn; - - RedFsm *redFsm; - - PdaGraph *pdaGraph; - PdaTables *pdaTables; - - long predValue; - long nextMatchEndNum; - - TypeRef *argvTypeRef; - - Context *context; -}; - -void afterOpMinimize( FsmGraph *fsm, bool lastInSeq = true ); -Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd ); -Key makeFsmKeyDec( char *str, const InputLoc &loc, Compiler *pd ); -Key makeFsmKeyNum( char *str, const InputLoc &loc, Compiler *pd ); -Key makeFsmKeyChar( char c, Compiler *pd ); -void makeFsmKeyArray( Key *result, char *data, int len, Compiler *pd ); -void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, - bool caseInsensitive, Compiler *pd ); -FsmGraph *makeBuiltin( BuiltinMachine builtin, Compiler *pd ); -FsmGraph *dotFsm( Compiler *pd ); -FsmGraph *dotStarFsm( Compiler *pd ); - -void errorStateLabels( const NameSet &locations ); - -struct ColmParser; - -typedef AvlMap<String, ColmParser *, CmpStr> ParserDict; -typedef AvlMapEl<String, ColmParser *> ParserDictEl; - -LangEl *declareLangEl( Compiler *pd, Namespace *nspace, const String &data, LangEl::Type type ); -LangEl *addLangEl( Compiler *pd, Namespace *nspace, const String &data, LangEl::Type type ); -void declareTypeAlias( Compiler *pd, Namespace *nspace, const String &data, TypeRef *typeRef ); -LangEl *findType( Compiler *pd, Namespace *nspace, const String &data ); - -#endif /* _PARSEDATA_H */ diff --git a/colm/parsetree.cc b/colm/parsetree.cc deleted file mode 100644 index 084ffbb8..00000000 --- a/colm/parsetree.cc +++ /dev/null @@ -1,1776 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "lmparse.h" -#include "parsetree.h" -#include "input.h" -#include "fsmrun.h" - -#include <iostream> -#include <iomanip> -#include <errno.h> -#include <limits.h> -#include <stdlib.h> - - -using namespace std; -ostream &operator<<( ostream &out, const NameRef &nameRef ); -ostream &operator<<( ostream &out, const NameInst &nameInst ); -ostream &operator<<( ostream &out, const Token &token ); - -/* Convert the literal string which comes in from the scanner into an array of - * characters with escapes and options interpreted. Also null terminates the - * string. Though this null termination should not be relied on for - * interpreting literals in the parser because the string may contain a - * literal string with \0 */ -void prepareLitString( String &result, bool &caseInsensitive, - const String &srcString, const InputLoc &loc ) -{ - result.setAs( String::Fresh(), srcString.length() ); - caseInsensitive = false; - - char *src = srcString.data + 1; - char *end = srcString.data + srcString.length() - 1; - - while ( *end != '\'' && *end != '\"' && *end != '\n' ) { - if ( *end == 'i' ) - caseInsensitive = true; - else { - error( loc ) << "literal string '" << *end << - "' option not supported" << endl; - } - end -= 1; - } - - if ( *end == '\n' ) - end++; - - char *dest = result.data; - int len = 0; - while ( src != end ) { - if ( *src == '\\' ) { - switch ( src[1] ) { - case '0': dest[len++] = '\0'; break; - case 'a': dest[len++] = '\a'; break; - case 'b': dest[len++] = '\b'; break; - case 't': dest[len++] = '\t'; break; - case 'n': dest[len++] = '\n'; break; - case 'v': dest[len++] = '\v'; break; - case 'f': dest[len++] = '\f'; break; - case 'r': dest[len++] = '\r'; break; - case '\n': break; - default: dest[len++] = src[1]; break; - } - src += 2; - } - else { - dest[len++] = *src++; - } - } - - result.chop( len ); -} - -int CmpUniqueType::compare( const UniqueType &ut1, const UniqueType &ut2 ) -{ - if ( ut1.typeId < ut2.typeId ) - return -1; - else if ( ut1.typeId > ut2.typeId ) - return 1; - else if ( ut1.typeId == TYPE_TREE || - ut1.typeId == TYPE_PTR || - ut1.typeId == TYPE_REF ) - { - if ( ut1.langEl < ut2.langEl ) - return -1; - else if ( ut1.langEl > ut2.langEl ) - return 1; - } - else if ( ut1.typeId == TYPE_ITER ) { - if ( ut1.iterDef < ut2.iterDef ) - return -1; - else if ( ut1.iterDef > ut2.iterDef ) - return 1; - } - else { - /* Fail on anything unimplemented. */ - assert( false ); - } - - return 0; -} - -int CmpUniqueRepeat::compare( const UniqueRepeat &ut1, const UniqueRepeat &ut2 ) -{ - if ( ut1.repeatType < ut2.repeatType ) - return -1; - else if ( ut1.repeatType > ut2.repeatType ) - return 1; - else { - if ( ut1.langEl < ut2.langEl ) - return -1; - else if ( ut1.langEl > ut2.langEl ) - return 1; - } - - return 0; -} - -int CmpUniqueMap::compare( const UniqueMap &ut1, const UniqueMap &ut2 ) -{ - if ( ut1.key < ut2.key ) - return -1; - else if ( ut1.key > ut2.key ) - return 1; - else { - if ( ut1.value < ut2.value ) - return -1; - else if ( ut1.value > ut2.value ) - return 1; - } - - return 0; -} - -int CmpUniqueList::compare( const UniqueList &ut1, const UniqueList &ut2 ) -{ - if ( ut1.value < ut2.value ) - return -1; - else if ( ut1.value > ut2.value ) - return 1; - - return 0; -} - -int CmpUniqueVector::compare( const UniqueVector &ut1, const UniqueVector &ut2 ) -{ - if ( ut1.value < ut2.value ) - return -1; - else if ( ut1.value > ut2.value ) - return 1; - - return 0; -} - -int CmpUniqueParser::compare( const UniqueParser &ut1, const UniqueParser &ut2 ) -{ - if ( ut1.parseType < ut2.parseType ) - return -1; - else if ( ut1.parseType > ut2.parseType ) - return 1; - - return 0; -} - -FsmGraph *VarDef::walk( Compiler *pd ) -{ - /* Recurse on the expression. */ - FsmGraph *rtnVal = join->walk( pd ); - - /* Do the tranfer of local error actions. */ - LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name ); - if ( localErrDictEl != 0 ) { - for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ ) - rtnVal->transferErrorActions( state, localErrDictEl->value ); - } - - /* If the expression below is a join operation with multiple expressions - * then it just had epsilon transisions resolved. If it is a join - * with only a single expression then run the epsilon op now. */ - if ( join->exprList.length() == 1 ) - rtnVal->epsilonOp(); - - /* We can now unset entry points that are not longer used. */ - pd->unsetObsoleteEntries( rtnVal ); - - return rtnVal; -} - - -FsmGraph *RegionDef::walk( Compiler *pd ) -{ - /* We enter into a new name scope. */ - NameFrame nameFrame = pd->enterNameScope( true, 1 ); - - /* Recurse on the expression. */ - FsmGraph *rtnVal = tokenRegion->walk( pd ); - - /* Do the tranfer of local error actions. */ - LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name ); - if ( localErrDictEl != 0 ) { - for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ ) - rtnVal->transferErrorActions( state, localErrDictEl->value ); - } - - /* We can now unset entry points that are not longer used. */ - pd->unsetObsoleteEntries( rtnVal ); - - /* If the name of the variable is referenced then add the entry point to - * the graph. */ - if ( pd->curNameInst->numRefs > 0 ) - rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState ); - - /* Pop the name scope. */ - pd->popNameScope( nameFrame ); - return rtnVal; -} - -void RegionDef::makeNameTree( const InputLoc &loc, Compiler *pd ) -{ - /* The variable definition enters a new scope. */ - NameInst *prevNameInst = pd->curNameInst; - pd->curNameInst = pd->addNameInst( loc, name, false ); - - /* Guess we do this now. */ - tokenRegion->makeActions( pd ); - - /* Save off the name inst into the token region. This is only legal for - * token regions because they are only ever referenced once (near the root - * of the name tree). They cannot have more than one corresponding name - * inst. */ - assert( tokenRegion->regionNameInst == 0 ); - tokenRegion->regionNameInst = pd->curNameInst; - - /* The name scope ends, pop the name instantiation. */ - pd->curNameInst = prevNameInst; -} - -InputLoc TokenDef::getLoc() -{ - return action != 0 ? action->loc : semiLoc; -} - -/* - * If there are any LMs then all of the following entry points must reset - * tokstart: - * - * 1. fentry(StateRef) - * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef) - * 3. targt of any transition that has an fcall (the return loc). - * 4. start state of all longest match routines. - */ - -Action *TokenRegion::newAction( Compiler *pd, const InputLoc &loc, - const String &name, InlineList *inlineList ) -{ - Action *action = new Action( loc, name, inlineList ); - pd->actionList.append( action ); - action->isLmAction = true; - return action; -} - -void TokenRegion::makeActions( Compiler *pd ) -{ - /* Make actions that set the action id. */ - for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = new InlineList; - inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, - InlineItem::LmSetActId ) ); - char *actName = new char[50]; - sprintf( actName, "store%i", lmi->longestMatchId ); - lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList ); - } - - /* Make actions that execute the user action and restart on the last character. */ - for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = new InlineList; - inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, - InlineItem::LmOnLast ) ); - char *actName = new char[50]; - sprintf( actName, "imm%i", lmi->longestMatchId ); - lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList ); - } - - /* Make actions that execute the user action and restart on the next - * character. These actions will set tokend themselves (it is the current - * char). */ - for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = new InlineList; - inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, - InlineItem::LmOnNext ) ); - char *actName = new char[50]; - sprintf( actName, "lagh%i", lmi->longestMatchId ); - lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList ); - } - - /* Make actions that execute the user action and restart at tokend. These - * actions execute some time after matching the last char. */ - for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) { - /* For each part create actions for setting the match type. We need - * to do this so that the actions will go into the actionIndex. */ - InlineList *inlineList = new InlineList; - inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, - InlineItem::LmOnLagBehind ) ); - char *actName = new char[50]; - sprintf( actName, "lag%i", lmi->longestMatchId ); - lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList ); - } - - InputLoc loc; - loc.line = 1; - loc.col = 1; - - /* Create the error action. */ - InlineList *il6 = new InlineList; - il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) ); - lmActSelect = newAction( pd, loc, "lagsel", il6 ); -} - -void TokenRegion::restart( FsmGraph *graph, FsmTrans *trans ) -{ - FsmState *fromState = trans->fromState; - graph->detachTrans( fromState, trans->toState, trans ); - graph->attachTrans( fromState, graph->startState, trans ); -} - -void TokenRegion::runLongestMatch( Compiler *pd, FsmGraph *graph ) -{ - graph->markReachableFromHereStopFinal( graph->startState ); - for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { - if ( ms->stateBits & SB_ISMARKED ) { - ms->lmItemSet.insert( 0 ); - ms->stateBits &= ~ SB_ISMARKED; - } - } - - /* Transfer the first item of non-empty lmAction tables to the item sets - * of the states that follow. Exclude states that have no transitions out. - * This must happen on a separate pass so that on each iteration of the - * next pass we have the item set entries from all lmAction tables. */ - for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { - for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { - if ( trans->lmActionTable.length() > 0 ) { - LmActionTableEl *lmAct = trans->lmActionTable.data; - FsmState *toState = trans->toState; - assert( toState ); - - /* Check if there are transitions out, this may be a very - * close approximation? Out transitions going nowhere? - * FIXME: Check. */ - if ( toState->outList.length() > 0 ) { - /* Fill the item sets. */ - graph->markReachableFromHereStopFinal( toState ); - for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { - if ( ms->stateBits & SB_ISMARKED ) { - ms->lmItemSet.insert( lmAct->value ); - ms->stateBits &= ~ SB_ISMARKED; - } - } - } - } - } - } - - /* The lmItem sets are now filled, telling us which longest match rules - * can succeed in which states. First determine if we need to make sure - * act is defaulted to zero. */ - int maxItemSetLength = 0; - graph->markReachableFromHereStopFinal( graph->startState ); - for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { - if ( ms->stateBits & SB_ISMARKED ) { - if ( ms->lmItemSet.length() > maxItemSetLength ) - maxItemSetLength = ms->lmItemSet.length(); - ms->stateBits &= ~ SB_ISMARKED; - } - } - - /* The actions executed on starting to match a token. */ - graph->isolateStartState(); - graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart ); - if ( maxItemSetLength > 1 ) { - /* The longest match action switch may be called when tokens are - * matched, in which case act must be initialized, there must be a - * case to handle the error, and the generated machine will require an - * error state. */ - lmSwitchHandlesError = true; - graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId ); - } - - /* The place to store transitions to restart. It maybe possible for the - * restarting to affect the searching through the graph that follows. For - * now take the safe route and save the list of transitions to restart - * until after all searching is done. */ - Vector<FsmTrans*> restartTrans; - - /* Set actions that do immediate token recognition, set the longest match part - * id and set the token ending. */ - for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { - for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { - if ( trans->lmActionTable.length() > 0 ) { - LmActionTableEl *lmAct = trans->lmActionTable.data; - FsmState *toState = trans->toState; - assert( toState ); - - /* Check if there are transitions out, this may be a very - * close approximation? Out transitions going nowhere? - * FIXME: Check. */ - if ( toState->outList.length() == 0 ) { - /* Can execute the immediate action for the longest match - * part. Redirect the action to the start state. */ - trans->actionTable.setAction( lmAct->key, - lmAct->value->actOnLast ); - restartTrans.append( trans ); - } - else { - /* Look for non final states that have a non-empty item - * set. If these are present then we need to record the - * end of the token. Also Find the highest item set - * length reachable from here (excluding at transtions to - * final states). */ - bool nonFinalNonEmptyItemSet = false; - maxItemSetLength = 0; - graph->markReachableFromHereStopFinal( toState ); - for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { - if ( ms->stateBits & SB_ISMARKED ) { - if ( ms->lmItemSet.length() > 0 && !ms->isFinState() ) - nonFinalNonEmptyItemSet = true; - if ( ms->lmItemSet.length() > maxItemSetLength ) - maxItemSetLength = ms->lmItemSet.length(); - ms->stateBits &= ~ SB_ISMARKED; - } - } - - /* If there are reachable states that are not final and - * have non empty item sets or that have an item set - * length greater than one then we need to set tokend - * because the error action that matches the token will - * require it. */ - if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 ) - trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd ); - - /* Some states may not know which longest match item to - * execute, must set it. */ - if ( maxItemSetLength > 1 ) { - /* There are transitions out, another match may come. */ - trans->actionTable.setAction( lmAct->key, - lmAct->value->setActId ); - } - } - } - } - } - - /* Now that all graph searching is done it certainly safe set the - * restarting. It may be safe above, however this must be verified. */ - for ( Vector<FsmTrans*>::Iter rs = restartTrans; rs.lte(); rs++ ) - restart( graph, *rs ); - - int lmErrActionOrd = pd->curActionOrd++; - - /* Embed the error for recognizing a char. */ - for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { - if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) { - if ( st->isFinState() ) { - /* On error execute the onActNext action, which knows that - * the last character of the token was one back and restart. */ - graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, - &st->lmItemSet[0]->actOnNext, 1 ); - st->eofActionTable.setAction( lmErrActionOrd, - st->lmItemSet[0]->actOnNext ); - st->eofTarget = graph->startState; - } - else { - graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, - &st->lmItemSet[0]->actLagBehind, 1 ); - st->eofActionTable.setAction( lmErrActionOrd, - st->lmItemSet[0]->actLagBehind ); - st->eofTarget = graph->startState; - } - } - else if ( st->lmItemSet.length() > 1 ) { - /* Need to use the select. Take note of the which items the select - * is needed for so only the necessary actions are included. */ - for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) { - if ( *plmi != 0 ) - (*plmi)->inLmSelect = true; - } - /* On error, execute the action select and go to the start state. */ - graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, - &lmActSelect, 1 ); - st->eofActionTable.setAction( lmErrActionOrd, lmActSelect ); - st->eofTarget = graph->startState; - } - } - - /* Finally, the start state should be made final. */ - graph->setFinState( graph->startState ); -} - -void TokenRegion::transferScannerLeavingActions( FsmGraph *graph ) -{ - for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { - if ( st->outActionTable.length() > 0 ) - graph->setErrorActions( st, st->outActionTable ); - } -} - -FsmGraph *TokenRegion::walk( Compiler *pd ) -{ - /* Make each part of the longest match. */ - int numParts = 0; - FsmGraph **parts = new FsmGraph*[tokenDefList.length()]; - for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) { - /* Watch out for patternless tokens. */ - if ( lmi->join != 0 ) { - /* Create the machine and embed the setting of the longest match id. */ - parts[numParts] = lmi->join->walk( pd ); - parts[numParts]->longMatchAction( pd->curActionOrd++, lmi ); - - /* Look for tokens that accept the zero length-word. The first one found - * will be used as the default token. */ - if ( defaultTokenDef == 0 && parts[numParts]->startState->isFinState() ) - defaultTokenDef = lmi; - - numParts += 1; - } - } - FsmGraph *retFsm = parts[0]; - - if ( defaultTokenDef != 0 && defaultTokenDef->tdLangEl->ignore ) - error() << "ignore token cannot be a scanner's zero-length token" << endp; - - /* The region is empty. Return the empty set. */ - if ( numParts == 0 ) { - retFsm = new FsmGraph(); - retFsm->lambdaFsm(); - } - else { - /* Before we union the patterns we need to deal with leaving actions. They - * are transfered to error transitions out of the final states (like local - * error actions) and to eof actions. In the scanner we need to forbid - * on_last for any final state that has an leaving action. */ - for ( int i = 0; i < numParts; i++ ) - transferScannerLeavingActions( parts[i] ); - - /* Union machines one and up with machine zero. */ - FsmGraph *retFsm = parts[0]; - for ( int i = 1; i < numParts; i++ ) { - retFsm->unionOp( parts[i] ); - afterOpMinimize( retFsm ); - } - - runLongestMatch( pd, retFsm ); - delete[] parts; - } - - return retFsm; -} - -/* Construct with a location and the first expression. */ -Join::Join( Expression *expr ) -: - context(0), - mark(0) -{ - exprList.append( expr ); -} - -/* Walk an expression node. */ -FsmGraph *Join::walk( Compiler *pd ) -{ - assert( exprList.length() == 1 ); - - FsmGraph *retFsm = exprList.head->walk( pd ); - - /* Maybe the the context. */ - if ( context != 0 ) { - retFsm->leaveFsmAction( pd->curActionOrd++, mark ); - FsmGraph *contextGraph = context->walk( pd ); - retFsm->concatOp( contextGraph ); - } - - return retFsm; -} - -/* Clean up after an expression node. */ -Expression::~Expression() -{ - switch ( type ) { - case OrType: case IntersectType: case SubtractType: - case StrongSubtractType: - delete expression; - delete term; - break; - case TermType: - delete term; - break; - case BuiltinType: - break; - } -} - -/* Evaluate a single expression node. */ -FsmGraph *Expression::walk( Compiler *pd, bool lastInSeq ) -{ - FsmGraph *rtnVal = 0; - switch ( type ) { - case OrType: { - /* Evaluate the expression. */ - rtnVal = expression->walk( pd, false ); - /* Evaluate the term. */ - FsmGraph *rhs = term->walk( pd ); - /* Perform union. */ - rtnVal->unionOp( rhs ); - afterOpMinimize( rtnVal, lastInSeq ); - break; - } - case IntersectType: { - /* Evaluate the expression. */ - rtnVal = expression->walk( pd ); - /* Evaluate the term. */ - FsmGraph *rhs = term->walk( pd ); - /* Perform intersection. */ - rtnVal->intersectOp( rhs ); - afterOpMinimize( rtnVal, lastInSeq ); - break; - } - case SubtractType: { - /* Evaluate the expression. */ - rtnVal = expression->walk( pd ); - /* Evaluate the term. */ - FsmGraph *rhs = term->walk( pd ); - /* Perform subtraction. */ - rtnVal->subtractOp( rhs ); - afterOpMinimize( rtnVal, lastInSeq ); - break; - } - case StrongSubtractType: { - /* Evaluate the expression. */ - rtnVal = expression->walk( pd ); - - /* Evaluate the term and pad it with any* machines. */ - FsmGraph *rhs = dotStarFsm( pd ); - FsmGraph *termFsm = term->walk( pd ); - FsmGraph *trailAnyStar = dotStarFsm( pd ); - rhs->concatOp( termFsm ); - rhs->concatOp( trailAnyStar ); - - /* Perform subtraction. */ - rtnVal->subtractOp( rhs ); - afterOpMinimize( rtnVal, lastInSeq ); - break; - } - case TermType: { - /* Return result of the term. */ - rtnVal = term->walk( pd ); - break; - } - case BuiltinType: { - /* Duplicate the builtin. */ - rtnVal = makeBuiltin( builtin, pd ); - break; - } - } - - return rtnVal; -} - -/* Clean up after a term node. */ -Term::~Term() -{ - switch ( type ) { - case ConcatType: - case RightStartType: - case RightFinishType: - case LeftType: - delete term; - delete factorWithAug; - break; - case FactorWithAugType: - delete factorWithAug; - break; - } -} - -/* Evaluate a term node. */ -FsmGraph *Term::walk( Compiler *pd, bool lastInSeq ) -{ - FsmGraph *rtnVal = 0; - switch ( type ) { - case ConcatType: { - /* Evaluate the Term. */ - rtnVal = term->walk( pd, false ); - /* Evaluate the FactorWithRep. */ - FsmGraph *rhs = factorWithAug->walk( pd ); - /* Perform concatenation. */ - rtnVal->concatOp( rhs ); - afterOpMinimize( rtnVal, lastInSeq ); - break; - } - case RightStartType: { - /* Evaluate the Term. */ - rtnVal = term->walk( pd ); - - /* Evaluate the FactorWithRep. */ - FsmGraph *rhs = factorWithAug->walk( pd ); - - /* Set up the priority descriptors. The left machine gets the - * lower priority where as the right get the higher start priority. */ - priorDescs[0].key = pd->nextPriorKey++; - priorDescs[0].priority = 0; - rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); - - /* The start transitions right machine get the higher priority. - * Use the same unique key. */ - priorDescs[1].key = priorDescs[0].key; - priorDescs[1].priority = 1; - rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); - - /* Perform concatenation. */ - rtnVal->concatOp( rhs ); - afterOpMinimize( rtnVal, lastInSeq ); - break; - } - case RightFinishType: { - /* Evaluate the Term. */ - rtnVal = term->walk( pd ); - - /* Evaluate the FactorWithRep. */ - FsmGraph *rhs = factorWithAug->walk( pd ); - - /* Set up the priority descriptors. The left machine gets the - * lower priority where as the finishing transitions to the right - * get the higher priority. */ - priorDescs[0].key = pd->nextPriorKey++; - priorDescs[0].priority = 0; - rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); - - /* The finishing transitions of the right machine get the higher - * priority. Use the same unique key. */ - priorDescs[1].key = priorDescs[0].key; - priorDescs[1].priority = 1; - rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); - - /* Perform concatenation. */ - rtnVal->concatOp( rhs ); - afterOpMinimize( rtnVal, lastInSeq ); - break; - } - case LeftType: { - /* Evaluate the Term. */ - rtnVal = term->walk( pd ); - - /* Evaluate the FactorWithRep. */ - FsmGraph *rhs = factorWithAug->walk( pd ); - - /* Set up the priority descriptors. The left machine gets the - * higher priority. */ - priorDescs[0].key = pd->nextPriorKey++; - priorDescs[0].priority = 1; - rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); - - /* The right machine gets the lower priority. Since - * startTransPrior might unnecessarily increase the number of - * states during the state machine construction process (due to - * isolation), we use allTransPrior instead, which has the same - * effect. */ - priorDescs[1].key = priorDescs[0].key; - priorDescs[1].priority = 0; - rhs->allTransPrior( pd->curPriorOrd++, &priorDescs[1] ); - - /* Perform concatenation. */ - rtnVal->concatOp( rhs ); - afterOpMinimize( rtnVal, lastInSeq ); - break; - } - case FactorWithAugType: { - rtnVal = factorWithAug->walk( pd ); - break; - } - } - return rtnVal; -} - -/* Clean up after a factor with augmentation node. */ -FactorWithAug::~FactorWithAug() -{ - delete factorWithRep; - - /* Walk the vector of parser actions, deleting function names. */ - - /* Clean up priority descriptors. */ - if ( priorDescs != 0 ) - delete[] priorDescs; -} - -void FactorWithAug::assignActions( Compiler *pd, FsmGraph *graph, int *actionOrd ) -{ - /* Assign actions. */ - for ( int i = 0; i < actions.length(); i++ ) { - switch ( actions[i].type ) { - /* Transition actions. */ - case at_start: - graph->startFsmAction( actionOrd[i], actions[i].action ); - afterOpMinimize( graph ); - break; - case at_all: - graph->allTransAction( actionOrd[i], actions[i].action ); - break; - case at_finish: - graph->finishFsmAction( actionOrd[i], actions[i].action ); - break; - case at_leave: - graph->leaveFsmAction( actionOrd[i], actions[i].action ); - break; - - /* Global error actions. */ - case at_start_gbl_error: - graph->startErrorAction( actionOrd[i], actions[i].action, 0 ); - afterOpMinimize( graph ); - break; - case at_all_gbl_error: - graph->allErrorAction( actionOrd[i], actions[i].action, 0 ); - break; - case at_final_gbl_error: - graph->finalErrorAction( actionOrd[i], actions[i].action, 0 ); - break; - case at_not_start_gbl_error: - graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 ); - break; - case at_not_final_gbl_error: - graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 ); - break; - case at_middle_gbl_error: - graph->middleErrorAction( actionOrd[i], actions[i].action, 0 ); - break; - - /* Local error actions. */ - case at_start_local_error: - graph->startErrorAction( actionOrd[i], actions[i].action, - actions[i].localErrKey ); - afterOpMinimize( graph ); - break; - case at_all_local_error: - graph->allErrorAction( actionOrd[i], actions[i].action, - actions[i].localErrKey ); - break; - case at_final_local_error: - graph->finalErrorAction( actionOrd[i], actions[i].action, - actions[i].localErrKey ); - break; - case at_not_start_local_error: - graph->notStartErrorAction( actionOrd[i], actions[i].action, - actions[i].localErrKey ); - break; - case at_not_final_local_error: - graph->notFinalErrorAction( actionOrd[i], actions[i].action, - actions[i].localErrKey ); - break; - case at_middle_local_error: - graph->middleErrorAction( actionOrd[i], actions[i].action, - actions[i].localErrKey ); - break; - - /* EOF actions. */ - case at_start_eof: - graph->startEOFAction( actionOrd[i], actions[i].action ); - afterOpMinimize( graph ); - break; - case at_all_eof: - graph->allEOFAction( actionOrd[i], actions[i].action ); - break; - case at_final_eof: - graph->finalEOFAction( actionOrd[i], actions[i].action ); - break; - case at_not_start_eof: - graph->notStartEOFAction( actionOrd[i], actions[i].action ); - break; - case at_not_final_eof: - graph->notFinalEOFAction( actionOrd[i], actions[i].action ); - break; - case at_middle_eof: - graph->middleEOFAction( actionOrd[i], actions[i].action ); - break; - - /* To State Actions. */ - case at_start_to_state: - graph->startToStateAction( actionOrd[i], actions[i].action ); - afterOpMinimize( graph ); - break; - case at_all_to_state: - graph->allToStateAction( actionOrd[i], actions[i].action ); - break; - case at_final_to_state: - graph->finalToStateAction( actionOrd[i], actions[i].action ); - break; - case at_not_start_to_state: - graph->notStartToStateAction( actionOrd[i], actions[i].action ); - break; - case at_not_final_to_state: - graph->notFinalToStateAction( actionOrd[i], actions[i].action ); - break; - case at_middle_to_state: - graph->middleToStateAction( actionOrd[i], actions[i].action ); - break; - - /* From State Actions. */ - case at_start_from_state: - graph->startFromStateAction( actionOrd[i], actions[i].action ); - afterOpMinimize( graph ); - break; - case at_all_from_state: - graph->allFromStateAction( actionOrd[i], actions[i].action ); - break; - case at_final_from_state: - graph->finalFromStateAction( actionOrd[i], actions[i].action ); - break; - case at_not_start_from_state: - graph->notStartFromStateAction( actionOrd[i], actions[i].action ); - break; - case at_not_final_from_state: - graph->notFinalFromStateAction( actionOrd[i], actions[i].action ); - break; - case at_middle_from_state: - graph->middleFromStateAction( actionOrd[i], actions[i].action ); - break; - - /* Remaining cases, prevented by the parser. */ - default: - assert( false ); - break; - } - } -} - -void FactorWithAug::assignPriorities( FsmGraph *graph, int *priorOrd ) -{ - /* Assign priorities. */ - for ( int i = 0; i < priorityAugs.length(); i++ ) { - switch ( priorityAugs[i].type ) { - case at_start: - graph->startFsmPrior( priorOrd[i], &priorDescs[i]); - /* Start fsm priorities are a special case that may require - * minimization afterwards. */ - afterOpMinimize( graph ); - break; - case at_all: - graph->allTransPrior( priorOrd[i], &priorDescs[i] ); - break; - case at_finish: - graph->finishFsmPrior( priorOrd[i], &priorDescs[i] ); - break; - case at_leave: - graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] ); - break; - - default: - /* Parser Prevents this case. */ - break; - } - } -} - -void FactorWithAug::assignConditions( FsmGraph *graph ) -{ - for ( int i = 0; i < conditions.length(); i++ ) { - switch ( conditions[i].type ) { - /* Transition actions. */ - case at_start: - graph->startFsmCondition( conditions[i].action ); - afterOpMinimize( graph ); - break; - case at_all: - graph->allTransCondition( conditions[i].action ); - break; - case at_leave: - graph->leaveFsmCondition( conditions[i].action ); - break; - default: - break; - } - } -} - - -/* Evaluate a factor with augmentation node. */ -FsmGraph *FactorWithAug::walk( Compiler *pd ) -{ - /* Make the array of function orderings. */ - int *actionOrd = 0; - if ( actions.length() > 0 ) - actionOrd = new int[actions.length()]; - - /* First walk the list of actions, assigning order to all starting - * actions. */ - for ( int i = 0; i < actions.length(); i++ ) { - if ( actions[i].type == at_start || - actions[i].type == at_start_gbl_error || - actions[i].type == at_start_local_error || - actions[i].type == at_start_to_state || - actions[i].type == at_start_from_state || - actions[i].type == at_start_eof ) - actionOrd[i] = pd->curActionOrd++; - } - - /* Evaluate the factor with repetition. */ - FsmGraph *rtnVal = factorWithRep->walk( pd ); - - /* Compute the remaining action orderings. */ - for ( int i = 0; i < actions.length(); i++ ) { - if ( actions[i].type != at_start && - actions[i].type != at_start_gbl_error && - actions[i].type != at_start_local_error && - actions[i].type != at_start_to_state && - actions[i].type != at_start_from_state && - actions[i].type != at_start_eof ) - actionOrd[i] = pd->curActionOrd++; - } - - assignConditions( rtnVal ); - - assignActions( pd, rtnVal , actionOrd ); - - /* Make the array of priority orderings. Orderings are local to this walk - * of the factor with augmentation. */ - int *priorOrd = 0; - if ( priorityAugs.length() > 0 ) - priorOrd = new int[priorityAugs.length()]; - - /* Walk all priorities, assigning the priority ordering. */ - for ( int i = 0; i < priorityAugs.length(); i++ ) - priorOrd[i] = pd->curPriorOrd++; - - /* If the priority descriptors have not been made, make them now. Make - * priority descriptors for each priority asignment that will be passed to - * the fsm. Used to keep track of the key, value and used bit. */ - if ( priorDescs == 0 && priorityAugs.length() > 0 ) { - priorDescs = new PriorDesc[priorityAugs.length()]; - for ( int i = 0; i < priorityAugs.length(); i++ ) { - /* Init the prior descriptor for the priority setting. */ - priorDescs[i].key = priorityAugs[i].priorKey; - priorDescs[i].priority = priorityAugs[i].priorValue; - } - } - - /* Assign priorities into the machine. */ - assignPriorities( rtnVal, priorOrd ); - - /* Assign epsilon transitions. */ - for ( int e = 0; e < epsilonLinks.length(); e++ ) { - /* Get the name, which may not exist. If it doesn't then silently - * ignore it because an error has already been reported. */ - NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++]; - if ( epTarg != 0 ) { - /* Make the epsilon transitions. */ - rtnVal->epsilonTrans( epTarg->id ); - - /* Note that we have made a link to the name. */ - pd->localNameScope->referencedNames.append( epTarg ); - } - } - - if ( priorOrd != 0 ) - delete[] priorOrd; - if ( actionOrd != 0 ) - delete[] actionOrd; - return rtnVal; -} - - -/* Clean up after a factor with repetition node. */ -FactorWithRep::~FactorWithRep() -{ - switch ( type ) { - case StarType: case StarStarType: case OptionalType: case PlusType: - case ExactType: case MaxType: case MinType: case RangeType: - delete factorWithRep; - break; - case FactorWithNegType: - delete factorWithNeg; - break; - } -} - -/* Evaluate a factor with repetition node. */ -FsmGraph *FactorWithRep::walk( Compiler *pd ) -{ - FsmGraph *retFsm = 0; - - switch ( type ) { - case StarType: { - /* Evaluate the FactorWithRep. */ - retFsm = factorWithRep->walk( pd ); - if ( retFsm->startState->isFinState() ) { - warning(loc) << "applying kleene star to a machine that " - "accepts zero length word" << endl; - } - - /* Shift over the start action orders then do the kleene star. */ - pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); - retFsm->starOp( ); - afterOpMinimize( retFsm ); - break; - } - case StarStarType: { - /* Evaluate the FactorWithRep. */ - retFsm = factorWithRep->walk( pd ); - if ( retFsm->startState->isFinState() ) { - warning(loc) << "applying kleene star to a machine that " - "accepts zero length word" << endl; - } - - /* Set up the prior descs. All gets priority one, whereas leaving gets - * priority zero. Make a unique key so that these priorities don't - * interfere with any priorities set by the user. */ - priorDescs[0].key = pd->nextPriorKey++; - priorDescs[0].priority = 1; - retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); - - /* Leaveing gets priority 0. Use same unique key. */ - priorDescs[1].key = priorDescs[0].key; - priorDescs[1].priority = 0; - retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); - - /* Shift over the start action orders then do the kleene star. */ - pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); - retFsm->starOp( ); - afterOpMinimize( retFsm ); - break; - } - case OptionalType: { - /* Make the null fsm. */ - FsmGraph *nu = new FsmGraph(); - nu->lambdaFsm( ); - - /* Evaluate the FactorWithRep. */ - retFsm = factorWithRep->walk( pd ); - - /* Perform the question operator. */ - retFsm->unionOp( nu ); - afterOpMinimize( retFsm ); - break; - } - case PlusType: { - /* Evaluate the FactorWithRep. */ - retFsm = factorWithRep->walk( pd ); - if ( retFsm->startState->isFinState() ) { - warning(loc) << "applying plus operator to a machine that " - "accpets zero length word" << endl; - } - - /* Need a duplicated for the star end. */ - FsmGraph *dup = new FsmGraph( *retFsm ); - - /* The start func orders need to be shifted before doing the star. */ - pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd ); - - /* Star the duplicate. */ - dup->starOp( ); - afterOpMinimize( dup ); - - retFsm->concatOp( dup ); - afterOpMinimize( retFsm ); - break; - } - case ExactType: { - /* Get an int from the repetition amount. */ - if ( lowerRep == 0 ) { - /* No copies. Don't need to evaluate the factorWithRep. - * This Defeats the purpose so give a warning. */ - warning(loc) << "exactly zero repetitions results " - "in the null machine" << endl; - - retFsm = new FsmGraph(); - retFsm->lambdaFsm(); - } - else { - /* Evaluate the first FactorWithRep. */ - retFsm = factorWithRep->walk( pd ); - if ( retFsm->startState->isFinState() ) { - warning(loc) << "applying repetition to a machine that " - "accepts zero length word" << endl; - } - - /* The start func orders need to be shifted before doing the - * repetition. */ - pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); - - /* Do the repetition on the machine. Already guarded against n == 0 */ - retFsm->repeatOp( lowerRep ); - afterOpMinimize( retFsm ); - } - break; - } - case MaxType: { - /* Get an int from the repetition amount. */ - if ( upperRep == 0 ) { - /* No copies. Don't need to evaluate the factorWithRep. - * This Defeats the purpose so give a warning. */ - warning(loc) << "max zero repetitions results " - "in the null machine" << endl; - - retFsm = new FsmGraph(); - retFsm->lambdaFsm(); - } - else { - /* Evaluate the first FactorWithRep. */ - retFsm = factorWithRep->walk( pd ); - if ( retFsm->startState->isFinState() ) { - warning(loc) << "applying max repetition to a machine that " - "accepts zero length word" << endl; - } - - /* The start func orders need to be shifted before doing the - * repetition. */ - pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); - - /* Do the repetition on the machine. Already guarded against n == 0 */ - retFsm->optionalRepeatOp( upperRep ); - afterOpMinimize( retFsm ); - } - break; - } - case MinType: { - /* Evaluate the repeated machine. */ - retFsm = factorWithRep->walk( pd ); - if ( retFsm->startState->isFinState() ) { - warning(loc) << "applying min repetition to a machine that " - "accepts zero length word" << endl; - } - - /* The start func orders need to be shifted before doing the repetition - * and the kleene star. */ - pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); - - if ( lowerRep == 0 ) { - /* Acts just like a star op on the machine to return. */ - retFsm->starOp( ); - afterOpMinimize( retFsm ); - } - else { - /* Take a duplicate for the plus. */ - FsmGraph *dup = new FsmGraph( *retFsm ); - - /* Do repetition on the first half. */ - retFsm->repeatOp( lowerRep ); - afterOpMinimize( retFsm ); - - /* Star the duplicate. */ - dup->starOp( ); - afterOpMinimize( dup ); - - /* Tak on the kleene star. */ - retFsm->concatOp( dup ); - afterOpMinimize( retFsm ); - } - break; - } - case RangeType: { - /* Check for bogus range. */ - if ( upperRep - lowerRep < 0 ) { - error(loc) << "invalid range repetition" << endl; - - /* Return null machine as recovery. */ - retFsm = new FsmGraph(); - retFsm->lambdaFsm(); - } - else if ( lowerRep == 0 && upperRep == 0 ) { - /* No copies. Don't need to evaluate the factorWithRep. This - * defeats the purpose so give a warning. */ - warning(loc) << "zero to zero repetitions results " - "in the null machine" << endl; - - retFsm = new FsmGraph(); - retFsm->lambdaFsm(); - } - else { - /* Now need to evaluate the repeated machine. */ - retFsm = factorWithRep->walk( pd ); - if ( retFsm->startState->isFinState() ) { - warning(loc) << "applying range repetition to a machine that " - "accepts zero length word" << endl; - } - - /* The start func orders need to be shifted before doing both kinds - * of repetition. */ - pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); - - if ( lowerRep == 0 ) { - /* Just doing max repetition. Already guarded against n == 0. */ - retFsm->optionalRepeatOp( upperRep ); - afterOpMinimize( retFsm ); - } - else if ( lowerRep == upperRep ) { - /* Just doing exact repetition. Already guarded against n == 0. */ - retFsm->repeatOp( lowerRep ); - afterOpMinimize( retFsm ); - } - else { - /* This is the case that 0 < lowerRep < upperRep. Take a - * duplicate for the optional repeat. */ - FsmGraph *dup = new FsmGraph( *retFsm ); - - /* Do repetition on the first half. */ - retFsm->repeatOp( lowerRep ); - afterOpMinimize( retFsm ); - - /* Do optional repetition on the second half. */ - dup->optionalRepeatOp( upperRep - lowerRep ); - afterOpMinimize( dup ); - - /* Tak on the duplicate machine. */ - retFsm->concatOp( dup ); - afterOpMinimize( retFsm ); - } - } - break; - } - case FactorWithNegType: { - /* Evaluate the Factor. Pass it up. */ - retFsm = factorWithNeg->walk( pd ); - break; - }} - return retFsm; -} - - -/* Clean up after a factor with negation node. */ -FactorWithNeg::~FactorWithNeg() -{ - switch ( type ) { - case NegateType: - case CharNegateType: - delete factorWithNeg; - break; - case FactorType: - delete factor; - break; - } -} - -/* Evaluate a factor with negation node. */ -FsmGraph *FactorWithNeg::walk( Compiler *pd ) -{ - FsmGraph *retFsm = 0; - - switch ( type ) { - case NegateType: { - /* Evaluate the factorWithNeg. */ - FsmGraph *toNegate = factorWithNeg->walk( pd ); - - /* Negation is subtract from dot-star. */ - retFsm = dotStarFsm( pd ); - retFsm->subtractOp( toNegate ); - afterOpMinimize( retFsm ); - break; - } - case CharNegateType: { - /* Evaluate the factorWithNeg. */ - FsmGraph *toNegate = factorWithNeg->walk( pd ); - - /* CharNegation is subtract from dot. */ - retFsm = dotFsm( pd ); - retFsm->subtractOp( toNegate ); - afterOpMinimize( retFsm ); - break; - } - case FactorType: { - /* Evaluate the Factor. Pass it up. */ - retFsm = factor->walk( pd ); - break; - }} - return retFsm; -} - -/* Clean up after a factor node. */ -Factor::~Factor() -{ - switch ( type ) { - case LiteralType: - delete literal; - break; - case RangeType: - delete range; - break; - case OrExprType: - delete reItem; - break; - case RegExprType: - delete regExp; - break; - case ReferenceType: - break; - case ParenType: - delete join; - break; - } -} - -/* Evaluate a factor node. */ -FsmGraph *Factor::walk( Compiler *pd ) -{ - FsmGraph *rtnVal = 0; - switch ( type ) { - case LiteralType: - rtnVal = literal->walk( pd ); - break; - case RangeType: - rtnVal = range->walk( pd ); - break; - case OrExprType: - rtnVal = reItem->walk( pd, 0 ); - break; - case RegExprType: - rtnVal = regExp->walk( pd, 0 ); - break; - case ReferenceType: - rtnVal = varDef->walk( pd ); - break; - case ParenType: - rtnVal = join->walk( pd ); - break; - } - - return rtnVal; -} - - -/* Clean up a range object. Must delete the two literals. */ -Range::~Range() -{ - delete lowerLit; - delete upperLit; -} - -bool Range::verifyRangeFsm( FsmGraph *rangeEnd ) -{ - /* Must have two states. */ - if ( rangeEnd->stateList.length() != 2 ) - return false; - /* The start state cannot be final. */ - if ( rangeEnd->startState->isFinState() ) - return false; - /* There should be only one final state. */ - if ( rangeEnd->finStateSet.length() != 1 ) - return false; - /* The final state cannot have any transitions out. */ - if ( rangeEnd->finStateSet[0]->outList.length() != 0 ) - return false; - /* The start state should have only one transition out. */ - if ( rangeEnd->startState->outList.length() != 1 ) - return false; - /* The singe transition out of the start state should not be a range. */ - FsmTrans *startTrans = rangeEnd->startState->outList.head; - if ( startTrans->lowKey != startTrans->highKey ) - return false; - return true; -} - -/* Evaluate a range. Gets the lower an upper key and makes an fsm range. */ -FsmGraph *Range::walk( Compiler *pd ) -{ - /* Construct and verify the suitability of the lower end of the range. */ - FsmGraph *lowerFsm = lowerLit->walk( pd ); - if ( !verifyRangeFsm( lowerFsm ) ) { - error(lowerLit->loc) << - "bad range lower end, must be a single character" << endl; - } - - /* Construct and verify the upper end. */ - FsmGraph *upperFsm = upperLit->walk( pd ); - if ( !verifyRangeFsm( upperFsm ) ) { - error(upperLit->loc) << - "bad range upper end, must be a single character" << endl; - } - - /* Grab the keys from the machines, then delete them. */ - Key lowKey = lowerFsm->startState->outList.head->lowKey; - Key highKey = upperFsm->startState->outList.head->lowKey; - delete lowerFsm; - delete upperFsm; - - /* Validate the range. */ - if ( lowKey > highKey ) { - /* Recover by setting upper to lower; */ - error(lowerLit->loc) << "lower end of range is greater then upper end" << endl; - highKey = lowKey; - } - - /* Return the range now that it is validated. */ - FsmGraph *retFsm = new FsmGraph(); - retFsm->rangeFsm( lowKey, highKey ); - return retFsm; -} - -/* Evaluate a literal object. */ -FsmGraph *Literal::walk( Compiler *pd ) -{ - /* FsmGraph to return, is the alphabet signed. */ - FsmGraph *rtnVal = 0; - - switch ( type ) { - case Number: { - /* Make the fsm key in int format. */ - Key fsmKey = makeFsmKeyNum( literal.data, loc, pd ); - /* Make the new machine. */ - rtnVal = new FsmGraph(); - rtnVal->concatFsm( fsmKey ); - break; - } - case LitString: { - /* Make the array of keys in int format. */ - String interp; - bool caseInsensitive; - prepareLitString( interp, caseInsensitive, literal, loc ); - Key *arr = new Key[interp.length()]; - makeFsmKeyArray( arr, interp.data, interp.length(), pd ); - - /* Make the new machine. */ - rtnVal = new FsmGraph(); - if ( caseInsensitive ) - rtnVal->concatFsmCI( arr, interp.length() ); - else - rtnVal->concatFsm( arr, interp.length() ); - delete[] arr; - break; - }} - return rtnVal; -} - -/* Clean up after a regular expression object. */ -RegExpr::~RegExpr() -{ - switch ( type ) { - case RecurseItem: - delete regExp; - delete item; - break; - case Empty: - break; - } -} - -/* Evaluate a regular expression object. */ -FsmGraph *RegExpr::walk( Compiler *pd, RegExpr *rootRegex ) -{ - /* This is the root regex, pass down a pointer to this. */ - if ( rootRegex == 0 ) - rootRegex = this; - - FsmGraph *rtnVal = 0; - switch ( type ) { - case RecurseItem: { - /* Walk both items. */ - FsmGraph *fsm1 = regExp->walk( pd, rootRegex ); - FsmGraph *fsm2 = item->walk( pd, rootRegex ); - if ( fsm1 == 0 ) - rtnVal = fsm2; - else { - fsm1->concatOp( fsm2 ); - rtnVal = fsm1; - } - break; - } - case Empty: { - /* FIXME: Return something here. */ - rtnVal = 0; - break; - } - } - return rtnVal; -} - -/* Clean up after an item in a regular expression. */ -ReItem::~ReItem() -{ - switch ( type ) { - case Data: - case Dot: - break; - case OrBlock: - case NegOrBlock: - delete orBlock; - break; - } -} - -/* Evaluate a regular expression object. */ -FsmGraph *ReItem::walk( Compiler *pd, RegExpr *rootRegex ) -{ - /* The fsm to return, is the alphabet signed? */ - FsmGraph *rtnVal = 0; - - switch ( type ) { - case Data: { - /* Move the data into an integer array and make a concat fsm. */ - Key *arr = new Key[data.length()]; - makeFsmKeyArray( arr, data.data, data.length(), pd ); - - /* Make the concat fsm. */ - rtnVal = new FsmGraph(); - if ( rootRegex != 0 && rootRegex->caseInsensitive ) - rtnVal->concatFsmCI( arr, data.length() ); - else - rtnVal->concatFsm( arr, data.length() ); - delete[] arr; - break; - } - case Dot: { - /* Make the dot fsm. */ - rtnVal = dotFsm( pd ); - break; - } - case OrBlock: { - /* Get the or block and minmize it. */ - rtnVal = orBlock->walk( pd, rootRegex ); - rtnVal->minimizePartition2(); - break; - } - case NegOrBlock: { - /* Get the or block and minimize it. */ - FsmGraph *fsm = orBlock->walk( pd, rootRegex ); - fsm->minimizePartition2(); - - /* Make a dot fsm and subtract from it. */ - rtnVal = dotFsm( pd ); - rtnVal->subtractOp( fsm ); - rtnVal->minimizePartition2(); - break; - } - } - - /* If the item is followed by a star, then apply the star op. */ - if ( star ) { - if ( rtnVal->startState->isFinState() ) { - warning(loc) << "applying kleene star to a machine that " - "accpets zero length word" << endl; - } - - rtnVal->starOp(); - rtnVal->minimizePartition2(); - } - return rtnVal; -} - -/* Clean up after an or block of a regular expression. */ -ReOrBlock::~ReOrBlock() -{ - switch ( type ) { - case RecurseItem: - delete orBlock; - delete item; - break; - case Empty: - break; - } -} - - -/* Evaluate an or block of a regular expression. */ -FsmGraph *ReOrBlock::walk( Compiler *pd, RegExpr *rootRegex ) -{ - FsmGraph *rtnVal = 0; - switch ( type ) { - case RecurseItem: { - /* Evaluate the two fsm. */ - FsmGraph *fsm1 = orBlock->walk( pd, rootRegex ); - FsmGraph *fsm2 = item->walk( pd, rootRegex ); - if ( fsm1 == 0 ) - rtnVal = fsm2; - else { - fsm1->unionOp( fsm2 ); - rtnVal = fsm1; - } - break; - } - case Empty: { - rtnVal = 0; - break; - } - } - return rtnVal;; -} - -/* Evaluate an or block item of a regular expression. */ -FsmGraph *ReOrItem::walk( Compiler *pd, RegExpr *rootRegex ) -{ - /* The return value, is the alphabet signed? */ - FsmGraph *rtnVal = 0; - switch ( type ) { - case Data: { - /* Make the or machine. */ - rtnVal = new FsmGraph(); - - /* Put the or data into an array of ints. Note that we find unique - * keys. Duplicates are silently ignored. The alternative would be to - * issue warning or an error but since we can't with [a0-9a] or 'a' | - * 'a' don't bother here. */ - KeySet keySet; - makeFsmUniqueKeyArray( keySet, data.data, data.length(), - rootRegex != 0 ? rootRegex->caseInsensitive : false, pd ); - - /* Run the or operator. */ - rtnVal->orFsm( keySet.data, keySet.length() ); - break; - } - case Range: { - /* Make the upper and lower keys. */ - Key lowKey = makeFsmKeyChar( lower, pd ); - Key highKey = makeFsmKeyChar( upper, pd ); - - /* Validate the range. */ - if ( lowKey > highKey ) { - /* Recover by setting upper to lower; */ - error(loc) << "lower end of range is greater then upper end" << endl; - highKey = lowKey; - } - - /* Make the range machine. */ - rtnVal = new FsmGraph(); - rtnVal->rangeFsm( lowKey, highKey ); - - if ( rootRegex != 0 && rootRegex->caseInsensitive ) { - if ( lowKey <= 'Z' && 'A' <= highKey ) { - Key otherLow = lowKey < 'A' ? Key('A') : lowKey; - Key otherHigh = 'Z' < highKey ? Key('Z') : highKey; - - otherLow = 'a' + ( otherLow - 'A' ); - otherHigh = 'a' + ( otherHigh - 'A' ); - - FsmGraph *otherRange = new FsmGraph(); - otherRange->rangeFsm( otherLow, otherHigh ); - rtnVal->unionOp( otherRange ); - rtnVal->minimizePartition2(); - } - else if ( lowKey <= 'z' && 'a' <= highKey ) { - Key otherLow = lowKey < 'a' ? Key('a') : lowKey; - Key otherHigh = 'z' < highKey ? Key('z') : highKey; - - otherLow = 'A' + ( otherLow - 'a' ); - otherHigh = 'A' + ( otherHigh - 'a' ); - - FsmGraph *otherRange = new FsmGraph(); - otherRange->rangeFsm( otherLow, otherHigh ); - rtnVal->unionOp( otherRange ); - rtnVal->minimizePartition2(); - } - } - - break; - }} - return rtnVal; -} diff --git a/colm/parsetree.h b/colm/parsetree.h deleted file mode 100644 index c3a75df5..00000000 --- a/colm/parsetree.h +++ /dev/null @@ -1,2253 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _PARSETREE_H -#define _PARSETREE_H - -#include <iostream> -#include <string.h> -#include "global.h" -#include "avlmap.h" -#include "bstmap.h" -#include "bstset.h" -#include "vector.h" -#include "dlist.h" -#include "dlistval.h" -#include "dlistmel.h" -#include "astring.h" -#include "bytecode.h" -#include "avlbasic.h" -#include "fsmrun.h" - -/* Operators that are represented with single symbol characters. */ -#define OP_DoubleEql 'e' -#define OP_NotEql 'q' -#define OP_LessEql 'l' -#define OP_GrtrEql 'g' -#define OP_LogicalAnd 'a' -#define OP_LogicalOr 'o' -#define OP_Deref 'd' - -#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 - #error "SIZEOF_LONG contained an unexpected value" -#endif - -struct NameInst; -struct FsmGraph; -struct RedFsm; -struct _FsmRun; -struct ObjectDef; -struct ElementOf; -struct UniqueType; -struct ObjField; -struct TransBlock; -struct CodeBlock; -struct PdaLiteral; -struct TypeAlias; -typedef struct _PdaRun PdaRun; - -/* - * Code Vector - */ -struct CodeVect : public Vector<Code> -{ - void appendHalf( Half half ) - { - /* not optimal. */ - append( half & 0xff ); - append( (half>>8) & 0xff ); - } - - void appendWord( Word word ) - { - /* not optimal. */ - append( word & 0xff ); - append( (word>>8) & 0xff ); - append( (word>>16) & 0xff ); - append( (word>>24) & 0xff ); - #if SIZEOF_LONG == 8 - append( (word>>32) & 0xff ); - append( (word>>40) & 0xff ); - append( (word>>48) & 0xff ); - append( (word>>56) & 0xff ); - #endif - } - - void setHalf( long pos, Half half ) - { - /* not optimal. */ - data[pos] = half & 0xff; - data[pos+1] = (half>>8) & 0xff; - } - - void insertHalf( long pos, Half half ) - { - /* not optimal. */ - insert( pos, half & 0xff ); - insert( pos+1, (half>>8) & 0xff ); - } - - void insertWord( long pos, Word word ) - { - /* not at all optimal. */ - insert( pos, word & 0xff ); - insert( pos+1, (word>>8) & 0xff ); - insert( pos+2, (word>>16) & 0xff ); - insert( pos+3, (word>>24) & 0xff ); - #if SIZEOF_LONG == 8 - insert( pos+4, (word>>32) & 0xff ); - insert( pos+5, (word>>40) & 0xff ); - insert( pos+6, (word>>48) & 0xff ); - insert( pos+7, (word>>56) & 0xff ); - #endif - } - - void insertTree( long pos, Tree *tree ) - { insertWord( pos, (Word) tree ); } -}; - - - -/* Types of builtin machines. */ -enum BuiltinMachine -{ - BT_Any, - BT_Ascii, - BT_Extend, - BT_Alpha, - BT_Digit, - BT_Alnum, - BT_Lower, - BT_Upper, - BT_Cntrl, - BT_Graph, - BT_Print, - BT_Punct, - BT_Space, - BT_Xdigit, - BT_Lambda, - BT_Empty -}; - -typedef BstSet<char> CharSet; -typedef Vector<unsigned char> UnsignedCharVect; - - -struct Compiler; -struct TypeRef; - -/* Leaf type. */ -struct Literal; - -/* Tree nodes. */ - -struct Term; -struct FactorWithAug; -struct FactorWithRep; -struct FactorWithNeg; -struct Factor; -struct Expression; -struct Join; -struct JoinOrLm; -struct RegionJoinOrLm; -struct TokenRegion; -struct Namespace; -struct Context; -struct TokenDef; -struct TokenDefListReg; -struct TokenDefListNs; -struct Range; -struct LangEl; - -/* Type of augmentation. Describes locations in the machine. */ -enum AugType -{ - /* Transition actions/priorities. */ - at_start, - at_all, - at_finish, - at_leave, - - /* Global error actions. */ - at_start_gbl_error, - at_all_gbl_error, - at_final_gbl_error, - at_not_start_gbl_error, - at_not_final_gbl_error, - at_middle_gbl_error, - - /* Local error actions. */ - at_start_local_error, - at_all_local_error, - at_final_local_error, - at_not_start_local_error, - at_not_final_local_error, - at_middle_local_error, - - /* To State Action embedding. */ - at_start_to_state, - at_all_to_state, - at_final_to_state, - at_not_start_to_state, - at_not_final_to_state, - at_middle_to_state, - - /* From State Action embedding. */ - at_start_from_state, - at_all_from_state, - at_final_from_state, - at_not_start_from_state, - at_not_final_from_state, - at_middle_from_state, - - /* EOF Action embedding. */ - at_start_eof, - at_all_eof, - at_final_eof, - at_not_start_eof, - at_not_final_eof, - at_middle_eof -}; - -/* IMPORTANT: These must follow the same order as the state augs in AugType - * since we will be using this to compose AugType. */ -enum StateAugType -{ - sat_start = 0, - sat_all, - sat_final, - sat_not_start, - sat_not_final, - sat_middle -}; - -struct Action; -struct PriorDesc; -struct RegExpr; -struct ReItem; -struct ReOrBlock; -struct ReOrItem; -struct ExplicitMachine; -struct InlineItem; -struct InlineList; - -/* Reference to a named state. */ -typedef Vector<String> NameRef; -typedef Vector<NameRef*> NameRefList; -typedef Vector<NameInst*> NameTargList; - -/* Structure for storing location of epsilon transitons. */ -struct EpsilonLink -{ - EpsilonLink( const InputLoc &loc, NameRef &target ) - : loc(loc), target(target) { } - - InputLoc loc; - NameRef target; -}; - -struct Label -{ - Label( const InputLoc &loc, const String &data, ObjField *objField ) - : loc(loc), data(data), objField(objField) { } - - InputLoc loc; - String data; - ObjField *objField; -}; - -/* Structure represents an action assigned to some FactorWithAug node. The - * factor with aug will keep an array of these. */ -struct ParserAction -{ - ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action ) - : loc(loc), type(type), localErrKey(localErrKey), action(action) { } - - InputLoc loc; - AugType type; - int localErrKey; - Action *action; -}; - -struct Token -{ - String data; - InputLoc loc; -}; - -void prepareLitString( String &result, bool &caseInsensitive, - const String &srcString, const InputLoc &loc ); - -std::ostream &operator<<(std::ostream &out, const Token &token ); - -typedef AvlMap< String, TokenDef*, CmpStr > LiteralDict; -typedef AvlMapEl< String, TokenDef* > LiteralDictEl; - -/* Store the value and type of a priority augmentation. */ -struct PriorityAug -{ - PriorityAug( AugType type, int priorKey, int priorValue ) : - type(type), priorKey(priorKey), priorValue(priorValue) { } - - AugType type; - int priorKey; - int priorValue; -}; - -/* - * A Variable Definition - */ -struct VarDef -{ - VarDef( const String &name, Join *join ) - : name(name), join(join) { } - - /* Parse tree traversal. */ - FsmGraph *walk( Compiler *pd ); - void makeNameTree( const InputLoc &loc, Compiler *pd ); - - String name; - Join *join; -}; - -/* - * A Variable Definition - */ -struct RegionDef -{ - RegionDef( const String &name, TokenRegion *tokenRegion ) - : name(name), tokenRegion(tokenRegion) { } - - /* Parse tree traversal. */ - FsmGraph *walk( Compiler *pd ); - void makeNameTree( const InputLoc &loc, Compiler *pd ); - - String name; - TokenRegion *tokenRegion; -}; - -typedef Vector<String> StringVect; -typedef CmpTable<String, CmpStr> CmpStrVect; - -struct NamespaceQual -{ - NamespaceQual( Namespace *declInNspace, TokenRegion *declInRegion ) : - cachedNspaceQual(0), declInNspace(declInNspace) {} - - Namespace *cachedNspaceQual; - Namespace *declInNspace; - - StringVect qualNames; - - Namespace *searchFrom( Namespace *from, StringVect::Iter &qualPart ); - Namespace *getQual( Compiler *pd ); -}; - -struct ReCapture -{ - ReCapture( Action *markEnter, Action *markLeave, ObjField *objField ) - : markEnter(markEnter), markLeave(markLeave), objField(objField) {} - - Action *markEnter; - Action *markLeave; - ObjField *objField; -}; - -typedef Vector<Context*> ContextVect; - -struct Context -{ - Context( InputLoc &loc, LangEl *lel ) - : - loc(loc), - lel(lel) - {} - - InputLoc loc; - LangEl *lel; - - ObjectDef *contextObjDef; -}; - -typedef Vector<ReCapture> ReCaptureVect; - -struct TokenDefPtr1 -{ - TokenDef *prev, *next; -}; - -struct TokenDefPtr2 -{ - TokenDef *prev, *next; -}; - -struct TokenDef -: - public TokenDefPtr1, - public TokenDefPtr2 -{ - TokenDef( const String &name, const String &literal, bool isLiteral, bool ignore, - Join *join, CodeBlock *codeBlock, InputLoc &semiLoc, - int longestMatchId, Namespace *nspace, TokenRegion *tokenRegion, - ReCaptureVect *pReCaptureVect, ObjectDef *objectDef, Context *contextIn ) - : - name(name), literal(literal), isLiteral(isLiteral), ignore(ignore), join(join), action(0), - codeBlock(codeBlock), tdLangEl(0), semiLoc(semiLoc), - longestMatchId(longestMatchId), inLmSelect(false), - nspace(nspace), tokenRegion(tokenRegion), objectDef(objectDef), - contextIn(contextIn), - dupOf(0), noPostIgnore(false), noPreIgnore(false), isZero(false) - { - if ( pReCaptureVect != 0 ) - reCaptureVect = *pReCaptureVect; - } - - InputLoc getLoc(); - - String name; - String literal; - bool isLiteral; - bool ignore; - Join *join; - Action *action; - CodeBlock *codeBlock; - LangEl *tdLangEl; - InputLoc semiLoc; - - Action *setActId; - Action *actOnLast; - Action *actOnNext; - Action *actLagBehind; - int longestMatchId; - bool inLmSelect; - Namespace *nspace; - TokenRegion *tokenRegion; - ReCaptureVect reCaptureVect; - ObjectDef *objectDef; - Context *contextIn; - - TokenDef *dupOf; - bool noPostIgnore; - bool noPreIgnore; - bool isZero; -}; - -struct LelDefList; - -struct NtDef -{ - NtDef( const String &name, Namespace *nspace, - LelDefList *defList, ObjectDef *objectDef, - Context *contextIn, bool reduceFirst ) - : - name(name), - nspace(nspace), - defList(defList), - objectDef(objectDef), - contextIn(contextIn), - reduceFirst(reduceFirst) - {} - - String name; - Namespace *nspace; - LelDefList *defList; - ObjectDef *objectDef; - Context *contextIn; - bool reduceFirst; - - NtDef *prev, *next; -}; - -struct NtDefList : DList<NtDef> {}; - -/* Declare a new type so that ptreetypes.h need not include dlist.h. */ -struct TokenDefListReg : DListMel<TokenDef, TokenDefPtr1> {}; -struct TokenDefListNs : DListMel<TokenDef, TokenDefPtr2> {}; - -struct ContextDef -{ - ContextDef( const String &name, Context *context, Namespace *nspace ) - : name(name), context(context), nspace(nspace) {} - - String name; - Context *context; - Namespace *nspace; - - ContextDef *prev, *next; -}; - -struct ContextDefList : DList<ContextDef> {}; - -struct TypeMapEl - : public AvlTreeEl<TypeMapEl> -{ - enum Type - { - TypeAliasType = 1, - LangElType - }; - - const String &getKey() { return key; } - - TypeMapEl( const String &key, TypeRef *typeRef ) - : type(TypeAliasType), key(key), value(0), typeRef(typeRef) {} - - TypeMapEl( const String &key, LangEl *value ) - : type(LangElType), key(key), value(value), typeRef(0) {} - - - Type type; - String key; - LangEl *value; - TypeRef *typeRef; - - TypeMapEl *prev, *next; -}; - -/* Symbol Map. */ -typedef AvlTree< TypeMapEl, String, CmpStr > TypeMap; - -typedef Vector<TokenRegion*> RegionVect; - -struct TokenRegion -{ - /* Construct with a list of joins */ - TokenRegion( const InputLoc &loc, const String &name, int id, - TokenRegion *parentRegion ) : - loc(loc), name(name), id(id), - lmSwitchHandlesError(false), regionNameInst(0), - parentRegion(parentRegion), defaultTokenDef(0), - preEofBlock(0), - ignoreOnlyRegion(0), tokenOnlyRegion(0), ciRegion(0), - wasEmpty(false), - isFullRegion(false), - isIgnoreOnly(false), - isTokenOnly(false), - isCiOnly(false), - ciLel(0), - derivedFrom(0) - { } - - /* Tree traversal. */ - FsmGraph *walk( Compiler *pd ); - void makeNameTree( Compiler *pd ); - void runLongestMatch( Compiler *pd, FsmGraph *graph ); - void transferScannerLeavingActions( FsmGraph *graph ); - Action *newAction( Compiler *pd, const InputLoc &loc, const String &name, - InlineList *inlineList ); - void makeActions( Compiler *pd ); - void findName( Compiler *pd ); - void restart( FsmGraph *graph, FsmTrans *trans ); - - InputLoc loc; - TokenDefListReg tokenDefList; - String name; - int id; - - Action *lmActSelect; - bool lmSwitchHandlesError; - - /* This gets saved off during the name walk. Can save it off because token - * regions are referenced once only. */ - NameInst *regionNameInst; - - TokenRegion *parentRegion; - RegionVect childRegions; - - TokenDef *defaultTokenDef; - - CodeBlock *preEofBlock; - - /* Dupe of the region, containing only the ignore tokens. */ - TokenRegion *ignoreOnlyRegion; - TokenRegion *tokenOnlyRegion; - TokenRegion *ciRegion; - - /* We alway init empty scanners with a single token. If we had to do this - * then wasEmpty is true. */ - bool wasEmpty; - - bool isFullRegion; - bool isIgnoreOnly; - bool isTokenOnly; - bool isCiOnly; - - LangEl *ciLel; - TokenRegion *derivedFrom; - - TokenRegion *next, *prev; -}; - -typedef DList<TokenRegion> RegionList; -typedef BstSet< TokenRegion*, CmpOrd<TokenRegion*> > RegionSet; - -typedef Vector<Namespace*> NamespaceVect; - -struct GenericType - : public DListEl<GenericType> -{ - GenericType( const String &name, long typeId, long id, - LangEl *langEl, TypeRef *typeArg ) - : - name(name), typeId(typeId), id(id), langEl(langEl), - typeArg(typeArg), keyTypeArg(0), - utArg(0), keyUT(0), - objDef(0) - {} - - const String &getKey() const - { return name; }; - - void declare( Compiler *pd, Namespace *nspace ); - - String name; - long typeId; - long id; - LangEl *langEl; - TypeRef *typeArg; - TypeRef *keyTypeArg; - UniqueType *utArg; - UniqueType *keyUT; - - ObjectDef *objDef; -}; - -typedef DList<GenericType> GenericList; - -typedef struct _UserIter UserIter; -typedef AvlMap<String, UserIter*, CmpStr> UserIterMap; -typedef AvlMapEl<String, UserIter*> UserIterMapEl; - -/* Graph dictionary. */ -struct GraphDictEl -: - public AvlTreeEl<GraphDictEl>, - public DListEl<GraphDictEl> -{ - GraphDictEl( const String &key ) - : key(key), value(0), isInstance(false) { } - GraphDictEl( const String &key, VarDef *value ) - : key(key), value(value), isInstance(false) { } - - const String &getKey() { return key; } - - String key; - VarDef *value; - bool isInstance; - - /* Location info of graph definition. Points to variable name of assignment. */ - InputLoc loc; -}; - -typedef AvlTree<GraphDictEl, String, CmpStr> GraphDict; -typedef DList<GraphDictEl> GraphList; - -/* Graph dictionary. */ -struct RegionGraphDictEl -: - public AvlTreeEl<RegionGraphDictEl>, - public DListEl<RegionGraphDictEl> -{ - RegionGraphDictEl( const String &key ) - : key(key), value(0), isInstance(false) { } - RegionGraphDictEl( const String &key, RegionDef *value ) - : key(key), value(value), isInstance(false) { } - - const String &getKey() { return key; } - - String key; - RegionDef *value; - bool isInstance; - - /* Location info of graph definition. Points to variable name of assignment. */ - InputLoc loc; -}; - -typedef AvlTree<RegionGraphDictEl, String, CmpStr> RegionGraphDict; -typedef DList<RegionGraphDictEl> RegionGraphList; - -struct TypeAlias -{ - TypeAlias( const InputLoc &loc, Namespace *nspace, - const String &name, TypeRef *typeRef ) - : - loc(loc), - nspace(nspace), - name(name), - typeRef(typeRef) - {} - - InputLoc loc; - Namespace *nspace; - String name; - TypeRef *typeRef; - - TypeAlias *prev, *next; -}; - -typedef DList<TypeAlias> TypeAliasList; - -struct Namespace -{ - /* Construct with a list of joins */ - Namespace( const InputLoc &loc, const String &name, int id, - Namespace *parentNamespace ) : - loc(loc), name(name), id(id), - parentNamespace(parentNamespace) { } - - /* Tree traversal. */ - Namespace *findNamespace( const String &name ); - - InputLoc loc; - String name; - int id; - - /* Literal patterns and the dictionary mapping literals to the underlying - * tokens. */ - LiteralDict literalDict; - - /* List of tokens defs in the namespace. */ - TokenDefListNs tokenDefList; - - /* List of nonterminal defs in the namespace. */ - NtDefList ntDefList; - - /* List of context definitions for encapsulating the data of a parser. */ - ContextDefList contextDefList; - - /* Dictionary of symbols within the region. */ - TypeMap typeMap; - GenericList genericList; - - /* Dictionary of graphs. Both instances and non-instances go here. */ - RegionGraphDict graphDict; - - /* regular language definitions. */ - GraphDict rlMap; - - TypeAliasList typeAliasList; - - Namespace *parentNamespace; - NamespaceVect childNamespaces; - - Namespace *next, *prev; - - void declare( Compiler *pd ); -}; - -typedef DList<Namespace> NamespaceList; -typedef BstSet< Namespace*, CmpOrd<Namespace*> > NamespaceSet; - -/* List of Expressions. */ -typedef DList<Expression> ExprList; - -struct JoinOrLm -{ - JoinOrLm( Join *join ) : - join(join) {} - - FsmGraph *walk( Compiler *pd ); - void makeNameTree( Compiler *pd ); - - Join *join; -}; - -struct RegionJoinOrLm -{ - enum Type { LongestMatchType }; - - RegionJoinOrLm( TokenRegion *tokenRegion ) : - tokenRegion(tokenRegion) {} - - FsmGraph *walk( Compiler *pd ); - void makeNameTree( Compiler *pd ); - - TokenRegion *tokenRegion; -}; - -/* - * Join - */ -struct Join -{ - /* Construct with the first expression. */ - Join( Expression *expr ); - - /* Tree traversal. */ - FsmGraph *walk( Compiler *pd ); - void makeNameTree( Compiler *pd ); - - /* Data. */ - ExprList exprList; - - Join *context; - Action *mark; -}; - -/* - * Expression - */ -struct Expression -{ - enum Type { - OrType, - IntersectType, - SubtractType, - StrongSubtractType, - TermType, - BuiltinType - }; - - /* Construct with an expression on the left and a term on the right. */ - Expression( Expression *expression, Term *term, Type type ) : - expression(expression), term(term), - builtin(builtin), type(type), prev(this), next(this) { } - - /* Construct with only a term. */ - Expression( Term *term ) : - expression(0), term(term), builtin(builtin), - type(TermType) , prev(this), next(this) { } - - /* Construct with a builtin type. */ - Expression( BuiltinMachine builtin ) : - expression(0), term(0), builtin(builtin), - type(BuiltinType), prev(this), next(this) { } - - ~Expression(); - - /* Tree traversal. */ - FsmGraph *walk( Compiler *pd, bool lastInSeq = true ); - void makeNameTree( Compiler *pd ); - - /* Node data. */ - Expression *expression; - Term *term; - BuiltinMachine builtin; - Type type; - - Expression *prev, *next; -}; - -/* - * Term - */ -struct Term -{ - enum Type { - ConcatType, - RightStartType, - RightFinishType, - LeftType, - FactorWithAugType - }; - - Term( Term *term, FactorWithAug *factorWithAug ) : - term(term), factorWithAug(factorWithAug), type(ConcatType) { } - - Term( Term *term, FactorWithAug *factorWithAug, Type type ) : - term(term), factorWithAug(factorWithAug), type(type) { } - - Term( FactorWithAug *factorWithAug ) : - term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { } - - ~Term(); - - FsmGraph *walk( Compiler *pd, bool lastInSeq = true ); - void makeNameTree( Compiler *pd ); - - Term *term; - FactorWithAug *factorWithAug; - Type type; - - /* Priority descriptor for RightFinish type. */ - PriorDesc priorDescs[2]; -}; - - -/* Third level of precedence. Augmenting nodes with actions and priorities. */ -struct FactorWithAug -{ - FactorWithAug( FactorWithRep *factorWithRep ) : - priorDescs(0), factorWithRep(factorWithRep) { } - ~FactorWithAug(); - - /* Tree traversal. */ - FsmGraph *walk( Compiler *pd ); - void makeNameTree( Compiler *pd ); - - void assignActions( Compiler *pd, FsmGraph *graph, int *actionOrd ); - void assignPriorities( FsmGraph *graph, int *priorOrd ); - - void assignConditions( FsmGraph *graph ); - - /* Actions and priorities assigned to the factor node. */ - Vector<ParserAction> actions; - Vector<PriorityAug> priorityAugs; - PriorDesc *priorDescs; - Vector<EpsilonLink> epsilonLinks; - Vector<ParserAction> conditions; - - FactorWithRep *factorWithRep; -}; - -/* Fourth level of precedence. Trailing unary operators. Provide kleen star, - * optional and plus. */ -struct FactorWithRep -{ - enum Type { - StarType, - StarStarType, - OptionalType, - PlusType, - ExactType, - MaxType, - MinType, - RangeType, - FactorWithNegType - }; - - FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep, - int lowerRep, int upperRep, Type type ) : - loc(loc), factorWithRep(factorWithRep), - factorWithNeg(0), lowerRep(lowerRep), - upperRep(upperRep), type(type) { } - - FactorWithRep( const InputLoc &loc, FactorWithNeg *factorWithNeg ) - : loc(loc), factorWithNeg(factorWithNeg), type(FactorWithNegType) { } - - ~FactorWithRep(); - - /* Tree traversal. */ - FsmGraph *walk( Compiler *pd ); - void makeNameTree( Compiler *pd ); - - InputLoc loc; - FactorWithRep *factorWithRep; - FactorWithNeg *factorWithNeg; - int lowerRep, upperRep; - Type type; - - /* Priority descriptor for StarStar type. */ - PriorDesc priorDescs[2]; -}; - -/* Fifth level of precedence. Provides Negation. */ -struct FactorWithNeg -{ - enum Type { - NegateType, - CharNegateType, - FactorType - }; - - FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) : - loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { } - - FactorWithNeg( const InputLoc &loc, Factor *factor ) : - loc(loc), factorWithNeg(0), factor(factor), type(FactorType) { } - - ~FactorWithNeg(); - - /* Tree traversal. */ - FsmGraph *walk( Compiler *pd ); - void makeNameTree( Compiler *pd ); - - InputLoc loc; - FactorWithNeg *factorWithNeg; - Factor *factor; - Type type; -}; - -/* - * Factor - */ -struct Factor -{ - /* Language elements a factor node can be. */ - enum Type { - LiteralType, - RangeType, - OrExprType, - RegExprType, - ReferenceType, - ParenType, - }; - - /* Construct with a literal fsm. */ - Factor( Literal *literal ) : - literal(literal), type(LiteralType) { } - - /* Construct with a range. */ - Factor( Range *range ) : - range(range), type(RangeType) { } - - /* Construct with the or part of a regular expression. */ - Factor( ReItem *reItem ) : - reItem(reItem), type(OrExprType) { } - - /* Construct with a regular expression. */ - Factor( RegExpr *regExp ) : - regExp(regExp), type(RegExprType) { } - - /* Construct with a reference to a var def. */ - Factor( const InputLoc &loc, VarDef *varDef ) : - loc(loc), varDef(varDef), type(ReferenceType) {} - - /* Construct with a parenthesized join. */ - Factor( Join *join ) : - join(join), type(ParenType) {} - - /* Cleanup. */ - ~Factor(); - - /* Tree traversal. */ - FsmGraph *walk( Compiler *pd ); - void makeNameTree( Compiler *pd ); - - InputLoc loc; - Literal *literal; - Range *range; - ReItem *reItem; - RegExpr *regExp; - VarDef *varDef; - Join *join; - int lower, upper; - Type type; -}; - -/* A range machine. Only ever composed of two literals. */ -struct Range -{ - Range( Literal *lowerLit, Literal *upperLit ) - : lowerLit(lowerLit), upperLit(upperLit) { } - - ~Range(); - FsmGraph *walk( Compiler *pd ); - bool verifyRangeFsm( FsmGraph *rangeEnd ); - - Literal *lowerLit; - Literal *upperLit; -}; - -/* Some literal machine. Can be a number or literal string. */ -struct Literal -{ - enum LiteralType { Number, LitString }; - - Literal( const InputLoc &loc, const String &literal, LiteralType type ) - : loc(loc), literal(literal), type(type) { } - - FsmGraph *walk( Compiler *pd ); - - InputLoc loc; - String literal; - LiteralType type; -}; - -/* Regular expression. */ -struct RegExpr -{ - enum RegExpType { RecurseItem, Empty }; - - /* Constructors. */ - RegExpr() : - type(Empty), caseInsensitive(false) { } - RegExpr(RegExpr *regExp, ReItem *item) : - regExp(regExp), item(item), - type(RecurseItem), caseInsensitive(false) { } - - ~RegExpr(); - FsmGraph *walk( Compiler *pd, RegExpr *rootRegex ); - - RegExpr *regExp; - ReItem *item; - RegExpType type; - bool caseInsensitive; -}; - -/* An item in a regular expression. */ -struct ReItem -{ - enum ReItemType { Data, Dot, OrBlock, NegOrBlock }; - - ReItem( const InputLoc &loc, const String &data ) - : loc(loc), data(data), star(false), type(Data) { } - ReItem( const InputLoc &loc, ReItemType type ) - : loc(loc), star(false), type(type) { } - ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type ) - : loc(loc), orBlock(orBlock), star(false), type(type) { } - - ~ReItem(); - FsmGraph *walk( Compiler *pd, RegExpr *rootRegex ); - - InputLoc loc; - String data; - ReOrBlock *orBlock; - bool star; - ReItemType type; -}; - -/* An or block item. */ -struct ReOrBlock -{ - enum ReOrBlockType { RecurseItem, Empty }; - - /* Constructors. */ - ReOrBlock() - : type(Empty) { } - ReOrBlock(ReOrBlock *orBlock, ReOrItem *item) - : orBlock(orBlock), item(item), type(RecurseItem) { } - - ~ReOrBlock(); - FsmGraph *walk( Compiler *pd, RegExpr *rootRegex ); - - ReOrBlock *orBlock; - ReOrItem *item; - ReOrBlockType type; -}; - -/* An item in an or block. */ -struct ReOrItem -{ - enum ReOrItemType { Data, Range }; - - ReOrItem( const InputLoc &loc, const String &data ) - : loc(loc), data(data), type(Data) {} - ReOrItem( const InputLoc &loc, char lower, char upper ) - : loc(loc), lower(lower), upper(upper), type(Range) { } - - FsmGraph *walk( Compiler *pd, RegExpr *rootRegex ); - - InputLoc loc; - String data; - char lower; - char upper; - ReOrItemType type; -}; - - -/* - * Inline code tree - */ -struct InlineList; -struct InlineItem -{ - enum Type - { - Text, - LmSwitch, - LmSetActId, - LmSetTokEnd, - LmOnLast, - LmOnNext, - LmOnLagBehind, - LmInitAct, - LmInitTokStart, - LmSetTokStart - }; - - InlineItem( const InputLoc &loc, const String &data, Type type ) : - loc(loc), data(data), nameRef(0), children(0), type(type) { } - - InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) : - loc(loc), nameRef(nameRef), children(0), type(type) { } - - InlineItem( const InputLoc &loc, TokenRegion *tokenRegion, - TokenDef *longestMatchPart, Type type ) : loc(loc), - nameRef(0), children(0), tokenRegion(tokenRegion), - longestMatchPart(longestMatchPart), type(type) { } - - InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) : - loc(loc), nameRef(0), nameTarg(nameTarg), children(0), - type(type) { } - - InlineItem( const InputLoc &loc, Type type ) : - loc(loc), nameRef(0), children(0), type(type) { } - - InputLoc loc; - String data; - NameRef *nameRef; - NameInst *nameTarg; - InlineList *children; - TokenRegion *tokenRegion; - TokenDef *longestMatchPart; - Type type; - - InlineItem *prev, *next; -}; - -/* Normally this would be atypedef, but that would entail including DList from - * ptreetypes, which should be just typedef forwards. */ -struct InlineList : public DList<InlineItem> { }; - -struct ProdEl; -struct LangVarRef; -struct ObjField; - -struct PatternItem -{ - enum Type { - FactorType, - InputText - }; - - PatternItem( const InputLoc &loc, const String &data, Type type ) : - loc(loc), factor(0), data(data), type(type), region(0), - varRef(0), bindId(0) {} - - PatternItem( const InputLoc &loc, ProdEl *factor, Type type ) : - loc(loc), factor(factor), type(type), region(0), - varRef(0), bindId(0) {} - - InputLoc loc; - ProdEl *factor; - String data; - Type type; - TokenRegion *region; - LangVarRef *varRef; - long bindId; - - PatternItem *prev, *next; -}; - -struct LangExpr; -typedef DList<PatternItem> PatternItemList; - -struct ReplItem -{ - enum Type { - InputText, - ExprType, - FactorType - }; - - ReplItem( const InputLoc &loc, Type type, const String &data ) : - loc(loc), type(type), data(data), expr(0), bindId(0) {} - - ReplItem( const InputLoc &loc, Type type, LangExpr *expr ) : - loc(loc), type(type), expr(expr), bindId(0) {} - - ReplItem( const InputLoc &loc, Type type, ProdEl *factor ) : - loc(loc), type(type), expr(expr), factor(factor), bindId(0) {} - - InputLoc loc; - Type type; - String data; - LangExpr *expr; - LangEl *langEl; - ProdEl *factor; - long bindId; - - ReplItem *prev, *next; -}; - -typedef DList<ReplItem> ReplItemList; - - -struct Pattern -{ - Pattern( const InputLoc &loc, Namespace *nspace, TokenRegion *region, - PatternItemList *list, int patRepId ) : - loc(loc), nspace(nspace), region(region), list(list), patRepId(patRepId), - langEl(0), pdaRun(0), nextBindId(1) {} - - InputLoc loc; - Namespace *nspace; - TokenRegion *region; - PatternItemList *list; - long patRepId; - LangEl *langEl; - PdaRun *pdaRun; - long nextBindId; - - Pattern *prev, *next; -}; - -typedef DList<Pattern> PatternList; - -struct Replacement -{ - Replacement( const InputLoc &loc, Namespace *nspace, - TokenRegion *region, ReplItemList *list, int patRepId ) : - loc(loc), nspace(nspace), region(region), list(list), - patRepId(patRepId), langEl(0), pdaRun(0), nextBindId(1), parse(true) {} - - InputLoc loc; - Namespace *nspace; - TokenRegion *region; - ReplItemList *list; - int patRepId; - LangEl *langEl; - PdaRun *pdaRun; - long nextBindId; - bool parse; - - Replacement *prev, *next; -}; - -typedef DList<Replacement> ReplList; - -struct ParserText -{ - ParserText( const InputLoc &loc, Namespace *nspace, - TokenRegion *region, ReplItemList *list ) : - loc(loc), nspace(nspace), region(region), list(list), - langEl(0), pdaRun(0), nextBindId(1), parse(true) {} - - InputLoc loc; - Namespace *nspace; - TokenRegion *region; - ReplItemList *list; - LangEl *langEl; - PdaRun *pdaRun; - long nextBindId; - bool parse; - - ParserText *prev, *next; -}; - -typedef DList<ParserText> ParserTextList; - -struct Function; - -struct IterDef -{ - enum Type { Tree, Child, RevChild, Repeat, RevRepeat, User }; - - IterDef( Type type, Function *func ); - IterDef( Type type ); - - Type type; - - Function *func; - bool useFuncId; - bool useSearchUT; - - Code inCreateWV; - Code inCreateWC; - Code inDestroy; - Code inAdvance; - - Code inGetCurR; - Code inGetCurWC; - Code inSetCurWC; - - Code inRefFromCur; -}; - -struct CmpIterDef -{ - static int compare( const IterDef &id1, const IterDef &id2 ) - { - if ( id1.type < id2.type ) - return -1; - else if ( id1.type > id2.type ) - return 1; - else if ( id1.type == IterDef::User ) { - if ( id1.func < id2.func ) - return -1; - else if ( id1.func > id2.func ) - return 1; - } - - return 0; - } -}; - -typedef AvlSet<IterDef, CmpIterDef> IterDefSet; -typedef AvlSetEl<IterDef> IterDefSetEl; - - -/* - * Unique Types. - */ - -/* - * type_ref -> qualified_name - * type_ref -> '*' type_ref - * type_ref -> '&' type_ref - * type_ref -> list type_ref type_ref - * type_ref -> map type_ref type_ref - * type_ref -> vector type_ref - * type_ref -> parser type_ref - * type_ref -> iter_tree type_ref - * type_ref -> iter_child type_ref - * type_ref -> iter_revchild type_ref - * type_ref -> iter_repeat type_ref - * type_ref -> iter_revrepeat type_ref - * type_ref -> iter_user type_ref - * - * type -> nil - * type -> def term - * type -> def nonterm - * type -> '*' type - * type -> '&' type - * type -> list type - * type -> map type type - * type -> vector type - * type -> parser type - * type -> iter_tree type - * type -> iter_child type - * type -> iter_revchild type - * type -> iter_repeat type - * type -> iter_revrepeat type - * type -> iter_user type - */ - -struct UniqueType : public AvlTreeEl<UniqueType> -{ - UniqueType( int typeId ) : - typeId(typeId), - langEl(0), - iterDef(0) {} - - UniqueType( int typeId, LangEl *langEl ) : - typeId(typeId), - langEl(langEl), - iterDef(0) {} - - UniqueType( int typeId, IterDef *iterDef ) : - typeId(typeId), - langEl(langEl), - iterDef(iterDef) {} - - int typeId; - LangEl *langEl; - IterDef *iterDef; -}; - -struct CmpUniqueType -{ - static int compare( const UniqueType &ut1, const UniqueType &ut2 ); -}; - -typedef AvlBasic< UniqueType, CmpUniqueType > UniqueTypeMap; - -enum RepeatType { - RepeatNone = 1, - RepeatRepeat, - RepeatList, - RepeatOpt, -}; - -/* - * Repeat types. - */ - -struct UniqueRepeat - : public AvlTreeEl<UniqueRepeat> -{ - UniqueRepeat( RepeatType repeatType, LangEl *langEl ) : - repeatType(repeatType), - langEl(langEl), declLangEl(0) {} - - RepeatType repeatType; - LangEl *langEl; - LangEl *declLangEl; -}; - -struct CmpUniqueRepeat -{ - static int compare( const UniqueRepeat &ut1, const UniqueRepeat &ut2 ); -}; - -typedef AvlBasic< UniqueRepeat, CmpUniqueRepeat > UniqueRepeatMap; - -/* - * Unique Map Types - */ - -struct UniqueMap - : public AvlTreeEl<UniqueMap> -{ - UniqueMap( UniqueType *key, UniqueType *value ) : - key(key), value(value), generic(0) {} - - UniqueType *key; - UniqueType *value; - - GenericType *generic; -}; - -struct CmpUniqueMap -{ - static int compare( const UniqueMap &ut1, const UniqueMap &ut2 ); -}; - -typedef AvlBasic< UniqueMap, CmpUniqueMap > UniqueMapMap; - -/* - * Unique List Types - */ - -struct UniqueList - : public AvlTreeEl<UniqueList> -{ - UniqueList( UniqueType *value ) : - value(value), generic(0) {} - - UniqueType *value; - GenericType *generic; -}; - -struct CmpUniqueList -{ - static int compare( const UniqueList &ut1, const UniqueList &ut2 ); -}; - -typedef AvlBasic< UniqueList, CmpUniqueList > UniqueListMap; - -/* - * Unique Vector Types - */ - -struct UniqueVector - : public AvlTreeEl<UniqueVector> -{ - UniqueVector( UniqueType *value ) : - value(value), generic(0) {} - - UniqueType *value; - GenericType *generic; -}; - -struct CmpUniqueVector -{ - static int compare( const UniqueVector &ut1, const UniqueVector &ut2 ); -}; - -typedef AvlBasic< UniqueVector, CmpUniqueVector > UniqueVectorMap; - -/* - * Unique Parser Types - */ - -struct UniqueParser - : public AvlTreeEl<UniqueParser> -{ - UniqueParser( UniqueType *parseType ) : - parseType(parseType), generic(0) {} - - UniqueType *parseType; - GenericType *generic; -}; - -struct CmpUniqueParser -{ - static int compare( const UniqueParser &ut1, const UniqueParser &ut2 ); -}; - -typedef AvlBasic< UniqueParser, CmpUniqueParser > UniqueParserMap; - -/* - * - */ - -typedef AvlMap< StringVect, int, CmpStrVect > VectorTypeIdMap; -typedef AvlMapEl< StringVect, int > VectorTypeIdMapEl; - -typedef Vector<TypeRef*> TypeRefVect; - -struct TypeRef -{ - enum Type - { - Unspecified, - Name, - Literal, - Iterator, - Map, - List, - Vector, - Parser, - Ref, - Ptr, - }; - - /* Qualification and a type name. These require lookup. */ - TypeRef( const InputLoc &loc, NamespaceQual *nspaceQual, String typeName ) : - type(Name), loc(loc), nspaceQual(nspaceQual), typeName(typeName), pdaLiteral(0), iterDef(0), - typeRef1(0), typeRef2(0), - repeatType(RepeatNone), - nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {} - - /* Qualification and a type name. These require lookup. */ - TypeRef( const InputLoc &loc, NamespaceQual *nspaceQual, PdaLiteral *pdaLiteral ) : - type(Literal), loc(loc), nspaceQual(nspaceQual), pdaLiteral(pdaLiteral), iterDef(0), - typeRef1(0), typeRef2(0), - repeatType(RepeatNone), - nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {} - - /* Generics. */ - TypeRef( Type type, const InputLoc &loc, NamespaceQual *nspaceQual, TypeRef *typeRef1, TypeRef *typeRef2 ) : - type(type), loc(loc), nspaceQual(nspaceQual), pdaLiteral(0), iterDef(0), - typeRef1(typeRef1), typeRef2(typeRef2), - repeatType(RepeatNone), - nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {} - - /* Pointers and Refs. */ - TypeRef( Type type, const InputLoc &loc, TypeRef *typeRef1 ) : - type(type), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(0), - typeRef1(typeRef1), typeRef2(0), - repeatType(RepeatNone), - nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {} - - /* Resolution not needed. */ - - /* Iterator definition. */ - TypeRef( const InputLoc &loc, IterDef *iterDef, UniqueType *uniqueType, - UniqueType *searchUniqueType ) : - type(Iterator), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(iterDef), - typeRef1(0), typeRef2(0), - repeatType(RepeatNone), - nspace(0), uniqueType(uniqueType), searchUniqueType(searchUniqueType), generic(0) {} - - /* Unique type is given directly. */ - TypeRef( const InputLoc &loc, UniqueType *uniqueType ) : - type(Unspecified), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(0), - typeRef1(0), typeRef2(0), - repeatType(RepeatNone), - nspace(0), uniqueType(uniqueType), searchUniqueType(0), generic(0) {} - - void resolveRepeat( Compiler *pd ); - - UniqueType *lookupTypeName( Compiler *pd ); - UniqueType *lookupTypeLiteral( Compiler *pd ); - UniqueType *lookupTypeMap( Compiler *pd ); - UniqueType *lookupTypeList( Compiler *pd ); - UniqueType *lookupTypeVector( Compiler *pd ); - UniqueType *lookupTypeParser( Compiler *pd ); - UniqueType *lookupType( Compiler *pd ); - UniqueType *lookupTypePtr( Compiler *pd ); - UniqueType *lookupTypeRef( Compiler *pd ); - - Type type; - InputLoc loc; - NamespaceQual *nspaceQual; - String typeName; - PdaLiteral *pdaLiteral; - IterDef *iterDef; - TypeRef *typeRef1; - TypeRef *typeRef2; - RepeatType repeatType; - - /* Resolved. */ - Namespace *nspace; - UniqueType *uniqueType; - UniqueType *searchUniqueType; - GenericType *generic; -}; - -typedef DList<ObjField> ParameterList; - -struct ObjMethod -{ - ObjMethod( UniqueType *returnUT, String name, - int opcodeWV, int opcodeWC, int numParams, - UniqueType **types, ParameterList *paramList, bool isConst ) - : - returnUT(returnUT), - returnTypeId(0), - name(name), - opcodeWV(opcodeWV), - opcodeWC(opcodeWC), - numParams(numParams), - paramList(paramList), - isConst(isConst), - funcId(0), - useFuncId(false), - useCallObj(true), - isCustom(false), - func(0), - iterDef(0) - { - this->paramUTs = new UniqueType*[numParams]; - memcpy( this->paramUTs, types, sizeof(UniqueType*)*numParams ); - } - - UniqueType *returnUT; - long returnTypeId; - String name; - long opcodeWV; - long opcodeWC; - long numParams; - UniqueType **paramUTs; - ParameterList *paramList; - bool isConst; - long funcId; - bool useFuncId; - bool useCallObj; - bool isCustom; - Function *func; - IterDef *iterDef; -}; - -typedef AvlMap<String, ObjMethod*, CmpStr> ObjMethodMap; -typedef AvlMapEl<String, ObjMethod*> ObjMethodMapEl; - -struct RhsVal { RhsVal( int prodNum, int childNum ) : prodNum(prodNum), childNum(childNum) { } int prodNum; int childNum; }; - -struct ObjField -{ - ObjField( const InputLoc &loc, TypeRef *typeRef, const String &name ) : - loc(loc), typeRef(typeRef), name(name), - context(0), - pos(0), offset(0), - beenReferenced(false), - beenInitialized(false), - useOffset(true), - isConst(false), - isLhsEl(false), isRhsEl(false), - refActive(false), - isArgv(false), - isCustom(false), - isParam(false), - isRhsGet(false), - isExport(false), - dirtyTree(false), - inGetR( IN_HALT ), - inGetWC( IN_HALT ), - inGetWV( IN_HALT ), - inSetWC( IN_HALT ), - inSetWV( IN_HALT ) - {} - - InputLoc loc; - TypeRef *typeRef; - String name; - Context *context; - long pos; - long offset; - bool beenReferenced; - bool beenInitialized; - bool useOffset; - bool isConst; - bool isLhsEl; - bool isRhsEl; - bool refActive; - bool isArgv; - bool isCustom; - bool isParam; - bool isRhsGet; - bool isExport; - - /* True if some aspect of the tree has possibly been written to. This does - * not include attributes. This is here so we can optimize the storage of - * old lhs vars. If only a lhs attribute changes we don't need to preserve - * the original for backtracking. */ - bool dirtyTree; - - Vector<RhsVal> rhsVal; - - Code inGetR; - Code inGetWC; - Code inGetWV; - Code inSetWC; - Code inSetWV; - - ObjField *prev, *next; -}; - -typedef AvlMap<String, ObjField*, CmpStr> ObjFieldMap; -typedef AvlMapEl<String, ObjField*> ObjFieldMapEl; - -typedef DListVal<ObjField*> ObjFieldList; - -typedef DList<ObjField> ParameterList; - -struct TemplateType; - -/* Tree of name scopes for an object def. All of the object fields inside this - * tree live in one object def. This is used for scoping names in functions. */ -struct ObjNameScope -{ - ObjNameScope() - : parentScope(0), childIter(0) - {} - - ObjFieldMap *objFieldMap; - - ObjNameScope *parentScope; - DList<ObjNameScope> children; - - /* For iteration after declaration. */ - ObjNameScope *childIter; - - ObjNameScope *prev, *next; -}; - -struct ObjectDef -{ - enum Type { - UserType, - FrameType, - IterType, - BuiltinType - }; - - ObjectDef( Type type, String name, int id ) - : - type(type), name(name), id(id), - nextOffset(0), firstNonTree(0) - { - scope = new ObjNameScope; - scope->objFieldMap = new ObjFieldMap; - - objFieldList = new ObjFieldList; - objMethodMap = new ObjMethodMap(); - } - - Type type; - String name; - ObjFieldList *objFieldList; - ObjMethodMap *objMethodMap; - - /* Head of stack of name scopes. */ - ObjNameScope *scope; - - void pushScope(); - void popScope(); - void iterPushScope(); - void iterPopScope(); - - long id; - long nextOffset; - long firstNonTree; - - void referenceField( Compiler *pd, ObjField *field ); - void initField( Compiler *pd, ObjField *field ); - void createCode( Compiler *pd, CodeVect &code ); - ObjField *checkRedecl( const String &name ); - ObjMethod *findMethod( const String &name ); - ObjField *findFieldInScope( const String &name, ObjNameScope *inScope ); - ObjField *findField( const String &name ); - void insertField( const String &name, ObjField *value ); - void resolve( Compiler *pd ); - ObjField *findFieldNum( long offset ); - - long size() { return nextOffset; } - long sizeTrees() { return firstNonTree; } -}; - -typedef Vector<LangExpr*> ExprVect; -typedef Vector<String> StringVect; - -struct FieldInit -{ - FieldInit( const InputLoc &loc, String name, LangExpr *expr ) - : loc(loc), name(name), expr(expr) {} - - InputLoc loc; - String name; - LangExpr *expr; - - UniqueType *exprUT; -}; - -typedef Vector<FieldInit*> FieldInitVect; - -struct VarRefLookup -{ - VarRefLookup( int lastPtrInQual, int firstConstPart, ObjectDef *inObject ) : - lastPtrInQual(lastPtrInQual), - firstConstPart(firstConstPart), - inObject(inObject), - objField(0), - objMethod(0), - uniqueType(0), - iterSearchUT(0) - {} - - int lastPtrInQual; - int firstConstPart; - ObjectDef *inObject; - ObjField *objField; - ObjMethod *objMethod; - UniqueType *uniqueType; - UniqueType *iterSearchUT; -}; - -struct QualItem -{ - enum Type { Dot, Arrow }; - - QualItem( const InputLoc &loc, const String &data, Type type ) - : loc(loc), data(data), type(type) {} - - InputLoc loc; - String data; - Type type; -}; - -typedef Vector<QualItem> QualItemVect; - -struct LangVarRef -{ - LangVarRef( const InputLoc &loc, QualItemVect *qual, String name ) - : loc(loc), qual(qual), name(name) {} - - void resolve( Compiler *pd ) const; - - UniqueType *loadFieldInstr( Compiler *pd, CodeVect &code, ObjectDef *inObject, - ObjField *el, bool forWriting, bool revert ) const; - void setFieldInstr( Compiler *pd, CodeVect &code, ObjectDef *inObject, - ObjField *el, UniqueType *exprUT, bool revert ) const; - - VarRefLookup lookupMethod( Compiler *pd ) ; - VarRefLookup lookupField( Compiler *pd ) const; - - VarRefLookup lookupQualification( Compiler *pd, ObjectDef *rootDef ) const; - VarRefLookup lookupObj( Compiler *pd ) const; - - bool isCustom( Compiler *pd ) const; - bool isLocalRef( Compiler *pd ) const; - bool isContextRef( Compiler *pd ) const; - void loadQualification( Compiler *pd, CodeVect &code, ObjectDef *rootObj, - int lastPtrInQual, bool forWriting, bool revert ) const; - void loadCustom( Compiler *pd, CodeVect &code, - int lastPtrInQual, bool forWriting ) const; - void loadLocalObj( Compiler *pd, CodeVect &code, - int lastPtrInQual, bool forWriting ) const; - void loadContextObj( Compiler *pd, CodeVect &code, int lastPtrInQual, bool forWriting ) const; - void loadGlobalObj( Compiler *pd, CodeVect &code, - int lastPtrInQual, bool forWriting ) const; - void loadObj( Compiler *pd, CodeVect &code, int lastPtrInQual, bool forWriting ) const; - void canTakeRef( Compiler *pd, VarRefLookup &lookup ) const; - - void setFieldIter( Compiler *pd, CodeVect &code, - ObjectDef *inObject, UniqueType *objUT, UniqueType *exprType, bool revert ) const; - void setFieldSearch( Compiler *pd, CodeVect &code, - ObjectDef *inObject, UniqueType *exprType ) const; - void setField( Compiler *pd, CodeVect &code, - ObjectDef *inObject, UniqueType *type, bool revert ) const; - - void assignValue( Compiler *pd, CodeVect &code, UniqueType *exprUT ) const; - ObjField **evaluateArgs( Compiler *pd, CodeVect &code, - VarRefLookup &lookup, ExprVect *args ) const; - void callOperation( Compiler *pd, CodeVect &code, VarRefLookup &lookup ) const; - UniqueType *evaluateCall( Compiler *pd, CodeVect &code, ExprVect *args ); - UniqueType *evaluate( Compiler *pd, CodeVect &code, bool forWriting = false ) const; - ObjField *evaluateRef( Compiler *pd, CodeVect &code, long pushCount ) const; - ObjField *preEvaluateRef( Compiler *pd, CodeVect &code ) const; - void resetActiveRefs( Compiler *pd, VarRefLookup &lookup, ObjField **paramRefs ) const; - long loadQualificationRefs( Compiler *pd, CodeVect &code ) const; - void popRefQuals( Compiler *pd, CodeVect &code, - VarRefLookup &lookup, ExprVect *args ) const; - - InputLoc loc; - QualItemVect *qual; - String name; -}; - -struct LangTerm -{ - enum Type { - VarRefType, - MethodCallType, - NumberType, - StringType, - MatchType, - NewType, - ConstructType, - TypeIdType, - SearchType, - NilType, - TrueType, - FalseType, - ParseType, - ParseStopType, - MakeTreeType, - MakeTokenType, - EmbedStringType - }; - - LangTerm( Type type, LangVarRef *varRef ) - : type(type), varRef(varRef) {} - - LangTerm( LangVarRef *varRef, ExprVect *args ) - : type(MethodCallType), varRef(varRef), args(args) {} - - LangTerm( const InputLoc &loc, Type type, ExprVect *args ) - : loc(loc), type(type), args(args) {} - - LangTerm( Type type, String data ) - : type(type), varRef(0), data(data) {} - - LangTerm( Type type, NamespaceQual *nspaceQual, const String &data ) - : type(type), varRef(0), nspaceQual(nspaceQual), data(data) {} - - LangTerm( const InputLoc &loc, Type type ) - : loc(loc), type(type), varRef(0), typeRef(0) {} - - LangTerm( const InputLoc &loc, Type type, TypeRef *typeRef ) - : loc(loc), type(type), varRef(0), typeRef(typeRef) {} - - LangTerm( const InputLoc &loc, Type type, LangVarRef *varRef ) - : loc(loc), type(type), varRef(varRef) {} - - LangTerm( Type type, LangVarRef *varRef, Pattern *pattern ) - : type(type), varRef(varRef), pattern(pattern) {} - - LangTerm( const InputLoc &loc, Type type, TypeRef *typeRef, LangVarRef *varRef ) - : loc(loc), type(type), varRef(varRef), typeRef(typeRef) {} - - LangTerm( const InputLoc &loc, Type type, TypeRef *typeRef, FieldInitVect *fieldInitArgs, - Replacement *replacement ) - : loc(loc), type(type), typeRef(typeRef), fieldInitArgs(fieldInitArgs), - replacement(replacement) {} - - LangTerm( const InputLoc &loc, Type type, LangVarRef *varRef, ObjField *objField, - TypeRef *typeRef, FieldInitVect *fieldInitArgs, Replacement *replacement ) - : loc(loc), type(type), varRef(varRef), objField(objField), typeRef(typeRef), - fieldInitArgs(fieldInitArgs), replacement(replacement) {} - - LangTerm( Type type, LangExpr *expr ) - : type(type), expr(expr) {} - - LangTerm( ReplItemList *replItemList ) - : type(EmbedStringType), replItemList(replItemList) {} - - LangTerm( const InputLoc &loc, Type type, LangVarRef *varRef, - ObjField *objField, TypeRef *typeRef, GenericType *generic, TypeRef *parserTypeRef, - Replacement *replacement ) - : loc(loc), type(type), varRef(varRef), objField(objField), - typeRef(typeRef), generic(generic), parserTypeRef(parserTypeRef), - replacement(replacement) {} - - void resolve( Compiler *pd ); - - UniqueType *evaluateParse( Compiler *pd, CodeVect &code, bool stop ) const; - UniqueType *evaluateNew( Compiler *pd, CodeVect &code ) const; - UniqueType *evaluateConstruct( Compiler *pd, CodeVect &code ) const; - UniqueType *evaluateMatch( Compiler *pd, CodeVect &code ) const; - UniqueType *evaluate( Compiler *pd, CodeVect &code ) const; - void assignFieldArgs( Compiler *pd, CodeVect &code, UniqueType *replUT ) const; - UniqueType *evaluateMakeToken( Compiler *pd, CodeVect &code ) const; - UniqueType *evaluateMakeTree( Compiler *pd, CodeVect &code ) const; - UniqueType *evaluateEmbedString( Compiler *pd, CodeVect &code ) const; - - InputLoc loc; - Type type; - LangVarRef *varRef; - ExprVect *args; - NamespaceQual *nspaceQual; - String data; - ObjField *objField; - TypeRef *typeRef; - Pattern *pattern; - FieldInitVect *fieldInitArgs; - GenericType *generic; - TypeRef *parserTypeRef; - Replacement *replacement; - LangExpr *expr; - ReplItemList *replItemList; -}; - -struct LangExpr -{ - enum Type { - BinaryType, - UnaryType, - TermType - }; - - LangExpr( const InputLoc &loc, LangExpr *left, char op, LangExpr *right ) - : loc(loc), type(BinaryType), left(left), op(op), right(right) {} - - LangExpr( const InputLoc &loc, char op, LangExpr *right ) - : loc(loc), type(UnaryType), left(0), op(op), right(right) {} - - LangExpr( LangTerm *term ) - : type(TermType), term(term) {} - - void resolve( Compiler *pd ) const; - - UniqueType *evaluate( Compiler *pd, CodeVect &code ) const; - - InputLoc loc; - Type type; - LangExpr *left; - char op; - LangExpr *right; - LangTerm *term; -}; - -struct LangStmt; -typedef DList<LangStmt> StmtList; - -struct LangStmt -{ - enum Type { - AssignType, - PrintType, - PrintXMLACType, - PrintXMLType, - PrintStreamType, - ExprType, - IfType, - ElseType, - RejectType, - WhileType, - ReturnType, - YieldType, - ForIterType, - BreakType, - ParserType - }; - - LangStmt( const InputLoc &loc, Type type, FieldInitVect *fieldInitVect ) : - loc(loc), type(type), varRef(0), expr(0), fieldInitVect(fieldInitVect), next(0) {} - - LangStmt( const InputLoc &loc, Type type, ExprVect *exprPtrVect ) : - loc(loc), type(type), varRef(0), expr(0), exprPtrVect(exprPtrVect), next(0) {} - - LangStmt( const InputLoc &loc, Type type, LangExpr *expr ) : - loc(loc), type(type), varRef(0), expr(expr), exprPtrVect(0), next(0) {} - - LangStmt( Type type, LangVarRef *varRef ) : - type(type), varRef(varRef), expr(0), exprPtrVect(0), next(0) {} - - LangStmt( const InputLoc &loc, Type type, ObjField *objField ) : - loc(loc), type(type), varRef(0), objField(objField), expr(0), - exprPtrVect(0), next(0) {} - - LangStmt( const InputLoc &loc, Type type, LangVarRef *varRef, LangExpr *expr ) : - loc(loc), type(type), varRef(varRef), expr(expr), exprPtrVect(0), next(0) {} - - LangStmt( Type type, LangExpr *expr, StmtList *stmtList ) : - type(type), expr(expr), stmtList(stmtList), next(0) {} - - LangStmt( Type type, StmtList *stmtList ) : - type(type), stmtList(stmtList), next(0) {} - - LangStmt( Type type, LangExpr *expr, StmtList *stmtList, LangStmt *elsePart ) : - type(type), expr(expr), stmtList(stmtList), elsePart(elsePart), next(0) {} - - LangStmt( const InputLoc &loc, Type type ) : - loc(loc), type(type), next(0) {} - - LangStmt( Type type, LangVarRef *varRef, Replacement *replacement ) : - type(type), varRef(varRef), expr(0), replacement(replacement), - exprPtrVect(0), next(0) {} - - LangStmt( Type type, LangVarRef *varRef, ParserText *parserText ) : - type(type), varRef(varRef), expr(0), parserText(parserText), - exprPtrVect(0), next(0) {} - - /* ForIterType */ - LangStmt( const InputLoc &loc, Type type, ObjField *objField, - TypeRef *typeRef, LangTerm *langTerm, StmtList *stmtList ) : - loc(loc), type(type), langTerm(langTerm), objField(objField), typeRef(typeRef), - stmtList(stmtList), next(0) {} - - LangStmt( Type type ) : - type(type), next(0) {} - - void resolve( Compiler *pd ) const; - void resolveParserItems( Compiler *pd ) const; - - void evaluateParserItems( Compiler *pd, CodeVect &code ) const; - LangTerm *chooseDefaultIter( Compiler *pd, LangTerm *fromVarRef ) const; - void compileWhile( Compiler *pd, CodeVect &code ) const; - void compileForIterBody( Compiler *pd, CodeVect &code, UniqueType *iterUT ) const; - void compileForIter( Compiler *pd, CodeVect &code ) const; - void compile( Compiler *pd, CodeVect &code ) const; - - InputLoc loc; - Type type; - LangVarRef *varRef; - LangTerm *langTerm; - ObjField *objField; - TypeRef *typeRef; - LangExpr *expr; - Replacement *replacement; - ParserText *parserText; - ExprVect *exprPtrVect; - FieldInitVect *fieldInitVect; - StmtList *stmtList; - /* Either another if, or an else. */ - LangStmt *elsePart; - String name; - - /* Normally you don't need to initialize double list pointers, however, we - * make use of the next pointer for returning a pair of statements using - * one pointer to a LangStmt, so we need to initialize it above. */ - LangStmt *prev, *next; -}; - -struct CodeBlock -{ - CodeBlock( StmtList *stmtList ) - : - frameId(-1), - stmtList(stmtList), - localFrame(0), - context(0) {} - - void compile( Compiler *pd, CodeVect &code ) const; - void resolve( Compiler *pd ) const; - - long frameId; - StmtList *stmtList; - ObjectDef *localFrame; - CharSet trees; - Context *context; - - /* Each frame has two versions of - * the code: revert and commit. */ - CodeVect codeWV, codeWC; -}; - -struct Function -{ - Function( TypeRef *typeRef, const String &name, - ParameterList *paramList, CodeBlock *codeBlock, - int funcId, bool isUserIter ) - : - typeRef(typeRef), - name(name), - paramList(paramList), - codeBlock(codeBlock), - funcId(funcId), - isUserIter(isUserIter), - paramListSize(0), - paramUTs(0), - inContext(0) - {} - - TransBlock *transBlock; - TypeRef *typeRef; - String name; - ParameterList *paramList; - CodeBlock *codeBlock; - ObjectDef *localFrame; - long funcId; - bool isUserIter; - long paramListSize; - UniqueType **paramUTs; - Context *inContext; - - Function *prev, *next; -}; - -typedef DList<Function> FunctionList; - -#endif /* _PARSETREE_H */ diff --git a/colm/pcheck.cc b/colm/pcheck.cc deleted file mode 100644 index d5401f7d..00000000 --- a/colm/pcheck.cc +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "pcheck.h" -#include <assert.h> - -/* Construct a new parameter checker with for paramSpec. */ -ParamCheck::ParamCheck( const char *paramSpec, int argc, const char **argv ) -: - state(noparam), - argOffset(0), - curArg(0), - iCurArg(1), - paramSpec(paramSpec), - argc(argc), - argv(argv) -{ -} - -/* Check a single option. Returns the index of the next parameter. Sets p to - * the arg character if valid, 0 otherwise. Sets parg to the parameter arg if - * there is one, NULL otherwise. */ -bool ParamCheck::check() -{ - bool requiresParam; - - if ( iCurArg >= argc ) { /* Off the end of the arg list. */ - state = noparam; - return false; - } - - if ( argOffset != 0 && *argOffset == 0 ) { - /* We are at the end of an arg string. */ - iCurArg += 1; - if ( iCurArg >= argc ) { - state = noparam; - return false; - } - argOffset = 0; - } - - if ( argOffset == 0 ) { - /* Set the current arg. */ - curArg = argv[iCurArg]; - - /* We are at the beginning of an arg string. */ - if ( argv[iCurArg] == 0 || /* Argv[iCurArg] is null. */ - argv[iCurArg][0] != '-' || /* Not a param. */ - argv[iCurArg][1] == 0 ) { /* Only a dash. */ - parameter = 0; - parameterArg = 0; - - iCurArg += 1; - state = noparam; - return true; - } - argOffset = argv[iCurArg] + 1; - } - - /* Get the arg char. */ - char argChar = *argOffset; - - /* Loop over all the parms and look for a match. */ - const char *pSpec = paramSpec; - while ( *pSpec != 0 ) { - char pSpecChar = *pSpec; - - /* If there is a ':' following the char then - * it requires a parm. If a parm is required - * then move ahead two in the parmspec. Otherwise - * move ahead one in the parm spec. */ - if ( pSpec[1] == ':' ) { - requiresParam = true; - pSpec += 2; - } - else { - requiresParam = false; - pSpec += 1; - } - - /* Do we have a match. */ - if ( argChar == pSpecChar ) { - if ( requiresParam ) { - if ( argOffset[1] == 0 ) { - /* The param must follow. */ - if ( iCurArg + 1 == argc ) { - /* We are the last arg so there - * cannot be a parameter to it. */ - parameter = argChar; - parameterArg = 0; - iCurArg += 1; - argOffset = 0; - state = invalid; - return true; - } - else { - /* the parameter to the arg is the next arg. */ - parameter = pSpecChar; - parameterArg = argv[iCurArg + 1]; - iCurArg += 2; - argOffset = 0; - state = match; - return true; - } - } - else { - /* The param for the arg is built in. */ - parameter = pSpecChar; - parameterArg = argOffset + 1; - iCurArg += 1; - argOffset = 0; - state = match; - return true; - } - } - else { - /* Good, we matched the parm and no - * arg is required. */ - parameter = pSpecChar; - parameterArg = 0; - argOffset += 1; - state = match; - return true; - } - } - } - - /* We did not find a match. Bad Argument. */ - parameter = argChar; - parameterArg = 0; - argOffset += 1; - state = invalid; - return true; -} - - diff --git a/colm/pcheck.h b/colm/pcheck.h deleted file mode 100644 index 5be60426..00000000 --- a/colm/pcheck.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright 2001, 2002 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _PCHECK_H -#define _PCHECK_H - -class ParamCheck -{ -public: - ParamCheck( const char *paramSpec, int argc, const char **argv ); - - bool check(); - - const char *parameterArg; /* The argument to the parameter. */ - char parameter; /* The parameter matched. */ - enum { match, invalid, noparam } state; - - const char *argOffset; /* If we are reading params inside an - * arg this points to the offset. */ - - const char *curArg; /* Pointer to the current arg. */ - int iCurArg; /* Index to the current arg. */ - -private: - const char *paramSpec; /* Parameter spec supplied by the coder. */ - int argc; /* Arguement data from the command line. */ - const char **argv; -}; - -#endif /* _PCHECK_H */ diff --git a/colm/pdabuild.cc b/colm/pdabuild.cc deleted file mode 100644 index 4bce96ce..00000000 --- a/colm/pdabuild.cc +++ /dev/null @@ -1,2091 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <iostream> -#include <iomanip> -#include <errno.h> -#include <stdlib.h> - -/* Parsing. */ -#include "global.h" -#include "parsedata.h" -#include "pdacodegen.h" -#include "pdarun.h" -#include "redfsm.h" -#include "fsmcodegen.h" -#include "redbuild.h" -#include "fsmrun.h" - -/* Dumping the fsm. */ -#include "mergesort.h" - -using namespace std; - -char startDefName[] = "start"; - -/* Count the transitions in the fsm by walking the state list. */ -int countTransitions( PdaGraph *fsm ) -{ - int numTrans = 0; - PdaState *state = fsm->stateList.head; - while ( state != 0 ) { - numTrans += state->transMap.length(); - state = state->next; - } - return numTrans; -} - -LangEl::LangEl( Namespace *nspace, const String &name, Type type ) -: - nspace(nspace), - name(name), - lit(name), - type(type), - id(-1), - isUserTerm(false), - isContext(false), - displayString(0), - numAppearances(0), - commit(false), - ignore(false), - reduceFirst(false), - isLiteral(false), - isRepeat(false), - isList(false), - isOpt(false), - parseStop(false), - isEOF(false), - repeatOf(0), - tokenDef(0), - rootDef(0), - termDup(0), - eofLel(0), - pdaGraph(0), - pdaTables(0), - transBlock(0), - objectDef(0), - thisSize(0), - ofiOffset(0), - generic(0), - parserId(-1), - predType(PredNone), - predValue(0), - contextDef(0), - contextIn(0), - noPreIgnore(false), - noPostIgnore(false), - isCI(false), - ciRegion(0) -{ -} - -PdaGraph *ProdElList::walk( Compiler *pd, Definition *prod ) -{ - PdaGraph *prodFsm = new PdaGraph(); - PdaState *last = prodFsm->addState(); - prodFsm->setStartState( last ); - - if ( prod->collectIgnoreRegion != 0 ) { -// cerr << "production " << prod->data << " has collect ignore region " << -// prod->collectIgnoreRegion->name << endl; - - /* Use the IGNORE TOKEN lang el for the region. */ - long value = prod->collectIgnoreRegion->ciLel->id; - - PdaState *newState = prodFsm->addState(); - PdaTrans *newTrans = prodFsm->appendNewTrans( last, newState, value, value ); - - newTrans->isShift = true; - newTrans->shiftPrior = 0; // WAT - last = newState; - } - - int prodLength = 0; - for ( Iter prodEl = first(); prodEl.lte(); prodEl++, prodLength++ ) { - //PdaGraph *itemFsm = prodEl->walk( pd ); - long value = prodEl->langEl->id; - - PdaState *newState = prodFsm->addState(); - PdaTrans *newTrans = prodFsm->appendNewTrans( last, newState, value, value ); - - newTrans->isShift = true; - newTrans->shiftPrior = prodEl->priorVal; - //cerr << "PRIOR VAL: " << newTrans->shiftPrior << endl; - - if ( prodEl->commit ) { - //cout << "COMMIT: inserting commit of length: " << pd->prodLength << endl; - /* Insert the commit into transitions out of last */ - for ( TransMap::Iter trans = last->transMap; trans.lte(); trans++ ) - trans->value->commits.insert( prodLength ); - } - - last = newState; - } - - /* Make the last state the final state. */ - prodFsm->setFinState( last ); - return prodFsm; -} - - -ProdElList *Compiler::makeProdElList( LangEl *langEl ) -{ - ProdElList *prodElList = new ProdElList(); - UniqueType *uniqueType = findUniqueType( TYPE_TREE, langEl ); - TypeRef *typeRef = new TypeRef( InputLoc(), uniqueType ); - prodElList->append( new ProdEl( InputLoc(), typeRef ) ); - prodElList->tail->langEl = langEl; - return prodElList; -} - -void Compiler::makeDefinitionNames() -{ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - int prodNum = 1; - for ( LelDefList::Iter def = lel->defList; def.lte(); def++ ) { - def->data.setAs( lel->name.length() + 32, "%s-%i", - lel->name.data, prodNum++ ); - } - } -} - -/* Make sure there there are no language elements whose type is unkonwn. This - * can happen when an id is used on the rhs of a definition but is not defined - * as anything. */ -void Compiler::noUndefindLangEls() -{ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->type == LangEl::Unknown ) - error() << "'" << lel->name << "' was not defined as anything" << endp; - } -} - -void Compiler::makeLangElIds() -{ - /* The first id 0 is reserved for the stack sentinal. A negative id means - * error to the parsing function, inducing backtracking. */ - nextSymbolId = 1; - - /* First pass assigns to the user terminals. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - /* Must be a term, and not any of the special reserved terminals. - * Remember if the non terminal is a user non terminal. */ - if ( lel->type == LangEl::Term && - !lel->isEOF && - lel != errorLangEl && - lel != noTokenLangEl ) - { - lel->isUserTerm = true; - lel->id = nextSymbolId++; - } - } - - //eofLangEl->id = nextSymbolId++; - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - /* Must be a term, and not any of the special reserved terminals. - * Remember if the non terminal is a user non terminal. */ - if ( lel->isEOF ) - lel->id = nextSymbolId++; - } - - /* Next assign to the eof notoken, which we always create. */ - noTokenLangEl->id = nextSymbolId++; - - /* Possibly assign to the error language element. */ - if ( errorLangEl != 0 ) - errorLangEl->id = nextSymbolId++; - - /* Save this for the code generation. */ - firstNonTermId = nextSymbolId; - - /* A third and final pass assigns to everything else. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - /* Anything else not yet assigned gets assigned now. */ - if ( lel->id < 0 ) - lel->id = nextSymbolId++; - } - - assert( ptrLangEl->id == LEL_ID_PTR ); - assert( boolLangEl->id == LEL_ID_BOOL ); - assert( intLangEl->id == LEL_ID_INT ); - assert( strLangEl->id == LEL_ID_STR ); - assert( streamLangEl->id == LEL_ID_STREAM ); - assert( inputLangEl->id == LEL_ID_INPUT ); - assert( ignoreLangEl->id == LEL_ID_IGNORE ); -} - -void Compiler::refNameSpace( LangEl *lel, Namespace *nspace ) -{ - if ( nspace == defaultNamespace || nspace == rootNamespace ) { - lel->refName = "::" + lel->refName; - return; - } - - lel->refName = nspace->name + "::" + lel->refName; - lel->declName = nspace->name + "::" + lel->declName; - lel->xmlTag = nspace->name + "::" + lel->xmlTag; - refNameSpace( lel, nspace->parentNamespace ); -} - -void Compiler::makeLangElNames() -{ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->id == LEL_ID_INT ) { - lel->fullName = "_int"; - lel->fullLit = "_int"; - lel->refName = "_int"; - lel->declName = "_int"; - lel->xmlTag = "int"; - } - else if ( lel->id == LEL_ID_BOOL ) { - lel->fullName = "_bool"; - lel->fullLit = "_bool"; - lel->refName = "_bool"; - lel->declName = "_bool"; - lel->xmlTag = "bool"; - } - else { - lel->fullName = lel->name; - lel->fullLit = lel->lit; - lel->refName = lel->lit; - lel->declName = lel->lit; - lel->xmlTag = lel->name; - } - - /* If there is also a namespace next to the type, we add a prefix to - * the type. It's not convenient to name C++ classes the same as a - * namespace in the same scope. We don't want to restrict colm, so we - * add a workaround for the least-common case. The type gets t_ prefix. - * */ - Namespace *nspace = lel->nspace->findNamespace( lel->name ); - if ( nspace != 0 ) { - lel->refName = "t_" + lel->refName; - lel->fullName = "t_" + lel->fullName; - lel->declName = "t_" + lel->declName; - lel->xmlTag = "t_" + lel->xmlTag; - } - - refNameSpace( lel, lel->nspace ); - } -} - -/* Set up dot sets, shift info, and prod sets. */ -void Compiler::makeProdFsms() -{ - /* There are two items in the index for each production (high and low). */ - int indexLen = prodList.length() * 2; - dotItemIndex.setAsNew( indexLen ); - int dsiLow = 0, indexPos = 0; - - /* Build FSMs for all production language elements. */ - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) - prod->fsm = prod->prodElList->walk( this, prod ); - - makeNonTermFirstSets(); - makeFirstSets(); - - /* Build FSMs for all production language elements. */ - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - if ( addUniqueEmptyProductions ) { - /* This must be re-implemented. */ - assert( false ); - //if ( !prod->isLeftRec && prod->uniqueEmptyLeader != 0 ) { - // PdaGraph *emptyLeader = prod->uniqueEmptyLeader->walk( this ); - // emptyLeader->concatOp( prod->fsm ); - // prod->fsm = emptyLeader; - //} - } - - /* Compute the machine's length. */ - prod->fsmLength = prod->fsm->fsmLength( ); - - /* Productions have a unique production id for each final state. - * This lets us use a production length specific to each final state. - * Start states are always isolated therefore if the start state is - * final then reductions from it will always have a fixed production - * length. This is a simple method for determining the length - * of zero-length derivations when reducing. */ - - /* Number of dot items needed for the production is elements + 1 - * because the dot can be before the first and after the last element. */ - int numForProd = prod->fsm->stateList.length() + 1; - - /* Set up the low and high values in the index for this production. */ - dotItemIndex.data[indexPos].key = dsiLow; - dotItemIndex.data[indexPos].value = prod; - dotItemIndex.data[indexPos+1].key = dsiLow + numForProd - 1; - dotItemIndex.data[indexPos+1].value = prod; - - int dsi = dsiLow; - for ( PdaStateList::Iter state = prod->fsm->stateList; state.lte(); state++, dsi++ ) { - /* All transitions are shifts. */ - for ( TransMap::Iter out = state->transMap; out.lte(); out++ ) - assert( out->value->isShift ); - - state->dotSet.insert( dsi ); - } - - /* Move over the production. */ - dsiLow += numForProd; - indexPos += 2; - - if ( prod->prodCommit ) { - for ( PdaStateSet::Iter fin = prod->fsm->finStateSet; fin.lte(); fin++ ) { - int length = prod->fsmLength; - //cerr << "PENDING COMMIT IN FINAL STATE of " << prod->prodId << - // " with len: " << length << endl; - (*fin)->pendingCommits.insert( ProdIdPair( prod->prodId, length ) ); - } - } - } - - /* Make the final state specific prod id to prod id mapping. */ - prodIdIndex = new Definition*[prodList.length()]; - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) - prodIdIndex[prod->prodId] = prod; -} - -/* Want the first set of over src. If the first set contains epsilon, go over - * it and over tab. If overSrc is the end of the production, find the follow - * from the table, taking only the characters on which the parent is reduced. - * */ -void Compiler::findFollow( AlphSet &result, PdaState *overTab, - PdaState *overSrc, Definition *parentDef ) -{ - if ( overSrc->isFinState() ) { - assert( overSrc->transMap.length() == 0 ); - - /* At the end of the production. Turn to the table. */ - long redCode = makeReduceCode( parentDef->prodId, false ); - for ( TransMap::Iter tabTrans = overTab->transMap; tabTrans.lte(); tabTrans++ ) { - for ( ActDataList::Iter adl = tabTrans->value->actions; adl.lte(); adl++ ) { - if ( *adl == redCode ) - result.insert( tabTrans->key ); - } - } - } - else { - /* Get the first set of the item. If the first set contains epsilon - * then move over overSrc and overTab and recurse. */ - assert( overSrc->transMap.length() == 1 ); - TransMap::Iter pastTrans = overSrc->transMap; - - LangEl *langEl = langElIndex[pastTrans->key]; - if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { - bool hasEpsilon = false; - for ( LelDefList::Iter def = langEl->defList; def.lte(); def++ ) { - result.insert( def->firstSet ); - - if ( def->firstSet.find( -1 ) ) - hasEpsilon = true; - } - - /* Find the equivalent state in the parser. */ - if ( hasEpsilon ) { - PdaTrans *tabTrans = overTab->findTrans( pastTrans->key ); - findFollow( result, tabTrans->toState, - pastTrans->value->toState, parentDef ); - } - - /* Now possibly the dup. */ - if ( langEl->termDup != 0 ) - result.insert( langEl->termDup->id ); - } - else { - result.insert( pastTrans->key ); - } - } -} - -PdaState *Compiler::followProd( PdaState *tabState, PdaState *prodState ) -{ - while ( prodState->transMap.length() == 1 ) { - TransMap::Iter prodTrans = prodState->transMap; - PdaTrans *tabTrans = tabState->findTrans( prodTrans->key ); - prodState = prodTrans->value->toState; - tabState = tabTrans->toState; - } - return tabState; -} - -void Compiler::trySetTime( PdaTrans *trans, long code, long &time ) -{ - /* Find the item. */ - for ( ActDataList::Iter adl = trans->actions; adl.lte(); adl++ ) { - if ( *adl == code ) { - /* If the time of the shift is not already set, set it. */ - if ( trans->actOrds[adl.pos()] == 0 ) { - //cerr << "setting time: state = " << tabState->stateNum - // << ", trans = " << tabTrans->lowKey - // << ", time = " << time << endl; - trans->actOrds[adl.pos()] = time++; - } - break; - } - } -} - -/* Go down a defintiion and then handle the follow actions. */ -void Compiler::pdaOrderFollow( LangEl *rootEl, PdaState *tabState, - PdaTrans *tabTrans, PdaTrans *srcTrans, Definition *parentDef, - Definition *definition, long &time ) -{ - /* We need the follow from tabState/srcState over the defintion we are - * currently processing. */ - PdaState *overTab = tabTrans->toState; - PdaState *overSrc = srcTrans->toState; - - AlphSet alphSet; - if ( parentDef == rootEl->rootDef ) - alphSet.insert( rootEl->eofLel->id ); - else - findFollow( alphSet, overTab, overSrc, parentDef ); - - /* Now follow the production to find out where it expands to. */ - PdaState *expandToState = followProd( tabState, definition->fsm->startState ); - - /* Find the reduce item. */ - long redCode = makeReduceCode( definition->prodId, false ); - - for ( TransMap::Iter tt = expandToState->transMap; tt.lte(); tt++ ) { - if ( alphSet.find( tt->key ) ) { - trySetTime( tt->value, redCode, time ); - - /* If the items token region is not recorded in the state, do it now. */ - addRegion( expandToState, tt->value, tt->key, - tt->value->noPreIgnore, tt->value->noPostIgnore ); - } - } -} - -bool regionVectHas( RegionVect ®Vect, TokenRegion *region ) -{ - for ( RegionVect::Iter trvi = regVect; trvi.lte(); trvi++ ) { - if ( *trvi == region ) - return true; - } - return false; -} - -void Compiler::addRegion( PdaState *tabState, PdaTrans *tabTrans, - long pdaKey, bool noPreIgnore, bool noPostIgnore ) -{ - LangEl *langEl = langElIndex[pdaKey]; - if ( langEl != 0 && langEl->type == LangEl::Term ) { - TokenRegion *region = 0; - - /* If it is not the eof, then use the region associated - * with the token definition. */ - if ( langEl->isCI ) { - //cerr << "isCI" << endl; - region = langEl->ciRegion->ciRegion; - } - else if ( !langEl->isEOF && langEl->tokenDef != 0 ) { - region = langEl->tokenDef->tokenRegion; - } - - if ( region != 0 ) { - /* region. */ - TokenRegion *scanRegion = region; - - if ( langEl->noPreIgnore ) - scanRegion = region->tokenOnlyRegion; - - if ( !regionVectHas( tabState->regions, scanRegion ) ) { - tabState->regions.append( scanRegion ); - } - - /* Pre-region of to state */ - PdaState *toState = tabTrans->toState; - if ( !langEl->noPostIgnore && - region->ignoreOnlyRegion != 0 && - !regionVectHas( toState->preRegions, region->ignoreOnlyRegion ) ) - { - toState->preRegions.append( region->ignoreOnlyRegion ); - } - } - } -} - -#if 0 - orderState( tabState, prodState, time ): - if not tabState.dotSet.find( prodState.dotID ) - tabState.dotSet.insert( prodState.dotID ) - tabTrans = tabState.findMatchingTransition( prodState.getTransition() ) - - if tabTrans is NonTerminal: - for production in tabTrans.nonTerm.prodList: - orderState( tabState, production.startState, time ) - - for all expandToState in tabTrans.expandToStates: - for all followTrans in expandToState.transList - reduceAction = findAction( production.reduction ) - if reduceAction.time is unset: - reduceAction.time = time++ - end - end - end - end - end - - shiftAction = tabTrans.findAction( shift ) - if shiftAction.time is unset: - shiftAction.time = time++ - end - - orderState( tabTrans.toState, prodTrans.toState, time ) - end - end - - orderState( parseTable.startState, startProduction.startState, 1 ) -#endif - -void Compiler::pdaOrderProd( LangEl *rootEl, PdaState *tabState, - PdaState *srcState, Definition *parentDef, long &time ) -{ - assert( srcState->dotSet.length() == 1 ); - if ( tabState->dotSet2.find( srcState->dotSet[0] ) ) - return; - tabState->dotSet2.insert( srcState->dotSet[0] ); - - assert( srcState->transMap.length() == 0 || srcState->transMap.length() == 1 ); - - if ( srcState->transMap.length() == 1 ) { - TransMap::Iter srcTrans = srcState->transMap; - - /* Find the equivalent state in the parser. */ - PdaTrans *tabTrans = tabState->findTrans( srcTrans->key ); - - /* Recurse into the transition if it is a non-terminal. */ - LangEl *langEl = langElIndex[srcTrans->key]; - if ( langEl != 0 ) { - if ( langEl->reduceFirst ) { - /* Use a shortest match ordering for the contents of this - * nonterminal. Does follows for all productions first, then - * goes down the productions. */ - for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) { - pdaOrderFollow( rootEl, tabState, tabTrans, srcTrans->value, - parentDef, expDef, time ); - } - for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) - pdaOrderProd( rootEl, tabState, expDef->fsm->startState, expDef, time ); - - } - else { - /* The default action ordering. For each prod, goes down the - * prod then sets the follow before going to the next prod. */ - for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) { - pdaOrderProd( rootEl, tabState, expDef->fsm->startState, expDef, time ); - - pdaOrderFollow( rootEl, tabState, tabTrans, srcTrans->value, - parentDef, expDef, time ); - } - } - } - - trySetTime( tabTrans, SHIFT_CODE, time ); - - /* Now possibly for the dup. */ - if ( langEl != 0 && langEl->termDup != 0 ) { - PdaTrans *dupTrans = tabState->findTrans( langEl->termDup->id ); - trySetTime( dupTrans, SHIFT_CODE, time ); - } - - /* If the items token region is not recorded in the state, do it now. */ - addRegion( tabState, tabTrans, srcTrans->key, - srcTrans->value->noPreIgnore, srcTrans->value->noPostIgnore ); - - /* Go over one in the production. */ - pdaOrderProd( rootEl, tabTrans->toState, - srcTrans->value->toState, parentDef, time ); - } -} - -void Compiler::pdaActionOrder( PdaGraph *pdaGraph, LangElSet &parserEls ) -{ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - assert( (state->stateBits & SB_ISMARKED) == 0 ); - - /* Traverse the src state's transitions. */ - long last = 0; - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - if ( ! trans.first() ) - assert( last < trans->key ); - last = trans->key; - } - } - - /* Compute the action orderings, record the max value. */ - long time = 1; - for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) { - PdaState *startState = (*pe)->rootDef->fsm->startState; - pdaOrderProd( *pe, (*pe)->startState, startState, (*pe)->rootDef, time ); - - /* Walk over the start lang el and set the time for shift of - * the eof action that completes the parse. */ - PdaTrans *overStart = (*pe)->startState->findTrans( (*pe)->id ); - PdaTrans *eofTrans = overStart->toState->findTrans( (*pe)->eofLel->id ); - eofTrans->actOrds[0] = time++; - } - - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - if ( state->regions.length() == 0 ) { - for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { - /* There are no regions and EOF leaves the state. Add the eof - * token region. */ - PdaTrans *trans = tel->value; - LangEl *lel = langElIndex[trans->lowKey]; - if ( lel != 0 && lel->isEOF ) - state->regions.append( eofTokenRegion ); - } - } - } - - if ( colm_log_compile ) { - /* Warn about states with empty token region lists. */ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - if ( state->regions.length() == 0 ) { - warning() << "state has an empty token region, state: " << - state->stateNum << endl; - } - } - } - - /* Some actions may not have an ordering. I believe these to be actions - * that result in a parse error and they arise because the state tables - * are LALR(1) but the action ordering is LR(1). LALR(1) causes some - * reductions that lead nowhere. */ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 ); - for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { - PdaTrans *trans = tel->value; - /* Check every action has an ordering. */ - for ( ActDataList::Iter adl = trans->actOrds; adl.lte(); adl++ ) { - if ( *adl == 0 ) - *adl = time++; - } - } - } -} - -void Compiler::advanceReductions( PdaGraph *pdaGraph ) -{ - /* Loop all states. */ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - if ( !state->advanceReductions ) - continue; - - bool outHasShift = false; - ReductionMap outReds; - LongSet outCommits; - for ( TransMap::Iter out = state->transMap; out.lte(); out++ ) { - /* Get the transition from the trans el. */ - if ( out->value->isShift ) - outHasShift = true; - outReds.insert( out->value->reductions ); - outCommits.insert( out->value->commits ); - } - - bool inHasShift = false; - ReductionMap inReds; - for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) { - /* Get the transition from the trans el. */ - if ( in->isShift ) - inHasShift = true; - inReds.insert( in->reductions ); - } - - if ( !outHasShift && outReds.length() == 1 && - inHasShift && inReds.length() == 0 ) - { - //cerr << "moving reduction to shift" << endl; - - /* Move the reduction to all in transitions. */ - for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) { - assert( in->actions.length() == 1 ); - assert( in->actions[0] == SHIFT_CODE ); - in->actions[0] = makeReduceCode( outReds[0].key, true ); - in->afterShiftCommits.insert( outCommits ); - } - - /* - * Remove all transitions out of the state. - */ - - /* Detach out range transitions. */ - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - pdaGraph->detachTrans( state, trans->value->toState, trans->value ); - delete trans->value; - } - state->transMap.empty(); - - /* Redirect all the in transitions to the actionDestState. */ - pdaGraph->inTransMove( actionDestState, state ); - } - } - - pdaGraph->removeUnreachableStates(); -} - -void Compiler::sortActions( PdaGraph *pdaGraph ) -{ - /* Sort the actions. */ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 ); - for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { - PdaTrans *trans = tel->value; - - /* Sort by the action ords. */ - ActDataList actions( trans->actions ); - ActDataList actOrds( trans->actOrds ); - ActDataList actPriors( trans->actPriors ); - trans->actions.empty(); - trans->actOrds.empty(); - trans->actPriors.empty(); - while ( actOrds.length() > 0 ) { - int min = 0; - for ( int i = 1; i < actOrds.length(); i++ ) { - if ( actPriors[i] > actPriors[min] || - (actPriors[i] == actPriors[min] && - actOrds[i] < actOrds[min] ) ) - { - min = i; - } - } - trans->actions.append( actions[min] ); - trans->actOrds.append( actOrds[min] ); - trans->actPriors.append( actPriors[min] ); - actions.remove(min); - actOrds.remove(min); - actPriors.remove(min); - } - - if ( branchPointInfo && trans->actions.length() > 1 ) { - cerr << "info: branch point" - << " state: " << state->stateNum - << " trans: "; - LangEl *lel = langElIndex[trans->lowKey]; - if ( lel == 0 ) - cerr << (char)trans->lowKey << endl; - else - cerr << lel->lit << endl; - - for ( ActDataList::Iter act = trans->actions; act.lte(); act++ ) { - switch ( *act & 0x3 ) { - case 1: - cerr << " shift" << endl; - break; - case 2: - cerr << " reduce " << - prodIdIndex[(*act >> 2)]->data << endl; - break; - case 3: - cerr << " shift-reduce" << endl; - break; - } - } - } - - /* Verify that shifts of nonterminals don't have any branch - * points or commits. */ - if ( trans->lowKey >= firstNonTermId ) { - if ( trans->actions.length() != 1 || - (trans->actions[0] & 0x3) != 1 ) - { - error() << "TRANS ON NONTERMINAL is something " - "other than a shift" << endl; - } - if ( trans->commits.length() > 0 ) - error() << "TRANS ON NONTERMINAL has a commit" << endl; - } - - /* TODO: Shift-reduces are optimizations. Verify that - * shift-reduces exist only if they don't entail a conflict. */ - } - } -} - -void Compiler::reduceActions( PdaGraph *pdaGraph ) -{ - /* Reduce the actions. */ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { - PdaTrans *trans = tel->value; - PdaActionSetEl *inSet; - - int commitLen = trans->commits.length() > 0 ? - trans->commits[trans->commits.length()-1] : 0; - - if ( trans->afterShiftCommits.length() > 0 ) { - int afterShiftCommit = trans->afterShiftCommits[ - trans->afterShiftCommits.length()-1]; - - if ( commitLen > 0 && commitLen+1 > afterShiftCommit ) - commitLen = ( commitLen + 1 ); - else - commitLen = afterShiftCommit; - } - else { - commitLen = commitLen * -1; - } - - //if ( commitLen != 0 ) { - // cerr << "FINAL ACTION COMMIT LEN: " << commitLen << endl; - //} - - pdaGraph->actionSet.insert( ActionData( trans->toState->stateNum, - trans->actions, commitLen ), &inSet ); - trans->actionSetEl = inSet; - } - } -} - -void Compiler::computeAdvanceReductions( LangEl *langEl, PdaGraph *pdaGraph ) -{ - /* Get the entry into the graph and traverse over the root. The resulting - * state can have eof, nothing else can. */ - PdaState *overStart = pdaGraph->followFsm( - langEl->startState, - langEl->rootDef->fsm ); - - /* The graph must reduce to root all on it's own. It cannot depend on - * require EOF. */ - for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) { - if ( st == overStart ) - continue; - - for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) { - if ( tr->value->lowKey == langEl->eofLel->id ) - st->advanceReductions = true; - } - } -} - -void Compiler::verifyParseStopGrammar( LangEl *langEl, PdaGraph *pdaGraph ) -{ - /* Get the entry into the graph and traverse over the root. The resulting - * state can have eof, nothing else can. */ - PdaState *overStart = pdaGraph->followFsm( - langEl->startState, - langEl->rootDef->fsm ); - - /* The graph must reduce to root all on it's own. It cannot depend on - * require EOF. */ - for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) { - if ( st == overStart ) - continue; - - for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) { - if ( tr->value->lowKey == langEl->eofLel->id ) { - /* This needs a better error message. Appears to be voodoo. */ - error() << "grammar is not usable with parse_stop" << endp; - } - } - } -} - -LangEl *Compiler::predOf( PdaTrans *trans, long action ) -{ - LangEl *lel; - if ( action == SHIFT_CODE ) - lel = langElIndex[trans->lowKey]; - else - lel = prodIdIndex[action >> 2]->predOf; - return lel; -} - - -bool Compiler::precedenceSwap( long action1, long action2, LangEl *l1, LangEl *l2 ) -{ - bool swap = false; - if ( l2->predValue > l1->predValue ) - swap = true; - else if ( l1->predValue == l2->predValue ) { - if ( l1->predType == PredLeft && action1 == SHIFT_CODE ) - swap = true; - else if ( l1->predType == PredRight && action2 == SHIFT_CODE ) - swap = true; - } - return swap; -} - -bool Compiler::precedenceRemoveBoth( LangEl *l1, LangEl *l2 ) -{ - if ( l1->predValue == l2->predValue && l1->predType == PredNonassoc ) - return true; - return false; -} - -void Compiler::resolvePrecedence( PdaGraph *pdaGraph ) -{ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 ); - - for ( long t = 0; t < state->transMap.length(); /* increment at end */ ) { - PdaTrans *trans = state->transMap[t].value; - -again: - /* Find action with precedence. */ - for ( int i = 0; i < trans->actions.length(); i++ ) { - LangEl *li = predOf( trans, trans->actions[i] ); - - if ( li != 0 && li->predType != PredNone ) { - /* Find another action with precedence. */ - for ( int j = i+1; j < trans->actions.length(); j++ ) { - LangEl *lj = predOf( trans, trans->actions[j] ); - - if ( lj != 0 && lj->predType != PredNone ) { - /* Conflict to check. */ - bool swap = precedenceSwap( trans->actions[i], - trans->actions[j], li, lj ); - - if ( swap ) { - long t = trans->actions[i]; - trans->actions[i] = trans->actions[j]; - trans->actions[j] = t; - } - - trans->actions.remove( j ); - if ( precedenceRemoveBoth( li, lj ) ) - trans->actions.remove( i ); - - goto again; - } - } - } - } - - /* If there are still actions then move to the next one. If not, - * (due to nonassoc) then remove the transition. */ - if ( trans->actions.length() > 0 ) - t += 1; - else - state->transMap.vremove( t ); - } - } -} - -void Compiler::analyzeMachine( PdaGraph *pdaGraph, LangElSet &parserEls ) -{ - pdaGraph->maxState = pdaGraph->stateList.length() - 1; - pdaGraph->maxLelId = nextSymbolId - 1; - pdaGraph->maxOffset = pdaGraph->stateList.length() * pdaGraph->maxLelId; - - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - if ( trans->value->isShift ) { - trans->value->actions.append( SHIFT_CODE ); - trans->value->actPriors.append( trans->value->shiftPrior ); - } - for ( ReductionMap::Iter red = trans->value->reductions; red.lte(); red++ ) { - trans->value->actions.append( makeReduceCode( red->key, false ) ); - trans->value->actPriors.append( red->value ); - } - trans->value->actOrds.appendDup( 0, trans->value->actions.length() ); - } - } - - pdaActionOrder( pdaGraph, parserEls ); - sortActions( pdaGraph ); - resolvePrecedence( pdaGraph ); - - /* Verify that any type we parse_stop can actually be parsed that way. */ - for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) { - LangEl *lel = *pe; - if ( lel->parseStop ) - computeAdvanceReductions(lel , pdaGraph); - } - - advanceReductions( pdaGraph ); - pdaGraph->setStateNumbers(); - reduceActions( pdaGraph ); - - /* Set the action ids. */ - int actionSetId = 0; - for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) - asi->key.id = actionSetId++; - - /* Get the max index. */ - pdaGraph->maxIndex = actionSetId - 1; - - /* Compute the max prod length. */ - pdaGraph->maxProdLen = 0; - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - if ( (unsigned)prod->fsmLength > pdaGraph->maxProdLen ) - pdaGraph->maxProdLen = prod->fsmLength; - } - - /* Asserts that any transition with a nonterminal has a single action - * which is either a shift or a shift-reduce. */ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - LangEl *langEl = langElIndex[trans->value->lowKey]; - if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { - assert( trans->value->actions.length() == 1 ); - assert( trans->value->actions[0] == SHIFT_CODE || - (trans->value->actions[0] & 0x3) == SHIFT_REDUCE_CODE ); - } - } - } - - /* Assert that shift reduces always appear on their own. */ - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - for ( ActDataList::Iter act = trans->value->actions; act.lte(); act++ ) { - if ( (*act & 0x3) == SHIFT_REDUCE_CODE ) - assert( trans->value->actions.length() == 1 ); - } - } - } - - /* Verify that any type we parse_stop can actually be parsed that way. */ - for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) { - LangEl *lel = *pe; - if ( lel->parseStop ) - verifyParseStopGrammar(lel , pdaGraph); - } -} - -void Compiler::wrapNonTerminals() -{ - /* Make a language element that will be used to make the root productions. - * These are used for making parsers rooted at any production (including - * the start symbol). */ - rootLangEl = declareLangEl( this, rootNamespace, "_root", LangEl::NonTerm ); - - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - /* Make a single production used when the lel is a root. */ - ProdElList *prodElList = makeProdElList( lel ); - lel->rootDef = new Definition( InputLoc(), rootLangEl, - prodElList, false, 0, - prodList.length(), rootLangEl->defList.length(), - Definition::Production ); - prodList.append( lel->rootDef ); - rootLangEl->defList.append( lel->rootDef ); - - /* First resolve. */ - for ( ProdElList::Iter fact = *prodElList; fact.lte(); fact++ ) - resolveFactor( fact ); - } -} - -bool Compiler::makeNonTermFirstSetProd( Definition *prod, PdaState *state ) -{ - bool modified = false; - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - if ( trans->key >= firstNonTermId ) { - long *inserted = prod->nonTermFirstSet.insert( trans->key ); - if ( inserted != 0 ) - modified = true; - - bool hasEpsilon = false; - LangEl *lel = langElIndex[trans->key]; - for ( LelDefList::Iter ldef = lel->defList; ldef.lte(); ldef++ ) { - for ( ProdIdSet::Iter pid = ldef->nonTermFirstSet; - pid.lte(); pid++ ) - { - if ( *pid == -1 ) - hasEpsilon = true; - else { - long *inserted = prod->nonTermFirstSet.insert( *pid ); - if ( inserted != 0 ) - modified = true; - } - } - } - - if ( hasEpsilon ) { - if ( trans->value->toState->isFinState() ) { - long *inserted = prod->nonTermFirstSet.insert( -1 ); - if ( inserted != 0 ) - modified = true; - } - - bool lmod = makeNonTermFirstSetProd( prod, trans->value->toState ); - if ( lmod ) - modified = true; - } - } - } - return modified; -} - - -void Compiler::makeNonTermFirstSets() -{ - bool modified = true; - while ( modified ) { - modified = false; - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - if ( prod->fsm->startState->isFinState() ) { - long *inserted = prod->nonTermFirstSet.insert( -1 ); - if ( inserted != 0 ) - modified = true; - } - - bool lmod = makeNonTermFirstSetProd( prod, prod->fsm->startState ); - if ( lmod ) - modified = true; - } - } - - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - if ( prod->nonTermFirstSet.find( prod->prodName->id ) ) - prod->isLeftRec = true; - } -} - -void Compiler::printNonTermFirstSets() -{ - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - cerr << prod->data << ": "; - for ( ProdIdSet::Iter pid = prod->nonTermFirstSet; pid.lte(); pid++ ) - { - if ( *pid < 0 ) - cerr << " <EPSILON>"; - else { - LangEl *lel = langElIndex[*pid]; - cerr << " " << lel->name; - } - } - cerr << endl; - - if ( prod->isLeftRec ) - cerr << "PROD IS LEFT REC: " << prod->data << endl; - } -} - -bool Compiler::makeFirstSetProd( Definition *prod, PdaState *state ) -{ - bool modified = false; - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - if ( trans->key < firstNonTermId ) { - long *inserted = prod->firstSet.insert( trans->key ); - if ( inserted != 0 ) - modified = true; - } - else { - long *inserted = prod->firstSet.insert( trans->key ); - if ( inserted != 0 ) - modified = true; - - LangEl *klangEl = langElIndex[trans->key]; - if ( klangEl != 0 && klangEl->termDup != 0 ) { - long *inserted2 = prod->firstSet.insert( klangEl->termDup->id ); - if ( inserted2 != 0 ) - modified = true; - } - - bool hasEpsilon = false; - LangEl *lel = langElIndex[trans->key]; - for ( LelDefList::Iter ldef = lel->defList; ldef.lte(); ldef++ ) { - for ( ProdIdSet::Iter pid = ldef->firstSet; - pid.lte(); pid++ ) - { - if ( *pid == -1 ) - hasEpsilon = true; - else { - long *inserted = prod->firstSet.insert( *pid ); - if ( inserted != 0 ) - modified = true; - } - } - } - - if ( hasEpsilon ) { - if ( trans->value->toState->isFinState() ) { - long *inserted = prod->firstSet.insert( -1 ); - if ( inserted != 0 ) - modified = true; - } - - bool lmod = makeFirstSetProd( prod, trans->value->toState ); - if ( lmod ) - modified = true; - } - } - } - return modified; -} - - -void Compiler::makeFirstSets() -{ - bool modified = true; - while ( modified ) { - modified = false; - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - if ( prod->fsm->startState->isFinState() ) { - long *inserted = prod->firstSet.insert( -1 ); - if ( inserted != 0 ) - modified = true; - } - - bool lmod = makeFirstSetProd( prod, prod->fsm->startState ); - if ( lmod ) - modified = true; - } - } -} - -void Compiler::printFirstSets() -{ - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - cerr << prod->data << ": "; - for ( ProdIdSet::Iter pid = prod->firstSet; pid.lte(); pid++ ) - { - if ( *pid < 0 ) - cerr << " <EPSILON>"; - else { - LangEl *lel = langElIndex[*pid]; - if ( lel != 0 ) - cerr << endl << " " << lel->name; - else - cerr << endl << " " << *pid; - } - } - cerr << endl; - } -} - -void Compiler::insertUniqueEmptyProductions() -{ - int limit = prodList.length(); - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - if ( prod->prodId == limit ) - break; - - /* Get a language element. */ - char name[20]; - sprintf(name, "U%li", prodList.length()); - LangEl *prodName = addLangEl( this, rootNamespace, name, LangEl::NonTerm ); - Definition *newDef = new Definition( InputLoc(), prodName, - 0 /* FIXME new VarDef( name, 0 )*/, - false, 0, prodList.length(), prodName->defList.length(), - Definition::Production ); - prodName->defList.append( newDef ); - prodList.append( newDef ); - - prod->uniqueEmptyLeader = prodName; - } -} - -void Compiler::makeRuntimeData() -{ - long count = 0; - - /* - * ProdLengths - * ProdLhsIs - * ProdNames - * ProdCodeBlocks - * ProdCodeBlockLens - */ - - runtimeData->frameInfo = new FrameInfo[nextFrameId]; - runtimeData->numFrames = nextFrameId; - memset( runtimeData->frameInfo, 0, sizeof(FrameInfo) * nextFrameId ); - - /* - * Init code block. - */ - if ( rootCodeBlock == 0 ) { - runtimeData->rootCode = 0; - runtimeData->rootCodeLen = 0; - runtimeData->rootFrameId = 0; - } - else { - runtimeData->rootCode = rootCodeBlock->codeWC.data; - runtimeData->rootCodeLen = rootCodeBlock->codeWC.length(); - runtimeData->rootFrameId = rootCodeBlock->frameId; - } - - runtimeData->frameInfo[rootCodeBlock->frameId].codeWV = 0; - runtimeData->frameInfo[rootCodeBlock->frameId].codeLenWV = 0; - runtimeData->frameInfo[rootCodeBlock->frameId].trees = rootCodeBlock->trees.data; - runtimeData->frameInfo[rootCodeBlock->frameId].treesLen = rootCodeBlock->trees.length(); - runtimeData->frameInfo[rootCodeBlock->frameId].frameSize = rootLocalFrame->size(); - runtimeData->frameInfo[rootCodeBlock->frameId].argSize = 0; - - /* - * prodInfo - */ - count = prodList.length(); - runtimeData->prodInfo = new ProdInfo[count]; - runtimeData->numProds = count; - - count = 0; - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - runtimeData->prodInfo[count].lhsId = prod->prodName->id; - runtimeData->prodInfo[count].prodNum = prod->prodNum; - runtimeData->prodInfo[count].length = prod->fsmLength; - runtimeData->prodInfo[count].name = prod->data; - runtimeData->prodInfo[count].frameId = -1; - - CodeBlock *block = prod->redBlock; - if ( block != 0 ) { - runtimeData->prodInfo[count].frameId = block->frameId; - runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data; - runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length(); - - runtimeData->frameInfo[block->frameId].trees = block->trees.data; - runtimeData->frameInfo[block->frameId].treesLen = block->trees.length(); - - runtimeData->frameInfo[block->frameId].frameSize = block->localFrame->size(); - runtimeData->frameInfo[block->frameId].argSize = 0; - } - - runtimeData->prodInfo[count].lhsUpref = true; - runtimeData->prodInfo[count].copy = prod->copy.data; - runtimeData->prodInfo[count].copyLen = prod->copy.length() / 2; - count += 1; - } - - /* - * regionInfo - */ - runtimeData->numRegions = regionList.length()+1; - runtimeData->regionInfo = new RegionInfo[runtimeData->numRegions]; - memset( runtimeData->regionInfo, 0, sizeof(RegionInfo) * runtimeData->numRegions ); - - runtimeData->regionInfo[0].name = "___EMPTY"; - runtimeData->regionInfo[0].defaultToken = -1; - for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) { - long regId = reg->id+1; - runtimeData->regionInfo[regId].name = reg->name; - runtimeData->regionInfo[regId].defaultToken = - reg->defaultTokenDef == 0 ? -1 : reg->defaultTokenDef->tdLangEl->id; - runtimeData->regionInfo[regId].eofFrameId = -1; - runtimeData->regionInfo[regId].isIgnoreOnly = reg->isIgnoreOnly; - runtimeData->regionInfo[regId].isCiOnly = reg->isCiOnly; - runtimeData->regionInfo[regId].ciLelId = reg->isCiOnly ? reg->derivedFrom->ciLel->id : 0; - - CodeBlock *block = reg->preEofBlock; - if ( block != 0 ) { - runtimeData->regionInfo[regId].eofFrameId = block->frameId; - runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data; - runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length(); - - runtimeData->frameInfo[block->frameId].trees = block->trees.data; - runtimeData->frameInfo[block->frameId].treesLen = block->trees.length(); - - runtimeData->frameInfo[block->frameId].frameSize = block->localFrame->size(); - runtimeData->frameInfo[block->frameId].argSize = 0; - } - } - - /* - * lelInfo - */ - - count = nextSymbolId; - runtimeData->lelInfo = new LangElInfo[count]; - runtimeData->numLangEls = count; - memset( runtimeData->lelInfo, 0, sizeof(LangElInfo)*count ); - - for ( int i = 0; i < nextSymbolId; i++ ) { - LangEl *lel = langElIndex[i]; - if ( lel != 0 ) { - runtimeData->lelInfo[i].name = lel->fullLit; - runtimeData->lelInfo[i].xmlTag = lel->xmlTag; - runtimeData->lelInfo[i].repeat = lel->isRepeat; - runtimeData->lelInfo[i].list = lel->isList; - runtimeData->lelInfo[i].literal = lel->isLiteral; - runtimeData->lelInfo[i].ignore = lel->ignore; - runtimeData->lelInfo[i].frameId = -1; - - CodeBlock *block = lel->transBlock; - if ( block != 0 ) { - runtimeData->lelInfo[i].frameId = block->frameId; - runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data; - runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length(); - - runtimeData->frameInfo[block->frameId].trees = block->trees.data; - runtimeData->frameInfo[block->frameId].treesLen = block->trees.length(); - - runtimeData->frameInfo[block->frameId].frameSize = block->localFrame->size(); - runtimeData->frameInfo[block->frameId].argSize = 0; - } - - - runtimeData->lelInfo[i].objectTypeId = - lel->objectDef == 0 ? 0 : lel->objectDef->id; - runtimeData->lelInfo[i].ofiOffset = lel->ofiOffset; - runtimeData->lelInfo[i].objectLength = - ( lel->objectDef == 0 || lel->objectDef == tokenObj ) ? 0 : - lel->objectDef->size(); - -// runtimeData->lelInfo[i].contextTypeId = 0; -// lel->context == 0 ? 0 : lel->context->contextObjDef->id; -// runtimeData->lelInfo[i].contextLength = 0; //lel->context == 0 ? 0 : -// lel->context->contextObjDef->size(); -// if ( lel->context != 0 ) { -// cout << "type: " << runtimeData->lelInfo[i].contextTypeId << " length: " << -// runtimeData->lelInfo[i].contextLength << endl; -// } - - runtimeData->lelInfo[i].termDupId = lel->termDup == 0 ? 0 : lel->termDup->id; - runtimeData->lelInfo[i].genericId = lel->generic == 0 ? 0 : lel->generic->id; - - if ( lel->tokenDef != 0 && lel->tokenDef->join != 0 && - lel->tokenDef->join->context != 0 ) - runtimeData->lelInfo[i].markId = lel->tokenDef->join->mark->markId; - else - runtimeData->lelInfo[i].markId = -1; - - runtimeData->lelInfo[i].numCaptureAttr = 0; - } - else { - memset(&runtimeData->lelInfo[i], 0, sizeof(LangElInfo) ); - runtimeData->lelInfo[i].name = "__UNUSED"; - runtimeData->lelInfo[i].xmlTag = "__UNUSED"; - runtimeData->lelInfo[i].frameId = -1; - } - } - - /* - * FunctionInfo - */ - count = functionList.length(); - - runtimeData->functionInfo = new FunctionInfo[count]; - runtimeData->numFunctions = count; - memset( runtimeData->functionInfo, 0, sizeof(FunctionInfo)*count ); - for ( FunctionList::Iter func = functionList; func.lte(); func++ ) { - runtimeData->functionInfo[func->funcId].name = func->name; - runtimeData->functionInfo[func->funcId].frameId = -1; - - CodeBlock *block = func->codeBlock; - if ( block != 0 ) { - runtimeData->functionInfo[func->funcId].frameId = block->frameId; - - runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data; - runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length(); - - runtimeData->frameInfo[block->frameId].codeWC = block->codeWC.data; - runtimeData->frameInfo[block->frameId].codeLenWC = block->codeWC.length(); - - runtimeData->frameInfo[block->frameId].trees = block->trees.data; - runtimeData->frameInfo[block->frameId].treesLen = block->trees.length(); - - runtimeData->frameInfo[block->frameId].frameSize = func->localFrame->size(); - runtimeData->frameInfo[block->frameId].argSize = func->paramListSize; - } - - runtimeData->functionInfo[func->funcId].frameSize = func->localFrame->size(); - runtimeData->functionInfo[func->funcId].argSize = func->paramListSize; - } - - /* - * PatReplInfo - */ - - /* Filled in later after patterns are parsed. */ - runtimeData->patReplInfo = new PatReplInfo[nextPatReplId]; - memset( runtimeData->patReplInfo, 0, sizeof(PatReplInfo) * nextPatReplId ); - runtimeData->numPatterns = nextPatReplId; - runtimeData->patReplNodes = 0; - runtimeData->numPatternNodes = 0; - - - /* - * GenericInfo - */ - count = 1; - for ( NamespaceList::Iter nspace = namespaceList; nspace.lte(); nspace++ ) - count += nspace->genericList.length(); - assert( count == nextGenericId ); - - runtimeData->genericInfo = new GenericInfo[count]; - runtimeData->numGenerics = count; - memset( &runtimeData->genericInfo[0], 0, sizeof(GenericInfo) ); - for ( NamespaceList::Iter nspace = namespaceList; nspace.lte(); nspace++ ) { - for ( GenericList::Iter gen = nspace->genericList; gen.lte(); gen++ ) { - runtimeData->genericInfo[gen->id].type = gen->typeId; - runtimeData->genericInfo[gen->id].typeArg = gen->utArg->typeId; - runtimeData->genericInfo[gen->id].keyType = gen->keyUT != 0 ? - gen->keyUT->typeId : 0; - runtimeData->genericInfo[gen->id].keyOffset = 0; - runtimeData->genericInfo[gen->id].langElId = gen->langEl->id; - runtimeData->genericInfo[gen->id].parserId = gen->utArg->langEl->parserId; - } - } - - runtimeData->argvGenericId = argvTypeRef->generic->id; - - /* - * Literals - */ - runtimeData->numLiterals = literalStrings.length(); - runtimeData->litdata = new const char *[literalStrings.length()]; - runtimeData->litlen = new long [literalStrings.length()]; - runtimeData->literals = 0; - for ( StringMap::Iter el = literalStrings; el.lte(); el++ ) { - /* Data. */ - char *data = new char[el->key.length()+1]; - memcpy( data, el->key.data, el->key.length() ); - data[el->key.length()] = 0; - runtimeData->litdata[el->value] = data; - - /* Length. */ - runtimeData->litlen[el->value] = el->key.length(); - } - - /* Captured attributes. Loop over tokens and count first. */ - long numCapturedAttr = 0; -// for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) { -// for ( TokenDefListReg::Iter td = reg->tokenDefList; td.lte(); td++ ) -// numCapturedAttr += td->reCaptureVect.length(); -// } - runtimeData->captureAttr = new CaptureAttr[numCapturedAttr]; - runtimeData->numCapturedAttr = numCapturedAttr; - memset( runtimeData->captureAttr, 0, sizeof( CaptureAttr ) * numCapturedAttr ); - - count = 0; -// for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) { -// for ( TokenDefListReg::Iter td = reg->tokenDefList; td.lte(); td++ ) { -// runtimeData->lelInfo[td->token->id].captureAttr = count; -// runtimeData->lelInfo[td->token->id].numCaptureAttr = td->reCaptureVect.length(); -// for ( ReCaptureVect::Iter c = td->reCaptureVect; c.lte(); c++ ) { -// runtimeData->captureAttr[count].mark_enter = c->markEnter->markId; -// runtimeData->captureAttr[count].mark_leave = c->markLeave->markId; -// runtimeData->captureAttr[count].offset = c->objField->offset; -// -// count += 1; -// } -// } -// } - - runtimeData->fsmTables = fsmTables; - runtimeData->pdaTables = pdaTables; - - /* FIXME: need a parser descriptor. */ - runtimeData->startStates = new int[nextParserId]; - runtimeData->eofLelIds = new int[nextParserId]; - runtimeData->parserLelIds = new int[nextParserId]; - runtimeData->numParsers = nextParserId; - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->parserId >= 0 ) { - runtimeData->startStates[lel->parserId] = lel->startState->stateNum; - runtimeData->eofLelIds[lel->parserId] = lel->eofLel->id; - runtimeData->parserLelIds[lel->parserId] = lel->id; - } - } - - runtimeData->globalSize = globalObjectDef->size(); - - /* - * firstNonTermId - */ - runtimeData->firstNonTermId = firstNonTermId; - - /* Special trees. */ - runtimeData->integerId = intLangEl->id; - runtimeData->stringId = strLangEl->id; - runtimeData->anyId = anyLangEl->id; - runtimeData->eofId = 0; //eofLangEl->id; - runtimeData->noTokenId = noTokenLangEl->id; -} - -/* Borrow alg->state for mapsTo. */ -void countNodes( Program *prg, int &count, ParseTree *parseTree, Kid *kid ) -{ - if ( kid != 0 ) { - count += 1; - - /* Should't have to recurse here. */ - Tree *ignoreList = treeLeftIgnore( prg, kid->tree ); - if ( ignoreList != 0 ) { - Kid *ignore = ignoreList->child; - while ( ignore != 0 ) { - count += 1; - ignore = ignore->next; - } - } - - ignoreList = treeRightIgnore( prg, kid->tree ); - if ( ignoreList != 0 ) { - Kid *ignore = ignoreList->child; - while ( ignore != 0 ) { - count += 1; - ignore = ignore->next; - } - } - - //count += prg->rtd->lelInfo[kid->tree->id].numCaptureAttr; - - if ( !( parseTree->flags & PF_NAMED ) && - !( parseTree->flags & PF_ARTIFICIAL ) && - treeChild( prg, kid->tree ) != 0 ) - { - countNodes( prg, count, parseTree->child, treeChild( prg, kid->tree ) ); - } - countNodes( prg, count, parseTree->next, kid->next ); - } -} - -void fillNodes( Program *prg, int &nextAvail, Bindings *bindings, long &bindId, - PatReplNode *nodes, ParseTree *parseTree, Kid *kid, int ind ) -{ - if ( kid != 0 ) { - PatReplNode &node = nodes[ind]; - - Kid *child = - !( parseTree->flags & PF_NAMED ) && - !( parseTree->flags & PF_ARTIFICIAL ) && - treeChild( prg, kid->tree ) != 0 - ? - treeChild( prg, kid->tree ) : 0; - - ParseTree *ptChild = - !( parseTree->flags & PF_NAMED ) && - !( parseTree->flags & PF_ARTIFICIAL ) && - treeChild( prg, kid->tree ) != 0 - ? - parseTree->child : 0; - - /* Set up the fields. */ - node.id = kid->tree->id; - node.prodNum = kid->tree->prodNum; - node.length = stringLength( kid->tree->tokdata ); - node.data = stringData( kid->tree->tokdata ); - - /* Ignore items. */ - Tree *ignoreList = treeLeftIgnore( prg, kid->tree ); - Kid *ignore = ignoreList == 0 ? 0 : ignoreList->child; - node.leftIgnore = ignore == 0 ? -1 : nextAvail; - - while ( ignore != 0 ) { - PatReplNode &node = nodes[nextAvail++]; - - memset( &node, 0, sizeof(PatReplNode) ); - node.id = ignore->tree->id; - node.prodNum = ignore->tree->prodNum; - node.next = ignore->next == 0 ? -1 : nextAvail; - - node.length = stringLength( ignore->tree->tokdata ); - node.data = stringData( ignore->tree->tokdata ); - - ignore = ignore->next; - } - - /* Ignore items. */ - ignoreList = treeRightIgnore( prg, kid->tree ); - ignore = ignoreList == 0 ? 0 : ignoreList->child; - node.rightIgnore = ignore == 0 ? -1 : nextAvail; - - while ( ignore != 0 ) { - PatReplNode &node = nodes[nextAvail++]; - - memset( &node, 0, sizeof(PatReplNode) ); - node.id = ignore->tree->id; - node.prodNum = ignore->tree->prodNum; - node.next = ignore->next == 0 ? -1 : nextAvail; - - node.length = stringLength( ignore->tree->tokdata ); - node.data = stringData( ignore->tree->tokdata ); - - ignore = ignore->next; - } - - ///* The captured attributes. */ - //for ( int i = 0; i < prg->rtd->lelInfo[kid->tree->id].numCaptureAttr; i++ ) { - // CaptureAttr *cap = prg->rtd->captureAttr + - // prg->rtd->lelInfo[kid->tree->id].captureAttr + i; - // - // Tree *attr = getAttr( kid->tree, cap->offset ); - // - // PatReplNode &node = nodes[nextAvail++]; - // memset( &node, 0, sizeof(PatReplNode) ); - // - // node.id = attr->id; - // node.prodNum = attr->prodNum; - // node.length = stringLength( attr->tokdata ); - // node.data = stringData( attr->tokdata ); - //} - - node.stop = parseTree->flags & PF_TERM_DUP; - - node.child = child == 0 ? -1 : nextAvail++; - - /* Recurse. */ - fillNodes( prg, nextAvail, bindings, bindId, nodes, ptChild, child, node.child ); - - /* Since the parser is bottom up the bindings are in a bottom up - * traversal order. Check after recursing. */ - node.bindId = 0; - if ( bindId < bindings->length() && bindings->data[bindId] == parseTree ) { - /* Remember that binding ids are indexed from one. */ - node.bindId = bindId++; - - //cout << "binding match in " << __PRETTY_FUNCTION__ << endl; - //cout << "bindId: " << node.bindId << endl; - } - - node.next = kid->next == 0 ? -1 : nextAvail++; - - /* Move to the next child. */ - fillNodes( prg, nextAvail, bindings, bindId, nodes, parseTree->next, kid->next, node.next ); - } -} - -void Compiler::fillInPatterns( Program *prg ) -{ - /* - * patReplNodes - */ - - /* Count is referenced and computed by mapNode. */ - int count = 0; - for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) { - countNodes( prg, count, - pat->pdaRun->stackTop->next, - pat->pdaRun->stackTop->next->shadow ); - } - - for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) { - countNodes( prg, count, - repl->pdaRun->stackTop->next, - repl->pdaRun->stackTop->next->shadow ); - } - - runtimeData->patReplNodes = new PatReplNode[count]; - runtimeData->numPatternNodes = count; - - int nextAvail = 0; - - for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) { - int ind = nextAvail++; - runtimeData->patReplInfo[pat->patRepId].offset = ind; - - /* BindIds are indexed base one. */ - runtimeData->patReplInfo[pat->patRepId].numBindings = - pat->pdaRun->bindings->length() - 1; - - /* Init the bind */ - long bindId = 1; - fillNodes( prg, nextAvail, pat->pdaRun->bindings, bindId, - runtimeData->patReplNodes, - pat->pdaRun->stackTop->next, - pat->pdaRun->stackTop->next->shadow, - ind ); - } - - for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) { - int ind = nextAvail++; - runtimeData->patReplInfo[repl->patRepId].offset = ind; - - /* BindIds are indexed base one. */ - runtimeData->patReplInfo[repl->patRepId].numBindings = - repl->pdaRun->bindings->length() - 1; - - long bindId = 1; - fillNodes( prg, nextAvail, repl->pdaRun->bindings, bindId, - runtimeData->patReplNodes, - repl->pdaRun->stackTop->next, - repl->pdaRun->stackTop->next->shadow, - ind ); - } - - assert( nextAvail == count ); -} - - -int Compiler::findIndexOff( PdaTables *pdaTables, PdaGraph *pdaGraph, PdaState *state, int &curLen ) -{ - for ( int start = 0; start < curLen; ) { - int offset = start; - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - if ( pdaTables->owners[offset] != -1 ) - goto next_start; - - offset++; - if ( ! trans.last() ) { - TransMap::Iter next = trans.next(); - offset += next->key - trans->key - 1; - } - } - - /* Got though the whole list without a conflict. */ - return start; - -next_start: - start++; - } - - return curLen; -} - -struct CmpSpan -{ - static int compare( PdaState *state1, PdaState *state2 ) - { - int dist1 = 0, dist2 = 0; - - if ( state1->transMap.length() > 0 ) { - TransMap::Iter first1 = state1->transMap.first(); - TransMap::Iter last1 = state1->transMap.last(); - dist1 = last1->key - first1->key; - } - - if ( state2->transMap.length() > 0 ) { - TransMap::Iter first2 = state2->transMap.first(); - TransMap::Iter last2 = state2->transMap.last(); - dist2 = last2->key - first2->key; - } - - if ( dist1 < dist2 ) - return 1; - else if ( dist2 < dist1 ) - return -1; - return 0; - } -}; - -PdaGraph *Compiler::makePdaGraph( LangElSet &parserEls ) -{ - //for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) - // cerr << prod->prodId << " " << prod->data << endl; - - PdaGraph *pdaGraph = new PdaGraph(); - lalr1GenerateParser( pdaGraph, parserEls ); - pdaGraph->setStateNumbers(); - analyzeMachine( pdaGraph, parserEls ); - - //cerr << "NUMBER OF STATES: " << pdaGraph->stateList.length() << endl; - - return pdaGraph; -} - -PdaTables *Compiler::makePdaTables( PdaGraph *pdaGraph ) -{ - int count, pos; - PdaTables *pdaTables = new PdaTables; - - /* - * Counting max indices. - */ - count = 0; - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - count++; - if ( ! trans.last() ) { - TransMap::Iter next = trans.next(); - count += next->key - trans->key - 1; - } - } - } - - - /* Allocate indicies and owners. */ - pdaTables->numIndicies = count; - pdaTables->indicies = new int[count]; - pdaTables->owners = new int[count]; - for ( long i = 0; i < count; i++ ) { - pdaTables->indicies[i] = -1; - pdaTables->owners[i] = -1; - } - - /* Allocate offsets. */ - int numStates = pdaGraph->stateList.length(); - pdaTables->offsets = new unsigned int[numStates]; - pdaTables->numStates = numStates; - - /* Place transitions into indicies/owners */ - PdaState **states = new PdaState*[numStates]; - long ds = 0; - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) - states[ds++] = state; - - /* Sorting baseded on span length. Gives an improvement, but incures a - * cost. Off for now. */ - //MergeSort< PdaState*, CmpSpan > mergeSort; - //mergeSort.sort( states, numStates ); - - int indLen = 0; - for ( int s = 0; s < numStates; s++ ) { - PdaState *state = states[s]; - - int indOff = findIndexOff( pdaTables, pdaGraph, state, indLen ); - pdaTables->offsets[state->stateNum] = indOff; - - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - pdaTables->indicies[indOff] = trans->value->actionSetEl->key.id; - pdaTables->owners[indOff] = state->stateNum; - indOff++; - - if ( ! trans.last() ) { - TransMap::Iter next = trans.next(); - indOff += next->key - trans->key - 1; - } - } - - if ( indOff > indLen ) - indLen = indOff; - } - - /* We allocated the max, but cmpression gives us less. */ - pdaTables->numIndicies = indLen; - delete[] states; - - - /* - * Keys - */ - count = pdaGraph->stateList.length() * 2;; - pdaTables->keys = new int[count]; - pdaTables->numKeys = count; - - count = 0; - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - if ( state->transMap.length() == 0 ) { - pdaTables->keys[count+0] = 0; - pdaTables->keys[count+1] = 0; - } - else { - TransMap::Iter first = state->transMap.first(); - TransMap::Iter last = state->transMap.last(); - pdaTables->keys[count+0] = first->key; - pdaTables->keys[count+1] = last->key; - } - count += 2; - } - - /* - * Targs - */ - count = pdaGraph->actionSet.length(); - pdaTables->targs = new unsigned int[count]; - pdaTables->numTargs = count; - - count = 0; - for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) - pdaTables->targs[count++] = asi->key.targ; - - /* - * ActInds - */ - count = pdaGraph->actionSet.length(); - pdaTables->actInds = new unsigned int[count]; - pdaTables->numActInds = count; - - count = pos = 0; - for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) { - pdaTables->actInds[count++] = pos; - pos += asi->key.actions.length() + 1; - } - - /* - * Actions - */ - count = 0; - for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) - count += asi->key.actions.length() + 1; - - pdaTables->actions = new unsigned int[count]; - pdaTables->numActions = count; - - count = 0; - for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) { - for ( ActDataList::Iter ali = asi->key.actions; ali.lte(); ali++ ) - pdaTables->actions[count++] = *ali; - - pdaTables->actions[count++] = 0; - } - - /* - * CommitLen - */ - count = pdaGraph->actionSet.length(); - pdaTables->commitLen = new int[count]; - pdaTables->numCommitLen = count; - - count = 0; - for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) - pdaTables->commitLen[count++] = asi->key.commitLen; - - /* - * tokenRegionInds. Start at one so region index 0 is null (unset). - */ - count = 0; - pos = 1; - pdaTables->tokenRegionInds = new int[pdaTables->numStates]; - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - pdaTables->tokenRegionInds[count++] = pos; - pos += state->regions.length() + 1; - } - - - /* - * tokenRegions. Build in a null at the beginning. - */ - - count = 1; - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) - count += state->regions.length() + 1; - - pdaTables->numRegionItems = count; - pdaTables->tokenRegions = new int[pdaTables->numRegionItems]; - - count = 0; - pdaTables->tokenRegions[count++] = 0; - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - for ( RegionVect::Iter reg = state->regions; reg.lte(); reg++ ) - pdaTables->tokenRegions[count++] = (*reg)->id + 1; - - pdaTables->tokenRegions[count++] = 0; - } - - /* - * tokenPreRegions. Build in a null at the beginning. - */ - - count = 1; - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) - count += state->regions.length() + 1; - - pdaTables->numPreRegionItems = count; - pdaTables->tokenPreRegions = new int[pdaTables->numPreRegionItems]; - - count = 0; - pdaTables->tokenPreRegions[count++] = 0; - for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { - for ( RegionVect::Iter reg = state->regions; reg.lte(); reg++ ) { - assert( state->preRegions.length() <= 1 ); - if ( state->preRegions.length() == 0 || state->preRegions[0]->wasEmpty ) - pdaTables->tokenPreRegions[count++] = -1; - else - pdaTables->tokenPreRegions[count++] = state->preRegions[0]->id + 1; - } - - pdaTables->tokenPreRegions[count++] = 0; - } - - - return pdaTables; -} - -void Compiler::makeParser( LangElSet &parserEls ) -{ - pdaGraph = makePdaGraph( parserEls ); - pdaTables = makePdaTables( pdaGraph ); -} - diff --git a/colm/pdacodegen.cc b/colm/pdacodegen.cc deleted file mode 100644 index 9e3dca47..00000000 --- a/colm/pdacodegen.cc +++ /dev/null @@ -1,653 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <iostream> -#include <stdlib.h> -#include <ctype.h> -#include <limits.h> -#include "global.h" -#include "parsedata.h" -#include "avlmap.h" -#include "avlbasic.h" -#include "avlset.h" -#include "mergesort.h" -#include "pdacodegen.h" - -using std::cerr; -using std::endl; - -#define FRESH_BLOCK 8128 -#define act_sb "0x1" -#define act_rb "0x2" -#define lower "0x0000ffff" -#define upper "0xffff0000" - -void escapeLiteralString( std::ostream &out, const char *path, int length ) -{ - for ( const char *pc = path, *end = path+length; pc != end; pc++ ) { - switch ( *pc ) { - case '\\': out << "\\\\"; break; - case '"': out << "\\\""; break; - case '\a': out << "\\a"; break; - case '\b': out << "\\b"; break; - case '\t': out << "\\t"; break; - case '\n': out << "\\n"; break; - case '\v': out << "\\v"; break; - case '\f': out << "\\f"; break; - case '\r': out << "\\r"; break; - default: out << *pc; break; - } - } -} - -void escapeLiteralString( std::ostream &out, const char *path ) -{ - escapeLiteralString( out, path, strlen(path) ); -} - -void PdaCodeGen::writeTokenIds() -{ - out << "/*\n"; - for ( LelList::Iter lel = pd->langEls; lel.lte(); lel++ ) { - if ( lel->name != 0 ) - out << " " << lel->name << " " << lel->id << endl; - else - out << " " << lel->id << endl; - } - out << "*/\n\n"; -} - -void PdaCodeGen::defineRuntime() -{ - out << - "extern RuntimeData main_runtimeData;\n" - "\n"; -} - -void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData, PdaTables *pdaTables ) -{ - /* - * Blocks of code in frames. - */ - for ( int i = 0; i < runtimeData->numFrames; i++ ) { - /* FIXME: horrible code cloning going on here. */ - if ( runtimeData->frameInfo[i].codeLenWV > 0 ) { - out << "Code code_" << i << "_wv[] = {\n\t"; - - Code *block = runtimeData->frameInfo[i].codeWV; - for ( int j = 0; j < runtimeData->frameInfo[i].codeLenWV; j++ ) { - out << (unsigned long) block[j]; - - if ( j < runtimeData->frameInfo[i].codeLenWV-1 ) { - out << ", "; - if ( (j+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - } - - if ( runtimeData->frameInfo[i].codeLenWC > 0 ) { - out << "Code code_" << i << "_wc[] = {\n\t"; - - Code *block = runtimeData->frameInfo[i].codeWC; - for ( int j = 0; j < runtimeData->frameInfo[i].codeLenWC; j++ ) { - out << (unsigned long) block[j]; - - if ( j < runtimeData->frameInfo[i].codeLenWC-1 ) { - out << ", "; - if ( (j+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - } - - if ( runtimeData->frameInfo[i].treesLen > 0 ) { - out << "char trees_" << i << "[] = {\n\t"; - - char *block = runtimeData->frameInfo[i].trees; - for ( int j = 0; j < runtimeData->frameInfo[i].treesLen; j++ ) { - out << (long) block[j]; - - if ( j < runtimeData->frameInfo[i].treesLen-1 ) { - out << ", "; - if ( (j+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - } - } - - /* - * Blocks in production info. - */ - for ( int i = 0; i < runtimeData->numProds; i++ ) { - if ( runtimeData->prodInfo[i].copyLen > 0 ) { - out << "unsigned char copy_" << i << "[] = {\n\t"; - - unsigned char *block = runtimeData->prodInfo[i].copy; - for ( int j = 0; j < runtimeData->prodInfo[i].copyLen; j++ ) { - out << (long) block[j*2] << ", " << (long) block[j*2+1]; - - if ( j < runtimeData->prodInfo[i].copyLen-1 ) { - out << ", "; - if ( (j+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - } - } - - /* - * Init code. - */ - out << "Code " << rootCode() << "[] = {\n\t"; - Code *block = runtimeData->rootCode ; - for ( int j = 0; j < runtimeData->rootCodeLen; j++ ) { - out << (unsigned int) block[j]; - - if ( j < runtimeData->rootCodeLen-1 ) { - out << ", "; - if ( (j+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - /* - * lelInfo - */ - out << "LangElInfo " << lelInfo() << "[] = {\n"; - for ( int i = 0; i < runtimeData->numLangEls; i++ ) { - out << "\t{"; - - /* Name. */ - out << " \""; - escapeLiteralString( out, runtimeData->lelInfo[i].name ); - out << "\", "; - - /* Name. */ - out << " \""; - escapeLiteralString( out, runtimeData->lelInfo[i].xmlTag ); - out << "\", "; - - /* Repeat, literal, ignore flags. */ - out << (int)runtimeData->lelInfo[i].repeat << ", " << - (int)runtimeData->lelInfo[i].list << ", " << - (int)runtimeData->lelInfo[i].literal << ", " << - (int)runtimeData->lelInfo[i].ignore << ", "; - - out << runtimeData->lelInfo[i].frameId << ", "; - - out << runtimeData->lelInfo[i].objectTypeId << ", "; - - out << runtimeData->lelInfo[i].ofiOffset << ", "; - - out << runtimeData->lelInfo[i].objectLength << ", "; - -// out << runtimeData->lelInfo[i].contextTypeId << ", "; -// out << runtimeData->lelInfo[i].contextLength << ", "; - - out << runtimeData->lelInfo[i].termDupId << ", "; - - out << runtimeData->lelInfo[i].genericId << ", "; - - out << runtimeData->lelInfo[i].markId << ", "; - - out << runtimeData->lelInfo[i].captureAttr << ", "; - - out << runtimeData->lelInfo[i].numCaptureAttr; - - out << " }"; - - if ( i < runtimeData->numLangEls-1 ) - out << ",\n"; - } - out << "\n};\n\n"; - - /* - * frameInfo - */ - out << "FrameInfo " << frameInfo() << "[] = {\n"; - for ( int i = 0; i < runtimeData->numFrames; i++ ) { - out << "\t{ "; - - if ( runtimeData->frameInfo[i].codeLenWV > 0 ) - out << "code_" << i << "_wv, "; - else - out << "0, "; - out << runtimeData->frameInfo[i].codeLenWV << ", "; - - if ( runtimeData->frameInfo[i].codeLenWC > 0 ) - out << "code_" << i << "_wc, "; - else - out << "0, "; - out << runtimeData->frameInfo[i].codeLenWC << ", "; - - if ( runtimeData->frameInfo[i].treesLen > 0 ) - out << "trees_" << i << ", "; - else - out << "0, "; - - out << - runtimeData->frameInfo[i].treesLen << ", " << - runtimeData->frameInfo[i].argSize << ", " << - runtimeData->frameInfo[i].frameSize; - - out << " }"; - - if ( i < runtimeData->numFrames-1 ) - out << ",\n"; - } - out << "\n};\n\n"; - - - /* - * prodInfo - */ - out << "ProdInfo " << prodInfo() << "[] = {\n"; - for ( int i = 0; i < runtimeData->numProds; i++ ) { - out << "\t{ "; - - out << runtimeData->prodInfo[i].lhsId << ", "; - out << runtimeData->prodInfo[i].prodNum << ", "; - out << runtimeData->prodInfo[i].length << ", "; - - out << - '"' << runtimeData->prodInfo[i].name << "\", " << - runtimeData->prodInfo[i].frameId << ", " << - (int)runtimeData->prodInfo[i].lhsUpref << ", "; - - if ( runtimeData->prodInfo[i].copyLen > 0 ) - out << "copy_" << i << ", "; - else - out << "0, "; - - out << runtimeData->prodInfo[i].copyLen << ", "; - - - out << " }"; - - if ( i < runtimeData->numProds-1 ) - out << ",\n"; - } - out << "\n};\n\n"; - - /* - * patReplInfo - */ - out << "PatReplInfo " << patReplInfo() << "[] = {\n"; - for ( int i = 0; i < runtimeData->numPatterns; i++ ) { - out << " { " << runtimeData->patReplInfo[i].offset << ", " << - runtimeData->patReplInfo[i].numBindings << " },\n"; - } - out << "};\n\n"; - - /* - * patReplNodes - */ - out << "PatReplNode " << patReplNodes() << "[] = {\n"; - for ( int i = 0; i < runtimeData->numPatternNodes; i++ ) { - PatReplNode &node = runtimeData->patReplNodes[i]; - out << " { " << node.id << ", " << - node.prodNum << ", " << node.next << ", " << - node.child << ", " << node.bindId << ", "; - if ( node.data == 0 ) - out << "0"; - else { - out << '\"'; - escapeLiteralString( out, node.data, node.length ); - out << '\"'; - } - out << ", " << node.length << ", "; - - out << node.leftIgnore << ", "; - out << node.rightIgnore << ", "; - - out << (int)node.stop << " },\n"; - } - out << "};\n\n"; - - /* - * functionInfo - */ - out << "FunctionInfo " << functionInfo() << "[] = {\n"; - for ( int i = 0; i < runtimeData->numFunctions; i++ ) { - out << "\t{ " << - "\"" << runtimeData->functionInfo[i].name << "\", " << - runtimeData->functionInfo[i].frameId << ", " << - runtimeData->functionInfo[i].argSize << ", " << - runtimeData->functionInfo[i].frameSize; - out << " }"; - - if ( i < runtimeData->numFunctions-1 ) - out << ",\n"; - } - out << "\n};\n\n"; - - /* - * regionInfo - */ - out << "RegionInfo " << regionInfo() << "[] = {\n"; - for ( int i = 0; i < runtimeData->numRegions; i++ ) { - out << "\t{ \""; - /* Name. */ - escapeLiteralString( out, runtimeData->regionInfo[i].name ); - out << "\", " << runtimeData->regionInfo[i].defaultToken << - ", " << runtimeData->regionInfo[i].eofFrameId << - ", " << runtimeData->regionInfo[i].isIgnoreOnly << - ", " << runtimeData->regionInfo[i].isCiOnly << - ", " << runtimeData->regionInfo[i].ciLelId << - " }"; - - if ( i < runtimeData->numRegions-1 ) - out << ",\n"; - } - out << "\n};\n\n"; - - /* - * genericInfo - */ - out << "GenericInfo " << genericInfo() << "[] = {\n"; - for ( int i = 0; i < runtimeData->numGenerics; i++ ) { - out << "\t{ " << - runtimeData->genericInfo[i].type << ", " << - runtimeData->genericInfo[i].typeArg << ", " << - runtimeData->genericInfo[i].keyOffset << ", " << - runtimeData->genericInfo[i].keyType << ", " << - runtimeData->genericInfo[i].langElId << ", " << - runtimeData->genericInfo[i].parserId << " },\n"; - } - out << "};\n\n"; - - /* - * literals - */ - out << "const char *" << litdata() << "[] = {\n"; - for ( int i = 0; i < runtimeData->numLiterals; i++ ) { - out << "\t\""; - escapeLiteralString( out, runtimeData->litdata[i] ); - out << "\",\n"; - } - out << "};\n\n"; - - out << "long " << litlen() << "[] = {\n\t"; - for ( int i = 0; i < runtimeData->numLiterals; i++ ) - out << runtimeData->litlen[i] << ", "; - out << "};\n\n"; - - out << "Head *" << literals() << "[] = {\n\t"; - for ( int i = 0; i < runtimeData->numLiterals; i++ ) - out << "0, "; - out << "};\n\n"; - - out << "int startStates[] = {\n\t"; - for ( long i = 0; i < runtimeData->numParsers; i++ ) { - out << runtimeData->startStates[i] << ", "; - } - out << "};\n\n"; - - out << "int eofLelIds[] = {\n\t"; - for ( long i = 0; i < runtimeData->numParsers; i++ ) { - out << runtimeData->eofLelIds[i] << ", "; - } - out << "};\n\n"; - - out << "int parserLelIds[] = {\n\t"; - for ( long i = 0; i < runtimeData->numParsers; i++ ) { - out << runtimeData->parserLelIds[i] << ", "; - } - out << "};\n\n"; - - out << "CaptureAttr captureAttr[] = {\n"; - for ( long i = 0; i < runtimeData->numCapturedAttr; i++ ) { - out << "\t{ " << - runtimeData->captureAttr[i].mark_enter << ", " << - runtimeData->captureAttr[i].mark_leave << ", " << - runtimeData->captureAttr[i].offset << " },\n"; - } - - out << "};\n\n"; - - out << - "RuntimeData main_runtimeData = \n" - "{\n" - " " << lelInfo() << ",\n" - " " << runtimeData->numLangEls << ",\n" - "\n" - " " << prodInfo() << ",\n" - " " << runtimeData->numProds << ",\n" - "\n" - " " << regionInfo() << ",\n" - " " << runtimeData->numRegions << ",\n" - "\n" - " " << rootCode() << ",\n" - " " << runtimeData->rootCodeLen << ",\n" - " " << runtimeData->rootFrameId << ",\n" - "\n" - " " << frameInfo() << ",\n" - " " << runtimeData->numFrames << ",\n" - "\n" - " " << functionInfo() << ",\n" - " " << runtimeData->numFunctions << ",\n" - "\n" - " " << patReplInfo() << ",\n" - " " << runtimeData->numPatterns << ",\n" - "\n" - " " << patReplNodes() << ",\n" - " " << runtimeData->numPatternNodes << ",\n" - "\n" - " " << genericInfo() << ",\n" - " " << runtimeData->numGenerics << ",\n" - " " << runtimeData->argvGenericId << ",\n" - "\n" - " " << litdata() << ",\n" - " " << litlen() << ",\n" - " " << literals() << ",\n" - " " << runtimeData->numLiterals << ",\n" - "\n" - " captureAttr,\n" - " " << runtimeData->numCapturedAttr << ",\n" - "\n" - " &fsmTables_start,\n" - " &pid_0_pdaTables,\n" - " startStates, eofLelIds, parserLelIds, " << runtimeData->numParsers << ",\n" - "\n" - " " << runtimeData->globalSize << ",\n" - "\n" - " " << runtimeData->firstNonTermId << ",\n" - " " << runtimeData->integerId << ",\n" - " " << runtimeData->stringId << ",\n" - " " << runtimeData->anyId << ",\n" - " " << runtimeData->eofId << ",\n" - " " << runtimeData->noTokenId << "\n" - "};\n" - "\n"; -} - -void PdaCodeGen::writeParserData( long id, PdaTables *tables ) -{ - String prefix = "pid_" + String(0, "%ld", id) + "_"; - - out << "int " << prefix << indicies() << "[] = {\n\t"; - for ( int i = 0; i < tables->numIndicies; i++ ) { - out << tables->indicies[i]; - - if ( i < tables->numIndicies-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "int " << prefix << owners() << "[] = {\n\t"; - for ( int i = 0; i < tables->numIndicies; i++ ) { - out << tables->owners[i]; - - if ( i < tables->numIndicies-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "int " << prefix << keys() << "[] = {\n\t"; - for ( int i = 0; i < tables->numKeys; i++ ) { - out << tables->keys[i]; - - if ( i < tables->numKeys-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "unsigned int " << prefix << offsets() << "[] = {\n\t"; - for ( int i = 0; i < tables->numStates; i++ ) { - out << tables->offsets[i]; - - if ( i < tables->numStates-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "unsigned int " << prefix << targs() << "[] = {\n\t"; - for ( int i = 0; i < tables->numTargs; i++ ) { - out << tables->targs[i]; - - if ( i < tables->numTargs-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "unsigned int " << prefix << actInds() << "[] = {\n\t"; - for ( int i = 0; i < tables->numActInds; i++ ) { - out << tables->actInds[i]; - - if ( i < tables->numActInds-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "unsigned int " << prefix << actions() << "[] = {\n\t"; - for ( int i = 0; i < tables->numActions; i++ ) { - out << tables->actions[i]; - - if ( i < tables->numActions-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "int " << prefix << commitLen() << "[] = {\n\t"; - for ( int i = 0; i < tables->numCommitLen; i++ ) { - out << tables->commitLen[i]; - - if ( i < tables->numCommitLen-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "int " << prefix << tokenRegionInds() << "[] = {\n\t"; - for ( int i = 0; i < tables->numStates; i++ ) { - out << tables->tokenRegionInds[i]; - - if ( i < tables->numStates-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "int " << prefix << tokenRegions() << "[] = {\n\t"; - for ( int i = 0; i < tables->numRegionItems; i++ ) { - out << tables->tokenRegions[i]; - - if ( i < tables->numRegionItems-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << "int " << prefix << tokenPreRegions() << "[] = {\n\t"; - for ( int i = 0; i < tables->numPreRegionItems; i++ ) { - out << tables->tokenPreRegions[i]; - - if ( i < tables->numPreRegionItems-1 ) { - out << ", "; - if ( (i+1) % 8 == 0 ) - out << "\n\t"; - } - } - out << "\n};\n\n"; - - out << - "PdaTables " << prefix << "pdaTables =\n" - "{\n" - " " << prefix << indicies() << ",\n" - " " << prefix << owners() << ",\n" - " " << prefix << keys() << ",\n" - " " << prefix << offsets() << ",\n" - " " << prefix << targs() << ",\n" - " " << prefix << actInds() << ",\n" - " " << prefix << actions() << ",\n" - " " << prefix << commitLen() << ",\n" - - " " << prefix << tokenRegionInds() << ",\n" - " " << prefix << tokenRegions() << ",\n" - " " << prefix << tokenPreRegions() << ",\n" - "\n" - " " << tables->numIndicies << ",\n" - " " << tables->numKeys << ",\n" - " " << tables->numStates << ",\n" - " " << tables->numTargs << ",\n" - " " << tables->numActInds << ",\n" - " " << tables->numActions << ",\n" - " " << tables->numCommitLen << ",\n" - " " << tables->numRegionItems << ",\n" - " " << tables->numPreRegionItems << "\n" - "};\n" - "\n"; -} - diff --git a/colm/pdacodegen.h b/colm/pdacodegen.h deleted file mode 100644 index 8e5e7a3a..00000000 --- a/colm/pdacodegen.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - - -#ifndef _PDACODEGEN_H -#define _PDACODEGEN_H - -struct Compiler; - -struct PdaCodeGen -{ - PdaCodeGen( const char *fileName, const char *parserName, Compiler *pd, ostream &out ) - : - fileName(fileName), - parserName(parserName), - pd(pd), - out(out) - {} - - /* - * Code Generation. - */ - void startCodeGen(); - void endCodeGen( int endLine ); - - void writeTokenIds(); - void writeLangEls(); - - void writeReference( Definition *prod, char *data ); - void writeUndoReference( Definition *prod, char *data ); - void writeFinalReference( Definition *prod, char *data ); - void writeFirstLocate( Definition *prod ); - void writeRhsLocate( Definition *prod ); - - void defineRuntime(); - void writeRuntimeData( RuntimeData *runtimeData, PdaTables *pdaTables ); - void writeParserData( long id, PdaTables *tables ); - - String PARSER() { return "parser_"; } - - String startState() { return PARSER() + "startState"; } - String indicies() { return PARSER() + "indicies"; } - String owners() { return PARSER() + "owners"; } - String keys() { return PARSER() + "keys"; } - String offsets() { return PARSER() + "offsets"; } - String targs() { return PARSER() + "targs"; } - String actInds() { return PARSER() + "actInds"; } - String actions() { return PARSER() + "actions"; } - String commitLen() { return PARSER() + "commitLen"; } - String fssProdIdIndex() { return PARSER() + "fssProdIdIndex"; } - String prodLengths() { return PARSER() + "prodLengths"; } - String prodLhsIds() { return PARSER() + "prodLhsIds"; } - String prodNames() { return PARSER() + "prodNames"; } - String lelInfo() { return PARSER() + "lelInfo"; } - String prodInfo() { return PARSER() + "prodInfo"; } - String tokenRegionInds() { return PARSER() + "tokenRegionInds"; } - String tokenRegions() { return PARSER() + "tokenRegions"; } - String tokenPreRegions() { return PARSER() + "tokenPreRegions"; } - String prodCodeBlocks() { return PARSER() + "prodCodeBlocks"; } - String prodCodeBlockLens() { return PARSER() + "prodCodeBlockLens"; } - String rootCode() { return PARSER() + "rootCode"; } - String frameInfo() { return PARSER() + "frameInfo"; } - String functionInfo() { return PARSER() + "functionInfo"; } - String objFieldInfo() { return PARSER() + "objFieldInfo"; } - String patReplInfo() { return PARSER() + "patReplInfo"; } - String patReplNodes() { return PARSER() + "patReplNodes"; } - String regionInfo() { return PARSER() + "regionInfo"; } - String genericInfo() { return PARSER() + "genericInfo"; } - String litdata() { return PARSER() + "litdata"; } - String litlen() { return PARSER() + "litlen"; } - String literals() { return PARSER() + "literals"; } - String fsmTables() { return PARSER() + "fsmTables"; } - - /* - * Graphviz Generation - */ - void writeTransList( PdaState *state ); - void writeDotFile( PdaGraph *graph ); - void writeDotFile( ); - - - const char *fileName; - const char *parserName; - Compiler *pd; - ostream &out; -}; - -#endif diff --git a/colm/pdagraph.cc b/colm/pdagraph.cc deleted file mode 100644 index 8f17b7a5..00000000 --- a/colm/pdagraph.cc +++ /dev/null @@ -1,533 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <string.h> -#include <iostream> -#include <string.h> -#include <assert.h> -#include "global.h" -#include "pdagraph.h" -#include "mergesort.h" - -using std::cerr; -using std::endl; - -/* Create a new fsm state. State has not out transitions or in transitions, not - * out out transition data and not number. */ -PdaState::PdaState() -: - /* No in transitions. */ - inRange(), - - /* No entry points, or epsilon trans. */ - pendingCommits(), - - stateSet(0), - - /* Only used during merging. Normally null. */ - stateDictEl(0), - - /* No state identification bits. */ - stateBits(0), - - onClosureQueue(false), - inClosedMap(false), - followMarked(false), - - advanceReductions(false) -{ -} - -/* Copy everything except the action transitions. That is left up to the - * PdaGraph copy constructor. */ -PdaState::PdaState(const PdaState &other) -: - inRange(), - - /* Duplicate the entry id set, epsilon transitions and context sets. These - * are sets of integers and as such need no fixing. */ - pendingCommits(other.pendingCommits), - - stateSet(0), - - /* This is only used during merging. Normally null. */ - stateDictEl(0), - - /* Fsm state data. */ - stateBits(other.stateBits), - - dotSet(other.dotSet), - onClosureQueue(false), - inClosedMap(false), - followMarked(false), - - transMap() -{ - /* Duplicate all the transitions. */ - for ( TransMap::Iter trans = other.transMap; trans.lte(); trans++ ) { - /* Dupicate and store the orginal target in the transition. This will - * be corrected once all the states have been created. */ - PdaTrans *newTrans = new PdaTrans(*trans->value); - newTrans->toState = trans->value->toState; - transMap.append( TransMapEl( newTrans->lowKey, newTrans ) ); - } -} - -/* If there is a state dict element, then delete it. Everything else is left - * up to the FsmGraph destructor. */ -PdaState::~PdaState() -{ - if ( stateDictEl != 0 ) - delete stateDictEl; -} - -/* Graph constructor. */ -PdaGraph::PdaGraph() -: - /* No start state. */ - startState(0) -{ -} - -/* Copy all graph data including transitions. */ -PdaGraph::PdaGraph( const PdaGraph &graph ) -: - /* Lists start empty. Will be filled by copy. */ - stateList(), - misfitList(), - - /* Copy in the entry points, - * pointers will be resolved later. */ - startState(graph.startState), - - /* Will be filled by copy. */ - finStateSet() -{ - /* Create the states and record their map in the original state. */ - PdaStateList::Iter origState = graph.stateList; - for ( ; origState.lte(); origState++ ) { - /* Make the new state. */ - PdaState *newState = new PdaState( *origState ); - - /* Add the state to the list. */ - stateList.append( newState ); - - /* Set the mapsTo item of the old state. */ - origState->stateMap = newState; - } - - /* Derefernce all the state maps. */ - for ( PdaStateList::Iter state = stateList; state.lte(); state++ ) { - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - /* The points to the original in the src machine. The taget's duplicate - * is in the statemap. */ - PdaState *toState = trans->value->toState != 0 ? - trans->value->toState->stateMap : 0; - - /* Attach The transition to the duplicate. */ - trans->value->toState = 0; - attachTrans( state, toState, trans->value ); - } - } - - /* Fix the start state pointer and the new start state's count of in - * transiions. */ - startState = startState->stateMap; - - /* Build the final state set. */ - PdaStateSet::Iter st = graph.finStateSet; - for ( ; st.lte(); st++ ) - finStateSet.insert((*st)->stateMap); -} - -/* Deletes all transition data then deletes each state. */ -PdaGraph::~PdaGraph() -{ - /* Delete all the transitions. */ - PdaStateList::Iter state = stateList; - for ( ; state.lte(); state++ ) { - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) - delete trans->value; - } - - /* Delete all the states. */ - stateList.empty(); -} - -/* Set a state final. The state has its isFinState set to true and the state - * is added to the finStateSet. */ -void PdaGraph::setFinState( PdaState *state ) -{ - /* Is it already a fin state. */ - if ( state->stateBits & SB_ISFINAL ) - return; - - state->stateBits |= SB_ISFINAL; - finStateSet.insert( state ); -} - -void PdaGraph::unsetAllFinStates( ) -{ - for ( PdaStateSet::Iter st = finStateSet; st.lte(); st++ ) { - PdaState *state = *st; - state->stateBits &= ~ SB_ISFINAL; - } - finStateSet.empty(); -} - -/* Set and unset a state as the start state. */ -void PdaGraph::setStartState( PdaState *state ) -{ - /* Sould change from unset to set. */ - assert( startState == 0 ); - startState = state; -} - -/* Mark all states reachable from state. Traverses transitions forward. Used - * for removing states that have no path into them. */ -void PdaGraph::markReachableFromHere( PdaState *state ) -{ - /* Base case: return; */ - if ( state->stateBits & SB_ISMARKED ) - return; - - /* Set this state as processed. We are going to visit all states that this - * state has a transition to. */ - state->stateBits |= SB_ISMARKED; - - /* Recurse on all out transitions. */ - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - if ( trans->value->toState != 0 ) - markReachableFromHere( trans->value->toState ); - } -} - -void PdaGraph::setStateNumbers() -{ - int curNum = 0; - PdaStateList::Iter state = stateList; - for ( ; state.lte(); state++ ) - state->stateNum = curNum++; -} - -/* Insert a transition into an inlist. The head must be supplied. */ -void PdaGraph::attachToInList( PdaState *from, PdaState *to, - PdaTrans *&head, PdaTrans *trans ) -{ - trans->ilnext = head; - trans->ilprev = 0; - - /* If in trans list is not empty, set the head->prev to trans. */ - if ( head != 0 ) - head->ilprev = trans; - - /* Now insert ourselves at the front of the list. */ - head = trans; -}; - -/* Detach a transition from an inlist. The head of the inlist must be supplied. */ -void PdaGraph::detachFromInList( PdaState *from, PdaState *to, - PdaTrans *&head, PdaTrans *trans ) -{ - /* Detach in the inTransList. */ - if ( trans->ilprev == 0 ) - head = trans->ilnext; - else - trans->ilprev->ilnext = trans->ilnext; - - if ( trans->ilnext != 0 ) - trans->ilnext->ilprev = trans->ilprev; -} - -/* Attach states on the default transition, range list or on out/in list key. - * Type of attaching and is controlled by keyType. First makes a new - * transition. If there is already a transition out from fromState on the - * default, then will assertion fail. */ -PdaTrans *PdaGraph::appendNewTrans( PdaState *from, PdaState *to, long lowKey, long ) -{ - /* Make the new transition. */ - PdaTrans *retVal = new PdaTrans(); - - /* The transition is now attached. Remember the parties involved. */ - retVal->fromState = from; - retVal->toState = to; - - /* Make the entry in the out list for the transitions. */ - from->transMap.append( TransMapEl( lowKey, retVal ) ); - - /* Set the the keys of the new trans. */ - retVal->lowKey = lowKey; - - /* Attach using inRange as the head pointer. */ - attachToInList( from, to, to->inRange.head, retVal ); - - return retVal; -} - -PdaTrans *PdaGraph::insertNewTrans( PdaState *from, PdaState *to, long lowKey, long ) -{ - /* Make the new transition. */ - PdaTrans *retVal = new PdaTrans(); - - /* The transition is now attached. Remember the parties involved. */ - retVal->fromState = from; - retVal->toState = to; - - /* Make the entry in the out list for the transitions. */ - from->transMap.insert( lowKey, retVal ); - - /* Set the the keys of the new trans. */ - retVal->lowKey = lowKey; - - /* Attach using inRange as the head pointer. */ - attachToInList( from, to, to->inRange.head, retVal ); - - return retVal; -} - -/* Attach for range lists or for the default transition. Type of attaching is - * controlled by the keyType parameter. This attach should be used when a - * transition already is allocated and must be attached to a target state. - * Does not handle adding the transition into the out list. */ -void PdaGraph::attachTrans( PdaState *from, PdaState *to, PdaTrans *trans ) -{ - assert( trans->fromState == 0 && trans->toState == 0 ); - trans->fromState = from; - trans->toState = to; - - /* Attach using the inRange pointer as the head pointer. */ - attachToInList( from, to, to->inRange.head, trans ); -} - -/* Detach for out/in lists or for default transition. The type of detaching is - * controlled by the keyType parameter. */ -void PdaGraph::detachTrans( PdaState *from, PdaState *to, PdaTrans *trans ) -{ - assert( trans->fromState == from && trans->toState == to ); - trans->fromState = 0; - trans->toState = 0; - - /* Detach using to's inRange pointer as the head. */ - detachFromInList( from, to, to->inRange.head, trans ); -} - - -/* Detach a state from the graph. Detaches and deletes transitions in and out - * of the state. Empties inList and outList. Removes the state from the final - * state set. A detached state becomes useless and should be deleted. */ -void PdaGraph::detachState( PdaState *state ) -{ - /* Detach the in transitions from the inRange list of transitions. */ - while ( state->inRange.head != 0 ) { - /* Get pointers to the trans and the state. */ - PdaTrans *trans = state->inRange.head; - PdaState *fromState = trans->fromState; - - /* Detach the transitions from the source state. */ - detachTrans( fromState, state, trans ); - - /* Ok to delete the transition. */ - fromState->transMap.remove( trans->lowKey ); - delete trans; - } - - /* Detach out range transitions. */ - for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { - detachTrans( state, trans->value->toState, trans->value ); - delete trans->value; - } - - /* Delete all of the out range pointers. */ - state->transMap.empty(); - - /* Unset final stateness before detaching from graph. */ - if ( state->stateBits & SB_ISFINAL ) - finStateSet.remove( state ); -} - -/* Move all the transitions that go into src so that they go into dest. */ -void PdaGraph::inTransMove( PdaState *dest, PdaState *src ) -{ - /* Do not try to move in trans to and from the same state. */ - assert( dest != src ); - - /* If src is the start state, dest becomes the start state. */ - assert( src != startState ); - - /* Move the transitions in inRange. */ - while ( src->inRange.head != 0 ) { - /* Get trans and from state. */ - PdaTrans *trans = src->inRange.head; - PdaState *fromState = trans->fromState; - - /* Detach from src, reattach to dest. */ - detachTrans( fromState, src, trans ); - attachTrans( fromState, dest, trans ); - } -} - -void PdaGraph::addInReduction( PdaTrans *dest, long prodId, long prior ) -{ - /* Look for the reduction. If not there insert it, otherwise take - * the max of the priorities. */ - ReductionMapEl *redMapEl = dest->reductions.find( prodId ); - if ( redMapEl == 0 ) - dest->reductions.insert( prodId, prior ); - else if ( prior > redMapEl->value ) - redMapEl->value = prior; -} - -/* Callback invoked when another trans (or possibly this) is added into this - * transition during the merging process. Draw in any properties of srcTrans - * into this transition. AddInTrans is called when a new transitions is made - * that will be a duplicate of another transition or a combination of several - * other transitions. AddInTrans will be called for each transition that the - * new transition is to represent. */ -void PdaGraph::addInTrans( PdaTrans *destTrans, PdaTrans *srcTrans ) -{ - /* Protect against adding in from ourselves. */ - if ( srcTrans != destTrans ) { - - /* Add in the shift priority. */ - if ( destTrans->isShift && srcTrans->isShift ) { - /* Both shifts are set. We want the max of the two. */ - if ( srcTrans->shiftPrior > destTrans->shiftPrior ) - destTrans->shiftPrior = srcTrans->shiftPrior; - } - else if ( srcTrans->isShift ) { - /* Just the source is set, copy the source prior over. */ - destTrans->shiftPrior = srcTrans->shiftPrior; - } - - /* If either is a shift, dest is a shift. */ - destTrans->isShift = destTrans->isShift || srcTrans->isShift; - - /* Add in the reductions. */ - for ( ReductionMap::Iter red = srcTrans->reductions; red.lte(); red++ ) - addInReduction( destTrans, red->key, red->value ); - - /* Add in the commit points. */ - destTrans->commits.insert( srcTrans->commits ); - - if ( srcTrans->toState->advanceReductions ) - destTrans->toState->advanceReductions = true; - - if ( srcTrans->noPreIgnore ) - destTrans->noPreIgnore = true; - if ( srcTrans->noPostIgnore ) - destTrans->noPostIgnore = true; - } -} - -/* NO LONGER USED. */ -void PdaGraph::addInState( PdaState *destState, PdaState *srcState ) -{ - /* Draw in any properties of srcState into destState. */ - if ( srcState != destState ) { - /* Get the epsilons, context, out priorities. */ - destState->pendingCommits.insert( srcState->pendingCommits ); - if ( srcState->pendingCommits.length() > 0 ) - cerr << "THERE ARE PENDING COMMITS DRAWN IN" << endl; - - /* Parser generation data. */ - destState->dotSet.insert( srcState->dotSet ); - - if ( srcState->onClosureQueue && !destState->onClosureQueue ) { - stateClosureQueue.append( destState ); - destState->onClosureQueue = true; - } - } -} - -/* Make a new state. The new state will be put on the graph's - * list of state. The new state can be created final or non final. */ -PdaState *PdaGraph::addState() -{ - /* Make the new state to return. */ - PdaState *state = new PdaState(); - - /* Create the new state. */ - stateList.append( state ); - - return state; -} - - -/* Follow from to the final state of srcFsm. */ -PdaState *PdaGraph::followFsm( PdaState *from, PdaGraph *srcFsm ) -{ - PdaState *followSrc = srcFsm->startState; - - while ( ! followSrc->isFinState() ) { - assert( followSrc->transMap.length() == 1 ); - PdaTrans *followTrans = followSrc->transMap[0].value; - - PdaTrans *inTrans = from->findTrans( followTrans->lowKey ); - assert( inTrans != 0 ); - - from = inTrans->toState; - followSrc = followTrans->toState; - } - - return from; -} - -int PdaGraph::fsmLength( ) -{ - int length = 0; - PdaState *state = startState; - while ( ! state->isFinState() ) { - length += 1; - state = state->transMap[0].value->toState; - } - return length; -} - -/* Remove states that have no path to them from the start state. Recursively - * traverses the graph marking states that have paths into them. Then removes - * all states that did not get marked. */ -void PdaGraph::removeUnreachableStates() -{ - /* Mark all the states that can be reached - * through the existing set of entry points. */ - if ( startState != 0 ) - markReachableFromHere( startState ); - - for ( PdaStateSet::Iter si = entryStateSet; si.lte(); si++ ) - markReachableFromHere( *si ); - - /* Delete all states that are not marked - * and unmark the ones that are marked. */ - PdaState *state = stateList.head; - while ( state ) { - PdaState *next = state->next; - - if ( state->stateBits & SB_ISMARKED ) - state->stateBits &= ~ SB_ISMARKED; - else { - detachState( state ); - stateList.detach( state ); - delete state; - } - - state = next; - } -} diff --git a/colm/pdagraph.h b/colm/pdagraph.h deleted file mode 100644 index dc11b3e1..00000000 --- a/colm/pdagraph.h +++ /dev/null @@ -1,515 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _PDAGRAPH_H -#define _PDAGRAPH_H - -#include <assert.h> -#include "vector.h" -#include "bstset.h" -#include "compare.h" -#include "avltree.h" -#include "dlist.h" -#include "bstmap.h" -#include "sbstmap.h" -#include "sbstset.h" -#include "sbsttable.h" -#include "avlset.h" -#include "dlistmel.h" -#include "avltree.h" - -/* Flags for states. */ -#define SB_ISFINAL 0x04 -#define SB_ISMARKED 0x08 -#define SB_ISSTART 0x10 - -/* Flags for transitions. */ -#define TB_ISMARKED 0x01 - -struct PdaTrans; -struct PdaState; -struct PdaGraph; -struct TokenDef; -struct Definition; -struct LangEl; -struct TokenRegion; - -typedef Vector<TokenRegion*> RegionVect; - -typedef Vector<long> ActDataList; - -struct ActionData -{ - ActionData( int targ, ActDataList &actions, int commitLen ) - : targ(targ), commitLen(commitLen), id(0), actions(actions) { } - - int targ; - int commitLen; - int id; - - ActDataList actions; -}; - - -struct CmpActionData -{ - static int compare( const ActionData &ap1, const ActionData &ap2 ) - { - if ( ap1.targ < ap2.targ ) - return -1; - else if ( ap1.targ > ap2.targ ) - return 1; - else if ( ap1.commitLen < ap2.commitLen ) - return -1; - else if ( ap1.commitLen > ap2.commitLen ) - return 1; - else if ( ap1.id < ap2.id ) - return -1; - else if ( ap1.id > ap2.id ) - return 1; - - return CmpTable< long, CmpOrd<long> >:: - compare( ap1.actions, ap2.actions ); - } -}; - -typedef AvlSet<ActionData, CmpActionData> PdaActionSet; -typedef AvlSetEl<ActionData> PdaActionSetEl; - -/* List pointers for the closure queue. Goes into state. */ -struct ClosureQueueListEl { PdaState *prev, *next; }; - -/* Queue of states, transitions to be closed. */ -typedef DListMel< PdaState, ClosureQueueListEl > StateClosureQueue; -typedef DList<PdaTrans> TransClosureQueue; - -typedef BstSet< Definition*, CmpOrd<Definition*> > DefSet; -typedef CmpTable< Definition*, CmpOrd<Definition*> > CmpDefSet; -typedef BstSet< DefSet, CmpDefSet > DefSetSet; - -typedef Vector< Definition* > DefVect; -typedef BstSet< long, CmpOrd<long> > AlphSet; - -struct ExpandToEl -{ - ExpandToEl( PdaState *state, int prodId ) - : state(state), prodId(prodId) { } - - PdaState *state; - int prodId; -}; - -struct CmpExpandToEl -{ - static inline int compare( const ExpandToEl &etel1, const ExpandToEl &etel2 ) - { - if ( etel1.state < etel2.state ) - return -1; - else if ( etel1.state > etel2.state ) - return 1; - else if ( etel1.prodId < etel2.prodId ) - return -1; - else if ( etel1.prodId > etel2.prodId ) - return 1; - else - return 0; - } -}; - -typedef BstSet<ExpandToEl, CmpExpandToEl> ExpandToSet; -typedef BstSet< int, CmpOrd<int> > IntSet; -typedef CmpTable< int, CmpOrd<int> > CmpIntSet; - -typedef BstSet< long, CmpOrd<long> > LongSet; -typedef CmpTable< long, CmpOrd<long> > CmpLongSet; - -typedef BstMap< long, long, CmpOrd<long> > LongMap; -typedef BstMapEl< long, long > LongMapEl; - -typedef LongSet ProdIdSet; -typedef CmpLongSet CmpProdIdSet; - -/* Set of states, list of states. */ -typedef BstSet<PdaState*> PdaStateSet; -typedef Vector<PdaState*> StateVect; -typedef DList<PdaState> PdaStateList; - -typedef LongMap FollowToAdd; -typedef LongMap ReductionMap; -typedef LongMapEl ReductionMapEl; - -struct ProdIdPair -{ - ProdIdPair( int onReduce, int length ) - : onReduce(onReduce), length(length) {} - - int onReduce; - int length; -}; - -struct CmpProdIdPair -{ - static inline int compare( const ProdIdPair &pair1, const ProdIdPair &pair2 ) - { - if ( pair1.onReduce < pair2.onReduce ) - return -1; - else if ( pair1.onReduce > pair2.onReduce ) - return 1; - else if ( pair1.length < pair2.length ) - return -1; - else if ( pair1.length > pair2.length ) - return 1; - else - return 0; - } -}; - -typedef BstSet< ProdIdPair, CmpProdIdPair > ProdIdPairSet; - -/* Transition class that implements actions and priorities. */ -struct PdaTrans -{ - PdaTrans() : - fromState(0), - toState(0), - isShift(false), - isShiftReduce(false), - shiftPrior(0), - noPreIgnore(false), - noPostIgnore(false) - { } - - PdaTrans( const PdaTrans &other ) : - lowKey(other.lowKey), - fromState(0), toState(0), - isShift(other.isShift), - isShiftReduce(other.isShiftReduce), - shiftPrior(other.shiftPrior), - reductions(other.reductions), - commits(other.commits), - noPreIgnore(false), - noPostIgnore(false) - { } - - long lowKey; - PdaState *fromState; - PdaState *toState; - - /* Pointers for outlist. */ - PdaTrans *prev, *next; - - /* Pointers for in-list. */ - PdaTrans *ilprev, *ilnext; - - long maxPrior(); - - /* Parse Table construction data. */ - bool isShift, isShiftReduce; - int shiftPrior; - ReductionMap reductions; - ActDataList actions; - ActDataList actOrds; - ActDataList actPriors; - - ExpandToSet expandTo; - - PdaActionSetEl *actionSetEl; - - LongSet commits; - LongSet afterShiftCommits; - - bool noPreIgnore; - bool noPostIgnore; -}; - -/* In transition list. Like DList except only has head pointers, which is all - * that is required. Insertion and deletion is handled by the graph. This - * class provides the iterator of a single list. */ -struct PdaTransInList -{ - PdaTransInList() : head(0) { } - - PdaTrans *head; - - struct Iter - { - /* Default construct. */ - Iter() : ptr(0) { } - - /* Construct, assign from a list. */ - Iter( const PdaTransInList &il ) : ptr(il.head) { } - Iter &operator=( const PdaTransInList &dl ) { ptr = dl.head; return *this; } - - /* At the end */ - bool lte() const { return ptr != 0; } - bool end() const { return ptr == 0; } - - /* At the first, last element. */ - bool first() const { return ptr && ptr->ilprev == 0; } - bool last() const { return ptr && ptr->ilnext == 0; } - - /* Cast, dereference, arrow ops. */ - operator PdaTrans*() const { return ptr; } - PdaTrans &operator *() const { return *ptr; } - PdaTrans *operator->() const { return ptr; } - - /* Increment, decrement. */ - inline void operator++(int) { ptr = ptr->ilnext; } - inline void operator--(int) { ptr = ptr->ilprev; } - - /* The iterator is simply a pointer. */ - PdaTrans *ptr; - }; -}; - -typedef DList<PdaTrans> PdaTransList; - -/* A element in a state dict. */ -struct PdaStateDictEl -: - public AvlTreeEl<PdaStateDictEl> -{ - PdaStateDictEl(const PdaStateSet &stateSet) - : stateSet(stateSet) { } - - const PdaStateSet &getKey() { return stateSet; } - PdaStateSet stateSet; - PdaState *targState; -}; - -/* Dictionary mapping a set of states to a target state. */ -typedef AvlTree< PdaStateDictEl, PdaStateSet, CmpTable<PdaState*> > PdaStateDict; - -/* What items does a particular state encompass. */ -typedef BstSet< long, CmpOrd<long> > DotSet; -typedef CmpTable< long, CmpOrd<long> > CmpDotSet; - -/* Map of dot sets to states. */ -typedef AvlTree< PdaState, DotSet, CmpDotSet > DotSetMap; -typedef PdaState DotSetMapEl; - -typedef BstMap< long, PdaTrans* > TransMap; -typedef BstMapEl< long, PdaTrans* > TransMapEl; - -/* State class that implements actions and priorities. */ -struct PdaState -: - public ClosureQueueListEl, - public AvlTreeEl< PdaState > -{ - PdaState(); - PdaState(const PdaState &other); - ~PdaState(); - - /* Is the state final? */ - bool isFinState() { return stateBits & SB_ISFINAL; } - - PdaTrans *findTrans( long key ) - { - TransMapEl *transMapEl = transMap.find( key ); - if ( transMapEl == 0 ) - return 0; - return transMapEl->value; - } - - /* In transition list. */ - PdaTransInList inRange; - - ProdIdPairSet pendingCommits; - - /* When duplicating the fsm we need to map each - * state to the new state representing it. */ - PdaState *stateMap; - - /* When merging states (state machine operations) this next pointer is - * used for the list of states that need to be filled in. */ - PdaState *alg_next; - - PdaStateSet *stateSet; - - /* Identification for printing and stable minimization. */ - int stateNum; - - /* A pointer to a dict element that contains the set of states this state - * represents. This cannot go into alg, because alg.next is used during - * the merging process. */ - PdaStateDictEl *stateDictEl; - - /* Bits controlling the behaviour of the state during collapsing to dfa. */ - int stateBits; - - /* State list elements. */ - PdaState *next, *prev; - - /* For dotset map. */ - DotSet &getKey() { return dotSet; } - - /* Closure management. */ - DotSet dotSet; - DotSet dotSet2; - bool onClosureQueue; - bool inClosedMap; - bool followMarked; - bool onStateList; - - TransMap transMap; - - RegionVect regions; - RegionVect preRegions; - - bool advanceReductions; -}; - -/* Compare lists of epsilon transitions. Entries are name ids of targets. */ -typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans; - -/* Compare sets of context values. */ -typedef CmpTable< int, CmpOrd<int> > CmpContextSets; - -/* Graph class that implements actions and priorities. */ -struct PdaGraph -{ - /* Constructors/Destructors. */ - PdaGraph(); - PdaGraph( const PdaGraph &graph ); - ~PdaGraph(); - - /* The list of states. */ - PdaStateList stateList; - PdaStateList misfitList; - - /* The start state. */ - PdaState *startState; - PdaStateSet entryStateSet; - - /* The set of final states. */ - PdaStateSet finStateSet; - - /* Closure queues and maps. */ - DotSetMap closedMap; - StateClosureQueue stateClosureQueue; - StateClosureQueue stateClosedList; - - TransClosureQueue transClosureQueue; - PdaState *stateClosureHead; - - LangEl **langElIndex; - - void setStartState( PdaState *state ); - void unsetStartState( ); - - /* - * Basic attaching and detaching. - */ - - /* Common to attaching/detaching list and default. */ - void attachToInList( PdaState *from, PdaState *to, PdaTrans *&head, PdaTrans *trans ); - void detachFromInList( PdaState *from, PdaState *to, PdaTrans *&head, PdaTrans *trans ); - - /* Attach with a new transition. */ - PdaTrans *appendNewTrans( PdaState *from, PdaState *to, long onChar1, long ); - PdaTrans *insertNewTrans( PdaState *from, PdaState *to, long lowKey, long ); - - /* Attach with an existing transition that already in an out list. */ - void attachTrans( PdaState *from, PdaState *to, PdaTrans *trans ); - - /* Detach a transition from a target state. */ - void detachTrans( PdaState *from, PdaState *to, PdaTrans *trans ); - - /* Detach a state from the graph. */ - void detachState( PdaState *state ); - - /* - * Callbacks. - */ - - /* Add in the properties of srcTrans into this. */ - void addInReduction( PdaTrans *dest, long prodId, long prior ); - void addInTrans( PdaTrans *destTrans, PdaTrans *srcTrans ); - void addInState( PdaState *destState, PdaState *srcState ); - - /* - * Allocation. - */ - - /* New up a state and add it to the graph. */ - PdaState *addState(); - - /* - * Fsm operators. - */ - - /* Follow to the fin state of src fsm. */ - PdaState *followFsm( PdaState *from, PdaGraph *srcFsm ); - - /* - * Final states - */ - - /* Set and Unset a state as final. */ - void setFinState( PdaState *state ); - void unsetFinState( PdaState *state ); - void unsetAllFinStates( ); - - /* Set State numbers starting at 0. */ - void setStateNumbers(); - - /* - * Path pruning - */ - - /* Mark all states reachable from state. */ - void markReachableFromHere( PdaState *state ); - - /* Removes states that cannot be reached by any path in the fsm and are - * thus wasted silicon. */ - void removeUnreachableStates(); - - /* Remove error actions from states on which the error transition will - * never be taken. */ - bool outListCovers( PdaState *state ); - - /* Remove states that are on the misfit list. */ - void removeMisfits(); - - - /* - * Other - */ - - /* Move the in trans into src into dest. */ - void inTransMove(PdaState *dest, PdaState *src); - - int fsmLength( ); - - /* Collected machine information. */ - unsigned long long maxState; - unsigned long long maxAction; - unsigned long long maxLelId; - unsigned long long maxOffset; - unsigned long long maxIndex; - unsigned long long maxProdLen; - - PdaActionSet actionSet; -}; - - -#endif /* _FSMGRAPH_H */ diff --git a/colm/pdarun.c b/colm/pdarun.c deleted file mode 100644 index 62ab107e..00000000 --- a/colm/pdarun.c +++ /dev/null @@ -1,2272 +0,0 @@ -/* - * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" -#include "debug.h" -#include "pdarun.h" -#include "fsmrun.h" -#include "bytecode.h" -#include "tree.h" -#include "pool.h" - -#include <errno.h> -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <assert.h> - -#define true 1 -#define false 0 - -#define act_sb 0x1 -#define act_rb 0x2 -#define lower 0x0000ffff -#define upper 0xffff0000 - -#define read_word_p( i, p ) do { \ - i = ((Word) p[0]); \ - i |= ((Word) p[1]) << 8; \ - i |= ((Word) p[2]) << 16; \ - i |= ((Word) p[3]) << 24; \ -} while(0) - -#define read_tree_p( i, p ) do { \ - Word w; \ - w = ((Word) p[0]); \ - w |= ((Word) p[1]) << 8; \ - w |= ((Word) p[2]) << 16; \ - w |= ((Word) p[3]) << 24; \ - i = (Tree*)w; \ -} while(0) - -void initFsmRun( FsmRun *fsmRun, Program *prg ) -{ - fsmRun->tables = prg->rtd->fsmTables; - fsmRun->runBuf = 0; - - /* Run buffers need to stick around because - * token strings point into them. */ - fsmRun->runBuf = newRunBuf(); - fsmRun->runBuf->next = 0; - - fsmRun->p = fsmRun->pe = fsmRun->runBuf->data; - fsmRun->peof = 0; - - fsmRun->attachedInput = 0; - fsmRun->attachedSource = 0; - fsmRun->preRegion = -1; -} - -void clearFsmRun( Program *prg, FsmRun *fsmRun ) -{ - if ( fsmRun->runBuf != 0 ) { - /* Transfer the run buf list to the program */ - RunBuf *head = fsmRun->runBuf; - RunBuf *tail = head; - while ( tail->next != 0 ) - tail = tail->next; - - tail->next = prg->allocRunBuf; - prg->allocRunBuf = head; - } -} - -/* Keep the position up to date after consuming text. */ -void updatePosition( InputStream *inputStream, const char *data, long length ) -{ - if ( !inputStream->handlesLine ) { - int i; - for ( i = 0; i < length; i++ ) { - if ( data[i] != '\n' ) - inputStream->column += 1; - else { - inputStream->line += 1; - inputStream->column = 1; - } - } - } - - inputStream->byte += length; -} - -/* Keep the position up to date after sending back text. */ -void undoPosition( InputStream *inputStream, const char *data, long length ) -{ - /* FIXME: this needs to fetch the position information from the parsed - * token and restore based on that.. */ - int i; - if ( !inputStream->handlesLine ) { - for ( i = 0; i < length; i++ ) { - if ( data[i] == '\n' ) - inputStream->line -= 1; - } - } - - inputStream->byte -= length; -} - -void incrementSteps( PdaRun *pdaRun ) -{ - pdaRun->steps += 1; - debug( REALM_PARSE, "steps up to %ld\n", pdaRun->steps ); -} - -void decrementSteps( PdaRun *pdaRun ) -{ - pdaRun->steps -= 1; - debug( REALM_PARSE, "steps down to %ld\n", pdaRun->steps ); -} - -/* Load up a token, starting from tokstart if it is set. If not set then - * start it at data. */ -Head *streamPull( Program *prg, FsmRun *fsmRun, InputStream *inputStream, long length ) -{ - /* We should not be in the midst of getting a token. */ - assert( fsmRun->tokstart == 0 ); - - RunBuf *runBuf = newRunBuf(); - runBuf->next = fsmRun->runBuf; - fsmRun->runBuf = runBuf; - - int len = 0; - getData( fsmRun, inputStream, 0, runBuf->data, length, &len ); - consumeData( inputStream, length ); - fsmRun->p = fsmRun->pe = runBuf->data + length; - - Head *tokdata = stringAllocPointer( prg, runBuf->data, length ); - updatePosition( inputStream, runBuf->data, length ); - - return tokdata; -} - -void undoStreamPull( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length ) -{ - debug( REALM_PARSE, "undoing stream pull\n" ); - - prependData( inputStream, data, length ); -} - -void streamPushText( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length ) -{ - prependData( inputStream, data, length ); -} - -void streamPushTree( FsmRun *fsmRun, InputStream *inputStream, Tree *tree, int ignore ) -{ - prependTree( inputStream, tree, ignore ); -} - -void undoStreamPush( Program *prg, Tree **sp, FsmRun *fsmRun, InputStream *inputStream, long length ) -{ - if ( length < 0 ) { - Tree *tree = undoPrependTree( inputStream ); - treeDownref( prg, sp, tree ); - } - else { - undoPrependData( inputStream, length ); - } -} - -void undoStreamAppend( Program *prg, Tree **sp, FsmRun *fsmRun, InputStream *inputStream, Tree *input, long length ) -{ - if ( input->id == LEL_ID_STR ) - undoAppendData( inputStream, length ); - else if ( input->id == LEL_ID_STREAM ) - undoAppendStream( inputStream ); - else { - Tree *tree = undoAppendTree( inputStream ); - treeDownref( prg, sp, tree ); - } -} - -/* Should only be sending back whole tokens/ignores, therefore the send back - * should never cross a buffer boundary. Either we slide back data, or we move to - * a previous buffer and slide back data. */ -static void sendBackText( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length ) -{ - debug( REALM_PARSE, "push back of %ld characters\n", length ); - - if ( length == 0 ) - return; - - debug( REALM_PARSE, "sending back text: %.*s\n", - (int)length, data ); - - undoConsumeData( fsmRun, inputStream, data, length ); - undoPosition( inputStream, data, length ); -} - -void sendBackTree( InputStream *inputStream, Tree *tree ) -{ - undoConsumeTree( inputStream, tree, false ); -} - -/* - * Stops on: - * PcrRevIgnore - */ -static void sendBackIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, - InputStream *inputStream, ParseTree *parseTree ) -{ - #ifdef DEBUG - LangElInfo *lelInfo = prg->rtd->lelInfo; - debug( REALM_PARSE, "sending back: %s%s\n", - lelInfo[parseTree->shadow->tree->id].name, - parseTree->flags & PF_ARTIFICIAL ? " (artificial)" : "" ); - #endif - - Head *head = parseTree->shadow->tree->tokdata; - int artificial = parseTree->flags & PF_ARTIFICIAL; - - if ( head != 0 && !artificial ) - sendBackText( fsmRun, inputStream, stringData( head ), head->length ); - - decrementSteps( pdaRun ); - - /* Check for reverse code. */ - if ( parseTree->flags & PF_HAS_RCODE ) { - pdaRun->onDeck = true; - parseTree->flags &= ~PF_HAS_RCODE; - } - - if ( pdaRun->steps == pdaRun->targetSteps ) { - debug( REALM_PARSE, "trigger parse stop, steps = target = %d\n", pdaRun->targetSteps ); - pdaRun->stop = true; - } - -} - -void attachInput( FsmRun *fsmRun, InputStream *is ) -{ - if ( is->attached != 0 && is->attached != fsmRun ) - detachInput( is->attached, is ); - - if ( is->attached != fsmRun ) { - debug( REALM_INPUT, "attaching fsm run to input stream: %p %p\n", fsmRun, is ); - fsmRun->attachedInput = is; - is->attached = fsmRun; - } -} - -void attachSource( FsmRun *fsmRun, SourceStream *ss ) -{ - if ( ss->attached != 0 && ss->attached != fsmRun ) - detachSource( ss->attached, ss ); - - if ( ss->attached != fsmRun ) { - debug( REALM_INPUT, "attaching fsm run to source stream: %p %p\n", fsmRun, ss ); - fsmRun->attachedSource = ss; - ss->attached = fsmRun; - } -} - -void detachInput( FsmRun *fsmRun, InputStream *is ) -{ - debug( REALM_INPUT, "detaching fsm run from input stream: %p %p\n", fsmRun, is ); - - fsmRun->attachedInput = 0; - is->attached = 0; - - clearBuffered( fsmRun ); - - if ( fsmRun->attachedSource != 0 ) { - fsmRun->attachedSource->attached = 0; - fsmRun->attachedSource = 0; - } -} - -void detachSource( FsmRun *fsmRun, SourceStream *is ) -{ - debug( REALM_INPUT, "detaching fsm run from source stream: %p %p\n", fsmRun, is ); - - fsmRun->attachedSource = 0; - is->attached = 0; - - clearBuffered( fsmRun ); - - if ( fsmRun->attachedInput != 0 ) { - fsmRun->attachedInput->attached = 0; - fsmRun->attachedInput = 0; - } -} - -void clearBuffered( FsmRun *fsmRun ) -{ - /* If there is data in the current buffer then send the whole send back - * should be in this buffer. */ - if ( fsmRun->tokstart != 0 ) { - fsmRun->p = fsmRun->pe = fsmRun->tokstart; - fsmRun->tokstart = 0; - } - else { - fsmRun->pe = fsmRun->p; - } -} - -void resetToken( FsmRun *fsmRun ) -{ - /* If there is a token started, but never finished for a lack of data, we - * must first backup over it. */ - if ( fsmRun->tokstart != 0 ) { - fsmRun->p = fsmRun->tokstart; - fsmRun->tokstart = 0; - } -} - -/* Stops on: - * PcrRevToken - */ - -static void sendBack( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, - InputStream *inputStream, ParseTree *parseTree ) -{ - debug( REALM_PARSE, "sending back: %s\n", prg->rtd->lelInfo[parseTree->id].name ); - - if ( parseTree->flags & PF_NAMED ) { - ///* Send back anything in the buffer that has not been parsed. */ - //if ( fsmRun->p == fsmRun->runBuf->data ) - // sendBackRunBufHead( fsmRun, inputStream ); - - /* Send the named lang el back first, then send back any leading - * whitespace. */ - undoConsumeLangEl( inputStream ); - } - - decrementSteps( pdaRun ); - - /* Artifical were not parsed, instead sent in as items. */ - if ( parseTree->flags & PF_ARTIFICIAL ) { - /* Check for reverse code. */ - if ( parseTree->flags & PF_HAS_RCODE ) { - debug( REALM_PARSE, "tree has rcode, setting on deck\n" ); - pdaRun->onDeck = true; - parseTree->flags &= ~PF_HAS_RCODE; - } - - treeUpref( parseTree->shadow->tree ); - - sendBackTree( inputStream, parseTree->shadow->tree ); - } - else { - /* Check for reverse code. */ - if ( parseTree->flags & PF_HAS_RCODE ) { - debug( REALM_PARSE, "tree has rcode, setting on deck\n" ); - pdaRun->onDeck = true; - parseTree->flags &= ~PF_HAS_RCODE; - } - - /* Push back the token data. */ - sendBackText( fsmRun, inputStream, stringData( parseTree->shadow->tree->tokdata ), - stringLength( parseTree->shadow->tree->tokdata ) ); - - /* If eof was just sent back remember that it needs to be sent again. */ - if ( parseTree->id == prg->rtd->eofLelIds[pdaRun->parserId] ) - inputStream->eofSent = false; - - /* If the item is bound then store remove it from the bindings array. */ - popBinding( pdaRun, parseTree ); - } - - if ( pdaRun->steps == pdaRun->targetSteps ) { - debug( REALM_PARSE, "trigger parse stop, steps = target = %d\n", pdaRun->targetSteps ); - pdaRun->stop = true; - } - - /* Downref the tree that was sent back and free the kid. */ - treeDownref( prg, sp, parseTree->shadow->tree ); - kidFree( prg, parseTree->shadow ); - parseTreeFree( prg, parseTree ); -} - -void setRegion( PdaRun *pdaRun, int emptyIgnore, ParseTree *tree ) -{ - if ( emptyIgnore ) { - /* Recording the next region. */ - tree->region = pdaRun->nextRegionInd; - if ( pdaRun->tables->tokenRegions[tree->region+1] != 0 ) - pdaRun->numRetry += 1; - } -} - -void ignoreTree( Program *prg, FsmRun *fsmRun, PdaRun *pdaRun, Tree *tree ) -{ - int emptyIgnore = pdaRun->accumIgnore == 0; - - incrementSteps( pdaRun ); - - ParseTree *parseTree = parseTreeAllocate( prg ); - parseTree->shadow = kidAllocate( prg ); - parseTree->shadow->tree = tree; - - parseTree->next = pdaRun->accumIgnore; - pdaRun->accumIgnore = parseTree; - - transferReverseCode( pdaRun, parseTree ); - - if ( fsmRun->preRegion >= 0 ) - parseTree->flags |= PF_RIGHT_IGNORE; - - setRegion( pdaRun, emptyIgnore, pdaRun->accumIgnore ); -} - -void ignoreTree2( Program *prg, PdaRun *pdaRun, Tree *tree ) -{ - int emptyIgnore = pdaRun->accumIgnore == 0; - - incrementSteps( pdaRun ); - - ParseTree *parseTree = parseTreeAllocate( prg ); - parseTree->flags |= PF_ARTIFICIAL; - parseTree->shadow = kidAllocate( prg ); - parseTree->shadow->tree = tree; - - parseTree->next = pdaRun->accumIgnore; - pdaRun->accumIgnore = parseTree; - - transferReverseCode( pdaRun, parseTree ); - - setRegion( pdaRun, emptyIgnore, pdaRun->accumIgnore ); -} - -Kid *makeTokenWithData( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, - InputStream *inputStream, int id, Head *tokdata ) -{ - /* Make the token object. */ - long objectLength = prg->rtd->lelInfo[id].objectLength; - Kid *attrs = allocAttrs( prg, objectLength ); - - Kid *input = 0; - input = kidAllocate( prg ); - input->tree = treeAllocate( prg ); - - debug( REALM_PARSE, "made token %p\n", input->tree ); - - input->tree->refs = 1; - input->tree->id = id; - input->tree->tokdata = tokdata; - - /* No children and ignores get added later. */ - input->tree->child = attrs; - - LangElInfo *lelInfo = prg->rtd->lelInfo; - if ( lelInfo[id].numCaptureAttr > 0 ) { - int i; - for ( i = 0; i < lelInfo[id].numCaptureAttr; i++ ) { - CaptureAttr *ca = &prg->rtd->captureAttr[lelInfo[id].captureAttr + i]; - Head *data = stringAllocFull( prg, - fsmRun->mark[ca->mark_enter], fsmRun->mark[ca->mark_leave] - - fsmRun->mark[ca->mark_enter] ); - Tree *string = constructString( prg, data ); - treeUpref( string ); - setAttr( input->tree, ca->offset, string ); - } - } - - return input; -} - -void clearIgnoreList( Program *prg, Tree **sp, Kid *kid ) -{ - while ( kid != 0 ) { - Kid *next = kid->next; - treeDownref( prg, sp, kid->tree ); - kidFree( prg, kid ); - kid = next; - } -} - -static void reportParseError( Program *prg, Tree **sp, PdaRun *pdaRun ) -{ - Kid *kid = pdaRun->btPoint; - Head *deepest = 0; - while ( kid != 0 ) { - Head *head = kid->tree->tokdata; - if ( head != 0 && head->location != 0 ) { - if ( deepest == 0 || head->location->byte > deepest->location->byte ) - deepest = head; - } - kid = kid->next; - } - - Head *errorHead = 0; - - /* If there are no error points on record assume the error occurred at the beginning of the stream. */ - if ( deepest == 0 ) - errorHead = stringAllocFull( prg, "PARSE ERROR at 1:1", 18 ); - else { - debug( REALM_PARSE, "deepest location byte: %d\n", deepest->location->byte ); - - long line = deepest->location->line; - long i, column = deepest->location->column; - - for ( i = 0; i < deepest->length; i++ ) { - if ( deepest->data[i] != '\n' ) - column += 1; - else { - line += 1; - column = 1; - } - } - - char formatted[128]; - sprintf( formatted, "PARSE ERROR at %ld:%ld", line, column ); - errorHead = stringAllocFull( prg, formatted, strlen(formatted) ); - } - - Tree *tree = constructString( prg, errorHead ); - treeDownref( prg, sp, prg->lastParseError ); - prg->lastParseError = tree; - treeUpref( prg->lastParseError ); -} - -static void attachRightIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, ParseTree *parseTree ) -{ - if ( pdaRun->accumIgnore == 0 ) - return; - - if ( pdaRun->stackTop->id > 0 && pdaRun->stackTop->id < prg->rtd->firstNonTermId ) { - /* OK, do it */ - debug( REALM_PARSE, "attaching right ignore\n" ); - - /* Reset. */ - assert( ! ( parseTree->flags & PF_RIGHT_IL_ATTACHED ) ); - - ParseTree *accum = pdaRun->accumIgnore; - - ParseTree *stopAt = 0, *use = accum; - while ( use != 0 ) { - if ( ! (use->flags & PF_RIGHT_IGNORE) ) - stopAt = use; - use = use->next; - } - - if ( stopAt != 0 ) { - /* Stop at was set. Make it the last item in the igore list. Take - * the rest. */ - accum = stopAt->next; - stopAt->next = 0; - } - else { - /* Stop at was never set. All right ignore. Use it all. */ - pdaRun->accumIgnore = 0; - } - - /* The data list needs to be extracted and reversed. The parse tree list - * can remain in stack order. */ - ParseTree *child = accum, *last = 0; - Kid *dataChild = 0, *dataLast = 0; - - while ( child ) { - dataChild = child->shadow; - ParseTree *next = child->next; - - /* Reverse the lists. */ - dataChild->next = dataLast; - child->next = last; - - /* Detach the parse tree from the data tree. */ - child->shadow = 0; - - /* Keep the last for reversal. */ - dataLast = dataChild; - last = child; - - child = next; - } - - /* Last is now the first. */ - parseTree->rightIgnore = last; - - if ( dataChild != 0 ) { - debug( REALM_PARSE, "attaching ignore right\n" ); - - Kid *ignoreKid = dataLast; - - /* Copy the ignore list first if we need to attach it as a right - * ignore. */ - Tree *rightIgnore = 0; - - rightIgnore = treeAllocate( prg ); - rightIgnore->id = LEL_ID_IGNORE; - rightIgnore->child = ignoreKid; - - Tree *pushTo = parseTree->shadow->tree; - - pushTo = pushRightIgnore( prg, pushTo, rightIgnore ); - - parseTree->shadow->tree = pushTo; - - parseTree->flags |= PF_RIGHT_IL_ATTACHED; - } - } -} - -static void attachLeftIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, ParseTree *parseTree ) -{ - /* Reset. */ - assert( ! ( parseTree->flags & PF_LEFT_IL_ATTACHED ) ); - - ParseTree *accum = pdaRun->accumIgnore; - pdaRun->accumIgnore = 0; - - /* The data list needs to be extracted and reversed. The parse tree list - * can remain in stack order. */ - ParseTree *child = accum, *last = 0; - Kid *dataChild = 0, *dataLast = 0; - - while ( child ) { - dataChild = child->shadow; - ParseTree *next = child->next; - - /* Reverse the lists. */ - dataChild->next = dataLast; - child->next = last; - - /* Detach the parse tree from the data tree. */ - child->shadow = 0; - - /* Keep the last for reversal. */ - dataLast = dataChild; - last = child; - - child = next; - } - - /* Last is now the first. */ - parseTree->leftIgnore = last; - - if ( dataChild != 0 ) { - debug( REALM_PARSE, "attaching left ignore\n" ); - - Kid *ignoreKid = dataChild; - - /* Make the ignore list for the left-ignore. */ - Tree *leftIgnore = treeAllocate( prg ); - leftIgnore->id = LEL_ID_IGNORE; - leftIgnore->child = ignoreKid; - - Tree *pushTo = parseTree->shadow->tree; - - pushTo = pushLeftIgnore( prg, pushTo, leftIgnore ); - - parseTree->shadow->tree = pushTo; - - parseTree->flags |= PF_LEFT_IL_ATTACHED; - } -} - -/* Not currently used. Need to revive this. WARNING: untested changes here */ -static void detachRightIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, ParseTree *parseTree ) -{ - /* Right ignore are immediately discarded since they are copies of - * left-ignores. */ - Tree *rightIgnore = 0; - if ( parseTree->flags & PF_RIGHT_IL_ATTACHED ) { - Tree *popFrom = parseTree->shadow->tree; - - popFrom = popRightIgnore( prg, sp, popFrom, &rightIgnore ); - - parseTree->shadow->tree = popFrom; - - parseTree->flags &= ~PF_RIGHT_IL_ATTACHED; - } - - if ( parseTree->rightIgnore != 0 ) { - assert( rightIgnore != 0 ); - - /* Transfer the trees to accumIgnore. */ - ParseTree *ignore = parseTree->rightIgnore; - parseTree->rightIgnore = 0; - - Kid *dataIgnore = rightIgnore->child; - rightIgnore->child = 0; - - ParseTree *last = 0; - Kid *dataLast = 0; - while ( ignore != 0 ) { - ParseTree *next = ignore->next; - Kid *dataNext = dataIgnore->next; - - /* Put the data trees underneath the parse trees. */ - ignore->shadow = dataIgnore; - - /* Reverse. */ - ignore->next = last; - dataIgnore->next = dataLast; - - /* Keep last for reversal. */ - last = ignore; - dataLast = dataIgnore; - - ignore = next; - dataIgnore = dataNext; - } - - pdaRun->accumIgnore = last; - - treeDownref( prg, sp, rightIgnore ); - } -} - -static void detachLeftIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, ParseTree *parseTree ) -{ - /* Detach left. */ - Tree *leftIgnore = 0; - if ( parseTree->flags & PF_LEFT_IL_ATTACHED ) { - Tree *popFrom = parseTree->shadow->tree; - - popFrom = popLeftIgnore( prg, sp, popFrom, &leftIgnore ); - - parseTree->shadow->tree = popFrom; - - parseTree->flags &= ~PF_LEFT_IL_ATTACHED; - } - - if ( parseTree->leftIgnore != 0 ) { - assert( leftIgnore != 0 ); - - /* Transfer the trees to accumIgnore. */ - ParseTree *ignore = parseTree->leftIgnore; - parseTree->leftIgnore = 0; - - Kid *dataIgnore = leftIgnore->child; - leftIgnore->child = 0; - - ParseTree *last = 0; - Kid *dataLast = 0; - while ( ignore != 0 ) { - ParseTree *next = ignore->next; - Kid *dataNext = dataIgnore->next; - - /* Put the data trees underneath the parse trees. */ - ignore->shadow = dataIgnore; - - /* Reverse. */ - ignore->next = last; - dataIgnore->next = dataLast; - - /* Keep last for reversal. */ - last = ignore; - dataLast = dataIgnore; - - ignore = next; - dataIgnore = dataNext; - } - - pdaRun->accumIgnore = last; - } - - treeDownref( prg, sp, leftIgnore ); -} - -void handleError( Program *prg, Tree **sp, PdaRun *pdaRun ) -{ - /* Check the result. */ - if ( pdaRun->parseError ) { - /* Error occured in the top-level parser. */ - reportParseError( prg, sp, pdaRun ); - } - else { - if ( isParserStopFinished( pdaRun ) ) { - debug( REALM_PARSE, "stopping the parse\n" ); - pdaRun->stopParsing = true; - } - } -} - -void sendIgnore( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, PdaRun *pdaRun, long id ) -{ - debug( REALM_PARSE, "ignoring: %s\n", prg->rtd->lelInfo[id].name ); - - /* Make the ignore string. */ - Head *ignoreStr = extractMatch( prg, fsmRun, inputStream ); - updatePosition( inputStream, fsmRun->tokstart, ignoreStr->length ); - - debug( REALM_PARSE, "ignoring: %.*s\n", ignoreStr->length, ignoreStr->data ); - - Tree *tree = treeAllocate( prg ); - tree->refs = 1; - tree->id = id; - tree->tokdata = ignoreStr; - - /* Send it to the pdaRun. */ - ignoreTree( prg, fsmRun, pdaRun, tree ); -} - - -/* Doesn't consume. */ -Head *peekMatch( Program *prg, FsmRun *fsmRun, InputStream *inputStream ) -{ - long length = fsmRun->p - fsmRun->tokstart; - Head *head = stringAllocPointer( prg, fsmRun->tokstart, length ); - head->location = locationAllocate( prg ); - head->location->line = inputStream->line; - head->location->column = inputStream->column; - head->location->byte = inputStream->byte; - - debug( REALM_PARSE, "location byte: %d\n", inputStream->byte ); - - return head; -} - -/* Consumes. */ -Head *extractMatch( Program *prg, FsmRun *fsmRun, InputStream *inputStream ) -{ - long length = fsmRun->p - fsmRun->tokstart; - Head *head = stringAllocPointer( prg, fsmRun->tokstart, length ); - head->location = locationAllocate( prg ); - head->location->line = inputStream->line; - head->location->column = inputStream->column; - head->location->byte = inputStream->byte; - - debug( REALM_PARSE, "location byte: %d\n", inputStream->byte ); - - consumeData( inputStream, length ); - - return head; -} - -static void sendToken( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, PdaRun *pdaRun, long id ) -{ - int emptyIgnore = pdaRun->accumIgnore == 0; - - /* Make the token data. */ - Head *tokdata = extractMatch( prg, fsmRun, inputStream ); - - debug( REALM_PARSE, "token: %s text: %.*s\n", - prg->rtd->lelInfo[id].name, - stringLength(tokdata), stringData(tokdata) ); - - updatePosition( inputStream, fsmRun->tokstart, tokdata->length ); - - Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, inputStream, id, tokdata ); - - incrementSteps( pdaRun ); - - ParseTree *parseTree = parseTreeAllocate( prg ); - parseTree->id = input->tree->id; - parseTree->shadow = input; - - pdaRun->parseInput = parseTree; - - /* Store any alternate scanning region. */ - if ( input != 0 && pdaRun->cs >= 0 ) - setRegion( pdaRun, emptyIgnore, parseTree ); -} - -static void sendTree( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream ) -{ - Kid *input = kidAllocate( prg ); - input->tree = consumeTree( inputStream ); - - incrementSteps( pdaRun ); - - ParseTree *parseTree = parseTreeAllocate( prg ); - parseTree->id = input->tree->id; - parseTree->flags |= PF_ARTIFICIAL; - parseTree->shadow = input; - - pdaRun->parseInput = parseTree; -} - -static void sendIgnoreTree( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream ) -{ - Tree *tree = consumeTree( inputStream ); - ignoreTree2( prg, pdaRun, tree ); -} - -static void sendCi( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, PdaRun *pdaRun, int id ) -{ - debug( REALM_PARSE, "token: CI\n" ); - -/**/ - - int emptyIgnore = pdaRun->accumIgnore == 0; - - /* Make the token data. */ - Head *tokdata = headAllocate( prg ); - tokdata->location = locationAllocate( prg ); - tokdata->location->line = inputStream->line; - tokdata->location->column = inputStream->column; - tokdata->location->byte = inputStream->byte; - - debug( REALM_PARSE, "token: %s text: %.*s\n", - prg->rtd->lelInfo[id].name, - stringLength(tokdata), stringData(tokdata) ); - - updatePosition( inputStream, fsmRun->tokstart, tokdata->length ); - - Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, inputStream, id, tokdata ); - - incrementSteps( pdaRun ); - - ParseTree *parseTree = parseTreeAllocate( prg ); - parseTree->id = input->tree->id; - parseTree->shadow = input; - - pdaRun->parseInput = parseTree; - - /* Store any alternate scanning region. */ - if ( input != 0 && pdaRun->cs >= 0 ) - setRegion( pdaRun, emptyIgnore, parseTree ); -} - - -static void sendEof( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, PdaRun *pdaRun ) -{ - debug( REALM_PARSE, "token: _EOF\n" ); - - incrementSteps( pdaRun ); - - Head *head = headAllocate( prg ); - head->location = locationAllocate( prg ); - head->location->line = inputStream->line; - head->location->column = inputStream->column; - head->location->byte = inputStream->byte; - - Kid *input = kidAllocate( prg ); - input->tree = treeAllocate( prg ); - - input->tree->refs = 1; - input->tree->id = prg->rtd->eofLelIds[pdaRun->parserId]; - input->tree->tokdata = head; - - /* Set the state using the state of the parser. */ - fsmRun->region = pdaRunGetNextRegion( pdaRun, 0 ); - fsmRun->preRegion = pdaRunGetNextPreRegion( pdaRun ); - fsmRun->cs = fsmRun->tables->entryByRegion[fsmRun->region]; - - ParseTree *parseTree = parseTreeAllocate( prg ); - parseTree->id = input->tree->id; - parseTree->shadow = input; - - pdaRun->parseInput = parseTree; -} - -void newToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun ) -{ - /* Init the scanner vars. */ - fsmRun->act = 0; - fsmRun->tokstart = 0; - fsmRun->tokend = 0; - fsmRun->matchedToken = 0; - - /* Set the state using the state of the parser. */ - fsmRun->region = pdaRunGetNextRegion( pdaRun, 0 ); - fsmRun->preRegion = pdaRunGetNextPreRegion( pdaRun ); - if ( fsmRun->preRegion > 0 ) { - debug( REALM_PARSE, "pre region for next token: %s\n", - prg->rtd->regionInfo[fsmRun->preRegion].name ); - fsmRun->cs = fsmRun->tables->entryByRegion[fsmRun->preRegion]; - fsmRun->ncs = fsmRun->tables->entryByRegion[fsmRun->region]; - } - else { - debug( REALM_PARSE, "scanning using token region: %s\n", - prg->rtd->regionInfo[fsmRun->region].name ); - - fsmRun->cs = fsmRun->tables->entryByRegion[fsmRun->region]; - } - - - /* Clear the mark array. */ - memset( fsmRun->mark, 0, sizeof(fsmRun->mark) ); -} - -static void pushBtPoint( Program *prg, PdaRun *pdaRun ) -{ - Tree *tree = 0; - if ( pdaRun->accumIgnore != 0 ) - tree = pdaRun->accumIgnore->shadow->tree; - else if ( pdaRun->tokenList != 0 ) - tree = pdaRun->tokenList->kid->tree; - - if ( tree != 0 ) { - debug( REALM_PARSE, "pushing bt point with location byte %d\n", - ( tree != 0 && tree->tokdata != 0 && tree->tokdata->location != 0 ) ? - tree->tokdata->location->byte : 0 ); - - Kid *kid = kidAllocate( prg ); - kid->tree = tree; - treeUpref( tree ); - kid->next = pdaRun->btPoint; - pdaRun->btPoint = kid; - } -} - - -#define SCAN_UNDO -7 -#define SCAN_IGNORE -6 -#define SCAN_TREE -5 -#define SCAN_TRY_AGAIN_LATER -4 -#define SCAN_ERROR -3 -#define SCAN_LANG_EL -2 -#define SCAN_EOF -1 - -long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream ) -{ - if ( pdaRun->triggerUndo ) - return SCAN_UNDO; - - while ( true ) { - fsmExecute( fsmRun, inputStream ); - - /* First check if scanning stopped because we have a token. */ - if ( fsmRun->matchedToken > 0 ) { - /* If the token has a marker indicating the end (due to trailing - * context) then adjust data now. */ - LangElInfo *lelInfo = prg->rtd->lelInfo; - if ( lelInfo[fsmRun->matchedToken].markId >= 0 ) - fsmRun->p = fsmRun->mark[lelInfo[fsmRun->matchedToken].markId]; - - return fsmRun->matchedToken; - } - - /* Check for error. */ - if ( fsmRun->cs == fsmRun->tables->errorState ) { - /* If a token was started, but not finished (tokstart != 0) then - * restore data to the beginning of that token. */ - if ( fsmRun->tokstart != 0 ) - fsmRun->p = fsmRun->tokstart; - - /* Check for a default token in the region. If one is there - * then send it and continue with the processing loop. */ - if ( prg->rtd->regionInfo[fsmRun->region].defaultToken >= 0 ) { - fsmRun->tokstart = fsmRun->tokend = fsmRun->p; - return prg->rtd->regionInfo[fsmRun->region].defaultToken; - } - - return SCAN_ERROR; - } - - /* Got here because the state machine didn't match a token or - * encounter an error. Must be because we got to the end of the buffer - * data. */ - assert( fsmRun->p == fsmRun->pe ); - - /* There may be space left in the current buffer. If not then we need - * to make some. */ - long space = fsmRun->runBuf->data + FSM_BUFSIZE - fsmRun->pe; - if ( space == 0 ) { - /* Create a new run buf. */ - RunBuf *newBuf = newRunBuf(); - - /* If partway through a token then preserve the prefix. */ - long have = 0; - - if ( fsmRun->tokstart == 0 ) { - /* No prefix. We filled the previous buffer. */ - fsmRun->runBuf->length = FSM_BUFSIZE; - } - else { - int i; - - debug( REALM_SCAN, "copying data over to new buffer\n" ); - assert( fsmRun->runBuf->offset == 0 ); - - if ( fsmRun->tokstart == fsmRun->runBuf->data ) { - /* A token is started and it is already at the beginning - * of the current buffer. This means buffer is full and it - * must be grown. Probably need to do this sooner. */ - fatal( "OUT OF BUFFER SPACE\n" ); - } - - /* There is data that needs to be shifted over. */ - have = fsmRun->pe - fsmRun->tokstart; - memcpy( newBuf->data, fsmRun->tokstart, have ); - - /* Compute the length of the previous buffer. */ - fsmRun->runBuf->length = FSM_BUFSIZE - have; - - /* Compute tokstart and tokend. */ - long dist = fsmRun->tokstart - newBuf->data; - - fsmRun->tokend -= dist; - fsmRun->tokstart = newBuf->data; - - /* Shift any markers. */ - for ( i = 0; i < MARK_SLOTS; i++ ) { - if ( fsmRun->mark[i] != 0 ) - fsmRun->mark[i] -= dist; - } - } - - fsmRun->p = fsmRun->pe = newBuf->data + have; - fsmRun->peof = 0; - - newBuf->next = fsmRun->runBuf; - fsmRun->runBuf = newBuf; - } - - /* We don't have any data. What is next in the input inputStream? */ - space = fsmRun->runBuf->data + FSM_BUFSIZE - fsmRun->pe; - assert( space > 0 ); - - /* Get more data. */ - int have = fsmRun->tokstart != 0 ? fsmRun->p - fsmRun->tokstart : 0; - int len = 0; - debug( REALM_SCAN, "fetching data: have: %d space: %d\n", have, space ); - int type = getData( fsmRun, inputStream, have, fsmRun->p, space, &len ); - - switch ( type ) { - case INPUT_DATA: - fsmRun->pe = fsmRun->p + len; - break; - - case INPUT_EOF: - if ( fsmRun->tokstart != 0 ) - fsmRun->peof = fsmRun->pe; - else - return SCAN_EOF; - break; - - case INPUT_EOD: - return SCAN_TRY_AGAIN_LATER; - - case INPUT_LANG_EL: - if ( fsmRun->tokstart != 0 ) - fsmRun->peof = fsmRun->pe; - else - return SCAN_LANG_EL; - break; - - case INPUT_TREE: - if ( fsmRun->tokstart != 0 ) - fsmRun->peof = fsmRun->pe; - else - return SCAN_TREE; - break; - case INPUT_IGNORE: - if ( fsmRun->tokstart != 0 ) - fsmRun->peof = fsmRun->pe; - else - return SCAN_IGNORE; - break; - } - } - - /* Should not be reached. */ - return SCAN_ERROR; -} - -/* - * Stops on: - * PcrPreEof - * PcrGeneration - * PcrReduction - * PcrRevReduction - * PcrRevIgnore - * PcrRevToken - */ - -long parseLoop( Program *prg, Tree **sp, PdaRun *pdaRun, - FsmRun *fsmRun, InputStream *inputStream, long entry ) -{ - LangElInfo *lelInfo = prg->rtd->lelInfo; - -switch ( entry ) { -case PcrStart: - - pdaRun->stop = false; - - while ( true ) { - debug( REALM_PARSE, "parse loop start %d:%d\n", inputStream->line, inputStream->column ); - - /* Pull the current scanner from the parser. This can change during - * parsing due to inputStream pushes, usually for the purpose of includes. - * */ - pdaRun->tokenId = scanToken( prg, pdaRun, fsmRun, inputStream ); - - if ( pdaRun->tokenId == SCAN_ERROR ) { - if ( fsmRun->preRegion >= 0 ) { - fsmRun->preRegion = -1; - fsmRun->cs = fsmRun->ncs; - debug( REALM_PARSE, "moving from pre region to main region: %s\n", - prg->rtd->regionInfo[fsmRun->region].name ); - continue; - } - } - - if ( pdaRun->tokenId == SCAN_ERROR && - ( prg->rtd->regionInfo[fsmRun->region].ciLelId > 0 ) ) - { - debug( REALM_PARSE, "sending a collect ignore\n" ); - sendCi( prg, sp, inputStream, fsmRun, pdaRun, prg->rtd->regionInfo[fsmRun->region].ciLelId ); - goto yes; - } - - if ( pdaRun->tokenId == SCAN_TRY_AGAIN_LATER ) { - debug( REALM_PARSE, "scanner says try again later\n" ); - break; - } - - assert( pdaRun->parseInput == 0 ); - pdaRun->parseInput = 0; - - /* Check for EOF. */ - if ( pdaRun->tokenId == SCAN_EOF ) { - inputStream->eofSent = true; - sendEof( prg, sp, inputStream, fsmRun, pdaRun ); - - pdaRun->frameId = prg->rtd->regionInfo[fsmRun->region].eofFrameId; - - if ( prg->ctxDepParsing && pdaRun->frameId >= 0 ) { - debug( REALM_PARSE, "HAVE PRE_EOF BLOCK\n" ); - - pdaRun->fi = &prg->rtd->frameInfo[pdaRun->frameId]; - pdaRun->code = pdaRun->fi->codeWV; - -return PcrPreEof; -case PcrPreEof: - makeReverseCode( pdaRun ); - } - } - else if ( pdaRun->tokenId == SCAN_UNDO ) { - /* Fall through with parseInput = 0. FIXME: Do we need to send back ignore? */ - debug( REALM_PARSE, "invoking undo from the scanner\n" ); - } - else if ( pdaRun->tokenId == SCAN_ERROR ) { - /* Scanner error, maybe retry. */ - if ( pdaRun->accumIgnore == 0 && pdaRunGetNextRegion( pdaRun, 1 ) != 0 ) { - debug( REALM_PARSE, "scanner failed, trying next region\n" ); - - pdaRun->nextRegionInd += 1; - goto skipSend; - } - else if ( pdaRun->numRetry > 0 ) { - debug( REALM_PARSE, "invoking parse error from the scanner\n" ); - - /* Fall through to send null (error). */ - pushBtPoint( prg, pdaRun ); - } - else { - debug( REALM_PARSE, "no alternate scanning regions\n" ); - - /* There are no alternative scanning regions to try, nor are - * there any alternatives stored in the current parse tree. No - * choice but to end the parse. */ - pushBtPoint( prg, pdaRun ); - - reportParseError( prg, sp, pdaRun ); - pdaRun->parseError = 1; - goto skipSend; - } - } - else if ( pdaRun->tokenId == SCAN_LANG_EL ) { - debug( REALM_PARSE, "sending an named lang el\n" ); - - /* A named language element (parsing colm program). */ - sendNamedLangEl( prg, sp, pdaRun, fsmRun, inputStream ); - } - else if ( pdaRun->tokenId == SCAN_TREE ) { - debug( REALM_PARSE, "sending a tree\n" ); - - /* A tree already built. */ - sendTree( prg, sp, pdaRun, fsmRun, inputStream ); - } - else if ( pdaRun->tokenId == SCAN_IGNORE ) { - debug( REALM_PARSE, "sending an ignore token\n" ); - - /* A tree to ignore. */ - sendIgnoreTree( prg, sp, pdaRun, fsmRun, inputStream ); - goto skipSend; - } - else if ( prg->ctxDepParsing && lelInfo[pdaRun->tokenId].frameId >= 0 ) { - /* Has a generation action. */ - debug( REALM_PARSE, "token gen action: %s\n", - prg->rtd->lelInfo[pdaRun->tokenId].name ); - - /* Make the token data. */ - pdaRun->tokdata = peekMatch( prg, fsmRun, inputStream ); - - /* Note that we don't update the position now. It is done when the token - * data is pulled from the inputStream. */ - - fsmRun->p = fsmRun->tokstart; - fsmRun->tokstart = 0; - - pdaRun->fi = &prg->rtd->frameInfo[prg->rtd->lelInfo[pdaRun->tokenId].frameId]; - pdaRun->frameId = prg->rtd->lelInfo[pdaRun->tokenId].frameId; - pdaRun->code = pdaRun->fi->codeWV; - -return PcrGeneration; -case PcrGeneration: - - makeReverseCode( pdaRun ); - - /* Finished with the match text. */ - stringFree( prg, pdaRun->tokdata ); - - goto skipSend; - } - else if ( lelInfo[pdaRun->tokenId].ignore ) { - debug( REALM_PARSE, "sending an ignore token: %s\n", - prg->rtd->lelInfo[pdaRun->tokenId].name ); - - /* Is an ignore token. */ - sendIgnore( prg, sp, inputStream, fsmRun, pdaRun, pdaRun->tokenId ); - goto skipSend; - } - else { - debug( REALM_PARSE, "sending an a plain old token: %s\n", - prg->rtd->lelInfo[pdaRun->tokenId].name ); - - /* Is a plain token. */ - sendToken( prg, sp, inputStream, fsmRun, pdaRun, pdaRun->tokenId ); - } -yes: - - if ( pdaRun->parseInput != 0 ) - transferReverseCode( pdaRun, pdaRun->parseInput ); - - if ( pdaRun->parseInput != 0 ) { - /* If it's a nonterminal with a termdup then flip the parse tree to the terminal. */ - if ( pdaRun->parseInput->id >= prg->rtd->firstNonTermId ) { - pdaRun->parseInput->id = prg->rtd->lelInfo[pdaRun->parseInput->id].termDupId; - pdaRun->parseInput->flags |= PF_TERM_DUP; - } - } - - long pcr = parseToken( prg, sp, pdaRun, fsmRun, inputStream, PcrStart ); - - while ( pcr != PcrDone ) { - -return pcr; -case PcrReduction: -case PcrReverse: - - pcr = parseToken( prg, sp, pdaRun, fsmRun, inputStream, entry ); - } - - assert( pcr == PcrDone ); - - handleError( prg, sp, pdaRun ); - -skipSend: - newToken( prg, pdaRun, fsmRun ); - - /* Various stop conditions. This should all be coverned by one test - * eventually. */ - - if ( pdaRun->triggerUndo ) { - debug( REALM_PARSE, "parsing stopped by triggerUndo\n" ); - break; - } - - if ( inputStream->eofSent ) { - debug( REALM_PARSE, "parsing stopped by EOF\n" ); - break; - } - - if ( pdaRun->stopParsing ) { - debug( REALM_PARSE, "scanner has been stopped\n" ); - break; - } - - if ( pdaRun->stop ) { - debug( REALM_PARSE, "parsing has been stopped by consumedCount\n" ); - break; - } - - if ( prg->induceExit ) { - debug( REALM_PARSE, "parsing has been stopped by a call to exit\n" ); - break; - } - - if ( pdaRun->parseError ) { - debug( REALM_PARSE, "parsing stopped by a parse error\n" ); - break; - } - } - -case PcrDone: -break; } - - return PcrDone; -} - -/* Offset can be used to look at the next nextRegionInd. */ -int pdaRunGetNextRegion( PdaRun *pdaRun, int offset ) -{ - return pdaRun->tables->tokenRegions[pdaRun->nextRegionInd+offset]; -} - -int pdaRunGetNextPreRegion( PdaRun *pdaRun ) -{ - return pdaRun->tables->tokenPreRegions[pdaRun->nextRegionInd]; -} - -Tree *getParsedRoot( PdaRun *pdaRun, int stop ) -{ - if ( pdaRun->parseError ) - return 0; - else if ( stop ) { - if ( pdaRun->stackTop->shadow != 0 ) - return pdaRun->stackTop->shadow->tree; - } - else { - if ( pdaRun->stackTop->next->shadow != 0 ) - return pdaRun->stackTop->next->shadow->tree; - } - return 0; -} - -void clearParseTree( Program *prg, Tree **sp, ParseTree *parseTree ) -{ - /* Traverse the stack downreffing. */ - ParseTree *pt = parseTree; - while ( pt != 0 ) { - ParseTree *next = pt->next; - if ( pt->shadow != 0 ) { - treeDownref( prg, sp, pt->shadow->tree ); - kidFree( prg, pt->shadow ); - } - if ( pt->child != 0 ) - clearParseTree( prg, sp, pt->child ); - if ( pt->leftIgnore != 0 ) - clearParseTree( prg, sp, pt->leftIgnore ); - if ( pt->rightIgnore != 0 ) - clearParseTree( prg, sp, pt->rightIgnore ); - parseTreeFree( prg, pt ); - pt = next; - } -} - -void clearPdaRun( Program *prg, Tree **sp, PdaRun *pdaRun ) -{ - /* Remaining stack and parse trees underneath. */ - clearParseTree( prg, sp, pdaRun->stackTop ); - pdaRun->stackTop = 0; - - /* Traverse the token list downreffing. */ - Ref *ref = pdaRun->tokenList; - while ( ref != 0 ) { - Ref *next = ref->next; - kidFree( prg, (Kid*)ref ); - ref = next; - } - pdaRun->tokenList = 0; - - /* Traverse the btPoint list downreffing */ - Kid *btp = pdaRun->btPoint; - while ( btp != 0 ) { - Kid *next = btp->next; - treeDownref( prg, sp, btp->tree ); - kidFree( prg, (Kid*)btp ); - btp = next; - } - pdaRun->btPoint = 0; - - /* Clear out any remaining ignores. */ - clearParseTree( prg, sp, pdaRun->accumIgnore ); - pdaRun->accumIgnore = 0; - - if ( pdaRun->context != 0 ) - treeDownref( prg, sp, pdaRun->context ); - - rcodeDownrefAll( prg, sp, &pdaRun->reverseCode ); - rtCodeVectEmpty( &pdaRun->reverseCode ); - rtCodeVectEmpty( &pdaRun->rcodeCollect ); -} - -int isParserStopFinished( PdaRun *pdaRun ) -{ - int done = - pdaRun->stackTop->next != 0 && - pdaRun->stackTop->next->next == 0 && - pdaRun->stackTop->id == pdaRun->stopTarget; - return done; -} - -void initPdaRun( PdaRun *pdaRun, Program *prg, PdaTables *tables, - FsmRun *fsmRun, int parserId, long stopTarget, int revertOn, Tree *context ) -{ - memset( pdaRun, 0, sizeof(PdaRun) ); - pdaRun->tables = tables; - pdaRun->parserId = parserId; - pdaRun->stopTarget = stopTarget; - pdaRun->revertOn = revertOn; - pdaRun->targetSteps = -1; - - debug( REALM_PARSE, "initializing PdaRun\n" ); - - /* FIXME: need the right one here. */ - pdaRun->cs = prg->rtd->startStates[pdaRun->parserId]; - - Kid *sentinal = kidAllocate( prg ); - sentinal->tree = treeAllocate( prg ); - sentinal->tree->refs = 1; - - /* Init the element allocation variables. */ - pdaRun->stackTop = parseTreeAllocate( prg ); - pdaRun->stackTop->state = -1; - pdaRun->stackTop->shadow = sentinal; - - pdaRun->numRetry = 0; - pdaRun->nextRegionInd = pdaRun->tables->tokenRegionInds[pdaRun->cs]; - pdaRun->stopParsing = false; - pdaRun->accumIgnore = 0; - pdaRun->btPoint = 0; - pdaRun->checkNext = false; - pdaRun->checkStop = false; - - initBindings( pdaRun ); - - initRtCodeVect( &pdaRun->reverseCode ); - initRtCodeVect( &pdaRun->rcodeCollect ); - - pdaRun->context = splitTree( prg, context ); - pdaRun->parseError = 0; - pdaRun->parseInput = 0; - pdaRun->triggerUndo = 0; - - pdaRun->tokenId = 0; - - pdaRun->onDeck = false; - pdaRun->parsed = 0; - pdaRun->reject = false; - - pdaRun->rcBlockCount = 0; -} - -long stackTopTarget( Program *prg, PdaRun *pdaRun ) -{ - long state; - if ( pdaRun->stackTop->state < 0 ) - state = prg->rtd->startStates[pdaRun->parserId]; - else { - state = pdaRun->tables->targs[(int)pdaRun->tables->indicies[pdaRun->tables->offsets[ - pdaRun->stackTop->state] + - (pdaRun->stackTop->id - pdaRun->tables->keys[pdaRun->stackTop->state<<1])]]; - } - return state; -} - -/* - * Local commit: - * -clears reparse flags underneath - * -must be possible to backtrack after - * Global commit (revertOn) - * -clears all reparse flags - * -must be possible to backtrack after - * Global commit (!revertOn) - * -clears all reparse flags - * -clears all 'parsed' reverse code - * -clears all reverse code - * -clears all alg structures - */ - -int beenCommitted( ParseTree *parseTree ) -{ - return parseTree->flags & PF_COMMITTED; -} - -Code *backupOverRcode( Code *rcode ) -{ - Word len; - rcode -= SIZEOF_WORD; - read_word_p( len, rcode ); - rcode -= len; - return rcode; -} - -/* The top level of the stack is linked right-to-left. Trees underneath are - * linked left-to-right. */ -void commitKid( Program *prg, PdaRun *pdaRun, Tree **root, ParseTree *lel, Code **rcode, long *causeReduce ) -{ - ParseTree *tree = 0; - Tree **sp = root; - //Tree *restore = 0; - -head: - /* Commit */ - debug( REALM_PARSE, "commit: visiting %s\n", - prg->rtd->lelInfo[lel->id].name ); - - /* Load up the parsed tree. */ - tree = lel; - - /* Check for reverse code. */ - //restore = 0; - if ( tree->flags & PF_HAS_RCODE ) { - /* If tree caused some reductions, now is not the right time to backup - * over the reverse code. We need to backup over the reductions first. Store - * the count of the reductions and do it when the count drops to zero. */ - if ( tree->causeReduce > 0 ) { - /* The top reduce block does not correspond to this alg. */ - debug( REALM_PARSE, "commit: causeReduce found, delaying backup: %ld\n", - (long)tree->causeReduce ); - *causeReduce = tree->causeReduce; - } - else { - *rcode = backupOverRcode( *rcode ); - - //if ( **rcode == IN_RESTORE_LHS ) { - // debug( REALM_PARSE, "commit: has restore_lhs\n" ); - // read_tree_p( restore, (*rcode+1) ); - //} - } - } - - //FIXME: what was this about? - //if ( restore != 0 ) - // tree = restore; - - /* All the parse algorithm data except for the RCODE flag is in the - * original. That is why we restore first, then we can clear the retry - * values. */ - - /* Check causeReduce, might be time to backup over the reverse code - * belonging to a nonterminal that caused previous reductions. */ - if ( *causeReduce > 0 && - tree->id >= prg->rtd->firstNonTermId && - !(tree->flags & PF_TERM_DUP) ) - { - *causeReduce -= 1; - - if ( *causeReduce == 0 ) { - debug( REALM_PARSE, "commit: causeReduce dropped to zero, backing up over rcode\n" ); - - /* Cause reduce just dropped down to zero. */ - *rcode = backupOverRcode( *rcode ); - } - } - - ///* FIXME: why was this here? - // * Reset retries. */ - //if ( tree->flags & AF_PARSED ) { - // if ( tree->retryLower > 0 ) { - // pdaRun->numRetry -= 1; - // tree->retryLower = 0; - // } - // if ( tree->retryUpper > 0 ) { - // pdaRun->numRetry -= 1; - // tree->retryUpper = 0; - // } - //} - - tree->flags |= PF_COMMITTED; - - /* Do not recures on trees that are terminal dups. */ - if ( !(tree->flags & PF_TERM_DUP) && - !(tree->flags & PF_NAMED) && - !(tree->flags & PF_ARTIFICIAL) && - tree->child != 0 ) - { - vm_push( (Tree*)lel ); - lel = tree->child; - - if ( lel != 0 ) { - while ( lel != 0 ) { - vm_push( (Tree*)lel ); - lel = lel->next; - } - } - } - -backup: - if ( sp != root ) { - ParseTree *next = (ParseTree*)vm_pop(); - if ( next->next == lel ) { - /* Moving backwards. */ - lel = next; - - if ( !beenCommitted( lel ) ) - goto head; - } - else { - /* Moving upwards. */ - lel = next; - } - - goto backup; - } - - pdaRun->numRetry = 0; - assert( sp == root ); -} - -void commitFull( Program *prg, Tree **sp, PdaRun *pdaRun, long causeReduce ) -{ - debug( REALM_PARSE, "running full commit" ); - - ParseTree *parseTree = pdaRun->stackTop; - Code *rcode = pdaRun->reverseCode.data + pdaRun->reverseCode.tabLen; - - /* The top level of the stack is linked right to left. This is the - * traversal order we need for committing. */ - while ( parseTree != 0 && !beenCommitted( parseTree ) ) { - commitKid( prg, pdaRun, sp, parseTree, &rcode, &causeReduce ); - parseTree = parseTree->next; - } - - /* We cannot always clear all the rcode here. We may need to backup over - * the parse statement. We depend on the context flag. */ - if ( !pdaRun->revertOn ) - rcodeDownrefAll( prg, sp, &pdaRun->reverseCode ); -} - -/* - * shift: retry goes into lower of shifted node. - * reduce: retry goes into upper of reduced node. - * shift-reduce: cannot be a retry - */ - -/* Stops on: - * PcrReduction - * PcrRevToken - * PcrRevReduction - */ -long parseToken( Program *prg, Tree **sp, PdaRun *pdaRun, - FsmRun *fsmRun, InputStream *inputStream, long entry ) -{ - int pos; - unsigned int *action; - int rhsLen; - int owner; - int induceReject; - int indPos; - //LangElInfo *lelInfo = prg->rtd->lelInfo; - -switch ( entry ) { -case PcrStart: - - /* The scanner will send a null token if it can't find a token. */ - if ( pdaRun->parseInput == 0 ) - goto parseError; - - /* This will cause parseInput to be lost. This - * path should be traced. */ - if ( pdaRun->cs < 0 ) - return PcrDone; - - /* Record the state in the parse tree. */ - pdaRun->parseInput->state = pdaRun->cs; - -again: - if ( pdaRun->parseInput == 0 ) - goto _out; - - pdaRun->lel = pdaRun->parseInput; - pdaRun->curState = pdaRun->cs; - - if ( pdaRun->lel->id < pdaRun->tables->keys[pdaRun->curState<<1] || - pdaRun->lel->id > pdaRun->tables->keys[(pdaRun->curState<<1)+1] ) { - debug( REALM_PARSE, "parse error, no transition 1\n" ); - pushBtPoint( prg, pdaRun ); - goto parseError; - } - - indPos = pdaRun->tables->offsets[pdaRun->curState] + - (pdaRun->lel->id - pdaRun->tables->keys[pdaRun->curState<<1]); - - owner = pdaRun->tables->owners[indPos]; - if ( owner != pdaRun->curState ) { - debug( REALM_PARSE, "parse error, no transition 2\n" ); - pushBtPoint( prg, pdaRun ); - goto parseError; - } - - pos = pdaRun->tables->indicies[indPos]; - if ( pos < 0 ) { - debug( REALM_PARSE, "parse error, no transition 3\n" ); - pushBtPoint( prg, pdaRun ); - goto parseError; - } - - /* Checking complete. */ - - induceReject = false; - pdaRun->cs = pdaRun->tables->targs[pos]; - action = pdaRun->tables->actions + pdaRun->tables->actInds[pos]; - if ( pdaRun->lel->retryLower ) - action += pdaRun->lel->retryLower; - - /* - * Shift - */ - - if ( *action & act_sb ) { - debug( REALM_PARSE, "shifted: %s\n", - prg->rtd->lelInfo[pdaRun->lel->id].name ); - /* Consume. */ - pdaRun->parseInput = pdaRun->parseInput->next; - - pdaRun->lel->state = pdaRun->curState; - - /* If its a token then attach ignores and record it in the token list - * of the next ignore attachment to use. */ - if ( pdaRun->lel->id < prg->rtd->firstNonTermId ) { - if ( pdaRun->lel->causeReduce == 0 ) - attachRightIgnore( prg, sp, pdaRun, pdaRun->stackTop ); - } - - pdaRun->lel->next = pdaRun->stackTop; - pdaRun->stackTop = pdaRun->lel; - - /* If its a token then attach ignores and record it in the token list - * of the next ignore attachment to use. */ - if ( pdaRun->lel->id < prg->rtd->firstNonTermId ) { - attachLeftIgnore( prg, sp, pdaRun, pdaRun->lel ); - - Ref *ref = (Ref*)kidAllocate( prg ); - ref->kid = pdaRun->lel->shadow; - //treeUpref( pdaRun->tree ); - ref->next = pdaRun->tokenList; - pdaRun->tokenList = ref; - } - - if ( action[1] == 0 ) - pdaRun->lel->retryLower = 0; - else { - debug( REALM_PARSE, "retry: %p\n", pdaRun->stackTop ); - pdaRun->lel->retryLower += 1; - assert( pdaRun->lel->retryUpper == 0 ); - /* FIXME: Has the retry already been counted? */ - pdaRun->numRetry += 1; - } - } - - /* - * Commit - */ - - if ( pdaRun->tables->commitLen[pos] != 0 ) { - long causeReduce = 0; - if ( pdaRun->parseInput != 0 ) { - if ( pdaRun->parseInput->flags & PF_HAS_RCODE ) - causeReduce = pdaRun->parseInput->causeReduce; - } - commitFull( prg, sp, pdaRun, causeReduce ); - } - - /* - * Reduce - */ - - if ( *action & act_rb ) { - int r, objectLength; - ParseTree *last, *child; - Kid *attrs; - Kid *dataLast, *dataChild; - - /* If there was shift don't attach again. */ - if ( !( *action & act_sb ) && pdaRun->lel->id < prg->rtd->firstNonTermId ) - attachRightIgnore( prg, sp, pdaRun, pdaRun->stackTop ); - - pdaRun->reduction = *action >> 2; - - if ( pdaRun->parseInput != 0 ) - pdaRun->parseInput->causeReduce += 1; - - Kid *value = kidAllocate( prg ); - value->tree = treeAllocate( prg ); - value->tree->refs = 1; - value->tree->id = prg->rtd->prodInfo[pdaRun->reduction].lhsId; - value->tree->prodNum = prg->rtd->prodInfo[pdaRun->reduction].prodNum; - - pdaRun->redLel = parseTreeAllocate( prg ); - pdaRun->redLel->id = prg->rtd->prodInfo[pdaRun->reduction].lhsId; - pdaRun->redLel->next = 0; - pdaRun->redLel->causeReduce = 0; - pdaRun->redLel->retryLower = 0; - pdaRun->redLel->shadow = value; - - /* Transfer. */ - pdaRun->redLel->retryUpper = pdaRun->lel->retryLower; - pdaRun->lel->retryLower = 0; - - /* Allocate the attributes. */ - objectLength = prg->rtd->lelInfo[pdaRun->redLel->id].objectLength; - attrs = allocAttrs( prg, objectLength ); - - /* Build the list of children. We will be giving up a reference when we - * detach parse tree and data tree, but gaining the reference when we - * put the children under the new data tree. No need to alter refcounts - * here. */ - rhsLen = prg->rtd->prodInfo[pdaRun->reduction].length; - child = last = 0; - dataChild = dataLast = 0; - for ( r = 0; r < rhsLen; r++ ) { - - /* The child. */ - child = pdaRun->stackTop; - dataChild = child->shadow; - - /* Pop. */ - pdaRun->stackTop = pdaRun->stackTop->next; - - /* Detach the parse tree from the data. */ - child->shadow = 0; - - /* Reverse list. */ - child->next = last; - dataChild->next = dataLast; - - /* Track last for reversal. */ - last = child; - dataLast = dataChild; - } - - pdaRun->redLel->child = child; - pdaRun->redLel->shadow->tree->child = kidListConcat( attrs, dataChild ); - - debug( REALM_PARSE, "reduced: %s rhsLen %d\n", - prg->rtd->prodInfo[pdaRun->reduction].name, rhsLen ); - if ( action[1] == 0 ) - pdaRun->redLel->retryUpper = 0; - else { - pdaRun->redLel->retryUpper += 1; - assert( pdaRun->lel->retryLower == 0 ); - pdaRun->numRetry += 1; - debug( REALM_PARSE, "retry: %p\n", pdaRun->redLel ); - } - - /* When the production is of zero length we stay in the same state. - * Otherwise we use the state stored in the first child. */ - pdaRun->cs = rhsLen == 0 ? pdaRun->curState : child->state; - - if ( prg->ctxDepParsing && prg->rtd->prodInfo[pdaRun->reduction].frameId >= 0 ) { - /* Frame info for reduction. */ - pdaRun->fi = &prg->rtd->frameInfo[prg->rtd->prodInfo[pdaRun->reduction].frameId]; - pdaRun->frameId = prg->rtd->prodInfo[pdaRun->reduction].frameId; - pdaRun->reject = false; - pdaRun->parsed = 0; - pdaRun->code = pdaRun->fi->codeWV; - -return PcrReduction; -case PcrReduction: - - if ( prg->induceExit ) - goto fail; - - /* If the lhs was stored and it changed then we need to restore the - * original upon backtracking, otherwise downref since we took a - * copy above. */ - if ( pdaRun->parsed != 0 ) { - if ( pdaRun->parsed != pdaRun->redLel->shadow->tree ) { - debug( REALM_PARSE, "lhs tree was modified, adding a restore instruction\n" ); -// -// /* Make it into a parse tree. */ -// Tree *newPt = prepParseTree( prg, sp, pdaRun->redLel->tree ); -// treeDownref( prg, sp, pdaRun->redLel->tree ); -// -// /* Copy it in. */ -// pdaRun->redLel->tree = newPt; -// treeUpref( pdaRun->redLel->tree ); - - /* Add the restore instruct. */ - append( &pdaRun->rcodeCollect, IN_RESTORE_LHS ); - appendWord( &pdaRun->rcodeCollect, (Word)pdaRun->parsed ); - append( &pdaRun->rcodeCollect, SIZEOF_CODE + SIZEOF_WORD ); - } - else { - /* Not changed. Done with parsed. */ - treeDownref( prg, sp, pdaRun->parsed ); - } - pdaRun->parsed = 0; - } - - /* Pull out the reverse code, if any. */ - makeReverseCode( pdaRun ); - transferReverseCode( pdaRun, pdaRun->redLel ); - - /* Perhaps the execution environment is telling us we need to - * reject the reduction. */ - induceReject = pdaRun->reject; - } - - /* If the left hand side was replaced then the only parse algorithm - * data that is contained in it will the PF_HAS_RCODE flag. Everthing - * else will be in the original. This requires that we restore first - * when going backwards and when doing a commit. */ - - if ( induceReject ) { - debug( REALM_PARSE, "error induced during reduction of %s\n", - prg->rtd->lelInfo[pdaRun->redLel->id].name ); - pdaRun->redLel->state = pdaRun->curState; - pdaRun->redLel->next = pdaRun->stackTop; - pdaRun->stackTop = pdaRun->redLel; - /* FIXME: What is the right argument here? */ - pushBtPoint( prg, pdaRun ); - goto parseError; - } - - pdaRun->redLel->next = pdaRun->parseInput; - pdaRun->parseInput = pdaRun->redLel; - } - - goto again; - -parseError: - debug( REALM_PARSE, "hit error, backtracking\n" ); - - if ( pdaRun->numRetry == 0 ) { - debug( REALM_PARSE, "out of retries failing parse\n" ); - goto fail; - } - - while ( 1 ) { - if ( pdaRun->onDeck ) { - debug( REALM_BYTECODE, "dropping out for reverse code call\n" ); - - pdaRun->frameId = -1; - pdaRun->code = popReverseCode( &pdaRun->reverseCode ); - -return PcrReverse; -case PcrReverse: - - decrementSteps( pdaRun ); - } - else if ( pdaRun->checkNext ) { - pdaRun->checkNext = false; - - if ( pdaRun->next > 0 && pdaRun->tables->tokenRegions[pdaRun->next] != 0 ) { - debug( REALM_PARSE, "found a new region\n" ); - pdaRun->numRetry -= 1; - pdaRun->cs = stackTopTarget( prg, pdaRun ); - pdaRun->nextRegionInd = pdaRun->next; - return PcrDone; - } - } - else if ( pdaRun->checkStop ) { - pdaRun->checkStop = false; - - if ( pdaRun->stop ) { - debug( REALM_PARSE, "stopping the backtracking, steps is %d\n", pdaRun->steps ); - - pdaRun->cs = stackTopTarget( prg, pdaRun ); - goto _out; - } - } - else if ( pdaRun->parseInput != 0 ) { - /* Either we are dealing with a terminal that was - * shifted or a nonterminal that was reduced. */ - if ( pdaRun->parseInput->id < prg->rtd->firstNonTermId ) { - assert( pdaRun->parseInput->retryUpper == 0 ); - - if ( pdaRun->parseInput->retryLower != 0 ) { - debug( REALM_PARSE, "found retry targ: %p\n", pdaRun->parseInput ); - - pdaRun->numRetry -= 1; - pdaRun->cs = pdaRun->parseInput->state; - goto again; - } - - if ( pdaRun->parseInput->causeReduce != 0 ) { - pdaRun->undoLel = pdaRun->stackTop; - - /* Check if we've arrived at the stack sentinal. This guard - * is here to allow us to initially set numRetry to one to - * cause the parser to backup all the way to the beginning - * when an error occurs. */ - if ( pdaRun->undoLel->next == 0 ) - break; - - /* Either we are dealing with a terminal that was - * shifted or a nonterminal that was reduced. */ - assert( !(pdaRun->stackTop->id < prg->rtd->firstNonTermId) ); - - debug( REALM_PARSE, "backing up over non-terminal: %s\n", - prg->rtd->lelInfo[pdaRun->stackTop->id].name ); - - /* Pop the item from the stack. */ - pdaRun->stackTop = pdaRun->stackTop->next; - - /* Queue it as next parseInput item. */ - pdaRun->undoLel->next = pdaRun->parseInput; - pdaRun->parseInput = pdaRun->undoLel; - } - else { - long region = pdaRun->parseInput->region; - pdaRun->next = region > 0 ? region + 1 : 0; - pdaRun->checkNext = true; - pdaRun->checkStop = true; - - sendBack( prg, sp, pdaRun, fsmRun, inputStream, pdaRun->parseInput ); - - pdaRun->parseInput = 0; - } - } - else if ( pdaRun->parseInput->flags & PF_HAS_RCODE ) { - debug( REALM_PARSE, "tree has rcode, setting on deck\n" ); - pdaRun->onDeck = true; - pdaRun->parsed = 0; - - /* Only the RCODE flag was in the replaced lhs. All the rest is in - * the the original. We read it after restoring. */ - - pdaRun->parseInput->flags &= ~PF_HAS_RCODE; - } - else { - /* Remove it from the input queue. */ - pdaRun->undoLel = pdaRun->parseInput; - pdaRun->parseInput = pdaRun->parseInput->next; - - /* Extract children from the child list. */ - ParseTree *first = pdaRun->undoLel->child; - pdaRun->undoLel->child = 0; - - /* This will skip the ignores/attributes, etc. */ - Kid *dataFirst = treeExtractChild( prg, pdaRun->undoLel->shadow->tree ); - - /* Walk the child list and and push the items onto the parsing - * stack one at a time. */ - while ( first != 0 ) { - /* Get the next item ahead of time. */ - ParseTree *next = first->next; - Kid *dataNext = dataFirst->next; - - /* Push onto the stack. */ - first->next = pdaRun->stackTop; - pdaRun->stackTop = first; - - /* Reattach the data and the parse tree. */ - first->shadow = dataFirst; - - first = next; - dataFirst = dataNext; - } - - /* If there is an parseInput queued, this is one less reduction it has - * caused. */ - if ( pdaRun->parseInput != 0 ) - pdaRun->parseInput->causeReduce -= 1; - - if ( pdaRun->undoLel->retryUpper != 0 ) { - /* There is always an parseInput item here because reduce - * conflicts only happen on a lookahead character. */ - assert( pdaRun->parseInput != pdaRun->undoLel ); - assert( pdaRun->parseInput != 0 ); - assert( pdaRun->undoLel->retryLower == 0 ); - assert( pdaRun->parseInput->retryUpper == 0 ); - - /* Transfer the retry from undoLel to parseInput. */ - pdaRun->parseInput->retryLower = pdaRun->undoLel->retryUpper; - pdaRun->parseInput->retryUpper = 0; - pdaRun->parseInput->state = stackTopTarget( prg, pdaRun ); - } - - /* Free the reduced item. */ - treeDownref( prg, sp, pdaRun->undoLel->shadow->tree ); - kidFree( prg, pdaRun->undoLel->shadow ); - parseTreeFree( prg, pdaRun->undoLel ); - - /* If the stacktop had right ignore attached, detach now. */ - if ( pdaRun->stackTop->flags & PF_RIGHT_IL_ATTACHED ) - detachRightIgnore( prg, sp, pdaRun, pdaRun->stackTop ); - } - } - else if ( pdaRun->accumIgnore != 0 ) { - debug( REALM_PARSE, "have accumulated ignore to undo\n" ); - - /* Send back any accumulated ignore tokens, then trigger error - * in the the parser. */ - ParseTree *ignore = pdaRun->accumIgnore; - pdaRun->accumIgnore = pdaRun->accumIgnore->next; - ignore->next = 0; - - long region = ignore->region; - pdaRun->next = region > 0 ? region + 1 : 0; - pdaRun->checkNext = true; - pdaRun->checkStop = true; - - sendBackIgnore( prg, sp, pdaRun, fsmRun, inputStream, ignore ); - - treeDownref( prg, sp, ignore->shadow->tree ); - kidFree( prg, ignore->shadow ); - parseTreeFree( prg, ignore ); - } - else { - /* Now it is time to undo something. Pick an element from the top of - * the stack. */ - pdaRun->undoLel = pdaRun->stackTop; - - /* Check if we've arrived at the stack sentinal. This guard is - * here to allow us to initially set numRetry to one to cause the - * parser to backup all the way to the beginning when an error - * occurs. */ - if ( pdaRun->undoLel->next == 0 ) - break; - - /* Either we are dealing with a terminal that was - * shifted or a nonterminal that was reduced. */ - if ( pdaRun->stackTop->id < prg->rtd->firstNonTermId ) { - debug( REALM_PARSE, "backing up over effective terminal: %s\n", - prg->rtd->lelInfo[pdaRun->stackTop->id].name ); - - /* Pop the item from the stack. */ - pdaRun->stackTop = pdaRun->stackTop->next; - - /* Queue it as next parseInput item. */ - pdaRun->undoLel->next = pdaRun->parseInput; - pdaRun->parseInput = pdaRun->undoLel; - - /* Pop from the token list. */ - Ref *ref = pdaRun->tokenList; - pdaRun->tokenList = ref->next; - kidFree( prg, (Kid*)ref ); - - assert( pdaRun->accumIgnore == 0 ); - detachLeftIgnore( prg, sp, pdaRun, fsmRun, pdaRun->parseInput ); - } - else { - debug( REALM_PARSE, "backing up over non-terminal: %s\n", - prg->rtd->lelInfo[pdaRun->stackTop->id].name ); - - /* Pop the item from the stack. */ - pdaRun->stackTop = pdaRun->stackTop->next; - - /* Queue it as next parseInput item. */ - pdaRun->undoLel->next = pdaRun->parseInput; - pdaRun->parseInput = pdaRun->undoLel; - } - - /* Undo attach of right ignore. */ - if ( pdaRun->stackTop->flags & PF_RIGHT_IL_ATTACHED ) - detachRightIgnore( prg, sp, pdaRun, pdaRun->stackTop ); - } - } - -fail: - pdaRun->cs = -1; - pdaRun->parseError = 1; - - /* If we failed parsing on tree we must free it. The caller expected us to - * either consume it or send it back to the parseInput. */ - if ( pdaRun->parseInput != 0 ) { - //treeDownref( prg, sp, (Tree*)pdaRun->parseInput->tree ); - //ptKidFree( prg, pdaRun->parseInput ); - pdaRun->parseInput = 0; - } - - /* FIXME: do we still need to fall through here? A fail is permanent now, - * no longer called into again. */ - - return PcrDone; - -_out: - pdaRun->nextRegionInd = pdaRun->tables->tokenRegionInds[pdaRun->cs]; - -case PcrDone: -break; } - - return PcrDone; -} diff --git a/colm/pdarun.h b/colm/pdarun.h deleted file mode 100644 index 4ab648a6..00000000 --- a/colm/pdarun.h +++ /dev/null @@ -1,473 +0,0 @@ -/* - * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef __COLM_PDARUN_H -#define __COLM_PDARUN_H - -#include <colm/input.h> -#include <colm/fsmrun.h> -#include <colm/defs.h> -#include <colm/tree.h> - -#ifdef __cplusplus -extern "C" { -#endif - -struct ColmProgram; - -#define MARK_SLOTS 32 - -typedef struct _FsmTables -{ - long *actions; - long *keyOffsets; - char *transKeys; - long *singleLengths; - long *rangeLengths; - long *indexOffsets; - long *transTargsWI; - long *transActionsWI; - long *toStateActions; - long *fromStateActions; - long *eofActions; - long *eofTargs; - long *entryByRegion; - - long numStates; - long numActions; - long numTransKeys; - long numSingleLengths; - long numRangeLengths; - long numIndexOffsets; - long numTransTargsWI; - long numTransActionsWI; - long numRegions; - - long startState; - long firstFinal; - long errorState; - - struct GenAction **actionSwitch; - long numActionSwitch; -} FsmTables; - -typedef struct _FsmRun -{ - FsmTables *tables; - - RunBuf *runBuf; - - /* FsmRun State. */ - long region, preRegion; - long cs, ncs, act; - char *tokstart, *tokend; - char *p, *pe, *peof; - int returnResult; - char *mark[MARK_SLOTS]; - long matchedToken; - - InputStream *attachedInput; - SourceStream *attachedSource; -} FsmRun; - -void initFsmRun( FsmRun *fsmRun, struct ColmProgram *prg ); -void clearFsmRun( struct ColmProgram *prg, FsmRun *fsmRun ); -void updatePosition( InputStream *inputStream, const char *data, long length ); -void undoPosition( InputStream *inputStream, const char *data, long length ); -void sendBackRunBufHead( FsmRun *fsmRun, InputStream *inputStream ); -void undoStreamPull( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length ); - - -#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 - #error "SIZEOF_LONG contained an unexpected value" -#endif - -struct _Execution; - -typedef struct _RtCodeVect -{ - Code *data; - long tabLen; - long allocLen; - - /* FIXME: leak when freed. */ -} RtCodeVect; - -void listAddAfter( List *list, ListEl *prev_el, ListEl *new_el ); -void listAddBefore( List *list, ListEl *next_el, ListEl *new_el ); - -void listPrepend( List *list, ListEl *new_el ); -void listAppend( List *list, ListEl *new_el ); - -ListEl *listDetach( List *list, ListEl *el ); -ListEl *listDetachFirst(List *list ); -ListEl *listDetachLast(List *list ); - -long listLength(List *list); - -typedef struct _FunctionInfo -{ - const char *name; - long frameId; - long argSize; - long frameSize; -} FunctionInfo; - -/* - * Program Data. - */ - -typedef struct _PatReplInfo -{ - long offset; - long numBindings; -} PatReplInfo; - -typedef struct _PatReplNode -{ - long id; - long prodNum; - long next; - long child; - long bindId; - const char *data; - long length; - long leftIgnore; - long rightIgnore; - - /* Just match nonterminal, don't go inside. */ - unsigned char stop; -} PatReplNode; - -/* FIXME: should have a descriptor for object types to give the length. */ - -typedef struct _LangElInfo -{ - const char *name; - const char *xmlTag; - unsigned char repeat; - unsigned char list; - unsigned char literal; - unsigned char ignore; - - long frameId; - - long objectTypeId; - long ofiOffset; - long objectLength; - -// long contextTypeId; -// long contextLength; - - long termDupId; - long genericId; - long markId; - long captureAttr; - long numCaptureAttr; -} LangElInfo; - -typedef struct _ObjFieldInfo -{ - int typeId; -} ObjFieldInfo; - -typedef struct _ProdInfo -{ - unsigned long lhsId; - short prodNum; - long length; - const char *name; - long frameId; - unsigned char lhsUpref; - unsigned char *copy; - long copyLen; -} ProdInfo; - -typedef struct _FrameInfo -{ - Code *codeWV; - long codeLenWV; - Code *codeWC; - long codeLenWC; - char *trees; - long treesLen; - long argSize; - long frameSize; -} FrameInfo; - -typedef struct _RegionInfo -{ - const char *name; - long defaultToken; - long eofFrameId; - int isIgnoreOnly; - int isCiOnly; - int ciLelId; -} RegionInfo; - -typedef struct _CaptureAttr -{ - long mark_enter; - long mark_leave; - long offset; -} CaptureAttr; - -typedef struct _PdaTables -{ - /* Parser table data. */ - int *indicies; - int *owners; - int *keys; - unsigned int *offsets; - unsigned int *targs; - unsigned int *actInds; - unsigned int *actions; - int *commitLen; - int *tokenRegionInds; - int *tokenRegions; - int *tokenPreRegions; - - int numIndicies; - int numKeys; - int numStates; - int numTargs; - int numActInds; - int numActions; - int numCommitLen; - int numRegionItems; - int numPreRegionItems; -} PdaTables; - -typedef struct _PoolBlock -{ - void *data; - struct _PoolBlock *next; -} PoolBlock; - -typedef struct _PoolItem -{ - struct _PoolItem *next; -} PoolItem; - -typedef struct _PoolAlloc -{ - PoolBlock *head; - long nextel; - PoolItem *pool; - int sizeofT; -} PoolAlloc; - -typedef struct _PdaRun -{ - int numRetry; - ParseTree *stackTop; - Ref *tokenList; - int cs; - int nextRegionInd; - - PdaTables *tables; - int parserId; - - /* Reused. */ - RtCodeVect rcodeCollect; - RtCodeVect reverseCode; - - int stopParsing; - long stopTarget; - - ParseTree *accumIgnore; - - Kid *btPoint; - - struct Bindings *bindings; - - int revertOn; - - Tree *context; - - int stop; - int parseError; - - long steps; - long targetSteps; - - int onDeck; - - /* - * Data we added when refactoring the parsing engine into a coroutine. - */ - - ParseTree *parseInput; - FrameInfo *fi; - int reduction; - ParseTree *redLel; - int curState; - ParseTree *lel; - int triggerUndo; - - int tokenId; - Head *tokdata; - int frameId; - int next; - ParseTree *undoLel; - - int checkNext; - int checkStop; - - /* The lhs is sometimes saved before reduction actions in case it is - * replaced and we need to restore it on backtracking */ - Tree *parsed; - - int reject; - - /* Instruction pointer to use when we stop parsing and execute code. */ - Code *code; - - int rcBlockCount; -} PdaRun; - -void rtCodeVectReplace( RtCodeVect *vect, long pos, const Code *val, long len ); -void rtCodeVectEmpty( RtCodeVect *vect ); -void rtCodeVectRemove( RtCodeVect *vect, long pos, long len ); - -void initRtCodeVect( RtCodeVect *codeVect ); - -//inline static void remove( RtCodeVect *vect, long pos ); -inline static void append( RtCodeVect *vect, const Code val ); -inline static void append2( RtCodeVect *vect, const Code *val, long len ); -inline static void appendHalf( RtCodeVect *vect, Half half ); -inline static void appendWord( RtCodeVect *vect, Word word ); - -inline static void append2( RtCodeVect *vect, const Code *val, long len ) -{ - rtCodeVectReplace( vect, vect->tabLen, val, len ); -} - -inline static void append( RtCodeVect *vect, const Code val ) -{ - rtCodeVectReplace( vect, vect->tabLen, &val, 1 ); -} - -inline static void appendHalf( RtCodeVect *vect, Half half ) -{ - /* not optimal. */ - append( vect, half & 0xff ); - append( vect, (half>>8) & 0xff ); -} - -inline static void appendWord( RtCodeVect *vect, Word word ) -{ - /* not optimal. */ - append( vect, word & 0xff ); - append( vect, (word>>8) & 0xff ); - append( vect, (word>>16) & 0xff ); - append( vect, (word>>24) & 0xff ); - #if SIZEOF_LONG == 8 - append( vect, (word>>32) & 0xff ); - append( vect, (word>>40) & 0xff ); - append( vect, (word>>48) & 0xff ); - append( vect, (word>>56) & 0xff ); - #endif -} - -void incrementSteps( PdaRun *pdaRun ); -void decrementSteps( PdaRun *pdaRun ); - -int makeReverseCode( PdaRun *pdaRun ); -void transferReverseCode( PdaRun *pdaRun, ParseTree *tree ); - -void initPdaRun( PdaRun *pdaRun, struct ColmProgram *prg, PdaTables *tables, - FsmRun *fsmRun, int parserId, long stopTarget, int revertOn, Tree *context ); -void clearPdaRun( struct ColmProgram *prg, Tree **root, PdaRun *pdaRun ); - -void initInputStream( InputStream *inputStream ); -void clearInputStream( struct ColmProgram *prg, Tree **sp, InputStream *inputStream ); -void initSourceStream( SourceStream *in ); -void clearSourceStream( struct ColmProgram *prg, Tree **sp, SourceStream *sourceStream ); - - -void clearContext( PdaRun *pdaRun, Tree **sp ); -Kid *extractIgnore( PdaRun *pdaRun ); -long stackTopTarget( struct ColmProgram *prg, PdaRun *pdaRun ); -void runCommit( PdaRun *pdaRun ); -int isParserStopFinished( PdaRun *pdaRun ); -void pdaRunMatch( PdaRun *pdaRun, Kid *tree, Kid *pattern ); - -/* Offset can be used to look at the next nextRegionInd. */ -int pdaRunGetNextRegion( PdaRun *pdaRun, int offset ); -int pdaRunGetNextPreRegion( PdaRun *pdaRun ); - -#define PcrStart 1 -#define PcrDone 2 -#define PcrReduction 3 -#define PcrGeneration 4 -#define PcrPreEof 5 -#define PcrReverse 6 - -long parseToken( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, - FsmRun *fsmRun, InputStream *inputStream, long entry ); - -long undoParse( Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream, Tree *tree ); - -Head *streamPull( struct ColmProgram *prg, FsmRun *fsmRun, InputStream *inputStream, long length ); -Head *stringAllocPointer( struct ColmProgram *prg, const char *data, long length ); - -void streamPushText( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length ); -void streamPushTree( FsmRun *fsmRun, InputStream *inputStream, Tree *tree, int ignore ); -void undoStreamPush( struct ColmProgram *prg, Tree **sp, FsmRun *fsmRun, InputStream *inputStream, long length ); -void undoStreamAppend( struct ColmProgram *prg, Tree **sp, FsmRun *fsmRun, InputStream *inputStream, struct ColmTree *tree, long length ); -Kid *makeTokenWithData( struct ColmProgram *prg, PdaRun *pdaRun, FsmRun *fsmRun, - InputStream *inputStream, int id, Head *tokdata ); - -void pushBinding( PdaRun *pdaRun, ParseTree *parseTree ); -void popBinding( PdaRun *pdaRun, ParseTree *parseTree ); - -void executeGenerationAction( struct ColmProgram *prg, Tree **sp, FsmRun *fsmRun, PdaRun *pdaRun, - InputStream *inputStream, int frameId, Code *code, long id, Head *tokdata ); -Kid *extractIgnore( PdaRun *pdaRun ); -long sendBackQueuedIgnore( struct ColmProgram *prg, Tree **sp, InputStream *inputStream, - FsmRun *fsmRun, PdaRun *pdaRun, long entry ); -void clearIgnoreList( struct ColmProgram *prg, Tree **sp, Kid *kid ); -Head *extractMatch( struct ColmProgram *prg, FsmRun *fsmRun, InputStream *inputStream ); -Head *extractMatch( struct ColmProgram *prg, FsmRun *fsmRun, InputStream *inputStream ); -void newToken( struct ColmProgram *prg, PdaRun *pdaRun, FsmRun *fsmRun ); -void fsmExecute( FsmRun *fsmRun, InputStream *inputStream ); -void sendNamedLangEl( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream ); -long parseLoop( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, - FsmRun *fsmRun, InputStream *inputStream, long entry ); -void initBindings( PdaRun *pdaRun ); -Tree *getParsedRoot( PdaRun *pdaRun, int stop ); -void undoParseStream( struct ColmProgram *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, - PdaRun *pdaRun, long steps ); - -void clearBuffered( FsmRun *fsmRun ); -void resetToken( FsmRun *fsmRun ); - -void detachInput( FsmRun *fsmRun, InputStream *is ); -void attachInput( FsmRun *fsmRun, InputStream *is ); -void detachSource( FsmRun *fsmRun, SourceStream *ss ); -void attachSource( FsmRun *fsmRun, SourceStream *ss ); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/colm/pool.c b/colm/pool.c deleted file mode 100644 index f531338d..00000000 --- a/colm/pool.c +++ /dev/null @@ -1,330 +0,0 @@ -/* - * Copyright 2010-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <string.h> -#include <stdlib.h> - -#include <colm/pdarun.h> -#include <colm/pool.h> -#include <colm/debug.h> - -void initPoolAlloc( PoolAlloc *poolAlloc, int sizeofT ) -{ - poolAlloc->head = 0; - poolAlloc->nextel = FRESH_BLOCK; - poolAlloc->pool = 0; - poolAlloc->sizeofT = sizeofT; -} - -void *poolAllocAllocate( PoolAlloc *poolAlloc ) -{ - debug( REALM_POOL, "pool allocation\n" ); - -#ifdef POOL_MALLOC - void *res = malloc( poolAlloc->sizeofT ); - memset( res, 0, poolAlloc->sizeofT ); - return res; -#else - //#ifdef COLM_LOG_BYTECODE - //cerr << "allocating in: " << __PRETTY_FUNCTION__ << endl; - //#endif - - void *newEl = 0; - if ( poolAlloc->pool == 0 ) { - if ( poolAlloc->nextel == FRESH_BLOCK ) { - //#ifdef COLM_LOG_BYTECODE - //if ( colm_log_bytecode ) - // cerr << "allocating " << FRESH_BLOCK << " Elements of type T" << endl; - //#endif - - PoolBlock *newBlock = (PoolBlock*)malloc( sizeof(PoolBlock) ); - newBlock->data = malloc( poolAlloc->sizeofT * FRESH_BLOCK ); - newBlock->next = poolAlloc->head; - poolAlloc->head = newBlock; - poolAlloc->nextel = 0; - } - - newEl = (char*)poolAlloc->head->data + poolAlloc->sizeofT * poolAlloc->nextel++; - } - else { - newEl = poolAlloc->pool; - poolAlloc->pool = poolAlloc->pool->next; - } - memset( newEl, 0, poolAlloc->sizeofT ); - return newEl; -#endif -} - -void poolAllocFree( PoolAlloc *poolAlloc, void *el ) -{ - #if 0 - /* Some sanity checking. Best not to normally run with this on. */ - char *p = (char*)el + sizeof(PoolItem*); - char *pe = (char*)el + sizeof(T); - for ( ; p < pe; p++ ) - assert( *p != 0xcc ); - memset( el, 0xcc, sizeof(T) ); - #endif - -#ifdef POOL_MALLOC - free( el ); -#else - PoolItem *pi = (PoolItem*) el; - pi->next = poolAlloc->pool; - poolAlloc->pool = pi; -#endif -} - -void poolAllocClear( PoolAlloc *poolAlloc ) -{ - PoolBlock *block = poolAlloc->head; - while ( block != 0 ) { - PoolBlock *next = block->next; - free( block->data ); - free( block ); - block = next; - } - - poolAlloc->head = 0; - poolAlloc->nextel = 0; - poolAlloc->pool = 0; -} - -long poolAllocNumLost( PoolAlloc *poolAlloc ) -{ - /* Count the number of items allocated. */ - long lost = 0; - PoolBlock *block = poolAlloc->head; - if ( block != 0 ) { - lost = poolAlloc->nextel; - block = block->next; - while ( block != 0 ) { - lost += FRESH_BLOCK; - block = block->next; - } - } - - /* Subtract. Items that are on the free list. */ - PoolItem *pi = poolAlloc->pool; - while ( pi != 0 ) { - lost -= 1; - pi = pi->next; - } - - return lost; -} - -/* - * Kid - */ - -Kid *kidAllocate( Program *prg ) -{ - return (Kid*) poolAllocAllocate( &prg->kidPool ); -} - -void kidFree( Program *prg, Kid *el ) -{ - poolAllocFree( &prg->kidPool, el ); -} - -void kidClear( Program *prg ) -{ - poolAllocClear( &prg->kidPool ); -} - -long kidNumLost( Program *prg ) -{ - return poolAllocNumLost( &prg->kidPool ); -} - -/* - * Tree - */ - -Tree *treeAllocate( Program *prg ) -{ - return (Tree*) poolAllocAllocate( &prg->treePool ); -} - -void treeFree( Program *prg, Tree *el ) -{ - poolAllocFree( &prg->treePool, el ); -} - -void treeClear( Program *prg ) -{ - poolAllocClear( &prg->treePool ); -} - -long treeNumLost( Program *prg ) -{ - return poolAllocNumLost( &prg->treePool ); -} - -/* - * ParseTree - */ - -ParseTree *parseTreeAllocate( Program *prg ) -{ - return (ParseTree*) poolAllocAllocate( &prg->parseTreePool ); -} - -void parseTreeFree( Program *prg, ParseTree *el ) -{ - poolAllocFree( &prg->parseTreePool, el ); -} - -void parseTreeClear( Program *prg ) -{ - poolAllocClear( &prg->parseTreePool ); -} - -long parseTreeNumLost( Program *prg ) -{ - return poolAllocNumLost( &prg->parseTreePool ); -} - -/* - * ListEl - */ - -ListEl *listElAllocate( Program *prg ) -{ - return (ListEl*) poolAllocAllocate( &prg->listElPool ); -} - -void listElFree( Program *prg, ListEl *el ) -{ - poolAllocFree( &prg->listElPool, el ); -} - -void listElClear( Program *prg ) -{ - poolAllocClear( &prg->listElPool ); -} - -long listElNumLost( Program *prg ) -{ - return poolAllocNumLost( &prg->listElPool ); -} - -/* - * MapEl - */ - -MapEl *mapElAllocate( Program *prg ) -{ - return (MapEl*) poolAllocAllocate( &prg->mapElPool ); -} - -void mapElFree( Program *prg, MapEl *el ) -{ - poolAllocFree( &prg->mapElPool, el ); -} - -void mapElClear( Program *prg ) -{ - poolAllocClear( &prg->mapElPool ); -} - -long mapElNumLost( Program *prg ) -{ - return poolAllocNumLost( &prg->mapElPool ); -} - -/* - * Head - */ - -Head *headAllocate( Program *prg ) -{ - return (Head*) poolAllocAllocate( &prg->headPool ); -} - -void headFree( Program *prg, Head *el ) -{ - poolAllocFree( &prg->headPool, el ); -} - -void headClear( Program *prg ) -{ - poolAllocClear( &prg->headPool ); -} - -long headNumLost( Program *prg ) -{ - return poolAllocNumLost( &prg->headPool ); -} - -/* - * Location - */ - -Location *locationAllocate( Program *prg ) -{ - return (Location*) poolAllocAllocate( &prg->locationPool ); -} - -void locationFree( Program *prg, Location *el ) -{ - poolAllocFree( &prg->locationPool, el ); -} - -void locationClear( Program *prg ) -{ - poolAllocClear( &prg->locationPool ); -} - -long locationNumLost( Program *prg ) -{ - return poolAllocNumLost( &prg->locationPool ); -} - -/* - * Stream - */ - -Stream *streamAllocate( Program *prg ) -{ - return (Stream*)mapElAllocate( prg ); -} - -void streamFree( Program *prg, Stream *stream ) -{ - mapElFree( prg, (MapEl*)stream ); -} - - -/* - * Input - */ - -Input *inputAllocate( Program *prg ) -{ - return (Input*)mapElAllocate( prg ); -} - -void inputFree( Program *prg, Input *accumStream ) -{ - mapElFree( prg, (MapEl*)accumStream ); -} diff --git a/colm/pool.h b/colm/pool.h deleted file mode 100644 index 454a5354..00000000 --- a/colm/pool.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright 2010-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _POOL_H -#define _POOL_H - -/* Allocation, number of items. */ -#define FRESH_BLOCK 8128 - -#include <colm/pdarun.h> -#include <colm/map.h> -#include <colm/tree.h> - -#ifdef __cplusplus -extern "C" { -#endif - -void initPoolAlloc( PoolAlloc *poolAlloc, int sizeofT ); - -Kid *kidAllocate( Program *prg ); -void kidFree( Program *prg, Kid *el ); -void kidClear( Program *prg ); -long kidNumLost( Program *prg ); - -Tree *treeAllocate( Program *prg ); -void treeFree( Program *prg, Tree *el ); -void treeClear( Program *prg ); -long treeNumLost( Program *prg ); - -ParseTree *parseTreeAllocate( Program *prg ); -void parseTreeFree( Program *prg, ParseTree *el ); -void parseTreeClear( Program *prg ); -long parseTreeNumLost( Program *prg ); - -ListEl *listElAllocate( Program *prg ); -void listElFree( Program *prg, ListEl *el ); -void listElClear( Program *prg ); -long listElNumLost( Program *prg ); - -MapEl *mapElAllocate( Program *prg ); -void mapElFree( Program *prg, MapEl *el ); -void mapElClear( Program *prg ); -long mapElNumLost( Program *prg ); - -Head *headAllocate( Program *prg ); -void headFree( Program *prg, Head *el ); -void headClear( Program *prg ); -long headNumLost( Program *prg ); - -Location *locationAllocate( Program *prg ); -void locationFree( Program *prg, Location *el ); -void locationClear( Program *prg ); -long locationNumLost( Program *prg ); - -Stream *streamAllocate( Program *prg ); -void streamFree( Program *prg, Stream *stream ); - -Input *inputAllocate( Program *prg ); -void inputFree( Program *prg, Input *stream ); - -/* Wrong place. */ -TreePair mapRemove( Program *prg, Map *map, Tree *key ); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/colm/program.c b/colm/program.c deleted file mode 100644 index 50b41fa1..00000000 --- a/colm/program.c +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <colm/pdarun.h> -#include <colm/fsmrun.h> -#include <colm/tree.h> -#include <colm/bytecode.h> -#include <colm/pool.h> -#include <colm/debug.h> -#include <colm/config.h> - -#include <alloca.h> -#include <sys/mman.h> -#include <string.h> -#include <assert.h> -#include <stdlib.h> - -void colmInit( long debugRealm ) -{ - /* Always on because because logging is controlled with ifdefs in\n" the - * runtime lib. */ - colm_log_bytecode = 1; - colm_log_parse = 1; - colm_log_match = 1; - colm_log_compile = 1; - colm_log_conds = 1; - colmActiveRealm = debugRealm; - initInputFuncs(); -} - -void colmRunProgram( Program *prg ) -{ - assert( sizeof(Int) <= sizeof(Tree) ); - assert( sizeof(Str) <= sizeof(Tree) ); - assert( sizeof(Pointer) <= sizeof(Tree) ); - assert( sizeof(Map) <= sizeof(MapEl) ); - assert( sizeof(List) <= sizeof(MapEl) ); - assert( sizeof(Stream) <= sizeof(MapEl) ); - assert( sizeof(Parser) <= sizeof(MapEl) ); - - /* Allocate the global variable. */ - allocGlobal( prg ); - - /* - * Allocate the VM stack. - */ - - prg->vm_stack = stackAlloc(); - prg->vm_root = &prg->vm_stack[VM_STACK_SIZE]; - - /* - * Execute - */ - if ( prg->rtd->rootCodeLen > 0 ) { - //RtCodeVect rcodeCollect; - Execution execution; - - initExecution( &execution, 0, 0, 0, 0, prg->rtd->rootFrameId ); - mainExecution( prg, &execution, prg->rtd->rootCode ); - } -} - -void clearGlobal( Program *prg, Tree **sp ) -{ - /* Downref all the fields in the global object. */ - int g; - for ( g = 0; g < prg->rtd->globalSize; g++ ) { - //assert( getAttr( global, g )->refs == 1 ); - treeDownref( prg, sp, getAttr( prg->global, g ) ); - } - - /* Free the global object. */ - if ( prg->rtd->globalSize > 0 ) - freeAttrs( prg, prg->global->child ); - treeFree( prg, prg->global ); -} - -void allocGlobal( Program *prg ) -{ - /* Alloc the global. */ - Tree *tree = treeAllocate( prg ); - tree->child = allocAttrs( prg, prg->rtd->globalSize ); - tree->refs = 1; - prg->global = tree; -} - -Tree **stackAlloc() -{ - //return new Tree*[VM_STACK_SIZE]; - - return (Tree**)mmap( 0, sizeof(Tree*)*VM_STACK_SIZE, - PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0 ); -} - -Tree **vm_root( struct ColmProgram *prg ) -{ - return prg->vm_root; -} - -Tree *returnVal( struct ColmProgram *prg ) -{ - return prg->returnVal; -} - - -Program *colmNewProgram( RuntimeData *rtd, int argc, const char **argv ) -{ - Program *prg = malloc(sizeof(Program)); - memset( prg, 0, sizeof(Program) ); - prg->argc = argc; - prg->argv = argv; - prg->rtd = rtd; - prg->ctxDepParsing = 1; - prg->global = 0; - prg->heap = 0; - prg->stdinVal = 0; - prg->stdoutVal = 0; - prg->stderrVal = 0; - prg->induceExit = 0; - prg->exitStatus = 0; - - initPoolAlloc( &prg->kidPool, sizeof(Kid) ); - initPoolAlloc( &prg->treePool, sizeof(Tree) ); - initPoolAlloc( &prg->parseTreePool, sizeof(ParseTree) ); - initPoolAlloc( &prg->listElPool, sizeof(ListEl) ); - initPoolAlloc( &prg->mapElPool, sizeof(MapEl) ); - initPoolAlloc( &prg->headPool, sizeof(Head) ); - initPoolAlloc( &prg->locationPool, sizeof(Location) ); - - Int *trueInt = (Int*) treeAllocate( prg ); - trueInt->id = LEL_ID_BOOL; - trueInt->refs = 1; - trueInt->value = 1; - - Int *falseInt = (Int*) treeAllocate( prg ); - falseInt->id = LEL_ID_BOOL; - falseInt->refs = 1; - falseInt->value = 0; - - prg->trueVal = (Tree*)trueInt; - prg->falseVal = (Tree*)falseInt; - - prg->allocRunBuf = 0; - prg->returnVal = 0; - prg->lastParseError = 0; - - return prg; -} - -int colmDeleteProgram( Program *prg ) -{ - Tree **sp = prg->vm_root; - int exitStatus = prg->exitStatus; - - #ifdef COLM_LOG_BYTECODE - if ( colm_log_bytecode ) { - cerr << "clearing the prg" << endl; - } - #endif - - treeDownref( prg, sp, prg->returnVal ); - treeDownref( prg, sp, prg->lastParseError ); - clearGlobal( prg, sp ); - - /* Clear the heap. */ - Kid *a = prg->heap; - while ( a != 0 ) { - Kid *next = a->next; - treeDownref( prg, sp, a->tree ); - kidFree( prg, a ); - a = next; - } - - //assert( trueVal->refs == 1 ); - //assert( falseVal->refs == 1 ); - treeDownref( prg, sp, prg->trueVal ); - treeDownref( prg, sp, prg->falseVal ); - - treeDownref( prg, sp, (Tree*)prg->stdinVal ); - treeDownref( prg, sp, (Tree*)prg->stdoutVal ); - treeDownref( prg, sp, (Tree*)prg->stderrVal ); - -#if DEBUG - long kidLost = kidNumLost( prg ); - long treeLost = treeNumLost( prg ); - long parseTreeLost = parseTreeNumLost( prg ); - long listLost = listElNumLost( prg ); - long mapLost = mapElNumLost( prg ); - long headLost = headNumLost( prg ); - long locationLost = locationNumLost( prg ); - - if ( kidLost ) - message( "warning: lost kids: %ld\n", kidLost ); - - if ( treeLost ) - message( "warning: lost trees: %ld\n", treeLost ); - - if ( parseTreeLost ) - message( "warning: lost parse trees: %ld\n", parseTreeLost ); - - if ( listLost ) - message( "warning: lost listEls: %ld\n", listLost ); - - if ( mapLost ) - message( "warning: lost mapEls: %ld\n", mapLost ); - - if ( headLost ) - message( "warning: lost heads: %ld\n", headLost ); - - if ( locationLost ) - message( "warning: lost locations: %ld\n", locationLost ); -#endif - - kidClear( prg ); - treeClear( prg ); - headClear( prg ); - parseTreeClear( prg ); - listElClear( prg ); - mapElClear( prg ); - locationClear( prg ); - - //memset( vm_stack, 0, sizeof(Tree*) * VM_STACK_SIZE); - - RunBuf *rb = prg->allocRunBuf; - while ( rb != 0 ) { - RunBuf *next = rb->next; - free( rb ); - rb = next; - } - - free( prg ); - - return exitStatus; -} - - diff --git a/colm/program.h b/colm/program.h deleted file mode 100644 index b5de90b3..00000000 --- a/colm/program.h +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef __COLM_PROGRAM_H -#define __COLM_PROGRAM_H - -#include <colm/pdarun.h> - -typedef struct ColmRuntimeData -{ - LangElInfo *lelInfo; - long numLangEls; - - ProdInfo *prodInfo; - long numProds; - - RegionInfo *regionInfo; - long numRegions; - - Code *rootCode; - long rootCodeLen; - long rootFrameId; - - FrameInfo *frameInfo; - long numFrames; - - FunctionInfo *functionInfo; - long numFunctions; - - PatReplInfo *patReplInfo; - long numPatterns; - - PatReplNode *patReplNodes; - long numPatternNodes; - - GenericInfo *genericInfo; - long numGenerics; - - long argvGenericId; - - const char **litdata; - long *litlen; - Head **literals; - long numLiterals; - - CaptureAttr *captureAttr; - long numCapturedAttr; - - FsmTables *fsmTables; - PdaTables *pdaTables; - int *startStates; - int *eofLelIds; - int *parserLelIds; - long numParsers; - - long globalSize; - - long firstNonTermId; - - long integerId; - long stringId; - long anyId; - long eofId; - long noTokenId; -} RuntimeData; - - -typedef struct ColmProgram -{ - int argc; - const char **argv; - - unsigned char ctxDepParsing; - RuntimeData *rtd; - Tree *global; - int induceExit; - int exitStatus; - - PoolAlloc kidPool; - PoolAlloc treePool; - PoolAlloc parseTreePool; - PoolAlloc listElPool; - PoolAlloc mapElPool; - PoolAlloc headPool; - PoolAlloc locationPool; - - Tree *trueVal; - Tree *falseVal; - - Kid *heap; - - Tree **se; - - Stream *stdinVal; - Stream *stdoutVal; - Stream *stderrVal; - - RunBuf *allocRunBuf; - - Tree **vm_stack; - Tree **vm_root; - - /* Returned from the main line. Should have exports instead. */ - Tree *returnVal; - - /* The most recent parse error. Should be returned from the parsing function. */ - Tree *lastParseError; -} Program; - -#endif diff --git a/colm/redbuild.cc b/colm/redbuild.cc deleted file mode 100644 index ae5faf38..00000000 --- a/colm/redbuild.cc +++ /dev/null @@ -1,650 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - - -#include "global.h" -#include "redbuild.h" -#include "fsmgraph.h" -#include "redfsm.h" -#include "fsmcodegen.h" -#include <string.h> - -using namespace std; - -RedFsmBuild::RedFsmBuild( char *fsmName, Compiler *pd, FsmGraph *fsm ) -: - fsmName(fsmName), - pd(pd), - fsm(fsm), - nextActionTableId(0), - startState(-1), - errState(-1) -{ -} - -void RedFsmBuild::initActionList( unsigned long length ) -{ - redFsm->allActions = new GenAction[length]; - memset( redFsm->allActions, 0, sizeof(GenAction) * length ); - for ( unsigned long a = 0; a < length; a++ ) - redFsm->genActionList.append( redFsm->allActions+a ); -} - - -void RedFsmBuild::makeActionList() -{ - /* Determine which actions to write. */ - int nextActionId = 0; - for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) { - if ( act->numRefs() > 0 || act->numCondRefs > 0 ) - act->actionId = nextActionId++; - } - - initActionList( nextActionId ); - curAction = 0; - - for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) { - if ( act->actionId >= 0 ) - makeAction( act ); - } -} - -void RedFsmBuild::initActionTableList( unsigned long length ) -{ - redFsm->allActionTables = new RedAction[length]; -} - -void RedFsmBuild::initStateList( unsigned long length ) -{ - redFsm->allStates = new RedState[length]; - for ( unsigned long s = 0; s < length; s++ ) - redFsm->stateList.append( redFsm->allStates+s ); - - /* We get the start state as an offset, set the pointer now. */ - assert( startState >= 0 ); - redFsm->startState = redFsm->allStates + startState; - if ( errState >= 0 ) - redFsm->errState = redFsm->allStates + errState; - for ( EntryIdVect::Iter en = redFsm->entryPointIds; en.lte(); en++ ) - redFsm->entryPoints.insert( redFsm->allStates + *en ); - - /* The nextStateId is no longer used to assign state ids (they come in set - * from the frontend now), however generation code still depends on it. - * Should eventually remove this variable. */ - redFsm->nextStateId = redFsm->stateList.length(); -} - -void RedFsmBuild::addEntryPoint( int entryId, char *name, unsigned long entryState ) -{ - redFsm->entryPointIds.append( entryState ); - redFsm->entryPointNames.append( name ); - redFsm->redEntryMap.insert( entryId, entryState ); -} - -void RedFsmBuild::addRegionToEntry( int regionId, int entryId ) -{ - assert( regionId == redFsm->regionToEntry.length() ); - redFsm->regionToEntry.append( entryId ); -} - -void RedFsmBuild::initTransList( int snum, unsigned long length ) -{ - /* Could preallocate the out range to save time growing it. For now do - * nothing. */ -} - -void RedFsmBuild::newTrans( int snum, int tnum, Key lowKey, - Key highKey, long targ, long action ) -{ - /* Get the current state and range. */ - RedState *curState = redFsm->allStates + snum; - RedTransList &destRange = curState->outRange; - - if ( curState == redFsm->errState ) - return; - - /* Make the new transitions. */ - RedState *targState = targ >= 0 ? (redFsm->allStates + targ) : - redFsm->wantComplete ? redFsm->getErrorState() : 0; - RedAction *actionTable = action >= 0 ? (redFsm->allActionTables + action) : 0; - RedTrans *trans = redFsm->allocateTrans( targState, actionTable ); - RedTransEl transEl( lowKey, highKey, trans ); - - if ( redFsm->wantComplete ) { - /* If the machine is to be complete then we need to fill any gaps with - * the error transitions. */ - if ( destRange.length() == 0 ) { - /* Range is currently empty. */ - if ( keyOps->minKey < lowKey ) { - /* The first range doesn't start at the low end. */ - Key fillHighKey = lowKey; - fillHighKey.decrement(); - - /* Create the filler with the state's error transition. */ - RedTransEl newTel( keyOps->minKey, fillHighKey, redFsm->getErrorTrans() ); - destRange.append( newTel ); - } - } - else { - /* The range list is not empty, get the the last range. */ - RedTransEl *last = &destRange[destRange.length()-1]; - Key nextKey = last->highKey; - nextKey.increment(); - if ( nextKey < lowKey ) { - /* There is a gap to fill. Make the high key. */ - Key fillHighKey = lowKey; - fillHighKey.decrement(); - - /* Create the filler with the state's error transtion. */ - RedTransEl newTel( nextKey, fillHighKey, redFsm->getErrorTrans() ); - destRange.append( newTel ); - } - } - } - - /* Filler taken care of. Append the range. */ - destRange.append( RedTransEl( lowKey, highKey, trans ) ); -} - -void RedFsmBuild::finishTransList( int snum ) -{ - /* Get the current state and range. */ - RedState *curState = redFsm->allStates + snum; - RedTransList &destRange = curState->outRange; - - if ( curState == redFsm->errState ) - return; - - /* If building a complete machine we may need filler on the end. */ - if ( redFsm->wantComplete ) { - /* Check if there are any ranges already. */ - if ( destRange.length() == 0 ) { - /* Fill with the whole alphabet. */ - /* Add the range on the lower and upper bound. */ - RedTransEl newTel( keyOps->minKey, keyOps->maxKey, redFsm->getErrorTrans() ); - destRange.append( newTel ); - } - else { - /* Get the last and check for a gap on the end. */ - RedTransEl *last = &destRange[destRange.length()-1]; - if ( last->highKey < keyOps->maxKey ) { - /* Make the high key. */ - Key fillLowKey = last->highKey; - fillLowKey.increment(); - - /* Create the new range with the error trans and append it. */ - RedTransEl newTel( fillLowKey, keyOps->maxKey, redFsm->getErrorTrans() ); - destRange.append( newTel ); - } - } - } -} - -void RedFsmBuild::setId( int snum, int id ) -{ - RedState *curState = redFsm->allStates + snum; - curState->id = id; -} - -void RedFsmBuild::setEofTrans( int snum, int eofTarget, int actId ) -{ - RedState *curState = redFsm->allStates + snum; - RedState *targState = redFsm->allStates + eofTarget; - RedAction *eofAct = redFsm->allActionTables + actId; - curState->eofTrans = redFsm->allocateTrans( targState, eofAct ); -} - -void RedFsmBuild::setFinal( int snum ) -{ - RedState *curState = redFsm->allStates + snum; - curState->isFinal = true; -} - - -void RedFsmBuild::setStateActions( int snum, long toStateAction, - long fromStateAction, long eofAction ) -{ - RedState *curState = redFsm->allStates + snum; - if ( toStateAction >= 0 ) - curState->toStateAction = redFsm->allActionTables + toStateAction; - if ( fromStateAction >= 0 ) - curState->fromStateAction = redFsm->allActionTables + fromStateAction; - if ( eofAction >= 0 ) - curState->eofAction = redFsm->allActionTables + eofAction; -} - -void RedFsmBuild::closeMachine() -{ - //for ( GenActionList::Iter a = redFsm->actionList; a.lte(); a++ ) - // resolveTargetStates( a->inlineList ); - - /* Note that even if we want a complete graph we do not give the error - * state a default transition. All machines break out of the processing - * loop when in the error state. */ - - for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { - for ( GenStateCondList::Iter sci = st->stateCondList; sci.lte(); sci++ ) - st->stateCondVect.append( sci ); - } -} - -void RedFsmBuild::initCondSpaceList( ulong length ) -{ - redFsm->allCondSpaces = new GenCondSpace[length]; - for ( ulong c = 0; c < length; c++ ) - redFsm->condSpaceList.append( redFsm->allCondSpaces + c ); -} - -void RedFsmBuild::newCondSpace( int cnum, int condSpaceId, Key baseKey ) -{ - GenCondSpace *cond = redFsm->allCondSpaces + cnum; - cond->condSpaceId = condSpaceId; - cond->baseKey = baseKey; -} - -void RedFsmBuild::condSpaceItem( int cnum, long condActionId ) -{ - GenCondSpace *cond = redFsm->allCondSpaces + cnum; - cond->condSet.append( redFsm->allActions + condActionId ); -} - -void RedFsmBuild::initStateCondList( int snum, ulong length ) -{ - /* Could preallocate these, as we could with transitions. */ -} - -void RedFsmBuild::addStateCond( int snum, Key lowKey, Key highKey, long condNum ) -{ - RedState *curState = redFsm->allStates + snum; - - /* Create the new state condition. */ - GenStateCond *stateCond = new GenStateCond; - stateCond->lowKey = lowKey; - stateCond->highKey = highKey; - - /* Assign it a cond space. */ - GenCondSpace *condSpace = redFsm->allCondSpaces + condNum; - stateCond->condSpace = condSpace; - - curState->stateCondList.append( stateCond ); -} - - -void RedFsmBuild::setForcedErrorState() -{ - redFsm->forcedErrorState = true; -} - -Key RedFsmBuild::findMaxKey() -{ - Key maxKey = keyOps->maxKey; - for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { - assert( st->outSingle.length() == 0 ); - assert( st->defTrans == 0 ); - - long rangeLen = st->outRange.length(); - if ( rangeLen > 0 ) { - Key highKey = st->outRange[rangeLen-1].highKey; - if ( highKey > maxKey ) - maxKey = highKey; - } - } - return maxKey; -} - - -void RedFsmBuild::makeActionTableList() -{ - /* Must first order the action tables based on their id. */ - int numTables = nextActionTableId; - RedActionTable **tables = new RedActionTable*[numTables]; - for ( ActionTableMap::Iter at = actionTableMap; at.lte(); at++ ) - tables[at->id] = at; - - initActionTableList( numTables ); - curActionTable = 0; - - for ( int t = 0; t < numTables; t++ ) { - long length = tables[t]->key.length(); - - /* Collect the action table. */ - RedAction *redAct = redFsm->allActionTables + curActionTable; - redAct->actListId = curActionTable; - redAct->key.setAsNew( length ); - - int pos = 0; - for ( ActionTable::Iter atel = tables[t]->key; atel.lte(); atel++ ) { - int actionId = atel->value->actionId; - redAct->key[pos].key = 0; - redAct->key[pos].value = redFsm->allActions+actionId; - pos += 1; - } - - /* Insert into the action table map. */ - redFsm->actionMap.insert( redAct ); - - curActionTable += 1; - - } - - delete[] tables; -} - -void RedFsmBuild::reduceActionTables() -{ - /* Reduce the actions tables to a set. */ - for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { - RedActionTable *actionTable = 0; - - /* Reduce To State Actions. */ - if ( st->toStateActionTable.length() > 0 ) { - if ( actionTableMap.insert( st->toStateActionTable, &actionTable ) ) - actionTable->id = nextActionTableId++; - } - - /* Reduce From State Actions. */ - if ( st->fromStateActionTable.length() > 0 ) { - if ( actionTableMap.insert( st->fromStateActionTable, &actionTable ) ) - actionTable->id = nextActionTableId++; - } - - /* Reduce EOF actions. */ - if ( st->eofActionTable.length() > 0 ) { - if ( actionTableMap.insert( st->eofActionTable, &actionTable ) ) - actionTable->id = nextActionTableId++; - } - - /* Loop the transitions and reduce their actions. */ - for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { - if ( trans->actionTable.length() > 0 ) { - if ( actionTableMap.insert( trans->actionTable, &actionTable ) ) - actionTable->id = nextActionTableId++; - } - } - } -} - -void RedFsmBuild::appendTrans( TransListVect &outList, Key lowKey, - Key highKey, FsmTrans *trans ) -{ - if ( trans->toState != 0 || trans->actionTable.length() > 0 ) - outList.append( TransEl( lowKey, highKey, trans ) ); -} - -void RedFsmBuild::makeTrans( Key lowKey, Key highKey, FsmTrans *trans ) -{ - /* First reduce the action. */ - RedActionTable *actionTable = 0; - if ( trans->actionTable.length() > 0 ) - actionTable = actionTableMap.find( trans->actionTable ); - - long targ = trans->toState == 0 ? -1 : trans->toState->alg.stateNum; - long action = actionTable == 0 ? -1 : actionTable->id; - - newTrans( curState, curTrans++, lowKey, highKey, targ, action ); -} - -void RedFsmBuild::makeTransList( FsmState *state ) -{ - TransListVect outList; - - /* If there is only are no ranges the task is simple. */ - if ( state->outList.length() > 0 ) { - /* Loop each source range. */ - for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { - /* Reduce the transition. If it reduced to anything then add it. */ - appendTrans( outList, trans->lowKey, trans->highKey, trans ); - } - } - - long length = outList.length(); - initTransList( curState, length ); - curTrans = 0; - - for ( TransListVect::Iter tvi = outList; tvi.lte(); tvi++ ) - makeTrans( tvi->lowKey, tvi->highKey, tvi->value ); - finishTransList( curState ); -} - -void RedFsmBuild::newAction( int anum, char *name, int line, int col, Action *action ) -{ - redFsm->allActions[anum].actionId = anum; - redFsm->allActions[anum].name = name; - redFsm->allActions[anum].loc.line = line; - redFsm->allActions[anum].loc.col = col; - redFsm->allActions[anum].inlineList = action->inlineList; - redFsm->allActions[anum].objField = action->objField; - redFsm->allActions[anum].markType = action->markType; - redFsm->allActions[anum].markId = action->markId + 1; -} - -void RedFsmBuild::makeAction( Action *action ) -{ - int line = action->loc.line; - int col = action->loc.col; - - char *name = 0; - if ( action->name != 0 ) - name = action->name; - - newAction( curAction++, name, line, col, action ); -} - -void xmlEscapeHost( std::ostream &out, char *data, int len ) -{ - char *end = data + len; - while ( data != end ) { - switch ( *data ) { - case '<': out << "<"; break; - case '>': out << ">"; break; - case '&': out << "&"; break; - default: out << *data; break; - } - data += 1; - } -} - -void RedFsmBuild::makeStateActions( FsmState *state ) -{ - RedActionTable *toStateActions = 0; - if ( state->toStateActionTable.length() > 0 ) - toStateActions = actionTableMap.find( state->toStateActionTable ); - - RedActionTable *fromStateActions = 0; - if ( state->fromStateActionTable.length() > 0 ) - fromStateActions = actionTableMap.find( state->fromStateActionTable ); - - RedActionTable *eofActions = 0; - if ( state->eofActionTable.length() > 0 ) - eofActions = actionTableMap.find( state->eofActionTable ); - - if ( toStateActions != 0 || fromStateActions != 0 || eofActions != 0 ) { - long toStateAction = -1; - long fromStateAction = -1; - long eofAction = -1; - - if ( toStateActions != 0 ) - toStateAction = toStateActions->id; - if ( fromStateActions != 0 ) - fromStateAction = fromStateActions->id; - if ( eofActions != 0 ) - eofAction = eofActions->id; - - setStateActions( curState, toStateAction, - fromStateAction, eofAction ); - } -} - -void RedFsmBuild::makeStateConditions( FsmState *state ) -{ - if ( state->stateCondList.length() > 0 ) { - - long length = state->stateCondList.length(); - initStateCondList( curState, length ); - curStateCond = 0; - - for ( StateCondList::Iter scdi = state->stateCondList; scdi.lte(); scdi++ ) { - Key lowKey = scdi->lowKey; - Key highKey = scdi->highKey; - long condId = scdi->condSpace->condSpaceId; - addStateCond( curState, lowKey, highKey, condId ); - } - } -} - -void RedFsmBuild::makeStateList() -{ - /* Write the list of states. */ - long length = fsm->stateList.length(); - initStateList( length ); - curState = 0; - - for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { - /* Both or neither should be set. */ - assert( !( (st->eofTarget != 0) xor (st->eofActionTable.length() > 0) ) ); - - makeStateActions( st ); - makeStateConditions( st ); - makeTransList( st ); - - setId( curState, st->alg.stateNum ); - if ( st->isFinState() ) - setFinal( curState ); - - /* If there is an eof target, make an eof transition. */ - if ( st->eofTarget != 0 ) { - /* Find the eof actions. */ - RedActionTable *eofActions = 0; - eofActions = actionTableMap.find( st->eofActionTable ); - setEofTrans( curState, st->eofTarget->alg.stateNum, eofActions->id ); - } - - curState += 1; - } -} - -void RedFsmBuild::makeEntryPoints() -{ - if ( fsm->lmRequiresErrorState ) - setForcedErrorState(); - - for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) { - /* Get the name instantiation from nameIndex. */ - NameInst *nameInst = fsm->nameIndex[en->key]; - FsmState *state = en->value; - char *name = nameInst->name; - long entry = state->alg.stateNum; - addEntryPoint( en->key, name, entry ); - } - - for ( RegionList::Iter reg = pd->regionList; reg.lte(); reg++ ) { - assert( reg->regionNameInst != 0 ); - - TokenRegion *use = reg; - - if ( use->isCiOnly ) - use = use->derivedFrom->ignoreOnlyRegion; - - NameInst *regionName = use->regionNameInst; - addRegionToEntry( reg->id, regionName->id ); - } -} - -void RedFsmBuild::makeMachine() -{ - /* Action tables. */ - reduceActionTables(); - - makeActionList(); - makeActionTableList(); - makeConditions(); - - /* Start state. */ - startState = fsm->startState->alg.stateNum; - - /* Error state. */ - if ( fsm->errState != 0 ) - errState = fsm->errState->alg.stateNum; - - makeEntryPoints(); - makeStateList(); -} - -void RedFsmBuild::makeConditions() -{ - if ( condData->condSpaceMap.length() > 0 ) { - long nextCondSpaceId = 0; - for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) - cs->condSpaceId = nextCondSpaceId++; - - long length = condData->condSpaceMap.length(); - initCondSpaceList( length ); - curCondSpace = 0; - - for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) { - long condSpaceId = cs->condSpaceId; - Key baseKey = cs->baseKey; - - newCondSpace( curCondSpace, condSpaceId, baseKey ); - for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ ) { - long actionOffset = (*csi)->actionId; - condSpaceItem( curCondSpace, actionOffset ); - } - - curCondSpace += 1; - } - } -} - -RedFsm *RedFsmBuild::reduceMachine() -{ - redFsm = new RedFsm(); - redFsm->wantComplete = true; - - /* Open the definition. */ - makeMachine(); - - /* Do this before distributing transitions out to singles and defaults - * makes life easier. */ - redFsm->maxKey = findMaxKey(); - - redFsm->assignActionLocs(); - - /* Find the first final state (The final state with the lowest id). */ - redFsm->findFirstFinState(); - - /* Choose default transitions and the single transition. */ - redFsm->chooseDefaultSpan(); - - /* Maybe do flat expand, otherwise choose single. */ - redFsm->chooseSingle(); - - /* Set up incoming transitions. */ - redFsm->setInTrans(); - - /* Anlayze Machine will find the final action reference counts, among - * other things. We will use these in reporting the usage - * of fsm directives in action code. */ - redFsm->analyzeMachine(); - - return redFsm; -} - diff --git a/colm/redbuild.h b/colm/redbuild.h deleted file mode 100644 index dbbb3e19..00000000 --- a/colm/redbuild.h +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _FSMREDUCE_H -#define _FSMREDUCE_H - -#include <iostream> -#include "avltree.h" -#include "fsmgraph.h" -#include "parsedata.h" -#include "fsmrun.h" - -/* Forwards. */ -struct FsmTrans; -struct FsmGraph; -struct Compiler; -struct FsmCodeGen; -struct RedFsm; -struct GenCondSpace; -struct Condition; - -struct RedActionTable -: - public AvlTreeEl<RedActionTable> -{ - RedActionTable( const ActionTable &key ) - : - key(key), - id(0) - { } - - const ActionTable &getKey() - { return key; } - - ActionTable key; - int id; -}; - -typedef AvlTree<RedActionTable, ActionTable, CmpActionTable> ActionTableMap; - -struct NextRedTrans -{ - Key lowKey, highKey; - FsmTrans *trans; - FsmTrans *next; - - void load() { - if ( trans != 0 ) { - next = trans->next; - lowKey = trans->lowKey; - highKey = trans->highKey; - } - } - - NextRedTrans( FsmTrans *t ) { - trans = t; - load(); - } - - void increment() { - trans = next; - load(); - } -}; - -class RedFsmBuild -{ -public: - RedFsmBuild( char *fsmName, Compiler *pd, FsmGraph *fsm ); - RedFsm *reduceMachine( ); - -private: - void appendTrans( TransListVect &outList, Key lowKey, Key highKey, FsmTrans *trans ); - void makeStateActions( FsmState *state ); - void makeStateList(); - void makeStateConditions( FsmState *state ); - - void initActionList( unsigned long length ); - void newAction( int anum, char *name, int line, int col, Action *action ); - void initActionTableList( unsigned long length ); - void initCondSpaceList( ulong length ); - void condSpaceItem( int cnum, long condActionId ); - void newCondSpace( int cnum, int condSpaceId, Key baseKey ); - void initStateCondList( int snum, ulong length ); - void addStateCond( int snum, Key lowKey, Key highKey, long condNum ); - void initStateList( unsigned long length ); - void addRegionToEntry( int regionId, int entryId ); - void addEntryPoint( int entryId, char *name, unsigned long entryState ); - void setId( int snum, int id ); - void initTransList( int snum, unsigned long length ); - void newTrans( int snum, int tnum, Key lowKey, Key highKey, - long targ, long act ); - void finishTransList( int snum ); - void setFinal( int snum ); - void setEofTrans( int snum, int eofTarget, int actId ); - void setStateActions( int snum, long toStateAction, - long fromStateAction, long eofAction ); - void setForcedErrorState(); - void closeMachine(); - Key findMaxKey(); - - - void makeEntryPoints(); - void makeGetKeyExpr(); - void makeAccessExpr(); - void makeCurStateExpr(); - void makeConditions(); - void makeInlineList( InlineList *inlineList, InlineItem *context ); - void makeActionList(); - void makeActionTableList(); - void reduceTrans( FsmTrans *trans ); - void reduceActionTables(); - void makeTransList( FsmState *state ); - void makeTrans( Key lowKey, Key highKey, FsmTrans *defTrans ); - void makeAction( Action *action ); - void makeLmSwitch( InlineItem *item ); - void makeMachine(); - void makeActionExec( InlineItem *item ); - void makeActionExecTE( InlineItem *item ); - - char *fsmName; - Compiler *pd; - FsmGraph *fsm; - ActionTableMap actionTableMap; - int nextActionTableId; - - int startState; - int errState; - -public: - RedFsm *redFsm; - -private: - int curAction; - int curActionTable; - int curTrans; - int curState; - int curCondSpace; - int curStateCond; -}; - - -#endif /* _FSMREDUCE_H */ diff --git a/colm/redfsm.cc b/colm/redfsm.cc deleted file mode 100644 index d3a65b7c..00000000 --- a/colm/redfsm.cc +++ /dev/null @@ -1,1112 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <iostream> -#include <sstream> -#include "redfsm.h" -#include "avlmap.h" -#include "mergesort.h" -#include "fsmgraph.h" -#include "parsetree.h" -#include "fsmrun.h" - -using std::ostringstream; - -string nameOrLoc( GenAction *genAction ) -{ - if ( genAction->name != 0 ) - return string(genAction->name); - else { - ostringstream ret; - ret << genAction->loc.line << ":" << genAction->loc.col; - return ret.str(); - } -} - -RedFsm::RedFsm() -: - wantComplete(false), - forcedErrorState(false), - nextActionId(0), - nextTransId(0), - errState(0), - errTrans(0), - firstFinState(0), - numFinStates(0), - allActions(0), - allActionTables(0), - allConditions(0), - allCondSpaces(0), - allStates(0), - bAnyToStateActions(false), - bAnyFromStateActions(false), - bAnyRegActions(false), - bAnyEofActions(false), - bAnyActionGotos(false), - bAnyActionCalls(false), - bAnyActionRets(false), - bAnyRegActionRets(false), - bAnyRegActionByValControl(false), - bAnyRegNextStmt(false), - bAnyRegCurStateRef(false), - bAnyRegBreak(false), - bAnyLmSwitchError(false), - bAnyConditions(false) -{ -} - -/* Does the machine have any actions. */ -bool RedFsm::anyActions() -{ - return actionMap.length() > 0; -} - -void RedFsm::depthFirstOrdering( RedState *state ) -{ - /* Nothing to do if the state is already on the list. */ - if ( state->onStateList ) - return; - - /* Doing depth first, put state on the list. */ - state->onStateList = true; - stateList.append( state ); - -// /* At this point transitions should only be in ranges. */ -// assert( state->outSingle.length() == 0 ); -// assert( state->defTrans == 0 ); - - /* Recurse on singles. */ - for ( RedTransList::Iter stel = state->outSingle; stel.lte(); stel++ ) { - if ( stel->value->targ != 0 ) - depthFirstOrdering( stel->value->targ ); - } - - /* Recurse on everything ranges. */ - for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { - if ( rtel->value->targ != 0 ) - depthFirstOrdering( rtel->value->targ ); - } - - if ( state->defTrans != 0 && state->defTrans->targ != 0 ) - depthFirstOrdering( state->defTrans->targ ); -} - -/* Ordering states by transition connections. */ -void RedFsm::depthFirstOrdering() -{ - /* Init on state list flags. */ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) - st->onStateList = false; - - /* Clear out the state list, we will rebuild it. */ - int stateListLen = stateList.length(); - stateList.abandon(); - - /* Add back to the state list from the start state and all other entry - * points. */ - depthFirstOrdering( startState ); - for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ ) - depthFirstOrdering( *en ); - if ( forcedErrorState ) - depthFirstOrdering( errState ); - - /* Make sure we put everything back on. */ - assert( stateListLen == stateList.length() ); -} - -/* Assign state ids by appearance in the state list. */ -void RedFsm::sequentialStateIds() -{ - /* Table based machines depend on the state numbers starting at zero. */ - nextStateId = 0; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) - st->id = nextStateId++; -} - -/* Stable sort the states by final state status. */ -void RedFsm::sortStatesByFinal() -{ - /* Move forward through the list and throw final states onto the end. */ - RedState *state = 0; - RedState *next = stateList.head; - RedState *last = stateList.tail; - while ( state != last ) { - /* Move forward and load up the next. */ - state = next; - next = state->next; - - /* Throw to the end? */ - if ( state->isFinal ) { - stateList.detach( state ); - stateList.append( state ); - } - } -} - -/* Assign state ids by final state state status. */ -void RedFsm::sortStateIdsByFinal() -{ - /* Table based machines depend on this starting at zero. */ - nextStateId = 0; - - /* First pass to assign non final ids. */ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - if ( ! st->isFinal ) - st->id = nextStateId++; - } - - /* Second pass to assign final ids. */ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - if ( st->isFinal ) - st->id = nextStateId++; - } -} - -struct CmpStateById -{ - static int compare( RedState *st1, RedState *st2 ) - { - if ( st1->id < st2->id ) - return -1; - else if ( st1->id > st2->id ) - return 1; - else - return 0; - } -}; - -void RedFsm::sortByStateId() -{ - /* Make the array. */ - int pos = 0; - RedState **ptrList = new RedState*[stateList.length()]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) - ptrList[pos++] = st; - - MergeSort<RedState*, CmpStateById> mergeSort; - mergeSort.sort( ptrList, stateList.length() ); - - stateList.abandon(); - for ( int st = 0; st < pos; st++ ) - stateList.append( ptrList[st] ); - - delete[] ptrList; -} - -/* Find the final state with the lowest id. */ -void RedFsm::findFirstFinState() -{ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - if ( st->isFinal && (firstFinState == 0 || st->id < firstFinState->id) ) - firstFinState = st; - } -} - -void RedFsm::assignActionLocs() -{ - int nextLocation = 0; - for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) { - /* Store the loc, skip over the array and a null terminator. */ - act->location = nextLocation; - nextLocation += act->key.length() + 1; - } -} - -/* Check if we can extend the current range by displacing any ranges - * ahead to the singles. */ -bool RedFsm::canExtend( const RedTransList &list, int pos ) -{ - /* Get the transition that we want to extend. */ - RedTrans *extendTrans = list[pos].value; - - /* Look ahead in the transition list. */ - for ( int next = pos + 1; next < list.length(); pos++, next++ ) { - /* If they are not continuous then cannot extend. */ - Key nextKey = list[next].lowKey; - nextKey.decrement(); - if ( list[pos].highKey != nextKey ) - break; - - /* Check for the extenstion property. */ - if ( extendTrans == list[next].value ) - return true; - - /* If the span of the next element is more than one, then don't keep - * checking, it won't be moved to single. */ - unsigned long long nextSpan = keyOps->span( list[next].lowKey, list[next].highKey ); - if ( nextSpan > 1 ) - break; - } - return false; -} - -/* Move ranges to the singles list. */ -void RedFsm::moveTransToSingle( RedState *state ) -{ - RedTransList &range = state->outRange; - RedTransList &single = state->outSingle; - for ( int rpos = 0; rpos < range.length(); ) { - /* Check if this is a range we can extend. */ - if ( canExtend( range, rpos ) ) { - /* Transfer singles over. */ - while ( range[rpos].value != range[rpos+1].value ) { - /* Transfer the range to single. */ - single.append( range[rpos+1] ); - range.remove( rpos+1 ); - } - - /* Extend. */ - range[rpos].highKey = range[rpos+1].highKey; - range.remove( rpos+1 ); - } - /* Maybe move it to the singles. */ - else if ( keyOps->span( range[rpos].lowKey, range[rpos].highKey ) == 1 ) { - single.append( range[rpos] ); - range.remove( rpos ); - } - else { - /* Keeping it in the ranges. */ - rpos += 1; - } - } -} - -/* Look through ranges and choose suitable single character transitions. */ -void RedFsm::chooseSingle() -{ - /* Loop the states. */ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - /* Rewrite the transition list taking out the suitable single - * transtions. */ - moveTransToSingle( st ); - } -} - -void RedFsm::makeFlat() -{ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - if ( st->stateCondList.length() == 0 ) { - st->condLowKey = 0; - st->condHighKey = 0; - } - else { - st->condLowKey = st->stateCondList.head->lowKey; - st->condHighKey = st->stateCondList.tail->highKey; - - unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey ); - st->condList = new GenCondSpace*[ span ]; - memset( st->condList, 0, sizeof(GenCondSpace*)*span ); - - for ( GenStateCondList::Iter sci = st->stateCondList; sci.lte(); sci++ ) { - unsigned long long base, trSpan; - base = keyOps->span( st->condLowKey, sci->lowKey )-1; - trSpan = keyOps->span( sci->lowKey, sci->highKey ); - for ( unsigned long long pos = 0; pos < trSpan; pos++ ) - st->condList[base+pos] = sci->condSpace; - } - } - - if ( st->outRange.length() == 0 ) { - st->lowKey = st->highKey = 0; - st->transList = 0; - } - else { - st->lowKey = st->outRange[0].lowKey; - st->highKey = st->outRange[st->outRange.length()-1].highKey; - unsigned long long span = keyOps->span( st->lowKey, st->highKey ); - st->transList = new RedTrans*[ span ]; - memset( st->transList, 0, sizeof(RedTrans*)*span ); - - for ( RedTransList::Iter trans = st->outRange; trans.lte(); trans++ ) { - unsigned long long base, trSpan; - base = keyOps->span( st->lowKey, trans->lowKey )-1; - trSpan = keyOps->span( trans->lowKey, trans->highKey ); - for ( unsigned long long pos = 0; pos < trSpan; pos++ ) - st->transList[base+pos] = trans->value; - } - - /* Fill in the gaps with the default transition. */ - for ( unsigned long long pos = 0; pos < span; pos++ ) { - if ( st->transList[pos] == 0 ) - st->transList[pos] = st->defTrans; - } - } - } -} - - -/* A default transition has been picked, move it from the outRange to the - * default pointer. */ -void RedFsm::moveToDefault( RedTrans *defTrans, RedState *state ) -{ - /* Rewrite the outRange, omitting any ranges that use - * the picked default. */ - RedTransList outRange; - for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { - /* If it does not take the default, copy it over. */ - if ( rtel->value != defTrans ) - outRange.append( *rtel ); - } - - /* Save off the range we just created into the state's range. */ - state->outRange.transfer( outRange ); - - /* Store the default. */ - state->defTrans = defTrans; -} - -bool RedFsm::alphabetCovered( RedTransList &outRange ) -{ - /* Cannot cover without any out ranges. */ - if ( outRange.length() == 0 ) - return false; - - /* If the first range doesn't start at the the lower bound then the - * alphabet is not covered. */ - RedTransList::Iter rtel = outRange; - if ( keyOps->minKey < rtel->lowKey ) - return false; - - /* Check that every range is next to the previous one. */ - rtel.increment(); - for ( ; rtel.lte(); rtel++ ) { - Key highKey = rtel[-1].highKey; - highKey.increment(); - if ( highKey != rtel->lowKey ) - return false; - } - - /* The last must extend to the upper bound. */ - RedTransEl *last = &outRange[outRange.length()-1]; - if ( last->highKey < keyOps->maxKey ) - return false; - - return true; -} - -RedTrans *RedFsm::chooseDefaultSpan( RedState *state ) -{ - /* Make a set of transitions from the outRange. */ - RedTransPtrSet stateTransSet; - for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) - stateTransSet.insert( rtel->value ); - - /* For each transition in the find how many alphabet characters the - * transition spans. */ - unsigned long long *span = new unsigned long long[stateTransSet.length()]; - memset( span, 0, sizeof(unsigned long long) * stateTransSet.length() ); - for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { - /* Lookup the transition in the set. */ - RedTrans **inSet = stateTransSet.find( rtel->value ); - int pos = inSet - stateTransSet.data; - span[pos] += keyOps->span( rtel->lowKey, rtel->highKey ); - } - - /* Find the max span, choose it for making the default. */ - RedTrans *maxTrans = 0; - unsigned long long maxSpan = 0; - for ( RedTransPtrSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) { - if ( span[rtel.pos()] > maxSpan ) { - maxSpan = span[rtel.pos()]; - maxTrans = *rtel; - } - } - - delete[] span; - return maxTrans; -} - -/* Pick default transitions from ranges for the states. */ -void RedFsm::chooseDefaultSpan() -{ - /* Loop the states. */ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - /* Only pick a default transition if the alphabet is covered. This - * avoids any transitions in the out range that go to error and avoids - * the need for an ERR state. */ - if ( alphabetCovered( st->outRange ) ) { - /* Pick a default transition by largest span. */ - RedTrans *defTrans = chooseDefaultSpan( st ); - - /* Rewrite the transition list taking out the transition we picked - * as the default and store the default. */ - moveToDefault( defTrans, st ); - } - } -} - -RedTrans *RedFsm::chooseDefaultGoto( RedState *state ) -{ - /* Make a set of transitions from the outRange. */ - RedTransPtrSet stateTransSet; - for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { - if ( rtel->value->targ == state->next ) - return rtel->value; - } - return 0; -} - -void RedFsm::chooseDefaultGoto() -{ - /* Loop the states. */ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - /* Pick a default transition. */ - RedTrans *defTrans = chooseDefaultGoto( st ); - if ( defTrans == 0 ) - defTrans = chooseDefaultSpan( st ); - - /* Rewrite the transition list taking out the transition we picked - * as the default and store the default. */ - moveToDefault( defTrans, st ); - } -} - -RedTrans *RedFsm::chooseDefaultNumRanges( RedState *state ) -{ - /* Make a set of transitions from the outRange. */ - RedTransPtrSet stateTransSet; - for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) - stateTransSet.insert( rtel->value ); - - /* For each transition in the find how many ranges use the transition. */ - int *numRanges = new int[stateTransSet.length()]; - memset( numRanges, 0, sizeof(int) * stateTransSet.length() ); - for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { - /* Lookup the transition in the set. */ - RedTrans **inSet = stateTransSet.find( rtel->value ); - numRanges[inSet - stateTransSet.data] += 1; - } - - /* Find the max number of ranges. */ - RedTrans *maxTrans = 0; - int maxNumRanges = 0; - for ( RedTransPtrSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) { - if ( numRanges[rtel.pos()] > maxNumRanges ) { - maxNumRanges = numRanges[rtel.pos()]; - maxTrans = *rtel; - } - } - - delete[] numRanges; - return maxTrans; -} - -void RedFsm::chooseDefaultNumRanges() -{ - /* Loop the states. */ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - /* Pick a default transition. */ - RedTrans *defTrans = chooseDefaultNumRanges( st ); - - /* Rewrite the transition list taking out the transition we picked - * as the default and store the default. */ - moveToDefault( defTrans, st ); - } -} - -RedTrans *RedFsm::getErrorTrans( ) -{ - /* If the error trans has not been made aready, make it. */ - if ( errTrans == 0 ) { - /* This insert should always succeed since no transition created by - * the user can point to the error state. */ - errTrans = new RedTrans( getErrorState(), 0, nextTransId++ ); - RedTrans *inRes = transSet.insert( errTrans ); - assert( inRes != 0 ); - } - return errTrans; -} - -RedState *RedFsm::getErrorState() -{ - /* Something went wrong. An error state is needed but one was not supplied - * by the frontend. */ - assert( errState != 0 ); - return errState; -} - - -RedTrans *RedFsm::allocateTrans( RedState *targ, RedAction *action ) -{ - /* Create a reduced trans and look for it in the transiton set. */ - RedTrans redTrans( targ, action, 0 ); - RedTrans *inDict = transSet.find( &redTrans ); - if ( inDict == 0 ) { - inDict = new RedTrans( targ, action, nextTransId++ ); - transSet.insert( inDict ); - } - return inDict; -} - -void RedFsm::partitionFsm( int nparts ) -{ - /* At this point the states are ordered by a depth-first traversal. We - * will allocate to partitions based on this ordering. */ - this->nParts = nparts; - int partSize = stateList.length() / nparts; - int remainder = stateList.length() % nparts; - int numInPart = partSize; - int partition = 0; - if ( remainder-- > 0 ) - numInPart += 1; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - st->partition = partition; - - numInPart -= 1; - if ( numInPart == 0 ) { - partition += 1; - numInPart = partSize; - if ( remainder-- > 0 ) - numInPart += 1; - } - } -} - -void RedFsm::setInTrans() -{ - /* First pass counts the number of transitions. */ - for ( RedTransSet::Iter trans = transSet; trans.lte(); trans++ ) - trans->targ->numInTrans += 1; - - /* Pass over states to allocate the needed memory. Reset the counts so we - * can use them as the current size. */ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - st->inTrans = new RedTrans*[st->numInTrans]; - st->numInTrans = 0; - } - - /* Second pass over transitions copies pointers into the in trans list. */ - for ( RedTransSet::Iter trans = transSet; trans.lte(); trans++ ) - trans->targ->inTrans[trans->targ->numInTrans++] = trans; -} - -GenCondSpace *RedFsm::findCondSpace( Key lowKey, Key highKey ) -{ - for ( CondSpaceList::Iter cs = condSpaceList; cs.lte(); cs++ ) { - Key csHighKey = cs->baseKey; - csHighKey += keyOps->alphSize() * (1 << cs->condSet.length()); - - if ( lowKey >= cs->baseKey && highKey <= csHighKey ) - return cs; - } - return 0; -} - -Condition *RedFsm::findCondition( Key key ) -{ - for ( ConditionList::Iter cond = conditionList; cond.lte(); cond++ ) { - Key upperKey = cond->baseKey + (1 << cond->condSet.length()); - if ( cond->baseKey <= key && key <= upperKey ) - return cond; - } - return 0; -} - -void RedFsm::setValueLimits() -{ - maxSingleLen = 0; - maxRangeLen = 0; - maxKeyOffset = 0; - maxIndexOffset = 0; - maxActListId = 0; - maxActionLoc = 0; - maxActArrItem = 0; - maxSpan = 0; - maxCondSpan = 0; - maxFlatIndexOffset = 0; - maxCondOffset = 0; - maxCondLen = 0; - maxCondSpaceId = 0; - maxCondIndexOffset = 0; - - /* In both of these cases the 0 index is reserved for no value, so the max - * is one more than it would be if they started at 0. */ - maxIndex = transSet.length(); - maxCond = condSpaceList.length(); - - /* The nextStateId - 1 is the last state id assigned. */ - maxState = nextStateId - 1; - - for ( CondSpaceList::Iter csi = condSpaceList; csi.lte(); csi++ ) { - if ( csi->condSpaceId > maxCondSpaceId ) - maxCondSpaceId = csi->condSpaceId; - } - - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - /* Maximum cond length. */ - if ( st->stateCondList.length() > maxCondLen ) - maxCondLen = st->stateCondList.length(); - - /* Maximum single length. */ - if ( st->outSingle.length() > maxSingleLen ) - maxSingleLen = st->outSingle.length(); - - /* Maximum range length. */ - if ( st->outRange.length() > maxRangeLen ) - maxRangeLen = st->outRange.length(); - - /* The key offset index offset for the state after last is not used, skip it.. */ - if ( ! st.last() ) { - maxCondOffset += st->stateCondList.length(); - maxKeyOffset += st->outSingle.length() + st->outRange.length()*2; - maxIndexOffset += st->outSingle.length() + st->outRange.length() + 1; - } - - /* Max cond span. */ - if ( st->condList != 0 ) { - unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey ); - if ( span > maxCondSpan ) - maxCondSpan = span; - } - - /* Max key span. */ - if ( st->transList != 0 ) { - unsigned long long span = keyOps->span( st->lowKey, st->highKey ); - if ( span > maxSpan ) - maxSpan = span; - } - - /* Max cond index offset. */ - if ( ! st.last() ) { - if ( st->condList != 0 ) - maxCondIndexOffset += keyOps->span( st->condLowKey, st->condHighKey ); - } - - /* Max flat index offset. */ - if ( ! st.last() ) { - if ( st->transList != 0 ) - maxFlatIndexOffset += keyOps->span( st->lowKey, st->highKey ); - maxFlatIndexOffset += 1; - } - } - - for ( GenActionTableMap::Iter at = actionMap; at.lte(); at++ ) { - /* Maximum id of action lists. */ - if ( at->actListId+1 > maxActListId ) - maxActListId = at->actListId+1; - - /* Maximum location of items in action array. */ - if ( at->location+1 > maxActionLoc ) - maxActionLoc = at->location+1; - - /* Maximum values going into the action array. */ - if ( at->key.length() > maxActArrItem ) - maxActArrItem = at->key.length(); - for ( GenActionTable::Iter item = at->key; item.lte(); item++ ) { - if ( item->value->actionId > maxActArrItem ) - maxActArrItem = item->value->actionId; - } - } -} - -void RedFsm::findFinalActionRefs() -{ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - /* Rerence count out of single transitions. */ - for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) { - if ( rtel->value->action != 0 ) { - rtel->value->action->numTransRefs += 1; - for ( GenActionTable::Iter item = rtel->value->action->key; item.lte(); item++ ) - item->value->numTransRefs += 1; - } - } - - /* Reference count out of range transitions. */ - for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { - if ( rtel->value->action != 0 ) { - rtel->value->action->numTransRefs += 1; - for ( GenActionTable::Iter item = rtel->value->action->key; item.lte(); item++ ) - item->value->numTransRefs += 1; - } - } - - /* Reference count default transition. */ - if ( st->defTrans != 0 && st->defTrans->action != 0 ) { - st->defTrans->action->numTransRefs += 1; - for ( GenActionTable::Iter item = st->defTrans->action->key; item.lte(); item++ ) - item->value->numTransRefs += 1; - } - - /* Reference count to state actions. */ - if ( st->toStateAction != 0 ) { - st->toStateAction->numToStateRefs += 1; - for ( GenActionTable::Iter item = st->toStateAction->key; item.lte(); item++ ) - item->value->numToStateRefs += 1; - } - - /* Reference count from state actions. */ - if ( st->fromStateAction != 0 ) { - st->fromStateAction->numFromStateRefs += 1; - for ( GenActionTable::Iter item = st->fromStateAction->key; item.lte(); item++ ) - item->value->numFromStateRefs += 1; - } - - /* Reference count EOF actions. */ - if ( st->eofAction != 0 ) { - st->eofAction->numEofRefs += 1; - for ( GenActionTable::Iter item = st->eofAction->key; item.lte(); item++ ) - item->value->numEofRefs += 1; - } - } -} - -void RedFsm::analyzeAction( GenAction *act, InlineList *inlineList ) -{ - for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { - /* Check for various things in regular actions. */ - if ( act->numTransRefs > 0 || act->numToStateRefs > 0 || - act->numFromStateRefs > 0 || act->numEofRefs > 0 ) - { - if ( item->type == InlineItem::LmSwitch && - item->tokenRegion->lmSwitchHandlesError ) - { - bAnyLmSwitchError = true; - } - } - - if ( item->children != 0 ) - analyzeAction( act, item->children ); - } -} - -void RedFsm::analyzeActionList( RedAction *redAct, InlineList *inlineList ) -{ - for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { - if ( item->children != 0 ) - analyzeActionList( redAct, item->children ); - } -} - -/* Assign ids to referenced actions. */ -void RedFsm::assignActionIds() -{ - int nextActionId = 0; - for ( GenActionList::Iter act = genActionList; act.lte(); act++ ) { - /* Only ever interested in referenced actions. */ - if ( numRefs( act ) > 0 ) - act->actionId = nextActionId++; - } -} - -/* Gather various info on the machine. */ -void RedFsm::analyzeMachine() -{ - /* Find the true count of action references. */ - findFinalActionRefs(); - - /* Check if there are any calls in action code. */ - for ( GenActionList::Iter act = genActionList; act.lte(); act++ ) { - /* Record the occurrence of various kinds of actions. */ - if ( act->numToStateRefs > 0 ) - bAnyToStateActions = true; - if ( act->numFromStateRefs > 0 ) - bAnyFromStateActions = true; - if ( act->numEofRefs > 0 ) - bAnyEofActions = true; - if ( act->numTransRefs > 0 ) - bAnyRegActions = true; - - /* Recurse through the action's parse tree looking for various things. */ - analyzeAction( act, act->inlineList ); - } - - /* Analyze reduced action lists. */ - for ( GenActionTableMap::Iter redAct = actionMap; redAct.lte(); redAct++ ) { - for ( GenActionTable::Iter act = redAct->key; act.lte(); act++ ) - analyzeActionList( redAct, act->value->inlineList ); - } - - /* Find states that have transitions with actions that have next - * statements. */ - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - /* Check any actions out of outSinge. */ - for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) { - if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() ) - st->bAnyRegCurStateRef = true; - } - - /* Check any actions out of outRange. */ - for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { - if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() ) - st->bAnyRegCurStateRef = true; - } - - /* Check any action out of default. */ - if ( st->defTrans != 0 && st->defTrans->action != 0 && - st->defTrans->action->anyCurStateRef() ) - st->bAnyRegCurStateRef = true; - - if ( st->stateCondList.length() > 0 ) - bAnyConditions = true; - } - - /* Assign ids to actions that are referenced. */ - assignActionIds(); - - /* Set the maximums of various values used for deciding types. */ - setValueLimits(); -} - -int transAction( RedTrans *trans ) -{ - int retAct = 0; - if ( trans->action != 0 ) - retAct = trans->action->location+1; - return retAct; -} - -int toStateAction( RedState *state ) -{ - int act = 0; - if ( state->toStateAction != 0 ) - act = state->toStateAction->location+1; - return act; -} - -int fromStateAction( RedState *state ) -{ - int act = 0; - if ( state->fromStateAction != 0 ) - act = state->fromStateAction->location+1; - return act; -} - -int eofAction( RedState *state ) -{ - int act = 0; - if ( state->eofAction != 0 ) - act = state->eofAction->location+1; - return act; -} - - -FsmTables *RedFsm::makeFsmTables() -{ - /* The fsm runtime needs states sorted by id. */ - sortByStateId(); - - int pos, curKeyOffset, curIndOffset; - FsmTables *fsmTables = new FsmTables; - fsmTables->numStates = stateList.length(); - - /* - * actions - */ - - fsmTables->numActions = 1; - for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) - fsmTables->numActions += 1 + act->key.length(); - - pos = 0; - fsmTables->actions = new long[fsmTables->numActions]; - fsmTables->actions[pos++] = 0; - for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) { - fsmTables->actions[pos++] = act->key.length(); - for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) - fsmTables->actions[pos++] = item->value->actionId; - } - - /* - * keyOffset - */ - pos = 0, curKeyOffset = 0; - fsmTables->keyOffsets = new long[fsmTables->numStates]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - /* Store the current offset. */ - fsmTables->keyOffsets[pos++] = curKeyOffset; - - /* Move the key offset ahead. */ - curKeyOffset += st->outSingle.length() + st->outRange.length()*2; - } - - /* - * transKeys - */ - fsmTables->numTransKeys = 0; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - fsmTables->numTransKeys += st->outSingle.length(); - fsmTables->numTransKeys += 2 * st->outRange.length(); - } - - pos = 0; - fsmTables->transKeys = new char[fsmTables->numTransKeys]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) - fsmTables->transKeys[pos++] = stel->lowKey.getVal(); - for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { - fsmTables->transKeys[pos++] = rtel->lowKey.getVal(); - fsmTables->transKeys[pos++] = rtel->highKey.getVal(); - } - } - - /* - * singleLengths - */ - pos = 0; - fsmTables->singleLengths = new long[fsmTables->numStates]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) - fsmTables->singleLengths[pos++] = st->outSingle.length(); - - /* - * rangeLengths - */ - pos = 0; - fsmTables->rangeLengths = new long[fsmTables->numStates]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) - fsmTables->rangeLengths[pos++] = st->outRange.length(); - - /* - * indexOffsets - */ - pos = 0, curIndOffset = 0; - fsmTables->indexOffsets = new long[fsmTables->numStates]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - fsmTables->indexOffsets[pos++] = curIndOffset; - - curIndOffset += st->outSingle.length() + st->outRange.length(); - if ( st->defTrans != 0 ) - curIndOffset += 1; - } - - /* - * transTargsWI - */ - fsmTables->numTransTargsWI = 0; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - fsmTables->numTransTargsWI += st->outSingle.length(); - fsmTables->numTransTargsWI += st->outRange.length(); - if ( st->defTrans != 0 ) - fsmTables->numTransTargsWI += 1; - } - - pos = 0; - fsmTables->transTargsWI = new long[fsmTables->numTransTargsWI]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) - fsmTables->transTargsWI[pos++] = stel->value->targ->id; - - for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) - fsmTables->transTargsWI[pos++] = rtel->value->targ->id; - - if ( st->defTrans != 0 ) - fsmTables->transTargsWI[pos++] = st->defTrans->targ->id; - } - - /* - * transActionsWI - */ - fsmTables->numTransActionsWI = 0; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - fsmTables->numTransActionsWI += st->outSingle.length(); - fsmTables->numTransActionsWI += st->outRange.length(); - if ( st->defTrans != 0 ) - fsmTables->numTransActionsWI += 1; - } - - pos = 0; - fsmTables->transActionsWI = new long[fsmTables->numTransActionsWI]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) - fsmTables->transActionsWI[pos++] = transAction( stel->value ); - - for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) - fsmTables->transActionsWI[pos++] = transAction( rtel->value ); - - if ( st->defTrans != 0 ) - fsmTables->transActionsWI[pos++] = transAction( st->defTrans ); - } - - /* - * toStateActions - */ - pos = 0; - fsmTables->toStateActions = new long[fsmTables->numStates]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) - fsmTables->toStateActions[pos++] = toStateAction( st ); - - /* - * fromStateActions - */ - pos = 0; - fsmTables->fromStateActions = new long[fsmTables->numStates]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) - fsmTables->fromStateActions[pos++] = fromStateAction( st ); - - /* - * eofActions - */ - pos = 0; - fsmTables->eofActions = new long[fsmTables->numStates]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) - fsmTables->eofActions[pos++] = eofAction( st ); - - /* - * eofTargs - */ - pos = 0; - fsmTables->eofTargs = new long[fsmTables->numStates]; - for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { - int targ = -1; - if ( st->eofTrans != 0 ) - targ = st->eofTrans->targ->id; - fsmTables->eofTargs[pos++] = targ; - } - - /* Start state. */ - fsmTables->startState = startState->id; - - /* First final state. */ - fsmTables->firstFinal = ( firstFinState != 0 ) ? - firstFinState->id : nextStateId; - - /* The error state. */ - fsmTables->errorState = ( errState != 0 ) ? - errState->id : -1; - - /* The array pointing to actions. */ - pos = 0; - fsmTables->numActionSwitch = genActionList.length(); - fsmTables->actionSwitch = new GenAction*[fsmTables->numActionSwitch]; - for ( GenActionList::Iter act = genActionList; act.lte(); act++ ) - fsmTables->actionSwitch[pos++] = act; - - /* - * entryByRegion - */ - - fsmTables->numRegions = regionToEntry.length()+1; - fsmTables->entryByRegion = new long[fsmTables->numRegions]; - fsmTables->entryByRegion[0] = fsmTables->errorState; - - pos = 1; - for ( RegionToEntry::Iter en = regionToEntry; en.lte(); en++ ) { - /* Find the entry state from the entry id. */ - RedEntryMapEl *entryMapEl = redEntryMap.find( *en ); - - /* Save it off. */ - fsmTables->entryByRegion[pos++] = entryMapEl != 0 ? entryMapEl->value - : fsmTables->errorState; - } - - return fsmTables; -} - - diff --git a/colm/redfsm.h b/colm/redfsm.h deleted file mode 100644 index 39b98d5f..00000000 --- a/colm/redfsm.h +++ /dev/null @@ -1,524 +0,0 @@ -/* - * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _REDFSM_H -#define _REDFSM_H - -#include <assert.h> -#include <string.h> -#include <string> -#include "keyops.h" -#include "vector.h" -#include "dlist.h" -#include "compare.h" -#include "bstmap.h" -#include "bstset.h" -#include "avlmap.h" -#include "avltree.h" -#include "avlbasic.h" -#include "mergesort.h" -#include "sbstmap.h" -#include "sbstset.h" -#include "sbsttable.h" -#include "global.h" -#include "pdarun.h" - -#define TRANS_ERR_TRANS 0 -#define STATE_ERR_STATE 0 -#define FUNC_NO_FUNC 0 - -using std::string; - -struct RedState; -struct InlineList; -struct Compiler; -struct ObjField; - -/* Element in list of actions. Contains the string for the code to exectute. */ -struct GenAction -{ - /* Data collected during parse. */ - InputLoc loc; - char *name; - InlineList *inlineList; - int actionId; - MarkType markType; - ObjField *objField; - long markId; - - int numTransRefs; - int numToStateRefs; - int numFromStateRefs; - int numEofRefs; - - GenAction *prev, *next; -}; - -typedef DList<GenAction> GenActionList; -string nameOrLoc( GenAction *genAction ); - -/* Number of references in the final machine. */ -inline int numRefs( GenAction *genAction ) -{ - return genAction->numTransRefs + - genAction->numToStateRefs + - genAction->numFromStateRefs + - genAction->numEofRefs; -} - - -/* Forwards. */ -struct RedState; -struct FsmState; - -/* Transistion GenAction Element. */ -typedef SBstMapEl< int, GenAction* > GenActionTableEl; - -/* Transition GenAction Table. */ -struct GenActionTable - : public SBstMap< int, GenAction*, CmpOrd<int> > -{ - void setAction( int ordering, GenAction *action ); - void setActions( int *orderings, GenAction **actions, int nActs ); - void setActions( const GenActionTable &other ); -}; - -/* Compare of a whole action table element (key & value). */ -struct GenCmpActionTableEl -{ - static int compare( const GenActionTableEl &action1, - const GenActionTableEl &action2 ) - { - if ( action1.key < action2.key ) - return -1; - else if ( action1.key > action2.key ) - return 1; - else if ( action1.value < action2.value ) - return -1; - else if ( action1.value > action2.value ) - return 1; - return 0; - } -}; - -/* Compare for GenActionTable. */ -typedef CmpSTable< GenActionTableEl, GenCmpActionTableEl > GenCmpActionTable; - -/* Set of states. */ -typedef BstSet<RedState*> RedStateSet; -typedef BstSet<int> IntSet; - -/* Reduced action. */ -struct RedAction -: - public AvlTreeEl<RedAction> -{ - RedAction( ) - : - key(), - eofRefs(0), - numTransRefs(0), - numToStateRefs(0), - numFromStateRefs(0), - numEofRefs(0), - bAnyNextStmt(false), - bAnyCurStateRef(false), - bAnyBreakStmt(false) - { } - - const GenActionTable &getKey() - { return key; } - - GenActionTable key; - int actListId; - int location; - IntSet *eofRefs; - - /* Number of references in the final machine. */ - bool numRefs() - { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } - int numTransRefs; - int numToStateRefs; - int numFromStateRefs; - int numEofRefs; - - bool anyNextStmt() { return bAnyNextStmt; } - bool anyCurStateRef() { return bAnyCurStateRef; } - bool anyBreakStmt() { return bAnyBreakStmt; } - - bool bAnyNextStmt; - bool bAnyCurStateRef; - bool bAnyBreakStmt; -}; -typedef AvlTree<RedAction, GenActionTable, GenCmpActionTable> GenActionTableMap; - -/* Reduced transition. */ -struct RedTrans -: - public AvlTreeEl<RedTrans> -{ - RedTrans( RedState *targ, RedAction *action, int id ) - : targ(targ), action(action), id(id), labelNeeded(true) { } - - RedState *targ; - RedAction *action; - int id; - bool partitionBoundary; - bool labelNeeded; -}; - -/* Compare of transitions for the final reduction of transitions. Comparison - * is on target and the pointer to the shared action table. It is assumed that - * when this is used the action tables have been reduced. */ -struct CmpRedTrans -{ - static int compare( const RedTrans &t1, const RedTrans &t2 ) - { - if ( t1.targ < t2.targ ) - return -1; - else if ( t1.targ > t2.targ ) - return 1; - else if ( t1.action < t2.action ) - return -1; - else if ( t1.action > t2.action ) - return 1; - else - return 0; - } -}; - -typedef AvlBasic<RedTrans, CmpRedTrans> RedTransSet; - -/* Element in out range. */ -struct RedTransEl -{ - /* Constructors. */ - RedTransEl( Key lowKey, Key highKey, RedTrans *value ) - : lowKey(lowKey), highKey(highKey), value(value) { } - - Key lowKey, highKey; - RedTrans *value; -}; - -typedef Vector<RedTransEl> RedTransList; -typedef Vector<RedState*> RedStateVect; - -typedef BstMapEl<RedState*, unsigned long long> RedSpanMapEl; -typedef BstMap<RedState*, unsigned long long> RedSpanMap; - -/* Compare used by span map sort. Reverse sorts by the span. */ -struct CmpRedSpanMapEl -{ - static int compare( const RedSpanMapEl &smel1, const RedSpanMapEl &smel2 ) - { - if ( smel1.value > smel2.value ) - return -1; - else if ( smel1.value < smel2.value ) - return 1; - else - return 0; - } -}; - -/* Sorting state-span map entries by span. */ -typedef MergeSort<RedSpanMapEl, CmpRedSpanMapEl> RedSpanMapSort; - -/* Set of entry ids that go into this state. */ -typedef Vector<int> EntryIdVect; -typedef Vector<char*> EntryNameVect; - -/* Maps entry ids (defined by the frontend, to reduced state ids. */ -typedef BstMap<int, int> RedEntryMap; -typedef BstMapEl<int, int> RedEntryMapEl; - -typedef Vector<int> RegionToEntry; - -typedef Vector< GenAction* > GenCondSet; - -struct Condition -{ - Condition( ) - : key(0), baseKey(0) {} - - Key key; - Key baseKey; - GenCondSet condSet; - - Condition *next, *prev; -}; -typedef DList<Condition> ConditionList; - -struct GenCondSpace -{ - Key baseKey; - GenCondSet condSet; - int condSpaceId; - - GenCondSpace *next, *prev; -}; -typedef DList<GenCondSpace> CondSpaceList; - -struct GenStateCond -{ - Key lowKey; - Key highKey; - - GenCondSpace *condSpace; - - GenStateCond *prev, *next; -}; -typedef DList<GenStateCond> GenStateCondList; -typedef Vector<GenStateCond*> StateCondVect; - -/* Reduced state. */ -struct RedState -{ - RedState() - : - defTrans(0), - condList(0), - transList(0), - isFinal(false), - labelNeeded(false), - outNeeded(false), - onStateList(false), - toStateAction(0), - fromStateAction(0), - eofAction(0), - eofTrans(0), - id(0), - bAnyRegCurStateRef(false), - partitionBoundary(false), - inTrans(0), - numInTrans(0) - { } - - /* Transitions out. */ - RedTransList outSingle; - RedTransList outRange; - RedTrans *defTrans; - - /* For flat conditions. */ - Key condLowKey, condHighKey; - GenCondSpace **condList; - - /* For flat keys. */ - Key lowKey, highKey; - RedTrans **transList; - - /* The list of states that transitions from this state go to. */ - RedStateVect targStates; - - bool isFinal; - bool labelNeeded; - bool outNeeded; - bool onStateList; - RedAction *toStateAction; - RedAction *fromStateAction; - RedAction *eofAction; - RedTrans *eofTrans; - int id; - GenStateCondList stateCondList; - StateCondVect stateCondVect; - - /* Pointers for the list of states. */ - RedState *prev, *next; - - bool anyRegCurStateRef() { return bAnyRegCurStateRef; } - bool bAnyRegCurStateRef; - - int partition; - bool partitionBoundary; - - RedTrans **inTrans; - int numInTrans; -}; - -/* List of states. */ -typedef DList<RedState> RedStateList; - -/* Set of reduced transitons. Comparison is by pointer. */ -typedef BstSet< RedTrans*, CmpOrd<RedTrans*> > RedTransPtrSet; - -/* Next version of the fsm machine. */ -struct RedFsm -{ - RedFsm(); - - bool wantComplete; - bool forcedErrorState; - - int nextActionId; - int nextTransId; - - /* Next State Id doubles as the total number of state ids. */ - int nextStateId; - - RedTransSet transSet; - GenActionTableMap actionMap; - RedStateList stateList; - RedStateSet entryPoints; - RedState *startState; - RedState *errState; - RedTrans *errTrans; - RedTrans *errActionTrans; - RedState *firstFinState; - int numFinStates; - int nParts; - - GenAction *allActions; - RedAction *allActionTables; - Condition *allConditions; - GenCondSpace *allCondSpaces; - RedState *allStates; - GenActionList genActionList; - ConditionList conditionList; - CondSpaceList condSpaceList; - EntryIdVect entryPointIds; - EntryNameVect entryPointNames; - RedEntryMap redEntryMap; - RegionToEntry regionToEntry; - - bool bAnyToStateActions; - bool bAnyFromStateActions; - bool bAnyRegActions; - bool bAnyEofActions; - bool bAnyActionGotos; - bool bAnyActionCalls; - bool bAnyActionRets; - bool bAnyRegActionRets; - bool bAnyRegActionByValControl; - bool bAnyRegNextStmt; - bool bAnyRegCurStateRef; - bool bAnyRegBreak; - bool bAnyLmSwitchError; - bool bAnyConditions; - - int maxState; - int maxSingleLen; - int maxRangeLen; - int maxKeyOffset; - int maxIndexOffset; - int maxIndex; - int maxActListId; - int maxActionLoc; - int maxActArrItem; - unsigned long long maxSpan; - unsigned long long maxCondSpan; - int maxFlatIndexOffset; - Key maxKey; - int maxCondOffset; - int maxCondLen; - int maxCondSpaceId; - int maxCondIndexOffset; - int maxCond; - - bool anyActions(); - bool anyToStateActions() { return bAnyToStateActions; } - bool anyFromStateActions() { return bAnyFromStateActions; } - bool anyRegActions() { return bAnyRegActions; } - bool anyEofActions() { return bAnyEofActions; } - bool anyActionGotos() { return bAnyActionGotos; } - bool anyActionCalls() { return bAnyActionCalls; } - bool anyActionRets() { return bAnyActionRets; } - bool anyRegActionRets() { return bAnyRegActionRets; } - bool anyRegActionByValControl() { return bAnyRegActionByValControl; } - bool anyRegNextStmt() { return bAnyRegNextStmt; } - bool anyRegCurStateRef() { return bAnyRegCurStateRef; } - bool anyRegBreak() { return bAnyRegBreak; } - bool anyLmSwitchError() { return bAnyLmSwitchError; } - bool anyConditions() { return bAnyConditions; } - - GenCondSpace *findCondSpace( Key lowKey, Key highKey ); - Condition *findCondition( Key key ); - - /* Is is it possible to extend a range by bumping ranges that span only - * one character to the singles array. */ - bool canExtend( const RedTransList &list, int pos ); - - /* Pick single transitions from the ranges. */ - void moveTransToSingle( RedState *state ); - void chooseSingle(); - - void makeFlat(); - - /* Move a selected transition from ranges to default. */ - void moveToDefault( RedTrans *defTrans, RedState *state ); - - /* Pick a default transition by largest span. */ - RedTrans *chooseDefaultSpan( RedState *state ); - void chooseDefaultSpan(); - - /* Pick a default transition by most number of ranges. */ - RedTrans *chooseDefaultNumRanges( RedState *state ); - void chooseDefaultNumRanges(); - - /* Pick a default transition tailored towards goto driven machine. */ - RedTrans *chooseDefaultGoto( RedState *state ); - void chooseDefaultGoto(); - - /* Ordering states by transition connections. */ - void optimizeStateOrdering( RedState *state ); - void optimizeStateOrdering(); - - /* Ordering states by transition connections. */ - void depthFirstOrdering( RedState *state ); - void depthFirstOrdering(); - - /* Set state ids. */ - void sequentialStateIds(); - void sortStateIdsByFinal(); - - /* Arrange states in by final id. This is a stable sort. */ - void sortStatesByFinal(); - - /* Sorting states by id. */ - void sortByStateId(); - - /* Locating the first final state. This is the final state with the lowest - * id. */ - void findFirstFinState(); - - void assignActionLocs(); - - RedTrans *getErrorTrans(); - RedState *getErrorState(); - - /* Is every char in the alphabet covered? */ - bool alphabetCovered( RedTransList &outRange ); - - RedTrans *allocateTrans( RedState *targState, RedAction *actionTable ); - - void partitionFsm( int nParts ); - - void setInTrans(); - void setValueLimits(); - void assignActionIds(); - void analyzeActionList( RedAction *redAct, InlineList *inlineList ); - void analyzeAction( GenAction *act, InlineList *inlineList ); - void findFinalActionRefs(); - void analyzeMachine(); - - FsmTables *makeFsmTables(); -}; - - -#endif /* _REDFSM_H */ diff --git a/colm/resolve.cc b/colm/resolve.cc deleted file mode 100644 index a661e68e..00000000 --- a/colm/resolve.cc +++ /dev/null @@ -1,805 +0,0 @@ -/* - * Copyright 2009-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "bytecode.h" -#include "parsedata.h" -#include "fsmrun.h" -#include <iostream> -#include <assert.h> - -using std::cout; -using std::cerr; -using std::endl; - -UniqueType *TypeRef::lookupTypeName( Compiler *pd ) -{ - /* Lookup up the qualifiction and then the name. */ - nspace = nspaceQual->getQual( pd ); - - if ( nspace == 0 ) - error(loc) << "do not have region for resolving reference" << endp; - - while ( nspace != 0 ) { - /* Search for the token in the region by typeName. */ - TypeMapEl *inDict = nspace->typeMap.find( typeName ); - - if ( inDict != 0 ) { - switch ( inDict->type ) { - /* Defer to the typeRef we are an alias of. We need to guard against loops here. */ - case TypeMapEl::TypeAliasType: - return inDict->typeRef->lookupType( pd ); - - case TypeMapEl::LangElType: - return pd->findUniqueType( TYPE_TREE, inDict->value ); - } - } - - nspace = nspace->parentNamespace; - } - - error(loc) << "unknown type in typeof expression" << endp; - return 0; -} - -UniqueType *TypeRef::lookupTypeLiteral( Compiler *pd ) -{ - /* Lookup up the qualifiction and then the name. */ - nspace = nspaceQual->getQual( pd ); - - if ( nspace == 0 ) - error(loc) << "do not have region for resolving reference" << endp; - - /* Interpret escape sequences and remove quotes. */ - bool unusedCI; - String interp; - prepareLitString( interp, unusedCI, pdaLiteral->token.data, - pdaLiteral->token.loc ); - - while ( nspace != 0 ) { - LiteralDictEl *ldel = nspace->literalDict.find( interp ); - - if ( ldel != 0 ) - return pd->findUniqueType( TYPE_TREE, ldel->value->tdLangEl ); - - nspace = nspace->parentNamespace; - } - - error(loc) << "unknown type in typeof expression" << endp; - return 0; -} - -UniqueType *TypeRef::lookupTypeMap( Compiler *pd ) -{ - /* Lookup up the qualifiction and then the name. */ - nspace = nspaceQual->getQual( pd ); - - UniqueType *utKey = typeRef1->lookupType( pd ); - UniqueType *utValue = typeRef2->lookupType( pd ); - - UniqueMap searchKey( utKey, utValue ); - UniqueMap *inMap = pd->uniqueMapMap.find( &searchKey ); - if ( inMap == 0 ) { - inMap = new UniqueMap( utKey, utValue ); - pd->uniqueMapMap.insert( inMap ); - - /* FIXME: Need uniqe name allocator for types. */ - static int mapId = 0; - String name( 36, "__map%d", mapId++ ); - - GenericType *generic = new GenericType( name, GEN_MAP, - pd->nextGenericId++, 0/*langEl*/, typeRef2 ); - generic->keyTypeArg = typeRef1; - - nspace->genericList.append( generic ); - - generic->declare( pd, nspace ); - - inMap->generic = generic; - } - - generic = inMap->generic; - return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl ); -} - -UniqueType *TypeRef::lookupTypeList( Compiler *pd ) -{ - /* Lookup up the qualifiction and then the name. */ - nspace = nspaceQual->getQual( pd ); - - UniqueType *utValue = typeRef1->lookupType( pd ); - - UniqueList searchKey( utValue ); - UniqueList *inMap = pd->uniqueListMap.find( &searchKey ); - if ( inMap == 0 ) { - inMap = new UniqueList( utValue ); - pd->uniqueListMap.insert( inMap ); - - /* FIXME: Need uniqe name allocator for types. */ - static int listId = 0; - String name( 36, "__list%d", listId++ ); - - GenericType *generic = new GenericType( name, GEN_LIST, - pd->nextGenericId++, 0/*langEl*/, typeRef1 ); - - nspace->genericList.append( generic ); - - generic->declare( pd, nspace ); - - inMap->generic = generic; - } - - generic = inMap->generic; - return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl ); -} - -UniqueType *TypeRef::lookupTypeVector( Compiler *pd ) -{ - /* Lookup up the qualifiction and then the name. */ - nspace = nspaceQual->getQual( pd ); - - UniqueType *utValue = typeRef1->lookupType( pd ); - - UniqueVector searchKey( utValue ); - UniqueVector *inMap = pd->uniqueVectorMap.find( &searchKey ); - if ( inMap == 0 ) { - inMap = new UniqueVector( utValue ); - pd->uniqueVectorMap.insert( inMap ); - - /* FIXME: Need uniqe name allocator for types. */ - static int vectorId = 0; - String name( 36, "__vector%d", vectorId++ ); - - GenericType *generic = new GenericType( name, GEN_VECTOR, - pd->nextGenericId++, 0/*langEl*/, typeRef1 ); - - nspace->genericList.append( generic ); - - generic->declare( pd, nspace ); - - inMap->generic = generic; - } - - generic = inMap->generic; - return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl ); -} - -UniqueType *TypeRef::lookupTypeParser( Compiler *pd ) -{ - /* Lookup up the qualifiction and then the name. */ - nspace = nspaceQual->getQual( pd ); - - UniqueType *utParse = typeRef1->lookupType( pd ); - - UniqueParser searchKey( utParse ); - UniqueParser *inMap = pd->uniqueParserMap.find( &searchKey ); - if ( inMap == 0 ) { - inMap = new UniqueParser( utParse ); - pd->uniqueParserMap.insert( inMap ); - - /* FIXME: Need uniqe name allocator for types. */ - static int accumId = 0; - String name( 36, "__accum%d", accumId++ ); - - GenericType *generic = new GenericType( name, GEN_PARSER, - pd->nextGenericId++, 0/*langEl*/, typeRef1 ); - - nspace->genericList.append( generic ); - - generic->declare( pd, nspace ); - - inMap->generic = generic; - } - - generic = inMap->generic; - return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl ); -} - -UniqueType *TypeRef::lookupTypePtr( Compiler *pd ) -{ - typeRef1->lookupType( pd ); - return pd->findUniqueType( TYPE_PTR, typeRef1->uniqueType->langEl ); -} - -UniqueType *TypeRef::lookupTypeRef( Compiler *pd ) -{ - typeRef1->lookupType( pd ); - return pd->findUniqueType( TYPE_REF, typeRef1->uniqueType->langEl ); -} - -void TypeRef::resolveRepeat( Compiler *pd ) -{ - if ( uniqueType->typeId != TYPE_TREE ) - error(loc) << "cannot repeat non-tree type" << endp; - - UniqueRepeat searchKey( repeatType, uniqueType->langEl ); - UniqueRepeat *uniqueRepeat = pd->uniqeRepeatMap.find( &searchKey ); - if ( uniqueRepeat == 0 ) { - uniqueRepeat = new UniqueRepeat( repeatType, uniqueType->langEl ); - pd->uniqeRepeatMap.insert( uniqueRepeat ); - - LangEl *declLangEl = 0; - - switch ( repeatType ) { - case RepeatRepeat: { - /* If the factor is a repeat, create the repeat element and link the - * factor to it. */ - String repeatName( 128, "_repeat_%s", typeName.data ); - declLangEl = pd->makeRepeatProd( nspace, repeatName, nspaceQual, typeName ); - break; - } - case RepeatList: { - /* If the factor is a repeat, create the repeat element and link the - * factor to it. */ - String listName( 128, "_list_%s", typeName.data ); - declLangEl = pd->makeListProd( nspace, listName, nspaceQual, typeName ); - break; - } - case RepeatOpt: { - /* If the factor is an opt, create the opt element and link the factor - * to it. */ - String optName( 128, "_opt_%s", typeName.data ); - declLangEl = pd->makeOptProd( nspace, optName, nspaceQual, typeName ); - break; - } - - case RepeatNone: - break; - } - - uniqueRepeat->declLangEl = declLangEl; - declLangEl->repeatOf = uniqueRepeat->langEl; - } - - uniqueType = pd->findUniqueType( TYPE_TREE, uniqueRepeat->declLangEl ); -} - - -UniqueType *TypeRef::lookupType( Compiler *pd ) -{ - if ( uniqueType != 0 ) - return uniqueType; - - /* Not an iterator. May be a reference. */ - switch ( type ) { - case Name: - uniqueType = lookupTypeName( pd ); - break; - case Literal: - uniqueType = lookupTypeLiteral( pd ); - break; - case Map: - uniqueType = lookupTypeMap( pd ); - break; - case List: - uniqueType = lookupTypeList( pd ); - break; - case Vector: - uniqueType = lookupTypeVector( pd ); - break; - case Parser: - uniqueType = lookupTypeParser( pd ); - break; - case Ptr: - uniqueType = lookupTypePtr( pd ); - break; - case Ref: - uniqueType = lookupTypeRef( pd ); - break; - case Iterator: - case Unspecified: - /* No lookup needed, unique type(s) set when constructed. */ - break; - } - - if ( repeatType != RepeatNone ) - resolveRepeat( pd ); - - return uniqueType; -} - -void Compiler::resolveFactor( ProdEl *fact ) -{ - fact->typeRef->lookupType( this ); - fact->langEl = fact->typeRef->uniqueType->langEl; -} - -void LangTerm::resolve( Compiler *pd ) -{ - switch ( type ) { - case ConstructType: - typeRef->lookupType( pd ); - - /* Evaluate the initialization expressions. */ - if ( fieldInitArgs != 0 ) { - for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ ) - (*pi)->expr->resolve( pd ); - } - break; - case VarRefType: - break; - - case MakeTreeType: - case MakeTokenType: - case MethodCallType: - if ( args != 0 ) { - for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) - (*pe)->resolve( pd ); - } - break; - - case NumberType: - case StringType: - case MatchType: - break; - case NewType: - expr->resolve( pd ); - break; - case TypeIdType: - typeRef->lookupType( pd ); - break; - case SearchType: - typeRef->lookupType( pd ); - break; - case NilType: - case TrueType: - case FalseType: - break; - - case ParseType: - case ParseStopType: - typeRef->lookupType( pd ); - parserTypeRef->lookupType( pd ); - generic = parserTypeRef->generic; - break; - - case EmbedStringType: - break; - } -} - -void LangVarRef::resolve( Compiler *pd ) const -{ - -} - -void LangExpr::resolve( Compiler *pd ) const -{ - switch ( type ) { - case BinaryType: { - left->resolve( pd ); - right->resolve( pd ); - break; - } - case UnaryType: { - right->resolve( pd ); - break; - } - case TermType: { - term->resolve( pd ); - break; - } - } -} - -void LangStmt::resolveParserItems( Compiler *pd ) const -{ - /* Assign bind ids to the variables in the replacement. */ - for ( ReplItemList::Iter item = *parserText->list; item.lte(); item++ ) { - varRef->resolve( pd ); - - switch ( item->type ) { - case ReplItem::FactorType: - break; - case ReplItem::InputText: - break; - case ReplItem::ExprType: - item->expr->resolve( pd ); - break; - } - } -} - -void LangStmt::resolve( Compiler *pd ) const -{ - switch ( type ) { - case PrintType: - case PrintXMLACType: - case PrintXMLType: - case PrintStreamType: { - /* Push the args backwards. */ - for ( ExprVect::Iter pex = exprPtrVect->last(); pex.gtb(); pex-- ) - (*pex)->resolve( pd ); - break; - } - case ExprType: { - /* Evaluate the exrepssion, then pop it immediately. */ - expr->resolve( pd ); - break; - } - case IfType: { - /* Evaluate the test. */ - expr->resolve( pd ); - - /* Analyze the if true branch. */ - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->resolve( pd ); - - if ( elsePart != 0 ) - elsePart->resolve( pd ); - break; - } - case ElseType: { - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->resolve( pd ); - break; - } - case RejectType: - break; - case WhileType: { - expr->resolve( pd ); - - /* Compute the while block. */ - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->resolve( pd ); - break; - } - case AssignType: { - /* Evaluate the exrepssion. */ -// cout << "Assign Type" << endl; - expr->resolve( pd ); - break; - } - case ForIterType: { - typeRef->lookupType( pd ); - - /* Evaluate and push the arguments. */ - langTerm->resolve( pd ); - - /* Compile the contents. */ - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->resolve( pd ); - - break; - } - case ReturnType: { - /* Evaluate the exrepssion. */ - expr->resolve( pd ); - break; - } - case BreakType: { - break; - } - case YieldType: { - /* take a reference and yield it. Immediately reset the referece. */ - varRef->resolve( pd ); - break; - } - case ParserType: { - //for ( ) - break; - } - } -} - -void ObjectDef::resolve( Compiler *pd ) -{ - for ( ObjFieldList::Iter fli = *objFieldList; fli.lte(); fli++ ) { - ObjField *field = fli->value; - - if ( field->typeRef != 0 ) { - field->typeRef->lookupType( pd ); - } - } -} - -void CodeBlock::resolve( Compiler *pd ) const -{ - if ( localFrame != 0 ) - localFrame->resolve( pd ); - - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->resolve( pd ); -} - -void Compiler::resolveFunction( Function *func ) -{ - CodeBlock *block = func->codeBlock; - block->resolve( this ); -} - -void Compiler::resolveUserIter( Function *func ) -{ - CodeBlock *block = func->codeBlock; - block->resolve( this ); -} - -void Compiler::resolvePreEof( TokenRegion *region ) -{ - CodeBlock *block = region->preEofBlock; - block->resolve( this ); -} - -void Compiler::resolveRootBlock() -{ - rootLocalFrame->resolve( this ); - - CodeBlock *block = rootCodeBlock; - block->resolve( this ); -} - -void Compiler::resolveTranslateBlock( LangEl *langEl ) -{ - CodeBlock *block = langEl->transBlock; - block->resolve( this ); -} - -void Compiler::resolveReductionCode( Definition *prod ) -{ - CodeBlock *block = prod->redBlock; - block->resolve( this ); -} - -void Compiler::resolveParseTree() -{ - /* Compile functions. */ - for ( FunctionList::Iter f = functionList; f.lte(); f++ ) { - if ( f->isUserIter ) - resolveUserIter( f ); - else - resolveFunction( f ); - - if ( f->typeRef != 0 ) - f->typeRef->lookupType( this ); - - for ( ParameterList::Iter param = *f->paramList; param.lte(); param++ ) - param->typeRef->lookupType( this ); - } - - /* Compile the reduction code. */ - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - if ( prod->redBlock != 0 ) - resolveReductionCode( prod ); - } - - /* Compile the token translation code. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->transBlock != 0 ) - resolveTranslateBlock( lel ); - } - - /* Compile preeof blocks. */ - for ( RegionList::Iter r = regionList; r.lte(); r++ ) { - if ( r->preEofBlock != 0 ) - resolvePreEof( r ); - } - - /* Compile the init code */ - resolveRootBlock( ); - - /* Init all user object fields (need consistent size). */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - ObjectDef *objDef = lel->objectDef; - if ( objDef != 0 ) { - /* Init all fields of the object. */ - for ( ObjFieldList::Iter f = *objDef->objFieldList; f.lte(); f++ ) { - f->value->typeRef->lookupType( this ); - } - } - } - - /* Init all fields of the global object. */ - for ( ObjFieldList::Iter f = *globalObjectDef->objFieldList; f.lte(); f++ ) { - f->value->typeRef->lookupType( this ); - } - -} - - -void Compiler::resolveUses() -{ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->objectDefUses != 0 ) { - /* Look for the production's associated region. */ - Namespace *nspace = lel->objectDefUsesQual->getQual( this ); - - if ( nspace == 0 ) - error() << "do not have namespace for resolving reference" << endp; - - /* Look up the language element in the region. */ - LangEl *langEl = findType( this, nspace, lel->objectDefUses ); - lel->objectDef = langEl->objectDef; - } - } -} - -void Compiler::resolvePatternEls() -{ - for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) { - for ( PatternItemList::Iter item = *pat->list; item.lte(); item++ ) { - switch ( item->type ) { - case PatternItem::FactorType: - /* Use pdaFactor reference resolving. */ - resolveFactor( item->factor ); - break; - case PatternItem::InputText: - /* Nothing to do here. */ - break; - } - } - } -} - -void Compiler::resolveReplacementEls() -{ - for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) { - for ( ReplItemList::Iter item = *repl->list; item.lte(); item++ ) { - switch ( item->type ) { - case ReplItem::FactorType: - /* Use pdaFactor reference resolving. */ - resolveFactor( item->factor ); - break; - case ReplItem::InputText: - case ReplItem::ExprType: - break; - } - } - } -} - -void Compiler::resolveParserEls() -{ - for ( ParserTextList::Iter accum = parserTextList; accum.lte(); accum++ ) { - for ( ReplItemList::Iter item = *accum->list; item.lte(); item++ ) { - switch ( item->type ) { - case ReplItem::FactorType: - resolveFactor( item->factor ); - break; - case ReplItem::InputText: - case ReplItem::ExprType: - break; - } - } - } -} - -/* Resolves production els and computes the precedence of each prod. */ -void Compiler::resolveProductionEls() -{ - /* NOTE: as we process this list it may be growing! */ - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - /* First resolve. */ - for ( ProdElList::Iter fact = *prod->prodElList; fact.lte(); fact++ ) - resolveFactor( fact ); - - /* If there is no explicit precdence ... */ - if ( prod->predOf == 0 ) { - /* Compute the precedence of the productions. */ - for ( ProdElList::Iter fact = prod->prodElList->last(); fact.gtb(); fact-- ) { - /* Production inherits the precedence of the last terminal with - * precedence. */ - if ( fact->langEl->predType != PredNone ) { - prod->predOf = fact->langEl; - break; - } - } - } - } -} - -void Compiler::resolveGenericTypes() -{ - for ( NamespaceList::Iter ns = namespaceList; ns.lte(); ns++ ) { - for ( GenericList::Iter gen = ns->genericList; gen.lte(); gen++ ) { -// cout << __PRETTY_FUNCTION__ << " " << gen->name.data << " " << gen->typeArg << endl; - - gen->utArg = gen->typeArg->lookupType( this ); - - if ( gen->typeId == GEN_MAP ) - gen->keyUT = gen->keyTypeArg->lookupType( this ); - } - } -} - -void Compiler::makeTerminalWrappers() -{ - /* Make terminal language elements corresponding to each nonterminal in - * the grammar. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->type == LangEl::NonTerm ) { - String name( lel->name.length() + 5, "_T_%s", lel->name.data ); - LangEl *termDup = new LangEl( lel->nspace, name, LangEl::Term ); - - /* Give the dup the attributes of the nonterminal. This ensures - * that the attributes are allocated when patterns and - * constructors are parsed. */ - termDup->objectDef = lel->objectDef; - - langEls.append( termDup ); - lel->termDup = termDup; - termDup->termDup = lel; - } - } -} - -void Compiler::makeEofElements() -{ - /* Make eof language elements for each user terminal. This is a bit excessive and - * need to be reduced to the ones that we need parsers for, but we don't know that yet. - * Another pass before this one is needed. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->eofLel == 0 && - lel != eofLangEl && - lel != errorLangEl && - lel != noTokenLangEl && - !( lel->tokenDef != 0 && lel->tokenDef->dupOf != 0 ) ) - { - String name( lel->name.length() + 5, "_eof_%s", lel->name.data ); - LangEl *eofLel = new LangEl( lel->nspace, name, LangEl::Term ); - - langEls.append( eofLel ); - lel->eofLel = eofLel; - eofLel->eofLel = lel; - eofLel->isEOF = true; - } - } -} - -void Compiler::makeIgnoreCollectors() -{ - for ( RegionList::Iter region = regionList; region.lte(); region++ ) { - if ( region->isFullRegion ) { - String name( region->name.length() + 5, "_ign_%s", region->name.data ); - LangEl *ignLel = new LangEl( rootNamespace, name, LangEl::Term ); - langEls.append( ignLel ); - ignLel->isCI = true; - ignLel->ciRegion = region; - - region->ciLel = ignLel; - } - } -} - -void Compiler::typeResolve() -{ - /* - * Type Resolving. - */ - - /* Resolve uses statements. */ - resolveUses(); - - /* Resolve pattern and replacement elements. */ - resolvePatternEls(); - resolveReplacementEls(); - resolveParserEls(); - - resolveParseTree(); - - resolveGenericTypes(); - - argvTypeRef->lookupType( this ); - - /* We must do this as the last step in the type resolution process because - * all type resolves can cause new language elments with associated - * productions. They get tacked onto the end of the list of productions. - * Doing it at the end results processing a growing list. */ - resolveProductionEls(); -} diff --git a/colm/rtvector.h b/colm/rtvector.h deleted file mode 100644 index e03a17f9..00000000 --- a/colm/rtvector.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright 2002, 2006, 2009 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Aapl. - * - * Aapl is free software; you can redistribute it and/or modify it under the - * terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) - * any later version. - * - * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for - * more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 - * Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _RT_VECTOR_H -#define _RT_VECTOR_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef __cplusplus -} -#endif - -#endif - diff --git a/colm/string.c b/colm/string.c deleted file mode 100644 index d670b68c..00000000 --- a/colm/string.c +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <colm/pool.h> -#include <colm/pdarun.h> -#include <colm/bytecode.h> - -#include <assert.h> -#include <string.h> -#include <stdio.h> -#include <stdlib.h> -#include <ctype.h> - -/* - * In this system strings are not null terminated. Often strings come from a - * parse, in which case the string is just a pointer into the the data string. - * A block in a parsed stream can house many tokens and there is no room for - * nulls. - */ - -Head *stringCopy( Program *prg, Head *head ) -{ - Head *result = 0; - if ( head != 0 ) { - if ( (char*)(head+1) == head->data ) - result = stringAllocFull( prg, head->data, head->length ); - else - result = stringAllocPointer( prg, head->data, head->length ); - } - return result; -} - -void stringFree( Program *prg, Head *head ) -{ - if ( head != 0 ) { - if ( head->location != 0 ) - locationFree( prg, head->location ); - - if ( (char*)(head+1) == head->data ) { - /* Full string allocation. */ - free( head ); - } - else { - /* Just a string head. */ - headFree( prg, head ); - } - } -} - -const char *stringData( Head *head ) -{ - if ( head == 0 ) - return 0; - return head->data; -} - -long stringLength( Head *head ) -{ - if ( head == 0 ) - return 0; - return head->length; -} - -void stringShorten( Head *head, long newlen ) -{ - assert( newlen <= head->length ); - head->length = newlen; -} - -Head *initStrSpace( long length ) -{ - /* Find the length and allocate the space for the shared string. */ - Head *head = (Head*) malloc( sizeof(Head) + length ); - //if ( head == 0 ) - // throw std::bad_alloc(); - - /* Init the header. */ - head->data = (char*)(head+1); - head->length = length; - head->location = 0; - - /* Save the pointer to the data. */ - return head; -} - -/* Create from a c-style string. */ -Head *stringAllocFull( Program *prg, const char *data, long length ) -{ - /* Init space for the data. */ - Head *head = initStrSpace( length ); - - /* Copy in the data. */ - memcpy( (head+1), data, length ); - - return head; -} - -/* Create from a c-style string. */ -Head *stringAllocPointer( Program *prg, const char *data, long length ) -{ - /* Find the length and allocate the space for the shared string. */ - Head *head = headAllocate( prg ); - - /* Init the header. */ - head->data = data; - head->length = length; - - return head; -} - -Head *concatStr( Head *s1, Head *s2 ) -{ - long s1Len = s1->length; - long s2Len = s2->length; - - /* Init space for the data. */ - Head *head = initStrSpace( s1Len + s2Len ); - - /* Copy in the data. */ - memcpy( (head+1), s1->data, s1Len ); - memcpy( (char*)(head+1) + s1Len, s2->data, s2Len ); - - return head; -} - -Head *stringToUpper( Head *s ) -{ - /* Init space for the data. */ - long len = s->length; - Head *head = initStrSpace( len ); - - /* Copy in the data. */ - const char *src = s->data; - char *dst = (char*)(head+1); - int i; - for ( i = 0; i < len; i++ ) - *dst++ = toupper( *src++ ); - - return head; -} - -Head *stringToLower( Head *s ) -{ - /* Init space for the data. */ - long len = s->length; - Head *head = initStrSpace( len ); - - /* Copy in the data. */ - const char *src = s->data; - char *dst = (char*)(head+1); - int i; - for ( i = 0; i < len; i++ ) - *dst++ = tolower( *src++ ); - - return head; -} - - -/* Compare two strings. If identical returns 1, otherwise 0. */ -Word cmpString( Head *s1, Head *s2 ) -{ - if ( s1->length < s2->length ) - return -1; - else if ( s1->length > s2->length ) - return 1; - else { - char *d1 = (char*)(s1->data); - char *d2 = (char*)(s2->data); - return memcmp( d1, d2, s1->length ); - } -} - -Word strAtoi( Head *str ) -{ - /* FIXME: need to implement this by hand. There is no null terminator. */ - char *nulled = (char*)malloc( str->length + 1 ); - memcpy( nulled, str->data, str->length ); - nulled[str->length] = 0; - int res = atoi( nulled ); - free( nulled ); - return res; -} - -Head *intToStr( Program *prg, Word i ) -{ - char data[20]; - sprintf( data, "%ld", i ); - return stringAllocFull( prg, data, strlen(data) ); -} - -Word strUord16( Head *head ) -{ - uchar *data = (uchar*)(head->data); - ulong res; - res = (ulong)data[1]; - res |= ((ulong)data[0]) << 8; - return res; -} - -Word strUord8( Head *head ) -{ - uchar *data = (uchar*)(head->data); - ulong res = (ulong)data[0]; - return res; -} - -Head *makeLiteral( Program *prg, long offset ) -{ - return stringAllocPointer( prg, - prg->rtd->litdata[offset], - prg->rtd->litlen[offset] ); -} - -Head *stringSprintf( Program *prg, Str *format, Int *integer ) -{ - Head *formatHead = format->value; - long written = snprintf( 0, 0, stringData(formatHead), integer->value ); - Head *head = initStrSpace( written+1 ); - written = snprintf( (char*)head->data, written+1, stringData(formatHead), integer->value ); - head->length -= 1; - return head; -} diff --git a/colm/synthesis.cc b/colm/synthesis.cc deleted file mode 100644 index 794927ad..00000000 --- a/colm/synthesis.cc +++ /dev/null @@ -1,3277 +0,0 @@ -/* - * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "bytecode.h" -#include "parsedata.h" -#include "fsmrun.h" -#include "pdarun.h" -#include "input.h" -#include <iostream> -#include <assert.h> - -using std::cout; -using std::cerr; -using std::endl; - -void Compiler::initUniqueTypes( ) -{ - uniqueTypeNil = new UniqueType( TYPE_NIL ); - uniqueTypePtr = new UniqueType( TYPE_TREE, ptrLangEl ); - uniqueTypeBool = new UniqueType( TYPE_TREE, boolLangEl ); - uniqueTypeInt = new UniqueType( TYPE_TREE, intLangEl ); - uniqueTypeStr = new UniqueType( TYPE_TREE, strLangEl ); - uniqueTypeStream = new UniqueType( TYPE_TREE, streamLangEl ); - uniqueTypeInput = new UniqueType( TYPE_TREE, inputLangEl ); - uniqueTypeIgnore = new UniqueType( TYPE_TREE, ignoreLangEl ); - uniqueTypeAny = new UniqueType( TYPE_TREE, anyLangEl ); - - uniqeTypeMap.insert( uniqueTypeNil ); - uniqeTypeMap.insert( uniqueTypePtr ); - uniqeTypeMap.insert( uniqueTypeBool ); - uniqeTypeMap.insert( uniqueTypeInt ); - uniqeTypeMap.insert( uniqueTypeStr ); - uniqeTypeMap.insert( uniqueTypeStream ); - uniqeTypeMap.insert( uniqueTypeInput ); - uniqeTypeMap.insert( uniqueTypeIgnore ); - uniqeTypeMap.insert( uniqueTypeAny ); -} - -IterDef::IterDef( Type type ) : - type(type), - func(0), - useFuncId(false), - useSearchUT(false) -{ - switch ( type ) { - case Tree: - inCreateWV = IN_TRITER_FROM_REF; - inCreateWC = IN_TRITER_FROM_REF; - inDestroy = IN_TRITER_DESTROY; - inAdvance = IN_TRITER_ADVANCE; - - inGetCurR = IN_TRITER_GET_CUR_R; - inGetCurWC = IN_TRITER_GET_CUR_WC; - inSetCurWC = IN_TRITER_SET_CUR_WC; - inRefFromCur = IN_TRITER_REF_FROM_CUR; - useSearchUT = true; - break; - case Child: - inCreateWV = IN_TRITER_FROM_REF; - inCreateWC = IN_TRITER_FROM_REF; - inDestroy = IN_TRITER_DESTROY; - inAdvance = IN_TRITER_NEXT_CHILD; - - inGetCurR = IN_TRITER_GET_CUR_R; - inGetCurWC = IN_TRITER_GET_CUR_WC; - inSetCurWC = IN_TRITER_SET_CUR_WC; - inRefFromCur = IN_TRITER_REF_FROM_CUR; - useSearchUT = true; - break; - case RevChild: - inCreateWV = IN_REV_TRITER_FROM_REF; - inCreateWC = IN_REV_TRITER_FROM_REF; - inDestroy = IN_REV_TRITER_DESTROY; - inAdvance = IN_REV_TRITER_PREV_CHILD; - - inGetCurR = IN_TRITER_GET_CUR_R; - inGetCurWC = IN_TRITER_GET_CUR_WC; - inSetCurWC = IN_TRITER_SET_CUR_WC; - inRefFromCur = IN_TRITER_REF_FROM_CUR; - useSearchUT = true; - break; - - case Repeat: - inCreateWV = IN_TRITER_FROM_REF; - inCreateWC = IN_TRITER_FROM_REF; - inDestroy = IN_TRITER_DESTROY; - inAdvance = IN_TRITER_NEXT_REPEAT; - - inGetCurR = IN_TRITER_GET_CUR_R; - inGetCurWC = IN_TRITER_GET_CUR_WC; - inSetCurWC = IN_TRITER_SET_CUR_WC; - inRefFromCur = IN_TRITER_REF_FROM_CUR; - useSearchUT = true; - break; - - case RevRepeat: - inCreateWV = IN_TRITER_FROM_REF; - inCreateWC = IN_TRITER_FROM_REF; - inDestroy = IN_TRITER_DESTROY; - inAdvance = IN_TRITER_PREV_REPEAT; - - inGetCurR = IN_TRITER_GET_CUR_R; - inGetCurWC = IN_TRITER_GET_CUR_WC; - inSetCurWC = IN_TRITER_SET_CUR_WC; - inRefFromCur = IN_TRITER_REF_FROM_CUR; - useSearchUT = true; - break; - - case User: - assert(false); - } -} - -IterDef::IterDef( Type type, Function *func ) : - type(type), - func(func), - useFuncId(true), - useSearchUT(true), - inCreateWV(IN_UITER_CREATE_WV), - inCreateWC(IN_UITER_CREATE_WC), - inDestroy(IN_UITER_DESTROY), - inAdvance(IN_UITER_ADVANCE), - inGetCurR(IN_UITER_GET_CUR_R), - inGetCurWC(IN_UITER_GET_CUR_WC), - inSetCurWC(IN_UITER_SET_CUR_WC), - inRefFromCur(IN_UITER_REF_FROM_CUR) -{} - -ObjMethod *initFunction( UniqueType *retType, ObjectDef *obj, - const String &name, int methIdWV, int methIdWC, bool isConst ) -{ - ObjMethod *objMethod = new ObjMethod( retType, name, - methIdWV, methIdWC, 0, 0, 0, isConst ); - obj->objMethodMap->insert( name, objMethod ); - return objMethod; -} - -ObjMethod *initFunction( UniqueType *retType, ObjectDef *obj, - const String &name, int methIdWV, int methIdWC, UniqueType *arg1, bool isConst ) -{ - UniqueType *args[] = { arg1 }; - ObjMethod *objMethod = new ObjMethod( retType, name, - methIdWV, methIdWC, 1, args, 0, isConst ); - obj->objMethodMap->insert( name, objMethod ); - return objMethod; -} - -ObjMethod *initFunction( UniqueType *retType, ObjectDef *obj, - const String &name, int methIdWV, int methIdWC, - UniqueType *arg1, UniqueType *arg2, bool isConst ) -{ - UniqueType *args[] = { arg1, arg2 }; - ObjMethod *objMethod = new ObjMethod( retType, name, - methIdWV, methIdWC, 2, args, 0, isConst ); - obj->objMethodMap->insert( name, objMethod ); - return objMethod; -} - -IterDef *Compiler::findIterDef( IterDef::Type type, Function *func ) -{ - IterDefSetEl *el = iterDefSet.find( IterDef( type, func ) ); - if ( el == 0 ) - el = iterDefSet.insert( IterDef( type, func ) ); - return &el->key; -} - -IterDef *Compiler::findIterDef( IterDef::Type type ) -{ - IterDefSetEl *el = iterDefSet.find( IterDef( type ) ); - if ( el == 0 ) - el = iterDefSet.insert( IterDef( type ) ); - return &el->key; -} - -UniqueType *Compiler::findUniqueType( int typeId ) -{ - UniqueType searchKey( typeId ); - UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); - if ( uniqueType == 0 ) { - uniqueType = new UniqueType( typeId ); - uniqeTypeMap.insert( uniqueType ); - } - return uniqueType; -} - -UniqueType *Compiler::findUniqueType( int typeId, LangEl *langEl ) -{ - UniqueType searchKey( typeId, langEl ); - UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); - if ( uniqueType == 0 ) { - uniqueType = new UniqueType( typeId, langEl ); - uniqeTypeMap.insert( uniqueType ); - } - return uniqueType; -} - -UniqueType *Compiler::findUniqueType( int typeId, IterDef *iterDef ) -{ - UniqueType searchKey( typeId, iterDef ); - UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); - if ( uniqueType == 0 ) { - uniqueType = new UniqueType( typeId, iterDef ); - uniqeTypeMap.insert( uniqueType ); - } - return uniqueType; -} - -void ObjectDef::iterPushScope() -{ - //cout << "iter push scope "; - if ( scope->childIter == 0 ) { - scope->childIter = scope->children.head; - } - else { - scope->childIter = scope->childIter->next; - /* Resetting. */ - if ( scope->childIter == 0 ) - scope ->childIter = scope->children.head; - } - - scope = scope->childIter; -} - -void ObjectDef::iterPopScope() -{ - //cout << "iter pop scope" << endl; - scope = scope->parentScope; -} - -void ObjectDef::pushScope() -{ - ObjNameScope *newScope = new ObjNameScope; - newScope->objFieldMap = new ObjFieldMap; - - newScope->parentScope = scope; - scope->children.append( newScope ); - - scope = newScope; -} - -void ObjectDef::popScope() -{ - scope = scope->parentScope; -} - -void ObjectDef::insertField( const String &name, ObjField *value ) -{ - scope->objFieldMap->insert( name, value ); - objFieldList->append( value ); -} - -/* Recurisve find through a single object def's scope. */ -ObjField *ObjectDef::findFieldInScope( const String &name, ObjNameScope *inScope ) -{ - ObjFieldMapEl *objDefMapEl = inScope->objFieldMap->find( name ); - if ( objDefMapEl != 0 ) - return objDefMapEl->value; - if ( inScope->parentScope != 0 ) - return findFieldInScope( name, inScope->parentScope ); - return 0; -} - -ObjField *ObjectDef::checkRedecl( const String &name ) -{ - //cout << "looking for " << name << endl; - ObjFieldMapEl *objDefMapEl = scope->objFieldMap->find( name ); - if ( objDefMapEl != 0 ) - return objDefMapEl->value; - return 0; - -} - -/* 0-based. */ -ObjField *ObjectDef::findFieldNum( long offset ) -{ - int fn = 0; - ObjFieldList::Iter field = *objFieldList; - while ( fn < offset ) { - fn++; - field++; - } - return field->value; -} - -ObjField *ObjectDef::findField( const String &name ) -{ - //cout << "looking for " << name << endl; - ObjField *objField = findFieldInScope( name, scope ); - if ( objField != 0 ) - return objField; - return 0; -} - -ObjMethod *ObjectDef::findMethod( const String &name ) -{ - ObjMethodMapEl *objMethodMapEl = objMethodMap->find( name ); - if ( objMethodMapEl != 0 ) - return objMethodMapEl->value; - return 0; -} - -long sizeOfField( UniqueType *fieldUT ) -{ - long size = 0; - if ( fieldUT->typeId == TYPE_ITER ) { - /* Select on the iterator type. */ - switch ( fieldUT->iterDef->type ) { - case IterDef::Tree: - case IterDef::Child: - case IterDef::Repeat: - case IterDef::RevRepeat: - size = sizeof(TreeIter) / sizeof(Word); - break; - case IterDef::RevChild: - size = sizeof(RevTreeIter) / sizeof(Word); - break; - - case IterDef::User: - /* User iterators are just a pointer to the UserIter struct. The - * struct needs to go right beneath the call to the user iterator - * so it can be found by a yield. It is therefore allocated on the - * stack right before the call. */ - size = 1; - break; - } - } - else if ( fieldUT->typeId == TYPE_REF ) - size = 2; - else - size = 1; - - return size; -} - -void ObjectDef::referenceField( Compiler *pd, ObjField *field ) -{ - field->beenReferenced = true; - initField( pd, field ); -} - -void ObjectDef::initField( Compiler *pd, ObjField *field ) -{ - if ( !field->beenInitialized ) { - field->beenInitialized = true; - UniqueType *fieldUT = field->typeRef->uniqueType; - - if ( type == FrameType ) { - nextOffset += sizeOfField( fieldUT ); - field->offset = -nextOffset; - - pd->initLocalInstructions( field ); - } - else if ( field->isRhsGet ) { - field->useOffset = false; - field->inGetR = IN_GET_RHS_VAL_R; - field->inGetWC = IN_GET_RHS_VAL_WC; - field->inGetWV = IN_GET_RHS_VAL_WV; - field->inSetWC = IN_SET_RHS_VAL_WC; - field->inSetWV = IN_SET_RHS_VAL_WC; - } - else { - field->offset = nextOffset; - nextOffset += sizeOfField( fieldUT ); - - /* Initialize the instructions. */ - pd->initFieldInstructions( field ); - } - } -} - -UniqueType *LangVarRef::loadFieldInstr( Compiler *pd, CodeVect &code, - ObjectDef *inObject, ObjField *el, bool forWriting, bool revert ) const -{ - /* Ensure that the field is referenced. */ - inObject->referenceField( pd, el ); - - UniqueType *elUT = el->typeRef->uniqueType; - - /* If it's a reference then we load it read always. */ - if ( forWriting ) { - /* The instruction, depends on whether or not we are reverting. */ - if ( elUT->typeId == TYPE_ITER ) - code.append( elUT->iterDef->inGetCurWC ); - else if ( pd->revertOn && revert ) - code.append( el->inGetWV ); - else - code.append( el->inGetWC ); - } - else { - /* Loading something for writing */ - if ( elUT->typeId == TYPE_ITER ) - code.append( elUT->iterDef->inGetCurR ); - else - code.append( el->inGetR ); - } - - if ( el->useOffset ) { - /* Gets of locals and fields require offsets. Fake vars like token - * data and lhs don't require it. */ - code.appendHalf( el->offset ); - } - else if ( el->isRhsGet ) { - /* Need to place the array computing the val. */ - code.append( el->rhsVal.length() ); - for ( Vector<RhsVal>::Iter rg = el->rhsVal; rg.lte(); rg++ ) { - code.append( rg->prodNum ); - code.append( rg->childNum ); - } - } - - /* If we are dealing with an iterator then dereference it. */ - if ( elUT->typeId == TYPE_ITER ) - elUT = el->typeRef->searchUniqueType; - - return elUT; -} - -ObjectDef *objDefFromUT( Compiler *pd, UniqueType *ut ) -{ - ObjectDef *objDef = 0; - if ( ut->typeId == TYPE_TREE || ut->typeId == TYPE_REF ) - objDef = ut->langEl->objectDef; - else { - /* This should have generated a compiler error. */ - assert(false); - } - return objDef; -} - -/* The qualification must start at a local frame. There cannot be any pointer. */ -long LangVarRef::loadQualificationRefs( Compiler *pd, CodeVect &code ) const -{ - long count = 0; - ObjectDef *rootObj = pd->curLocalFrame; - - /* Start the search from the root object. */ - ObjectDef *searchObjDef = rootObj; - - for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) { - /* Lookup the field in the current qualification. */ - ObjField *el = searchObjDef->findField( qi->data ); - if ( el == 0 ) - error(qi->loc) << "cannot resolve qualification " << qi->data << endp; - - if ( qi.pos() > 0 ) { - code.append( IN_REF_FROM_QUAL_REF ); - code.appendHalf( 0 ); - code.appendHalf( el->offset ); - } - else if ( el->typeRef->iterDef != 0 ) { - code.append( el->typeRef->iterDef->inRefFromCur ); - code.appendHalf( el->offset ); - } - else if ( el->typeRef->type == TypeRef::Ref ) { - code.append( IN_REF_FROM_REF ); - code.appendHalf( el->offset ); - } - else { - code.append( IN_REF_FROM_LOCAL ); - code.appendHalf( el->offset ); - } - - UniqueType *elUT = el->typeRef->uniqueType; - if ( elUT->typeId == TYPE_ITER ) - elUT = el->typeRef->searchUniqueType; - - assert( qi->type == QualItem::Dot ); - - searchObjDef = objDefFromUT( pd, elUT ); - count += 1; - } - return count; -} - -void LangVarRef::loadQualification( Compiler *pd, CodeVect &code, - ObjectDef *rootObj, int lastPtrInQual, bool forWriting, bool revert ) const -{ - /* Start the search from the root object. */ - ObjectDef *searchObjDef = rootObj; - - for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) { - /* Lookup the field int the current qualification. */ - ObjField *el = searchObjDef->findField( qi->data ); - if ( el == 0 ) - error(qi->loc) << "cannot resolve qualification " << qi->data << endp; - - if ( forWriting && el->refActive ) - error(qi->loc) << "reference active, cannot write to object" << endp; - - bool lfForWriting = forWriting; - bool lfRevert = revert; - - /* If there is a pointer in the qualification, we need to compute - * forWriting and revert. */ - if ( lastPtrInQual >= 0 ) { - if ( qi.pos() <= lastPtrInQual ) { - /* If we are before or at the pointer we are strictly read - * only, regardless of the origin. */ - lfForWriting = false; - lfRevert = false; - } - else { - /* If we are past the pointer then we are always reverting - * because the object is global. Forwriting is as passed in. - * */ - lfRevert = true; - } - } - - UniqueType *qualUT = loadFieldInstr( pd, code, searchObjDef, - el, lfForWriting, lfRevert ); - - if ( qi->type == QualItem::Dot ) { - /* Cannot a reference. Iterator yes (access of the iterator not - * hte current) */ - if ( qualUT->typeId == TYPE_PTR ) - error(loc) << "dot cannot be used to access a pointer" << endp; - } - else if ( qi->type == QualItem::Arrow ) { - if ( qualUT->typeId == TYPE_PTR ) { - /* Always dereference references when used for qualification. If - * this is the last one then we must start with the reverse - * execution business. */ - if ( pd->revertOn && qi.pos() == lastPtrInQual && forWriting ) { - /* This is like a global load. */ - code.append( IN_PTR_DEREF_WV ); - } - else { - /* If reading or not yet the last in ref then we only need a - * reading deref. */ - code.append( IN_PTR_DEREF_R ); - } - - qualUT = pd->findUniqueType( TYPE_TREE, qualUT->langEl ); - } - else { - error(loc) << "arrow operator cannot be used to access this type" << endp; - } - } - - searchObjDef = objDefFromUT( pd, qualUT ); - } -} - -void LangVarRef::loadContextObj( Compiler *pd, CodeVect &code, - int lastPtrInQual, bool forWriting ) const -{ - /* Start the search in the global object. */ - ObjectDef *rootObj = pd->context->contextObjDef; - - if ( forWriting && lastPtrInQual < 0 ) { - /* If we are writing an no reference was found in the qualification - * then load the gloabl with a revert. */ - if ( pd->revertOn ) - code.append( IN_LOAD_CONTEXT_WV ); - else - code.append( IN_LOAD_CONTEXT_WC ); - } - else { - /* Either we are reading or we are loading a pointer that will be - * dereferenced. */ - code.append( IN_LOAD_CONTEXT_R ); - } - - loadQualification( pd, code, rootObj, lastPtrInQual, forWriting, true ); -} - -void LangVarRef::loadGlobalObj( Compiler *pd, CodeVect &code, - int lastPtrInQual, bool forWriting ) const -{ - /* Start the search in the global object. */ - ObjectDef *rootObj = pd->globalObjectDef; - - if ( forWriting && lastPtrInQual < 0 ) { - /* If we are writing an no reference was found in the qualification - * then load the gloabl with a revert. */ - if ( pd->revertOn ) - code.append( IN_LOAD_GLOBAL_WV ); - else - code.append( IN_LOAD_GLOBAL_WC ); - } - else { - /* Either we are reading or we are loading a pointer that will be - * dereferenced. */ - code.append( IN_LOAD_GLOBAL_R ); - } - - loadQualification( pd, code, rootObj, lastPtrInQual, forWriting, true ); -} - -void LangVarRef::loadCustom( Compiler *pd, CodeVect &code, - int lastPtrInQual, bool forWriting ) const -{ - /* Start the search in the local frame. */ - loadQualification( pd, code, pd->curLocalFrame, lastPtrInQual, forWriting, true ); -} - -void LangVarRef::loadLocalObj( Compiler *pd, CodeVect &code, - int lastPtrInQual, bool forWriting ) const -{ - /* Start the search in the local frame. */ - loadQualification( pd, code, pd->curLocalFrame, lastPtrInQual, forWriting, false ); -} - -bool LangVarRef::isLocalRef( Compiler *pd ) const -{ - if ( qual->length() > 0 ) { - if ( pd->curLocalFrame->findField( qual->data[0].data ) != 0 ) - return true; - } - else if ( pd->curLocalFrame->findField( name ) != 0 ) - return true; - else if ( pd->curLocalFrame->findMethod( name ) != 0 ) - return true; - - return false; -} - -bool LangVarRef::isContextRef( Compiler *pd ) const -{ - if ( pd->context != 0 ) { - if ( qual->length() > 0 ) { - if ( pd->context->contextObjDef->findField( qual->data[0].data ) != 0 ) - return true; - } - else if ( pd->context->contextObjDef->findField( name ) != 0 ) - return true; - else if ( pd->context->contextObjDef->findMethod( name ) != 0 ) - return true; - } - - return false; -} - -bool LangVarRef::isCustom( Compiler *pd ) const -{ - if ( qual->length() > 0 ) { - ObjField *field = pd->curLocalFrame->findField( qual->data[0].data ); - if ( field != 0 && field->isCustom ) - return true; - } - else { - ObjField *field = pd->curLocalFrame->findField( name ); - if ( field != 0 ) { - if ( field->isCustom ) - return true; - } - else { - ObjMethod *method = pd->curLocalFrame->findMethod( name ); - if ( method != 0 && method->isCustom ) - return true; - } - - } - return false; -} - -void LangVarRef::loadObj( Compiler *pd, CodeVect &code, - int lastPtrInQual, bool forWriting ) const -{ - if ( isCustom( pd ) ) - loadCustom( pd, code, lastPtrInQual, forWriting ); - else if ( isLocalRef( pd ) ) - loadLocalObj( pd, code, lastPtrInQual, forWriting ); - else if ( isContextRef( pd ) ) - loadContextObj( pd, code, lastPtrInQual, forWriting ); - else - loadGlobalObj( pd, code, lastPtrInQual, forWriting ); -} - -VarRefLookup LangVarRef::lookupQualification( Compiler *pd, ObjectDef *rootDef ) const -{ - int lastPtrInQual = -1; - ObjectDef *searchObjDef = rootDef; - int firstConstPart = -1; - - for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) { - /* Lookup the field int the current qualification. */ - ObjField *el = searchObjDef->findField( qi->data ); - if ( el == 0 ) - error(qi->loc) << "cannot resolve qualification " << qi->data << endp; - - /* Lookup the type of the field. */ - UniqueType *qualUT = el->typeRef->uniqueType; - - /* If we are dealing with an iterator then dereference it. */ - if ( qualUT->typeId == TYPE_ITER ) - qualUT = el->typeRef->searchUniqueType; - - /* Is it const? */ - if ( firstConstPart < 0 && el->isConst ) - firstConstPart = qi.pos(); - - /* Check for references. When loop is done we will have the last one - * present, if any. */ - if ( qualUT->typeId == TYPE_PTR ) - lastPtrInQual = qi.pos(); - - if ( qi->type == QualItem::Dot ) { - /* Cannot dot a reference. Iterator yes (access of the iterator - * not the current) */ - if ( qualUT->typeId == TYPE_PTR ) - error(loc) << "dot cannot be used to access a pointer" << endp; - } - else if ( qi->type == QualItem::Arrow ) { - if ( qualUT->typeId == TYPE_ITER ) - qualUT = el->typeRef->searchUniqueType; - else if ( qualUT->typeId == TYPE_PTR ) - qualUT = pd->findUniqueType( TYPE_TREE, qualUT->langEl ); - } - - searchObjDef = objDefFromUT( pd, qualUT ); - } - - return VarRefLookup( lastPtrInQual, firstConstPart, searchObjDef ); -} - -VarRefLookup LangVarRef::lookupObj( Compiler *pd ) const -{ - ObjectDef *rootDef; - if ( isLocalRef( pd ) ) - rootDef = pd->curLocalFrame; - else if ( isContextRef( pd ) ) - rootDef = pd->context->contextObjDef; - else - rootDef = pd->globalObjectDef; - - return lookupQualification( pd, rootDef ); -} - -VarRefLookup LangVarRef::lookupField( Compiler *pd ) const -{ - /* Lookup the object that the field is in. */ - VarRefLookup lookup = lookupObj( pd ); - - /* Lookup the field. */ - ObjField *field = lookup.inObject->findField( name ); - if ( field == 0 ) - error(loc) << "cannot find name " << name << " in object" << endp; - - lookup.objField = field; - lookup.uniqueType = field->typeRef->uniqueType; - - if ( field->typeRef->searchUniqueType != 0 ) - lookup.iterSearchUT = field->typeRef->searchUniqueType; - - return lookup; -} - - -VarRefLookup LangVarRef::lookupMethod( Compiler *pd ) -{ - /* Lookup the object that the field is in. */ - VarRefLookup lookup = lookupObj( pd ); - - /* Find the method. */ - assert( lookup.inObject->objMethodMap != 0 ); - ObjMethod *method = lookup.inObject->findMethod( name ); - if ( method == 0 ) { - /* Not found as a method, try it as an object on which we will call a - * default function. */ - qual->append( QualItem( InputLoc(), name, QualItem::Dot ) ); - name = "finish"; - - /* Lookup the object that the field is in. */ - VarRefLookup lookup = lookupObj( pd ); - - /* Find the method. */ - assert( lookup.inObject->objMethodMap != 0 ); - method = lookup.inObject->findMethod( name ); - if ( method == 0 ) - error(loc) << "cannot find " << name << "(...) in object" << endp; - } - - lookup.objMethod = method; - lookup.uniqueType = method->returnUT; - - return lookup; -} - -void LangVarRef::setFieldInstr( Compiler *pd, CodeVect &code, - ObjectDef *inObject, ObjField *el, UniqueType *exprUT, bool revert ) const -{ - /* Ensure that the field is referenced. */ - inObject->referenceField( pd, el ); - - if ( pd->revertOn && revert ) - code.append( el->inSetWV ); - else - code.append( el->inSetWC ); - - /* Maybe write out an offset. */ - if ( el->useOffset ) - code.appendHalf( el->offset ); -} - -bool castAssignment( Compiler *pd, CodeVect &code, UniqueType *destUT, - UniqueType *destSearchUT, UniqueType *srcUT ) -{ - if ( destUT == srcUT ) - return true; - - /* Casting trees to any. */ - if ( destUT->typeId == TYPE_TREE && destUT->langEl == pd->anyLangEl && - srcUT->typeId == TYPE_TREE ) - return true; - - /* Setting a reference from a tree. */ - if ( destUT->typeId == TYPE_REF && srcUT->typeId == TYPE_TREE && - destUT->langEl == srcUT->langEl ) - return true; - - /* Setting a tree from a reference. */ - if ( destUT->typeId == TYPE_TREE && srcUT->typeId == TYPE_REF && - destUT->langEl == srcUT->langEl ) - return true; - - /* Setting an iterator from a tree. */ - if ( destUT->typeId == TYPE_ITER && srcUT->typeId == TYPE_TREE && - destSearchUT->langEl == srcUT->langEl ) - return true; - - /* Assigning nil to a tree. */ - if ( destUT->typeId == TYPE_TREE && srcUT->typeId == TYPE_NIL ) - return true; - - /* Assigning nil to a pointer. */ - if ( destUT->typeId == TYPE_PTR && srcUT->typeId == TYPE_NIL ) - return true; - - return false; -} - -void LangVarRef::setField( Compiler *pd, CodeVect &code, - ObjectDef *inObject, UniqueType *exprUT, bool revert ) const -{ - ObjField *el = inObject->findField( name ); - if ( el == 0 ) - error(loc) << "cannot find name " << name << " in object" << endp; - - setFieldInstr( pd, code, inObject, el, exprUT, revert ); -} - -void LangVarRef::setFieldIter( Compiler *pd, CodeVect &code, - ObjectDef *inObject, UniqueType *objUT, UniqueType *exprType, bool revert ) const -{ - ObjField *el = inObject->findField( name ); - if ( el == 0 ) - error(loc) << "cannot find name " << name << " in object" << endp; - - code.append( objUT->iterDef->inSetCurWC ); - code.appendHalf( el->offset ); -} - -UniqueType *LangVarRef::evaluate( Compiler *pd, CodeVect &code, bool forWriting ) const -{ - /* Lookup the loadObj. */ - VarRefLookup lookup = lookupField( pd ); - - /* Load the object, if any. */ - loadObj( pd, code, lookup.lastPtrInQual, forWriting ); - - /* Load the field. */ - UniqueType *ut = loadFieldInstr( pd, code, lookup.inObject, - lookup.objField, forWriting, false ); - - return ut; -} - -void LangVarRef::canTakeRef( Compiler *pd, VarRefLookup &lookup ) const -{ - bool canTake = false; - - /* If the var is not a local, it must be an attribute accessed - * via a local and attributes. */ - if ( lookup.inObject->type == ObjectDef::FrameType ) - canTake = true; - else if ( isLocalRef(pd) && lookup.lastPtrInQual < 0 && lookup.uniqueType->typeId != TYPE_PTR ) - canTake = true; - - if ( !canTake ) { - error(loc) << "can only take references of locals or " - "attributes accessed via a local" << endp; - } - - if ( lookup.objField->refActive ) - error(loc) << "reference currently active, cannot take another" << endp; -} - -/* Return the field referenced. */ -ObjField *LangVarRef::preEvaluateRef( Compiler *pd, CodeVect &code ) const -{ - VarRefLookup lookup = lookupField( pd ); - - canTakeRef( pd, lookup ); - - loadQualificationRefs( pd, code ); - - return lookup.objField; -} - -/* Return the field referenced. */ -ObjField *LangVarRef::evaluateRef( Compiler *pd, CodeVect &code, long pushCount ) const -{ - VarRefLookup lookup = lookupField( pd ); - - canTakeRef( pd, lookup ); - - /* Ensure that the field is referenced. */ - lookup.inObject->referenceField( pd, lookup.objField ); - - /* Note that we could have modified children. */ - if ( qual->length() == 0 ) - lookup.objField->refActive = true; - - /* Whenever we take a reference we have to assume writing and that the - * tree is dirty. */ - lookup.objField->dirtyTree = true; - - if ( qual->length() > 0 ) { - code.append( IN_REF_FROM_QUAL_REF ); - code.appendHalf( pushCount ); - code.appendHalf( lookup.objField->offset ); - } - else if ( lookup.objField->typeRef->iterDef != 0 ) { - code.append( lookup.objField->typeRef->iterDef->inRefFromCur ); - code.appendHalf( lookup.objField->offset ); - } - else if ( lookup.objField->typeRef->type == TypeRef::Ref ) { - code.append( IN_REF_FROM_REF ); - code.appendHalf( lookup.objField->offset ); - } - else { - code.append( IN_REF_FROM_LOCAL ); - code.appendHalf( lookup.objField->offset ); - } - - return lookup.objField; -} - -ObjField **LangVarRef::evaluateArgs( Compiler *pd, CodeVect &code, - VarRefLookup &lookup, ExprVect *args ) const -{ - /* Parameter list is given only for user defined methods. Otherwise it - * will be null. */ - ParameterList *paramList = lookup.objMethod->paramList; - - /* Match the number of arguments. */ - int numArgs = args != 0 ? args->length() : 0; - if ( numArgs != lookup.objMethod->numParams ) - error(loc) << "wrong number of arguments" << endp; - - /* This is for storing the object fields used by references. */ - ObjField **paramRefs = new ObjField*[numArgs]; - memset( paramRefs, 0, sizeof(ObjField*) * numArgs ); - - /* Evaluate and push the args. */ - if ( args != 0 ) { - /* We use this only if there is a paramter list. */ - ParameterList::Iter p; - long pushCount = 0; - - /* First pass we need to push object loads for reference parameters. */ - paramList != 0 && ( p = *paramList ); - for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) { - /* Get the expression and the UT for the arg. */ - LangExpr *expression = *pe; - UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()]; - - if ( paramUT->typeId == TYPE_REF ) { - /* Make sure we are dealing with a variable reference. */ - if ( expression->type != LangExpr::TermType ) - error(loc) << "not a term: argument must be a local variable" << endp; - if ( expression->term->type != LangTerm::VarRefType ) - error(loc) << "not a variable: argument must be a local variable" << endp; - - /* Lookup the field. */ - LangVarRef *varRef = expression->term->varRef; - - ObjField *refOf = varRef->preEvaluateRef( pd, code ); - paramRefs[pe.pos()] = refOf; - - pushCount += varRef->qual->length() * 2; - } - - /* Advance the parameter list iterator if we have it. */ - paramList != 0 && p.increment(); - } - - paramList != 0 && ( p = *paramList ); - for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) { - /* Get the expression and the UT for the arg. */ - LangExpr *expression = *pe; - UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()]; - - if ( paramUT->typeId == TYPE_REF ) { - - /* Make sure we are dealing with a variable reference. */ - if ( expression->type != LangExpr::TermType ) - error(loc) << "not a term: argument must be a local variable" << endp; - if ( expression->term->type != LangTerm::VarRefType ) - error(loc) << "not a variable: argument must be a local variable" << endp; - - /* Lookup the field. */ - LangVarRef *varRef = expression->term->varRef; - - pushCount -= varRef->qual->length() * 2; - - ObjField *refOf = varRef->evaluateRef( pd, code, pushCount ); - paramRefs[pe.pos()] = refOf; - - pushCount += 2; - } - else { - UniqueType *exprUT = expression->evaluate( pd, code ); - - if ( !castAssignment( pd, code, paramUT, 0, exprUT ) ) - error(loc) << "arg " << pe.pos()+1 << " is of the wrong type" << endp; - - pushCount += 1; - } - - /* Advance the parameter list iterator if we have it. */ - paramList != 0 && p.increment(); - } - } - - return paramRefs; -} - -void LangVarRef::resetActiveRefs( Compiler *pd, VarRefLookup &lookup, ObjField **paramRefs ) const -{ - /* Parameter list is given only for user defined methods. Otherwise it - * will be null. */ - for ( long p = 0; p < lookup.objMethod->numParams; p++ ) { - if ( paramRefs[p] != 0 ) - paramRefs[p]->refActive = false; - } -} - - -void LangVarRef::callOperation( Compiler *pd, CodeVect &code, VarRefLookup &lookup ) const -{ - /* This is for writing if it is a non-const builtin. */ - bool forWriting = lookup.objMethod->func == 0 && - !lookup.objMethod->isConst; - - if ( lookup.objMethod->useCallObj ) { - /* Load the object, if any. */ - loadObj( pd, code, lookup.lastPtrInQual, forWriting ); - } - - /* Check if we need to revert the function. If it operates on a reference - * or if it is not local then we need to revert it. */ - bool revert = lookup.lastPtrInQual >= 0 || !isLocalRef(pd); - - /* The call instruction. */ - if ( pd->revertOn && revert ) { - if ( lookup.objMethod->opcodeWV == IN_PARSE_FINISH_WV ) { - code.append( IN_PARSE_SAVE_STEPS ); - code.append( IN_PARSE_LOAD_START ); - code.append( IN_PARSE_FINISH_WV ); - code.appendHalf( 0 ); - code.append( IN_PCR_CALL ); - code.append( IN_PARSE_FINISH_WV3 ); - } - else { - code.append( lookup.objMethod->opcodeWV ); - } - } - else { - if ( lookup.objMethod->opcodeWC == IN_PARSE_FINISH_WC ) { - code.append( IN_PARSE_SAVE_STEPS ); - code.append( IN_PARSE_LOAD_START ); - code.append( IN_PARSE_FINISH_WC ); - code.appendHalf( 0 ); - code.append( IN_PCR_CALL ); - code.append( IN_PARSE_FINISH_WC3 ); - } - else { - code.append( lookup.objMethod->opcodeWC ); - } - } - - if ( lookup.objMethod->useFuncId ) - code.appendHalf( lookup.objMethod->funcId ); -} - -void LangVarRef::popRefQuals( Compiler *pd, CodeVect &code, - VarRefLookup &lookup, ExprVect *args ) const -{ - long popCount = 0; - - /* Evaluate and push the args. */ - if ( args != 0 ) { - /* We use this only if there is a paramter list. */ - for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) { - /* Get the expression and the UT for the arg. */ - LangExpr *expression = *pe; - UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()]; - - if ( paramUT->typeId == TYPE_REF ) { - /* Lookup the field. */ - LangVarRef *varRef = expression->term->varRef; - popCount += varRef->qual->length() * 2; - } - } - if ( popCount > 0 ) { - code.append( IN_POP_N_WORDS ); - code.appendHalf( (short)popCount ); - } - } -} - -UniqueType *LangVarRef::evaluateCall( Compiler *pd, CodeVect &code, ExprVect *args ) -{ - /* Evaluate the object. */ - VarRefLookup lookup = lookupMethod( pd ); - - /* Evaluate and push the arguments. */ - ObjField **paramRefs = evaluateArgs( pd, code, lookup, args ); - - /* Write the call opcode. */ - callOperation( pd, code, lookup ); - - popRefQuals( pd, code, lookup, args ); - - resetActiveRefs( pd, lookup, paramRefs); - delete[] paramRefs; - - /* Return the type to the expression. */ - return lookup.uniqueType; -} - -UniqueType *LangTerm::evaluateMatch( Compiler *pd, CodeVect &code ) const -{ - /* Add the vars bound by the pattern into the local scope. */ - for ( PatternItemList::Iter item = *pattern->list; item.lte(); item++ ) { - if ( item->varRef != 0 ) - item->bindId = pattern->nextBindId++; - } - - UniqueType *ut = varRef->evaluate( pd, code ); - if ( ut->typeId != TYPE_TREE ) - error(varRef->loc) << "expected match against a tree type" << endp; - - /* Store the language element type in the pattern. This is needed by - * the pattern parser. */ - pattern->langEl = ut->langEl; - - code.append( IN_MATCH ); - code.appendHalf( pattern->patRepId ); - - for ( PatternItemList::Iter item = pattern->list->last(); item.gtb(); item-- ) { - if ( item->varRef != 0 ) { - /* Compute the unique type. */ - UniqueType *exprType = pd->findUniqueType( TYPE_TREE, item->factor->langEl ); - - /* Get the type of the variable being assigned to. */ - VarRefLookup lookup = item->varRef->lookupField( pd ); - - item->varRef->loadObj( pd, code, lookup.lastPtrInQual, false ); - item->varRef->setField( pd, code, lookup.inObject, exprType, false ); - } - } - - return ut; -} - -UniqueType *LangTerm::evaluateNew( Compiler *pd, CodeVect &code ) const -{ - /* Evaluate the expression. */ - UniqueType *ut = expr->evaluate( pd, code ); - if ( ut->typeId != TYPE_TREE ) - error() << "new can only be applied to tree types" << endp; - - code.append( IN_TREE_NEW ); - return pd->findUniqueType( TYPE_PTR, ut->langEl ); -} - -void LangTerm::assignFieldArgs( Compiler *pd, CodeVect &code, UniqueType *replUT ) const -{ - /* Now assign the field initializations. Note that we need to do this in - * reverse because the last expression evaluated is at the top of the - * stack. */ - if ( fieldInitArgs != 0 && fieldInitArgs->length() > 0 ) { - ObjectDef *objDef = objDefFromUT( pd, replUT ); - /* Note the reverse traversal. */ - for ( FieldInitVect::Iter pi = fieldInitArgs->last(); pi.gtb(); pi-- ) { - FieldInit *fieldInit = *pi; - ObjField *field = objDef->findFieldNum( pi.pos() ); - if ( field == 0 ) { - error(fieldInit->loc) << "failed to find init pos " << - pi.pos() << " in object" << endp; - } - - /* Lookup the type of the field and compare it to the type of the - * expression. */ - UniqueType *fieldUT = field->typeRef->uniqueType; - if ( !castAssignment( pd, code, fieldUT, 0, fieldInit->exprUT ) ) - error(fieldInit->loc) << "type mismatch in initialization" << endp; - - /* The set field instruction must leave the object on the top of - * the stack. */ - code.append( IN_SET_FIELD_LEAVE_WC ); - code.appendHalf( field->offset ); - } - } -} - -UniqueType *LangTerm::evaluateConstruct( Compiler *pd, CodeVect &code ) const -{ - /* Evaluate the initialization expressions. */ - if ( fieldInitArgs != 0 && fieldInitArgs->length() > 0 ) { - for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ ) { - FieldInit *fieldInit = *pi; - fieldInit->exprUT = fieldInit->expr->evaluate( pd, code ); - } - } - - /* Assign bind ids to the variables in the replacement. */ - for ( ReplItemList::Iter item = *replacement->list; item.lte(); item++ ) { - if ( item->expr != 0 ) - item->bindId = replacement->nextBindId++; - } - - /* Evaluate variable references. */ - for ( ReplItemList::Iter item = replacement->list->last(); item.gtb(); item-- ) { - if ( item->type == ReplItem::ExprType ) { - UniqueType *ut = item->expr->evaluate( pd, code ); - - if ( ut->typeId != TYPE_TREE ) - error() << "variables used in replacements must be trees" << endp; - - item->langEl = ut->langEl; - } - } - - /* Construct the tree using the tree information stored in the compiled - * code. */ - code.append( IN_CONSTRUCT ); - code.appendHalf( replacement->patRepId ); - - /* Lookup the type of the replacement and store it in the replacement - * object so that replacement parsing has a target. */ - UniqueType *replUT = typeRef->uniqueType; - if ( replUT->typeId != TYPE_TREE ) - error(loc) << "don't know how to construct this type" << endp; - - if ( replUT->langEl->generic != 0 && replUT->langEl->generic->typeId == GEN_PARSER ) { - code.append( IN_CONSTRUCT_INPUT ); - code.append( IN_DUP_TOP_OFF ); - code.appendHalf( 1 ); - code.append( IN_SET_INPUT ); - } - - replacement->langEl = replUT->langEl; - assignFieldArgs( pd, code, replUT ); - - if ( varRef != 0 ) { - code.append( IN_DUP_TOP ); - - /* Get the type of the variable being assigned to. */ - VarRefLookup lookup = varRef->lookupField( pd ); - - varRef->loadObj( pd, code, lookup.lastPtrInQual, false ); - varRef->setField( pd, code, lookup.inObject, replUT, false ); - } - - return replUT; -} - -UniqueType *LangTerm::evaluateParse( Compiler *pd, CodeVect &code, bool stop ) const -{ - UniqueType *ut = typeRef->uniqueType; - assert( ut != 0 ); - - if ( ut->typeId != TYPE_TREE ) - error(loc) << "can only parse trees" << endl; - - /* Should be one arg, a stream. */ - if ( args == 0 || ( args->length() != 1 && args->length() != 2 ) ) - error(loc) << "expecting one or two args" << endp; - - int context, input; - if ( ut->langEl->contextIn == 0 ) { - if ( args->length() != 1 ) - error(loc) << "parse command requires just input" << endp; - context = -1; - input = 0; - } - else { - if ( args->length() != 2 ) - error(loc) << "parse command requires context and input" << endp; - context = 0; - input = 1; - } - - /* - * Make the parser. - */ - code.append( IN_CONSTRUCT ); - code.appendHalf( replacement->patRepId ); - - /* Dup once for the context load, again for the argument load, again for - * the parse frag, leaving the original there for the finish. */ - code.append( IN_DUP_TOP ); -// code.append( IN_DUP_TOP ); -// code.append( IN_DUP_TOP ); - - /* - * First load the context into the parser. - */ - if ( context < 0 ) { - code.append( IN_LOAD_NIL ); - } - else { - UniqueType *argUT = args->data[context]->evaluate( pd, code ); - if ( argUT != pd->uniqueTypeStream && argUT->typeId != TYPE_TREE ) - error(loc) << "context argument must be a stream or a tree" << endp; - } - - /* FIXME: need to select right one here. */ - code.append( IN_DUP_TOP_OFF ); - code.appendHalf( 1 ); - code.append( IN_SET_ACCUM_CTX_WC ); - - /* - * Evaluate the parse arg. - */ - - /* Evaluate the parse args. */ - UniqueType *argUT = args->data[input]->evaluate( pd, code ); - if ( argUT != pd->uniqueTypeStream && argUT->typeId != TYPE_TREE ) - error(loc) << "input argument must be a stream or a tree" << endp; - - /* Allocate a parser id. This will cause a parser to be built for - * the type. */ - if ( ut->langEl->parserId < 0 ) - ut->langEl->parserId = pd->nextParserId++; - - /* If this is a parse stop then we need to verify that the type is - * compatible with parse stop. */ - if ( stop ) - ut->langEl->parseStop = true; - - if ( argUT != pd->uniqueTypeInput ) { - code.append( IN_CONSTRUCT_INPUT ); - if ( pd->revertOn ) - code.append( IN_INPUT_APPEND_WV ); - else - code.append( IN_INPUT_APPEND_WC ); - } - - code.append( IN_DUP_TOP_OFF ); - code.appendHalf( 1 ); - code.append( IN_SET_INPUT ); - - int stopId = stop ? ut->langEl->id : 0; - - /* Parse instruction, dependent on whether or not we are producing revert - * or commit code. */ - if ( pd->revertOn ) { - code.append( IN_PARSE_SAVE_STEPS ); - code.append( IN_PARSE_LOAD_START ); - code.append( IN_PARSE_FRAG_WV ); - code.appendHalf( stopId ); - code.append( IN_PCR_CALL ); - code.append( IN_PARSE_FRAG_WV3 ); - - /* Finish immediately. */ - code.append( IN_PARSE_SAVE_STEPS ); - code.append( IN_PARSE_LOAD_START ); - code.append( IN_PARSE_FINISH_WV ); - code.appendHalf( stopId ); - code.append( IN_PCR_CALL ); - code.append( IN_PARSE_FINISH_WV3 ); - } - else { - code.append( IN_PARSE_SAVE_STEPS ); - code.append( IN_PARSE_LOAD_START ); - code.append( IN_PARSE_FRAG_WC ); - code.appendHalf( stopId ); - code.append( IN_PCR_CALL ); - code.append( IN_PARSE_FRAG_WC3 ); - - /* Finish immediately. */ - code.append( IN_PARSE_SAVE_STEPS ); - code.append( IN_PARSE_LOAD_START ); - code.append( IN_PARSE_FINISH_WC ); - code.appendHalf( stopId ); - code.append( IN_PCR_CALL ); - code.append( IN_PARSE_FINISH_WC3 ); - } - - /* Lookup the type of the replacement and store it in the replacement - * object so that replacement parsing has a target. */ - replacement->langEl = generic->langEl; - - if ( varRef != 0 ) { - code.append( IN_DUP_TOP ); - - /* Get the type of the variable being assigned to. */ - VarRefLookup lookup = varRef->lookupField( pd ); - - varRef->loadObj( pd, code, lookup.lastPtrInQual, false ); - varRef->setField( pd, code, lookup.inObject, ut, false ); - } - - return ut; -} - -UniqueType *LangTerm::evaluateEmbedString( Compiler *pd, CodeVect &code ) const -{ - /* Assign bind ids to the variables in the replacement. */ - for ( ReplItemList::Iter item = *replItemList; item.lte(); item++ ) { - switch ( item->type ) { - case ReplItem::FactorType: { - String result; - bool unusedCI; - prepareLitString( result, unusedCI, - item->factor->typeRef->pdaLiteral->token.data, - item->factor->typeRef->pdaLiteral->token.loc ); - - /* Make sure we have this string. */ - StringMapEl *mapEl = 0; - if ( pd->literalStrings.insert( result, &mapEl ) ) - mapEl->value = pd->literalStrings.length()-1; - - code.append( IN_LOAD_STR ); - code.appendWord( mapEl->value ); - break; - } - case ReplItem::InputText: { - /* Make sure we have this string. */ - StringMapEl *mapEl = 0; - if ( pd->literalStrings.insert( item->data, &mapEl ) ) - mapEl->value = pd->literalStrings.length()-1; - - code.append( IN_LOAD_STR ); - code.appendWord( mapEl->value ); - break; - } - case ReplItem::ExprType: - item->expr->evaluate( pd, code ); - break; - } - - } - - long items = replItemList->length(); - for ( long i = 0; i < items-1; i++ ) - code.append( IN_CONCAT_STR ); - - return pd->uniqueTypeStr; -} - -UniqueType *LangTerm::evaluate( Compiler *pd, CodeVect &code ) const -{ - switch ( type ) { - case VarRefType: - return varRef->evaluate( pd, code ); - case MethodCallType: - return varRef->evaluateCall( pd, code, args ); - case NilType: - code.append( IN_LOAD_NIL ); - return pd->uniqueTypeNil; - case TrueType: - code.append( IN_LOAD_TRUE ); - return pd->uniqueTypeBool; - case FalseType: - code.append( IN_LOAD_FALSE ); - return pd->uniqueTypeBool; - case MakeTokenType: - return evaluateMakeToken( pd, code ); - case MakeTreeType: - return evaluateMakeTree( pd, code ); - case NumberType: { - unsigned int n = atoi( data ); - code.append( IN_LOAD_INT ); - code.appendWord( n ); - return pd->uniqueTypeInt; - } - case StringType: { - String interp; - bool unused; - prepareLitString( interp, unused, data, InputLoc() ); - - /* Make sure we have this string. */ - StringMapEl *mapEl = 0; - if ( pd->literalStrings.insert( interp, &mapEl ) ) - mapEl->value = pd->literalStrings.length()-1; - - code.append( IN_LOAD_STR ); - code.appendWord( mapEl->value ); - return pd->uniqueTypeStr; - } - case MatchType: - return evaluateMatch( pd, code ); - case ParseType: - return evaluateParse( pd, code, false ); - case ParseStopType: - return evaluateParse( pd, code, true ); - case ConstructType: - return evaluateConstruct( pd, code ); - case NewType: - return evaluateNew( pd, code ); - case TypeIdType: { - /* Evaluate the expression. */ - UniqueType *ut = typeRef->uniqueType; - if ( ut->typeId != TYPE_TREE ) - error() << "typeid can only be applied to tree types" << endp; - - code.append( IN_LOAD_INT ); - code.appendWord( ut->langEl->id ); - return pd->uniqueTypeInt; - } - case SearchType: { - /* Evaluate the expression. */ - UniqueType *ut = typeRef->uniqueType; - if ( ut->typeId != TYPE_TREE ) - error(loc) << "can only search for tree types" << endp; - - UniqueType *treeUT = varRef->evaluate( pd, code ); - if ( treeUT->typeId != TYPE_TREE ) - error(loc) << "search can be applied only to tree types" << endl; - - code.append( IN_TREE_SEARCH ); - code.appendWord( ut->langEl->id ); - return ut; - }; - case EmbedStringType: { - return evaluateEmbedString( pd, code ); - } - } - return 0; -} - -UniqueType *LangExpr::evaluate( Compiler *pd, CodeVect &code ) const -{ - switch ( type ) { - case BinaryType: { - switch ( op ) { - case '+': { - UniqueType *lt = left->evaluate( pd, code ); - UniqueType *rt = right->evaluate( pd, code ); - - if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { - code.append( IN_ADD_INT ); - return pd->uniqueTypeInt; - } - - if ( lt == pd->uniqueTypeStr && rt == pd->uniqueTypeStr ) { - code.append( IN_CONCAT_STR ); - return pd->uniqueTypeStr; - } - - error(loc) << "do not have an addition operator for these types" << endp; - break; - } - case '-': { - UniqueType *lt = left->evaluate( pd, code ); - UniqueType *rt = right->evaluate( pd, code ); - - if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { - code.append( IN_SUB_INT ); - return pd->uniqueTypeInt; - } - - error(loc) << "do not have an addition operator for these types" << endp; - break; - } - case '*': { - UniqueType *lt = left->evaluate( pd, code ); - UniqueType *rt = right->evaluate( pd, code ); - - if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { - code.append( IN_MULT_INT ); - return pd->uniqueTypeInt; - } - - error(loc) << "do not have an multiplication " - "operator for these types" << endp; - break; - } - case '/': { - UniqueType *lt = left->evaluate( pd, code ); - UniqueType *rt = right->evaluate( pd, code ); - - if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { - code.append( IN_DIV_INT ); - return pd->uniqueTypeInt; - } - - error(loc) << "do not have an division" - "operator for these types" << endp; - break; - } - case OP_DoubleEql: { - UniqueType *lt = left->evaluate( pd, code ); - UniqueType *rt = right->evaluate( pd, code ); - - if ( lt != rt ) - error(loc) << "comparison of different types" << endp; - - code.append( IN_TST_EQL ); - return pd->uniqueTypeBool; - } - case OP_NotEql: { - UniqueType *lt = left->evaluate( pd, code ); - UniqueType *rt = right->evaluate( pd, code ); - - if ( lt != rt ) - error(loc) << "comparison of different types" << endp; - - code.append( IN_TST_NOT_EQL ); - return pd->uniqueTypeBool; - } - case '<': { - left->evaluate( pd, code ); - right->evaluate( pd, code ); - - code.append( IN_TST_LESS ); - return pd->uniqueTypeBool; - } - case '>': { - left->evaluate( pd, code ); - right->evaluate( pd, code ); - - code.append( IN_TST_GRTR ); - return pd->uniqueTypeBool; - } - case OP_LessEql: { - left->evaluate( pd, code ); - right->evaluate( pd, code ); - - code.append( IN_TST_LESS_EQL ); - return pd->uniqueTypeBool; - } - case OP_GrtrEql: { - left->evaluate( pd, code ); - right->evaluate( pd, code ); - - code.append( IN_TST_GRTR_EQL ); - return pd->uniqueTypeBool; - } - case OP_LogicalAnd: { - /* Evaluate the left and duplicate it. */ - left->evaluate( pd, code ); - code.append( IN_DUP_TOP ); - - /* Jump over the right if false, leaving the original left - * result on the top of the stack. We don't know the - * distance yet so record the position of the jump. */ - long jump = code.length(); - code.append( IN_JMP_FALSE ); - code.appendHalf( 0 ); - - /* Evauluate the right, add the test. Store it separately. */ - right->evaluate( pd, code ); - code.append( IN_TST_LOGICAL_AND ); - - /* Set the distance of the jump. */ - long distance = code.length() - jump - 3; - code.setHalf( jump+1, distance ); - - return pd->uniqueTypeInt; - } - case OP_LogicalOr: { - /* Evaluate the left and duplicate it. */ - left->evaluate( pd, code ); - code.append( IN_DUP_TOP ); - - /* Jump over the right if true, leaving the original left - * result on the top of the stack. We don't know the - * distance yet so record the position of the jump. */ - long jump = code.length(); - code.append( IN_JMP_TRUE ); - code.appendHalf( 0 ); - - /* Evauluate the right, add the test. */ - right->evaluate( pd, code ); - code.append( IN_TST_LOGICAL_OR ); - - /* Set the distance of the jump. */ - long distance = code.length() - jump - 3; - code.setHalf( jump+1, distance ); - - return pd->uniqueTypeInt; - } - } - - assert(false); - return 0; - } - case UnaryType: { - switch ( op ) { - case '!': { - /* Evaluate the left and duplicate it. */ - right->evaluate( pd, code ); - code.append( IN_NOT ); - return pd->uniqueTypeBool; - } - case '$': { - right->evaluate( pd, code ); - code.append( IN_TREE_TO_STR ); - return pd->uniqueTypeStr; - - } - case '%': { - right->evaluate( pd, code ); - code.append( IN_TREE_TO_STR_NOTRIM ); - return pd->uniqueTypeStr; - } - case '^': { - UniqueType *rt = right->evaluate( pd, code ); - code.append( IN_TREE_TRIM ); - return rt; - } - case OP_Deref: { - UniqueType *ut = right->evaluate( pd, code ); - if ( ut->typeId != TYPE_PTR ) - error(loc) << "can only dereference pointers" << endl; - - code.append( IN_PTR_DEREF_R ); - ut = pd->findUniqueType( TYPE_TREE, ut->langEl ); - return ut; - } - default: - assert(false); - } - return 0; - } - case TermType: { - return term->evaluate( pd, code ); - } - } - return 0; -} - -void LangVarRef::assignValue( Compiler *pd, CodeVect &code, - UniqueType *exprUT ) const -{ - /* Lookup the left hand side of the assignment. */ - VarRefLookup lookup = lookupField( pd ); - - if ( lookup.objField->refActive ) - error(loc) << "reference active, cannot write to object" << endp; - - if ( lookup.firstConstPart >= 0 ) { - error(loc) << "left hand side qualification \"" << - qual->data[lookup.firstConstPart].data << "\" is const" << endp; - } - - if ( lookup.objField->isConst ) - error(loc) << "field \"" << name << "\" is const" << endp; - - /* Writing guarantees the field is dirty. tree is dirty. */ - lookup.objField->dirtyTree = true; - - /* Check the types of the assignment and possibly cast. */ - UniqueType *objUT = lookup.objField->typeRef->uniqueType; - assert( lookup.uniqueType == lookup.objField->typeRef->uniqueType ); - if ( !castAssignment( pd, code, objUT, lookup.iterSearchUT, exprUT ) ) - error(loc) << "type mismatch in assignment" << endp; - - /* Decide if we need to revert the assignment. */ - bool revert = lookup.lastPtrInQual >= 0 || !isLocalRef(pd); - - /* Load the object and generate the field setting code. */ - loadObj( pd, code, lookup.lastPtrInQual, true ); - - if ( lookup.uniqueType->typeId == TYPE_ITER ) - setFieldIter( pd, code, lookup.inObject, lookup.uniqueType, exprUT, false ); - else - setField( pd, code, lookup.inObject, exprUT, revert ); -} - -UniqueType *LangTerm::evaluateMakeToken( Compiler *pd, CodeVect &code ) const -{ -// if ( pd->compileContext != Compiler::CompileTranslation ) -// error(loc) << "make_token can be used only in a translation block" << endp; - - /* Match the number of arguments. */ - int numArgs = args != 0 ? args->length() : 0; - if ( numArgs < 2 ) - error(loc) << "need at least two arguments" << endp; - - for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) { - /* Evaluate. */ - UniqueType *exprUT = (*pe)->evaluate( pd, code ); - - if ( pe.pos() == 0 && exprUT != pd->uniqueTypeInt ) - error(loc) << "first arg, id, must be an int" << endp; - - if ( pe.pos() == 1 && exprUT != pd->uniqueTypeStr ) - error(loc) << "second arg, length, must be a string" << endp; - } - - /* The token is now created, send it. */ - code.append( IN_MAKE_TOKEN ); - code.append( args->length() ); - - return pd->uniqueTypeAny; -} - -UniqueType *LangTerm::evaluateMakeTree( Compiler *pd, CodeVect &code ) const -{ - if ( pd->compileContext != Compiler::CompileTranslation ) - error(loc) << "make_tree can be used only in a translation block" << endp; - - /* Match the number of arguments. */ - int numArgs = args != 0 ? args->length() : 0; - if ( numArgs < 1 ) - error(loc) << "need at least one argument" << endp; - - for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) { - /* Evaluate. */ - UniqueType *exprUT = (*pe)->evaluate( pd, code ); - - if ( pe.pos() == 0 && exprUT != pd->uniqueTypeInt ) - error(loc) << "first arg, nonterm id, must be an int" << endp; - } - - /* The token is now created, send it. */ - code.append( IN_MAKE_TREE ); - code.append( args->length() ); - - return pd->uniqueTypeAny; -} - -void LangStmt::compileForIterBody( Compiler *pd, - CodeVect &code, UniqueType *iterUT ) const -{ - /* Remember the top of the loop. */ - long top = code.length(); - - /* Advance */ - code.append( iterUT->iterDef->inAdvance ); - code.appendHalf( objField->offset ); - - /* Test: jump past the while block if false. Note that we don't have the - * distance yet. */ - long jumpFalse = code.length(); - code.append( IN_JMP_FALSE ); - code.appendHalf( 0 ); - - /* - * Set up the loop cleanup code. - */ - - /* Set up the current loop cleanup. */ - CodeVect loopCleanup; - if ( pd->loopCleanup != 0 ) - loopCleanup.setAs( *pd->loopCleanup ); - - /* Add the cleanup for the current loop. */ - loopCleanup.append( iterUT->iterDef->inDestroy ); - loopCleanup.appendHalf( objField->offset ); - - /* Push the loop cleanup. */ - CodeVect *oldLoopCleanup = pd->loopCleanup; - pd->loopCleanup = &loopCleanup; - - /* Compile the contents. */ - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->compile( pd, code ); - - pd->loopCleanup = oldLoopCleanup; - - /* Jump back to the top to retest. */ - long retestDist = code.length() - top + 3; - code.append( IN_JMP ); - code.appendHalf( -retestDist ); - - /* Set the jump false distance. */ - long falseDist = code.length() - jumpFalse - 3; - code.setHalf( jumpFalse+1, falseDist ); - - /* Compute the jump distance for the break jumps. */ - for ( LongVect::Iter brk = pd->breakJumps; brk.lte(); brk++ ) { - long distance = code.length() - *brk - 3; - code.setHalf( *brk+1, distance ); - } - pd->breakJumps.empty(); - - /* Destroy the iterator. */ - code.append( iterUT->iterDef->inDestroy ); - code.appendHalf( objField->offset ); - - /* Clean up any prepush args. */ -} - -LangTerm *LangStmt::chooseDefaultIter( Compiler *pd, LangTerm *fromVarRef ) const -{ - /* Lookup the lang term and decide what iterator to use based - * on its type. */ - VarRefLookup lookup = fromVarRef->varRef->lookupField( pd ); - - if ( lookup.inObject->type != ObjectDef::FrameType ) - error(loc) << "root of iteration must be a local" << endp; - - LangVarRef *callVarRef = 0; - if ( lookup.uniqueType->typeId == TYPE_TREE || - lookup.uniqueType->typeId == TYPE_REF || - lookup.uniqueType->typeId == TYPE_ITER || - lookup.uniqueType->typeId == TYPE_PTR ) - { - /* The iterator name. */ - callVarRef = new LangVarRef( loc, new QualItemVect, "triter" ); - } - else { - error(loc) << "there is no default iterator for a " - "root of that type" << endp; - } - - /* The parameters. */ - ExprVect *callExprVect = new ExprVect; - LangExpr *callExpr = new LangExpr( new LangTerm( - LangTerm::VarRefType, fromVarRef->varRef ) ); - callExprVect->append( callExpr ); - - LangTerm *callLangTerm = new LangTerm( callVarRef, callExprVect ); - - return callLangTerm; -} - -void LangStmt::compileForIter( Compiler *pd, CodeVect &code ) const -{ - pd->curLocalFrame->iterPushScope(); - - LangTerm *iterCallTerm = langTerm; - if ( iterCallTerm->type != LangTerm::MethodCallType ) - iterCallTerm = chooseDefaultIter( pd, langTerm ); - - /* The type we are searching for. */ - UniqueType *searchUT = typeRef->uniqueType; - - /* - * Declare the iterator variable. - */ - VarRefLookup lookup = iterCallTerm->varRef->lookupMethod( pd ); - if ( lookup.objMethod->iterDef == 0 ) { - error(loc) << "attempt to iterate using something " - "that is not an iterator" << endp; - } - - /* Now that we have done the iterator call lookup we can make the type - * reference for the object field. */ - UniqueType *iterUniqueType = pd->findUniqueType( TYPE_ITER, lookup.objMethod->iterDef ); - objField->typeRef = new TypeRef( loc, lookup.objMethod->iterDef, iterUniqueType, searchUT ); - - /* Also force the field to be initialized. */ - pd->curLocalFrame->initField( pd, objField ); - - /* - * Create the iterator from the local var. - */ - - UniqueType *iterUT = objField->typeRef->uniqueType; - - /* Evaluate and push the arguments. */ - ObjField **paramRefs = iterCallTerm->varRef->evaluateArgs( - pd, code, lookup, iterCallTerm->args ); - - if ( pd->revertOn ) - code.append( iterUT->iterDef->inCreateWV ); - else - code.append( iterUT->iterDef->inCreateWC ); - - code.appendHalf( objField->offset ); - if ( lookup.objMethod->func != 0 ) - code.appendHalf( lookup.objMethod->func->funcId ); - - if ( iterUT->iterDef->useSearchUT ) { - if ( searchUT->typeId == TYPE_PTR ) - code.appendHalf( pd->uniqueTypePtr->langEl->id ); - else - code.appendHalf( searchUT->langEl->id ); - } - - compileForIterBody( pd, code, iterUT ); - - iterCallTerm->varRef->popRefQuals( pd, code, lookup, iterCallTerm->args ); - - iterCallTerm->varRef->resetActiveRefs( pd, lookup, paramRefs ); - delete[] paramRefs; - - pd->curLocalFrame->iterPopScope(); -} - -void LangStmt::compileWhile( Compiler *pd, CodeVect &code ) const -{ - pd->curLocalFrame->iterPushScope(); - - /* Generate code for the while test. Remember the top. */ - long top = code.length(); - expr->evaluate( pd, code ); - - /* Jump past the while block if false. Note that we don't have the - * distance yet. */ - long jumpFalse = code.length(); - code.append( IN_JMP_FALSE ); - code.appendHalf( 0 ); - - /* Compute the while block. */ - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->compile( pd, code ); - - /* Jump back to the top to retest. */ - long retestDist = code.length() - top + 3; - code.append( IN_JMP ); - code.appendHalf( -retestDist ); - - /* Set the jump false distance. */ - long falseDist = code.length() - jumpFalse - 3; - code.setHalf( jumpFalse+1, falseDist ); - - /* Compute the jump distance for the break jumps. */ - for ( LongVect::Iter brk = pd->breakJumps; brk.lte(); brk++ ) { - long distance = code.length() - *brk - 3; - code.setHalf( *brk+1, distance ); - } - pd->breakJumps.empty(); - - pd->curLocalFrame->iterPopScope(); -} - -void LangStmt::evaluateParserItems( Compiler *pd, CodeVect &code ) const -{ - varRef->evaluate( pd, code ); - - /* Assign bind ids to the variables in the replacement. */ - for ( ReplItemList::Iter item = *parserText->list; item.lte(); item++ ) { - switch ( item->type ) { - case ReplItem::FactorType: { - String result; - bool unusedCI; - prepareLitString( result, unusedCI, - item->factor->typeRef->pdaLiteral->token.data, - item->factor->typeRef->pdaLiteral->token.loc ); - - /* Make sure we have this string. */ - StringMapEl *mapEl = 0; - if ( pd->literalStrings.insert( result, &mapEl ) ) - mapEl->value = pd->literalStrings.length()-1; - - code.append( IN_LOAD_STR ); - code.appendWord( mapEl->value ); - break; - } - case ReplItem::InputText: { - /* Make sure we have this string. */ - StringMapEl *mapEl = 0; - if ( pd->literalStrings.insert( item->data, &mapEl ) ) - mapEl->value = pd->literalStrings.length()-1; - - code.append( IN_LOAD_STR ); - code.appendWord( mapEl->value ); - break; - } - case ReplItem::ExprType: - item->expr->evaluate( pd, code ); - break; - } - - code.append( IN_DUP_TOP_OFF ); - code.appendHalf( 1 ); - - /* Not a stream. Get the input first. */ - code.append( IN_GET_INPUT ); - if ( pd->revertOn ) - code.append( IN_INPUT_APPEND_WV ); - else - code.append( IN_INPUT_APPEND_WC ); - code.append( IN_POP ); - - code.append( IN_DUP_TOP ); - - /* Parse instruction, dependent on whether or not we are producing - * revert or commit code. */ - if ( pd->revertOn ) { - code.append( IN_PARSE_SAVE_STEPS ); - code.append( IN_PARSE_LOAD_START ); - code.append( IN_PARSE_FRAG_WV ); - code.appendHalf( 0 ); - code.append( IN_PCR_CALL ); - code.append( IN_PARSE_FRAG_WV3 ); - } - else { - code.append( IN_PARSE_SAVE_STEPS ); - code.append( IN_PARSE_LOAD_START ); - code.append( IN_PARSE_FRAG_WC ); - code.appendHalf( 0 ); - code.append( IN_PCR_CALL ); - code.append( IN_PARSE_FRAG_WC3 ); - } - } - code.append( IN_POP ); -} - -void LangStmt::compile( Compiler *pd, CodeVect &code ) const -{ - switch ( type ) { - case PrintType: - case PrintXMLACType: - case PrintXMLType: - case PrintStreamType: { - UniqueType **types = new UniqueType*[exprPtrVect->length()]; - - /* Push the args backwards. */ - for ( ExprVect::Iter pex = exprPtrVect->last(); pex.gtb(); pex-- ) - types[pex.pos()] = (*pex)->evaluate( pd, code ); - - /* Run the printing forwards. */ - if ( type == PrintType ) { - code.append( IN_PRINT ); - code.append( exprPtrVect->length() ); - } - else if ( type == PrintXMLACType ) { - code.append( IN_PRINT_XML_AC ); - code.append( exprPtrVect->length() ); - } - else if ( type == PrintXMLType ) { - code.append( IN_PRINT_XML ); - code.append( exprPtrVect->length() ); - } - else if ( type == PrintStreamType ) { - /* Minus one because the first arg is the stream. */ - code.append( IN_PRINT_STREAM ); - code.append( exprPtrVect->length() - 1 ); - } - - delete[] types; - - break; - } - case ExprType: { - /* Evaluate the exrepssion, then pop it immediately. */ - expr->evaluate( pd, code ); - code.append( IN_POP ); - break; - } - case IfType: { - pd->curLocalFrame->iterPushScope(); - - long jumpFalse = 0, jumpPastElse = 0, distance = 0; - - /* Evaluate the test. */ - expr->evaluate( pd, code ); - - /* Jump past the if block if false. We don't know the distance - * yet so store the location of the jump. */ - jumpFalse = code.length(); - code.append( IN_JMP_FALSE ); - code.appendHalf( 0 ); - - /* Compile the if true branch. */ - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->compile( pd, code ); - - if ( elsePart != 0 ) { - /* Jump past the else code for the if true branch. */ - jumpPastElse = code.length(); - code.append( IN_JMP ); - code.appendHalf( 0 ); - } - - /* Set the distance for the jump false case. */ - distance = code.length() - jumpFalse - 3; - code.setHalf( jumpFalse+1, distance ); - - pd->curLocalFrame->iterPopScope(); - - if ( elsePart != 0 ) { - /* Compile the else branch. */ - elsePart->compile( pd, code ); - - /* Set the distance for jump over the else part. */ - distance = code.length() - jumpPastElse - 3; - code.setHalf( jumpPastElse+1, distance ); - } - - break; - } - case ElseType: { - pd->curLocalFrame->iterPushScope(); - - /* Compile the else branch. */ - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->compile( pd, code ); - - pd->curLocalFrame->iterPopScope(); - break; - } - case RejectType: { - code.append( IN_REJECT ); - break; - } - case WhileType: { - compileWhile( pd, code ); - break; - } - case AssignType: { - /* Evaluate the exrepssion. */ - UniqueType *exprUT = expr->evaluate( pd, code ); - - /* Do the assignment. */ - varRef->assignValue( pd, code, exprUT ); - break; - } - case ForIterType: { - compileForIter( pd, code ); - break; - } - case ReturnType: { - /* Evaluate the exrepssion. */ - UniqueType *exprUT = expr->evaluate( pd, code ); - - if ( pd->curFunction == 0 ) { - /* In the main function */ - pd->mainReturnUT = exprUT; - } - else { - UniqueType *resUT = pd->curFunction->typeRef->uniqueType; - if ( !castAssignment( pd, code, resUT, 0, exprUT ) ) - error(loc) << "return value wrong type" << endp; - } - - code.append( IN_SAVE_RET ); - - /* The loop cleanup code. */ - if ( pd->loopCleanup != 0 ) - code.append( *pd->loopCleanup ); - - /* Jump to the return label. The distnacnce will be filled in - * later. */ - pd->returnJumps.append( code.length() ); - code.append( IN_JMP ); - code.appendHalf( 0 ); - break; - } - case BreakType: { - pd->breakJumps.append( code.length() ); - code.append( IN_JMP ); - code.appendHalf( 0 ); - break; - } - case YieldType: { - /* take a reference and yield it. Immediately reset the referece. */ - varRef->preEvaluateRef( pd, code ); - ObjField *objField = varRef->evaluateRef( pd, code, 0 ); - code.append( IN_YIELD ); - - if ( varRef->qual->length() > 0 ) { - code.append( IN_POP_N_WORDS ); - code.appendHalf( (short)(varRef->qual->length()*2) ); - } - - objField->refActive = false; - break; - } - case ParserType: { - evaluateParserItems( pd, code ); - break; - } - } -} - -void CodeBlock::compile( Compiler *pd, CodeVect &code ) const -{ - for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) - stmt->compile( pd, code ); -} - -void Compiler::addMatchLength( ObjectDef *frame, LangEl *lel ) -{ - /* Make the type ref. */ - TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt ); - - /* Create the field and insert it into the map. */ - ObjField *el = new ObjField( InputLoc(), typeRef, "match_length" ); - el->beenReferenced = true; - el->beenInitialized = true; - el->isConst = true; - el->useOffset = false; - el->inGetR = IN_GET_MATCH_LENGTH_R; - frame->insertField( el->name, el ); -} - -void Compiler::addMatchText( ObjectDef *frame, LangEl *lel ) -{ - /* Make the type ref. */ - TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr ); - - /* Create the field and insert it into the map. */ - ObjField *el = new ObjField( InputLoc(), typeRef, "match_text" ); - el->beenReferenced = true; - el->beenInitialized = true; - el->isConst = true; - el->useOffset = false; - el->inGetR = IN_GET_MATCH_TEXT_R; - frame->insertField( el->name, el ); -} - -void Compiler::addInput( ObjectDef *frame ) -{ - /* Make the type ref. */ - TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInput ); - - /* Create the field and insert it into the map. */ - ObjField *el = new ObjField( InputLoc(), typeRef, "input" ); - el->beenReferenced = true; - el->beenInitialized = true; - el->isConst = false; - el->useOffset = false; - el->isCustom = true; - el->inGetR = IN_LOAD_INPUT_R; - el->inGetWV = IN_LOAD_INPUT_WV; - el->inGetWC = IN_LOAD_INPUT_WC; - frame->insertField( el->name, el ); -} - -void Compiler::addCtx( ObjectDef *frame ) -{ - /* Make the type ref. */ - TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStream ); - - /* Create the field and insert it into the map. */ - ObjField *el = new ObjField( InputLoc(), typeRef, "ctx" ); - el->beenReferenced = true; - el->beenInitialized = true; - el->isConst = false; - el->useOffset = false; - el->isCustom = true; - el->inGetR = IN_LOAD_CTX_R; - el->inGetWV = IN_LOAD_CTX_WV; - el->inGetWC = IN_LOAD_CTX_WC; - frame->insertField( el->name, el ); -} - -void Compiler::initFieldInstructions( ObjField *el ) -{ - el->inGetR = IN_GET_FIELD_R; - el->inGetWC = IN_GET_FIELD_WC; - el->inGetWV = IN_GET_FIELD_WV; - el->inSetWC = IN_SET_FIELD_WC; - el->inSetWV = IN_SET_FIELD_WV; -} - -void Compiler::initLocalInstructions( ObjField *el ) -{ - el->inGetR = IN_GET_LOCAL_R; - el->inGetWC = IN_GET_LOCAL_WC; - el->inSetWC = IN_SET_LOCAL_WC; -} - -void Compiler::initLocalRefInstructions( ObjField *el ) -{ - el->inGetR = IN_GET_LOCAL_REF_R; - el->inGetWC = IN_GET_LOCAL_REF_WC; - el->inSetWC = IN_SET_LOCAL_REF_WC; -} - -void Compiler::initIntObject( ) -{ - intObj = new ObjectDef( ObjectDef::BuiltinType, "int", nextObjectId++ ); - intLangEl->objectDef = intObj; - - initFunction( uniqueTypeStr, intObj, "to_string", IN_INT_TO_STR, IN_INT_TO_STR, true ); -} - -/* Add a constant length field to the object. - * Opcode supplied by the caller. */ -void Compiler::addLengthField( ObjectDef *objDef, Code getLength ) -{ - /* Create the "length" field. */ - TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt ); - ObjField *el = new ObjField( InputLoc(), typeRef, "length" ); - el->beenReferenced = true; - el->beenInitialized = true; - el->isConst = true; - el->useOffset = false; - el->inGetR = getLength; - - objDef->insertField( el->name, el ); -} - -void Compiler::initStrObject( ) -{ - strObj = new ObjectDef( ObjectDef::BuiltinType, "str", nextObjectId++ ); - strLangEl->objectDef = strObj; - - initFunction( uniqueTypeInt, strObj, "atoi", IN_STR_ATOI, IN_STR_ATOI, true ); - initFunction( uniqueTypeInt, strObj, "uord8", IN_STR_UORD8, IN_STR_UORD8, true ); - initFunction( uniqueTypeInt, strObj, "sord8", IN_STR_SORD8, IN_STR_SORD8, true ); - initFunction( uniqueTypeInt, strObj, "uord16", IN_STR_UORD16, IN_STR_UORD16, true ); - initFunction( uniqueTypeInt, strObj, "sord16", IN_STR_SORD16, IN_STR_SORD16, true ); - initFunction( uniqueTypeInt, strObj, "uord32", IN_STR_UORD32, IN_STR_UORD32, true ); - initFunction( uniqueTypeInt, strObj, "sord32", IN_STR_SORD32, IN_STR_SORD32, true ); - addLengthField( strObj, IN_STR_LENGTH ); - - initFunction( uniqueTypeStr, globalObjectDef, "sprintf", - IN_SPRINTF, IN_SPRINTF, uniqueTypeStr, uniqueTypeInt, true ); -} - -void Compiler::initStreamObject( ) -{ - streamObj = new ObjectDef( ObjectDef::BuiltinType, - "stream", nextObjectId++ ); - streamLangEl->objectDef = streamObj; -} - -void Compiler::initInputObject( ) -{ - inputObj = new ObjectDef( ObjectDef::BuiltinType, - "accum_stream", nextObjectId++ ); - inputLangEl->objectDef = inputObj; - - initFunction( uniqueTypeStr, inputObj, "pull", - IN_INPUT_PULL_WV, IN_INPUT_PULL_WV, uniqueTypeInt, false ); - initFunction( uniqueTypeStr, inputObj, "push", - IN_INPUT_PUSH_WV, IN_INPUT_PUSH_WV, uniqueTypeAny, false ); - initFunction( uniqueTypeStr, inputObj, "push_ignore", - IN_INPUT_PUSH_IGNORE_WV, IN_INPUT_PUSH_IGNORE_WV, uniqueTypeAny, false ); -} - -ObjField *Compiler::makeDataEl() -{ - /* Create the "data" field. */ - TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr ); - ObjField *el = new ObjField( InputLoc(), typeRef, "data" ); - - /* Setting beenReferenced to true prevents us from assigning instructions - * and an offset to the field. */ - - el->beenReferenced = true; - el->beenInitialized = true; - el->useOffset = false; - el->inGetR = IN_GET_TOKEN_DATA_R; - el->inSetWC = IN_SET_TOKEN_DATA_WC; - el->inSetWV = IN_SET_TOKEN_DATA_WV; - return el; -} - -ObjField *Compiler::makePosEl() -{ - /* Create the "data" field. */ - TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt ); - ObjField *el = new ObjField( InputLoc(), typeRef, "pos" ); - - /* Setting beenReferenced to true prevents us from assigning instructions - * and an offset to the field. */ - - el->isConst = true; - el->beenReferenced = true; - el->beenInitialized = true; - el->useOffset = false; - el->inGetR = IN_GET_TOKEN_POS_R; - return el; -} - -ObjField *Compiler::makeLineEl() -{ - /* Create the "data" field. */ - TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt ); - ObjField *el = new ObjField( InputLoc(), typeRef, "line" ); - - /* Setting beenReferenced to true prevents us from assigning instructions - * and an offset to the field. */ - - el->isConst = true; - el->beenReferenced = true; - el->beenInitialized = true; - el->useOffset = false; - el->inGetR = IN_GET_TOKEN_LINE_R; - return el; -} - -void Compiler::initTokenObjects( ) -{ - /* Make a default object Definition. */ - tokenObj = new ObjectDef( ObjectDef::BuiltinType, "token", nextObjectId++ ); - - ObjField *dataEl = makeDataEl(); - tokenObj->insertField( dataEl->name, dataEl ); - - ObjField *posEl = makePosEl(); - tokenObj->insertField( posEl->name, posEl ); - - ObjField *lineEl = makeLineEl(); - tokenObj->insertField( lineEl->name, lineEl ); - - /* Give all user terminals the token object type. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->isUserTerm ) { - if ( lel->objectDef == 0 ) - lel->objectDef = tokenObj; - else { - /* Create the "data" field. */ - ObjField *dataEl = makeDataEl(); - lel->objectDef->insertField( dataEl->name, dataEl ); - - /* Create the "pos" field. */ - ObjField *posEl = makePosEl(); - lel->objectDef->insertField( posEl->name, posEl ); - - /* Create the "line" field. */ - ObjField *lineEl = makeLineEl(); - lel->objectDef->insertField( lineEl->name, lineEl ); - } - } - } -} - -void Compiler::findLocalTrees( CharSet &trees ) -{ - /* We exlcude "lhs" from being downrefed because we need to use if after - * the frame is is cleaned and so it must survive. */ - for ( ObjFieldList::Iter ol = *curLocalFrame->objFieldList; ol.lte(); ol++ ) { - ObjField *el = ol->value; - /* FIXME: This test needs to be improved. Match_text was getting - * through before useOffset was tested. What will? */ - if ( el->useOffset && !el->isLhsEl && ( el->beenReferenced || el->isParam ) ) { - UniqueType *ut = el->typeRef->uniqueType; - if ( ut->typeId == TYPE_TREE || ut->typeId == TYPE_PTR ) - trees.insert( el->offset ); - } - } -} - -void Compiler::makeProdCopies( Definition *prod ) -{ - int pos = 0; - for ( ProdElList::Iter pel = *prod->prodElList; pel.lte(); pel++, pos++) { - if ( pel->captureField != 0 ) { - prod->copy.append( pel->captureField->offset ); - prod->copy.append( pos ); - } - } -} - -void Compiler::compileReductionCode( Definition *prod ) -{ - CodeBlock *block = prod->redBlock; - - /* Init the compilation context. */ - compileContext = CompileReduction; - curLocalFrame = block->localFrame; - revertOn = true; - block->frameId = nextFrameId++; - - CodeVect &code = block->codeWV; - - /* Add the alloc frame opcode. We don't have the right - * frame size yet. We will fill it in later. */ - code.append( IN_INIT_LOCALS ); - code.appendHalf( 0 ); - long afterInit = code.length(); - - /* Compile the reduce block. */ - block->compile( this, code ); - - /* We have the frame size now. Set in the alloc frame instruction. */ - long frameSize = curLocalFrame->size(); - code.setHalf( 1, frameSize ); - - /* Might need to load right hand side values. */ - addProdRHSLoads( prod, code, afterInit ); - - addProdLHSLoad( prod, code, afterInit ); - addPushBackLHS( prod, code, afterInit ); - - code.append( IN_PCR_RET ); - - /* Now that compilation is done variables are referenced. Make the local - * trees descriptor. */ - findLocalTrees( block->trees ); -} - -void Compiler::compileTranslateBlock( LangEl *langEl ) -{ - CodeBlock *block = langEl->transBlock; - - /* Set up compilation context. */ - compileContext = CompileTranslation; - curLocalFrame = block->localFrame; - revertOn = true; - block->frameId = nextFrameId++; - - /* References to the reduce item. */ - addMatchLength( curLocalFrame, langEl ); - addMatchText( curLocalFrame, langEl ); - addInput( curLocalFrame ); - addCtx( curLocalFrame ); - - CodeVect &code = block->codeWV; - - /* Add the alloc frame opcode. We don't have the right - * frame size yet. We will fill it in later. */ - code.append( IN_INIT_LOCALS ); - code.appendHalf( 0 ); - - if ( langEl->tokenDef->reCaptureVect.length() > 0 ) { - code.append( IN_INIT_CAPTURES ); - code.append( langEl->tokenDef->reCaptureVect.length() ); - - ObjFieldList::Iter f = *curLocalFrame->objFieldList; - for ( int i = 0; i < langEl->tokenDef->reCaptureVect.length(); i++, f++ ) - curLocalFrame->referenceField( this, f->value ); - } - - /* Set the local frame and compile the reduce block. */ - block->compile( this, code ); - - /* We have the frame size now. Set in the alloc frame instruction. */ - long frameSize = curLocalFrame->size(); - code.setHalf( 1, frameSize ); - - code.append( IN_PCR_RET ); - - /* Now that compilation is done variables are referenced. Make the local - * trees descriptor. */ - findLocalTrees( block->trees ); -} - -void Compiler::compilePreEof( TokenRegion *region ) -{ - CodeBlock *block = region->preEofBlock; - - /* Set up compilation context. */ - compileContext = CompileTranslation; - curLocalFrame = region->preEofBlock->localFrame; - revertOn = true; - block->frameId = nextFrameId++; - - addInput( curLocalFrame ); - addCtx( curLocalFrame ); - - CodeVect &code = block->codeWV; - - /* Add the alloc frame opcode. We don't have the right - * frame size yet. We will fill it in later. */ - code.append( IN_INIT_LOCALS ); - code.appendHalf( 0 ); - - /* Set the local frame and compile the reduce block. */ - block->compile( this, code ); - - /* We have the frame size now. Set in the alloc frame instruction. */ - long frameSize = curLocalFrame->size(); - code.setHalf( 1, frameSize ); - - code.append( IN_PCR_RET ); - - /* Now that compilation is done variables are referenced. Make the local - * trees descriptor. */ - findLocalTrees( block->trees ); -} - -void Compiler::compileRootBlock( ) -{ - CodeBlock *block = rootCodeBlock; - - /* The root block never needs to be reverted. */ - - /* Set up the compile context. No locals are needed for the root code - * block, but we need an empty local frame for the compile. */ - compileContext = CompileRoot; - curLocalFrame = rootLocalFrame; - revertOn = false; - - /* The block needs a frame id. */ - block->frameId = nextFrameId++; - - /* The root block is not reverted. */ - CodeVect &code = block->codeWC; - - /* Add the alloc frame opcode. We don't have the right - * frame size yet. We will fill it in later. */ - code.append( IN_INIT_LOCALS ); - code.appendHalf( 0 ); - - code.append( IN_LOAD_ARGV ); - code.appendHalf( argvOffset() ); - - block->compile( this, code ); - - /* We have the frame size now. Store it in frame init. */ - long frameSize = curLocalFrame->size(); - code.setHalf( 1, frameSize ); - - code.append( IN_STOP ); - - /* Make the local trees descriptor. */ - findLocalTrees( block->trees ); -} - -void Compiler::initAllLanguageObjects() -{ - /* Init all user object fields (need consistent size). */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - ObjectDef *objDef = lel->objectDef; - if ( objDef != 0 ) { - /* Init all fields of the object. */ - for ( ObjFieldList::Iter f = *objDef->objFieldList; f.lte(); f++ ) - objDef->initField( this, f->value ); - } - } - - /* Init all fields of the global object. */ - for ( ObjFieldList::Iter f = *globalObjectDef->objFieldList; f.lte(); f++ ) - globalObjectDef->initField( this, f->value ); -} - -void Compiler::initMapFunctions( GenericType *gen ) -{ - addLengthField( gen->objDef, IN_MAP_LENGTH ); - initFunction( gen->utArg, gen->objDef, "find", - IN_MAP_FIND, IN_MAP_FIND, gen->keyUT, true ); - initFunction( uniqueTypeInt, gen->objDef, "insert", - IN_MAP_INSERT_WV, IN_MAP_INSERT_WC, gen->keyUT, gen->utArg, false ); - initFunction( uniqueTypeInt, gen->objDef, "store", - IN_MAP_STORE_WV, IN_MAP_STORE_WC, gen->keyUT, gen->utArg, false ); - initFunction( gen->utArg, gen->objDef, "remove", - IN_MAP_REMOVE_WV, IN_MAP_REMOVE_WC, gen->keyUT, false ); -} - -void Compiler::initListFunctions( GenericType *gen ) -{ - addLengthField( gen->objDef, IN_LIST_LENGTH ); - - initFunction( uniqueTypeInt, gen->objDef, "append", - IN_LIST_APPEND_WV, IN_LIST_APPEND_WC, gen->utArg, false ); - initFunction( uniqueTypeInt, gen->objDef, "push", - IN_LIST_APPEND_WV, IN_LIST_APPEND_WC, gen->utArg, false ); - - initFunction( gen->utArg, gen->objDef, "remove_end", - IN_LIST_REMOVE_END_WV, IN_LIST_REMOVE_END_WC, false ); - initFunction( gen->utArg, gen->objDef, "pop", - IN_LIST_REMOVE_END_WV, IN_LIST_REMOVE_END_WC, false ); -} - -void Compiler::initListField( GenericType *gen, const char *name, int offset ) -{ - /* Make the type ref and create the field. */ - TypeRef *typeRef = new TypeRef( InputLoc(), gen->utArg ); - ObjField *el = new ObjField( InputLoc(), typeRef, name ); - - el->inGetR = IN_GET_LIST_MEM_R; - el->inGetWC = IN_GET_LIST_MEM_WC; - el->inGetWV = IN_GET_LIST_MEM_WV; - el->inSetWC = IN_SET_LIST_MEM_WC; - el->inSetWV = IN_SET_LIST_MEM_WV; - - gen->objDef->insertField( el->name, el ); - - el->useOffset = true; - el->beenReferenced = true; - el->beenInitialized = true; - - /* Zero for head, One for tail. */ - el->offset = offset; -} - -void Compiler::initListFields( GenericType *gen ) -{ - initListField( gen, "head", 0 ); - initListField( gen, "tail", 1 ); - initListField( gen, "top", 1 ); -} - -void Compiler::initVectorFunctions( GenericType *gen ) -{ - addLengthField( gen->objDef, IN_VECTOR_LENGTH ); - initFunction( uniqueTypeInt, gen->objDef, "append", - IN_VECTOR_APPEND_WV, IN_VECTOR_APPEND_WC, gen->utArg, false ); - initFunction( uniqueTypeInt, gen->objDef, "insert", - IN_VECTOR_INSERT_WV, IN_VECTOR_INSERT_WC, uniqueTypeInt, gen->utArg, false ); -} - -void Compiler::initParserFunctions( GenericType *gen ) -{ - initFunction( gen->utArg, gen->objDef, "finish", - IN_PARSE_FINISH_WV, IN_PARSE_FINISH_WC, true ); -} - -void Compiler::initCtxField( GenericType *gen ) -{ - LangEl *langEl = gen->utArg->langEl; - Context *context = langEl->contextIn; - - /* Make the type ref and create the field. */ - UniqueType *ctxUT = findUniqueType( TYPE_TREE, context->lel ); - TypeRef *typeRef = new TypeRef( InputLoc(), ctxUT ); - ObjField *el = new ObjField( InputLoc(), typeRef, "ctx" ); - - el->inGetR = IN_GET_ACCUM_CTX_R; - el->inGetWC = IN_GET_ACCUM_CTX_WC; - el->inGetWV = IN_GET_ACCUM_CTX_WV; - el->inSetWC = IN_SET_ACCUM_CTX_WC; - el->inSetWV = IN_SET_ACCUM_CTX_WV; - - gen->objDef->insertField( el->name, el ); - - el->useOffset = false; - el->beenReferenced = true; - el->beenInitialized = true; -} - -void Compiler::initParserFields( GenericType *gen ) -{ - LangEl *langEl = gen->utArg->langEl; - if ( langEl->contextIn != 0 ) - initCtxField( gen ); -} - -void Compiler::initGenericTypes() -{ - for ( NamespaceList::Iter ns = namespaceList; ns.lte(); ns++ ) { - for ( GenericList::Iter gen = ns->genericList; gen.lte(); gen++ ) { - gen->utArg = gen->typeArg->uniqueType; - - if ( gen->typeId == GEN_MAP ) - gen->keyUT = gen->keyTypeArg->uniqueType; - - gen->objDef = new ObjectDef( ObjectDef::BuiltinType, - gen->name, nextObjectId++ ); - - switch ( gen->typeId ) { - case GEN_MAP: - initMapFunctions( gen ); - break; - case GEN_LIST: - initListFunctions( gen ); - initListFields( gen ); - break; - case GEN_VECTOR: - initVectorFunctions( gen ); - break; - case GEN_PARSER: - /* Need to generate a parser for the type. */ - gen->utArg->langEl->parserId = nextParserId++; - initParserFunctions( gen ); - initParserFields( gen ); - break; - } - - gen->langEl->objectDef = gen->objDef; - } - } -} - -void Compiler::makeFuncVisible( Function *func, bool isUserIter ) -{ - func->localFrame = func->codeBlock->localFrame; - - /* Set up the parameters. */ - long paramPos = 0, paramListSize = 0; - UniqueType **paramUTs = new UniqueType*[func->paramList->length()]; - for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) { - paramUTs[paramPos] = param->typeRef->uniqueType; - - if ( func->localFrame->findField( param->name ) != 0 ) - error(param->loc) << "parameter " << param->name << " redeclared" << endp; - - func->localFrame->insertField( param->name, param ); - param->beenInitialized = true; - param->pos = paramPos; - - /* Initialize the object field as a local variable. We also want trees - * downreffed. */ - if ( paramUTs[paramPos]->typeId == TYPE_REF ) - initLocalRefInstructions( param ); - else - initLocalInstructions( param ); - - paramListSize += sizeOfField( paramUTs[paramPos] ); - paramPos += 1; - } - - /* Param offset is relative to one past the last item in the array of - * words containing the args. */ - long paramOffset = 0; - for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) { - /* Moving downward, and need the offset to point to the lower half of - * the argument. */ - paramOffset -= sizeOfField( paramUTs[param->pos] ); - - /* How much space do we need to make for call overhead. */ - long frameAfterArgs = isUserIter ? IFR_AA : FR_AA; - - /* Going up first we have the frame data, then maybe - * the user iterator, then the args from high to low. */ - param->offset = frameAfterArgs + - ( isUserIter ? ( sizeof(UserIter) / sizeof(Word) ) : 0 ) + - paramListSize + paramOffset; - } - - func->paramListSize = paramListSize; - func->paramUTs = paramUTs; - - /* Insert the function into the global function map. */ - UniqueType *returnUT = func->typeRef != 0 ? - func->typeRef->uniqueType : uniqueTypeInt; - ObjMethod *objMethod = new ObjMethod( returnUT, func->name, - IN_CALL_WV, IN_CALL_WC, - func->paramList->length(), paramUTs, func->paramList, false ); - objMethod->funcId = func->funcId; - objMethod->useFuncId = true; - objMethod->useCallObj = false; - objMethod->func = func; - - if ( isUserIter ) { - IterDef *uiter = findIterDef( IterDef::User, func ); - objMethod->iterDef = uiter; - } - - globalObjectDef->objMethodMap->insert( func->name, objMethod ); -} - -void Compiler::compileUserIter( Function *func, CodeVect &code ) -{ - CodeBlock *block = func->codeBlock; - - /* Add the alloc frame opcode. We don't have the right - * frame size yet. We will fill it in later. */ - code.append( IN_INIT_LOCALS ); - code.appendHalf( 0 ); - - /* Compile the block. */ - block->compile( this, code ); - - /* We have the frame size now. Set in the alloc frame instruction. */ - int frameSize = func->localFrame->size(); - code.setHalf( 1, frameSize ); - - /* Check for a return statement. */ - if ( block->stmtList->length() == 0 || - block->stmtList->tail->type != LangStmt::YieldType ) - { - /* Push the return value. */ - code.append( IN_LOAD_NIL ); - code.append( IN_YIELD ); - } -} - -void Compiler::compileUserIter( Function *func ) -{ - CodeBlock *block = func->codeBlock; - - /* Set up the context. */ - compileContext = CompileFunction; - curFunction = func; - block->frameId = nextFrameId++; - - /* Need an object for the local frame. */ - curLocalFrame = func->codeBlock->localFrame; - - /* Compile for revert and commit. */ - revertOn = true; - compileUserIter( func, block->codeWV ); - - revertOn = false; - compileUserIter( func, block->codeWC ); - - /* Now that compilation is done variables are referenced. Make the local - * trees descriptor. */ - findLocalTrees( block->trees ); - - /* FIXME: Need to deal with the freeing of local trees. */ -} - -/* Called for each type of function compile: revert and commit. */ -void Compiler::compileFunction( Function *func, CodeVect &code ) -{ - CodeBlock *block = func->codeBlock; - - /* Add the alloc frame opcode. We don't have the right - * frame size yet. We will fill it in later. */ - code.append( IN_INIT_LOCALS ); - code.appendHalf( 0 ); - - /* Compile the block. */ - block->compile( this, code ); - - /* We have the frame size now. Set in the alloc frame instruction. */ - int frameSize = func->localFrame->size(); - code.setHalf( 1, frameSize ); - - /* Check for a return statement. */ - if ( block->stmtList->length() == 0 || - block->stmtList->tail->type != LangStmt::ReturnType ) - { - /* Push the return value. */ - code.append( IN_LOAD_NIL ); - code.append( IN_SAVE_RET ); - } - - /* Compute the jump distance for the return jumps. */ - for ( LongVect::Iter rj = returnJumps; rj.lte(); rj++ ) { - long distance = code.length() - *rj - 3; - code.setHalf( *rj+1, distance ); - } - - /* Reset the vector of return jumps. */ - returnJumps.empty(); - - /* Return cleans up the stack (including the args) and leaves the return - * value on the top. */ - code.append( IN_RET ); -} - -void Compiler::compileFunction( Function *func ) -{ - CodeBlock *block = func->codeBlock; - - /* Set up the compilation context. */ - compileContext = CompileFunction; - curFunction = func; - - /* Assign a frame Id. */ - block->frameId = nextFrameId++; - - /* Need an object for the local frame. */ - curLocalFrame = func->codeBlock->localFrame; - - /* Compile once for revert. */ - revertOn = true; - compileFunction( func, block->codeWV ); - - /* Compile once for commit. */ - revertOn = false; - compileFunction( func, block->codeWC ); - - /* Now that compilation is done variables are referenced. Make the local - * trees descriptor. */ - findLocalTrees( block->trees ); -} - -void Compiler::makeDefaultIterators() -{ - /* Tree iterator. */ - { - UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); - ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef, - "triter", IN_HALT, IN_HALT, anyRefUT, true ); - - IterDef *triter = findIterDef( IterDef::Tree ); - objMethod->iterDef = triter; - } - - /* Child iterator. */ - { - UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); - ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef, - "child", IN_HALT, IN_HALT, anyRefUT, true ); - - IterDef *triter = findIterDef( IterDef::Child ); - objMethod->iterDef = triter; - } - - /* Reverse iterator. */ - { - UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); - ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef, - "rev_child", IN_HALT, IN_HALT, anyRefUT, true ); - - IterDef *triter = findIterDef( IterDef::RevChild ); - objMethod->iterDef = triter; - } - - /* Repeat iterator. */ - { - UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); - ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef, - "repeat", IN_HALT, IN_HALT, anyRefUT, true ); - - IterDef *triter = findIterDef( IterDef::Repeat ); - objMethod->iterDef = triter; - } - - /* Reverse repeat iterator. */ - { - UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); - ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef, - "rev_repeat", IN_HALT, IN_HALT, anyRefUT, true ); - - IterDef *triter = findIterDef( IterDef::RevRepeat ); - objMethod->iterDef = triter; - } -} - -void Compiler::addStdin() -{ - /* Make the type ref. */ - TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStream ); - - /* Create the field and insert it into the map. */ - ObjField *el = new ObjField( InputLoc(), typeRef, "stdin" ); - el->beenReferenced = true; - el->beenInitialized = true; - el->isConst = true; - el->useOffset = false; - el->inGetR = IN_GET_STDIN; - globalObjectDef->insertField( el->name, el ); -} - -void Compiler::addStdout() -{ - /* Make the type ref. */ - TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr ); - - /* Create the field and insert it into the map. */ - ObjField *el = new ObjField( InputLoc(), typeRef, "stout" ); - el->beenReferenced = true; - el->beenInitialized = true; - el->isConst = true; - el->useOffset = false; - el->inGetR = IN_GET_STDOUT; - globalObjectDef->insertField( el->name, el ); -} - -void Compiler::addStderr() -{ - /* Make the type ref. */ - TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr ); - - /* Create the field and insert it into the map. */ - ObjField *el = new ObjField( InputLoc(), typeRef, "stderr" ); - el->beenReferenced = true; - el->beenInitialized = true; - el->isConst = true; - el->useOffset = false; - el->inGetR = IN_GET_STDERR; - globalObjectDef->insertField( el->name, el ); -} - -void Compiler::addArgv() -{ - /* Create the field and insert it into the map. */ - ObjField *el = new ObjField( InputLoc(), argvTypeRef, "argv" ); - el->isArgv = true; - el->isConst = true; - globalObjectDef->insertField( el->name, el ); -} - -int Compiler::argvOffset() -{ - for ( ObjFieldList::Iter field = *globalObjectDef->objFieldList; - field.lte(); field++ ) - { - if ( field->value->isArgv ) { - globalObjectDef->referenceField( this, field->value ); - return field->value->offset; - } - } - assert(false); -} - -void Compiler::initGlobalFunctions() -{ - ObjMethod *method; - - method = initFunction( uniqueTypeStream, globalObjectDef, "open", - IN_OPEN_FILE, IN_OPEN_FILE, uniqueTypeStr, uniqueTypeStr, true ); - method->useCallObj = false; - - method = initFunction( uniqueTypeStr, globalObjectDef, "tolower", - IN_TO_LOWER, IN_TO_LOWER, uniqueTypeStr, true ); - method->useCallObj = false; - - method = initFunction( uniqueTypeStr, globalObjectDef, "toupper", - IN_TO_UPPER, IN_TO_UPPER, uniqueTypeStr, true ); - method->useCallObj = false; - - method = initFunction( uniqueTypeInt, globalObjectDef, "exit", - IN_EXIT, IN_EXIT, uniqueTypeInt, true ); - - method = initFunction( uniqueTypeStr, globalObjectDef, "error", - IN_ERROR, IN_ERROR, true ); - - addStdin(); - addStdout(); - addStderr(); - addArgv(); -} - -void Compiler::removeNonUnparsableRepls() -{ - for ( ReplList::Iter repl = replList; repl.lte(); ) { - Replacement *maybeDel = repl++; - if ( !maybeDel->parse ) - replList.detach( maybeDel ); - } -} - -void Compiler::compileByteCode() -{ -// initUniqueTypes(); - initIntObject(); - initStrObject(); - initStreamObject(); - initInputObject(); - initTokenObjects(); - makeDefaultIterators(); - initAllLanguageObjects(); - initGenericTypes(); - - initGlobalFunctions(); - - for ( FunctionList::Iter f = functionList; f.lte(); f++ ) - makeFuncVisible( f, f->isUserIter ); - - /* This may be comment rot: The function info structure relies on functions - * being compiled first, then iterators. */ - - /* Compile functions. */ - for ( FunctionList::Iter f = functionList; f.lte(); f++ ) { - if ( f->inContext != 0 ) - context = f->inContext; - if ( f->isUserIter ) - compileUserIter( f ); - else - compileFunction( f ); - context = 0; - } - - /* Compile the reduction code. */ - for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { - makeProdCopies( prod ); - if ( prod->redBlock != 0 ) { - if ( prod->redBlock->context != 0 ) - context = prod->redBlock->context; - compileReductionCode( prod ); - context = 0; - } - } - - /* Compile the token translation code. */ - for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { - if ( lel->transBlock != 0 ) { - if ( lel->transBlock->context != 0 ) - context = lel->transBlock->context; - compileTranslateBlock( lel ); - context = 0; - } - } - - /* Compile preeof blocks. */ - for ( RegionList::Iter r = regionList; r.lte(); r++ ) { - if ( r->preEofBlock != 0 ) - compilePreEof( r ); - } - - /* Compile the init code */ - compileRootBlock( ); - removeNonUnparsableRepls(); -} diff --git a/colm/tree.c b/colm/tree.c deleted file mode 100644 index 66e5e025..00000000 --- a/colm/tree.c +++ /dev/null @@ -1,2484 +0,0 @@ -/* - * Copyright 2008-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <colm/pdarun.h> -#include <colm/tree.h> -#include <colm/pool.h> -#include <colm/bytecode.h> -#include <colm/debug.h> -#include <colm/map.h> -#include <string.h> -#include <stdlib.h> -#include <assert.h> - -#define true 1 -#define false 0 - -#define BUFFER_INITIAL_SIZE 4096 - -void listPrepend( List *list, ListEl *new_el) { listAddBefore(list, list->head, new_el); } -void listAppend( List *list, ListEl *new_el) { listAddAfter(list, list->tail, new_el); } - -ListEl *listDetach( List *list, ListEl *el ); -ListEl *listDetachFirst(List *list ) { return listDetach(list, list->head); } -ListEl *listDetachLast(List *list ) { return listDetach(list, list->tail); } - -long listLength(List *list) - { return list->listLen; } - -void initTreeIter( TreeIter *treeIter, const Ref *rootRef, int searchId, Tree **stackRoot ) -{ - treeIter->rootRef = *rootRef; - treeIter->searchId = searchId; - treeIter->stackRoot = stackRoot; - treeIter->stackSize = 0; - treeIter->ref.kid = 0; - treeIter->ref.next = 0; -} - -void initRevTreeIter( RevTreeIter *revTriter, const Ref *rootRef, - int searchId, Tree **stackRoot, int children ) -{ - revTriter->rootRef = *rootRef; - revTriter->searchId = searchId; - revTriter->stackRoot = stackRoot; - revTriter->stackSize = children; - revTriter->kidAtYield = 0; - revTriter->children = children; - revTriter->ref.kid = 0; - revTriter->ref.next = 0; -} - -void initUserIter( UserIter *userIter, Tree **stackRoot, long argSize, long searchId ) -{ - userIter->stackRoot = stackRoot; - userIter->argSize = argSize; - userIter->stackSize = 0; - userIter->resume = 0; - userIter->frame = 0; - userIter->searchId = searchId; - - userIter->ref.kid = 0; - userIter->ref.next = 0; -} - -Kid *allocAttrs( Program *prg, long length ) -{ - Kid *cur = 0; - long i; - for ( i = 0; i < length; i++ ) { - Kid *next = cur; - cur = kidAllocate( prg ); - cur->next = next; - } - return cur; -} - -void freeAttrs( Program *prg, Kid *attrs ) -{ - Kid *cur = attrs; - while ( cur != 0 ) { - Kid *next = cur->next; - kidFree( prg, cur ); - cur = next; - } -} - -void freeKidList( Program *prg, Kid *kid ) -{ - while ( kid != 0 ) { - Kid *next = kid->next; - kidFree( prg, kid ); - kid = next; - } -} - -void setAttr( Tree *tree, long pos, Tree *val ) -{ - long i; - Kid *kid = tree->child; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - for ( i = 0; i < pos; i++ ) - kid = kid->next; - kid->tree = val; -} - -Tree *getGlobal( Program *prg, long pos ) - { return getAttr( prg->global, pos ); } - -Tree *getAttr( Tree *tree, long pos ) -{ - long i; - Kid *kid = tree->child; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - for ( i = 0; i < pos; i++ ) - kid = kid->next; - return kid->tree; -} - - -Tree *getRepeatNext( Tree *tree ) -{ - Kid *kid = tree->child; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - return kid->next->tree; -} - -Tree *getRepeatVal( Tree *tree ) -{ - Kid *kid = tree->child; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - return kid->tree; -} - -int repeatEnd( Tree *tree ) -{ - Kid *kid = tree->child; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - return kid == 0; -} - -int listLast( Tree *tree ) -{ - Kid *kid = tree->child; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - return kid->next == 0; -} - -Kid *getAttrKid( Tree *tree, long pos ) -{ - long i; - Kid *kid = tree->child; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - for ( i = 0; i < pos; i++ ) - kid = kid->next; - return kid; -} - -Kid *kidListConcat( Kid *list1, Kid *list2 ) -{ - if ( list1 == 0 ) - return list2; - else if ( list2 == 0 ) - return list1; - - Kid *dest = list1; - while ( dest->next != 0 ) - dest = dest->next; - dest->next = list2; - return list1; -} - - -Stream *openStreamFile( Program *prg, FILE *file ) -{ - Stream *res = (Stream*)mapElAllocate( prg ); - res->id = LEL_ID_STREAM; - res->file = file; - res->in = newSourceStreamFile( file ); - initSourceStream( res->in ); - return res; -} - -Stream *openStreamFd( Program *prg, long fd ) -{ - Stream *res = (Stream*)mapElAllocate( prg ); - res->id = LEL_ID_STREAM; - res->in = newSourceStreamFd( fd ); - initSourceStream( res->in ); - return res; -} - -Stream *openFile( Program *prg, Tree *name, Tree *mode ) -{ - Head *headName = ((Str*)name)->value; - Head *headMode = ((Str*)mode)->value; - - const char *givenMode = stringData(headMode); - const char *fopenMode = 0; - if ( memcmp( givenMode, "r", stringLength(headMode) ) == 0 ) - fopenMode = "rb"; - else if ( memcmp( givenMode, "w", stringLength(headMode) ) == 0 ) - fopenMode = "wb"; - else { - fatal( "unknown file open mode: %s\n", givenMode ); - } - - /* Need to make a C-string (null terminated). */ - char *fileName = (char*)malloc(stringLength(headName)+1); - memcpy( fileName, stringData(headName), stringLength(headName) ); - fileName[stringLength(headName)] = 0; - FILE *file = fopen( fileName, fopenMode ); - free(fileName); - return openStreamFile( prg, file ); -} - -Tree *constructInteger( Program *prg, long i ) -{ - Int *integer = (Int*) treeAllocate( prg ); - integer->id = LEL_ID_INT; - integer->value = i; - - return (Tree*)integer; -} - -Tree *constructString( Program *prg, Head *s ) -{ - Str *str = (Str*) treeAllocate( prg ); - str->id = LEL_ID_STR; - str->value = s; - - return (Tree*)str; -} - -Tree *constructPointer( Program *prg, Tree *tree ) -{ - Kid *kid = kidAllocate( prg ); - kid->tree = tree; - kid->next = prg->heap; - prg->heap = kid; - - Pointer *pointer = (Pointer*) treeAllocate( prg ); - pointer->id = LEL_ID_PTR; - pointer->value = kid; - - return (Tree*)pointer; -} - -Tree *constructTerm( Program *prg, Word id, Head *tokdata ) -{ - LangElInfo *lelInfo = prg->rtd->lelInfo; - - Tree *tree = treeAllocate( prg ); - tree->id = id; - tree->refs = 0; - tree->tokdata = tokdata; - - int objectLength = lelInfo[tree->id].objectLength; - tree->child = allocAttrs( prg, objectLength ); - - return tree; -} - -Tree *constructInput( Program *prg ) -{ - Input *input = inputAllocate( prg ); - input->refs = 0; - input->id = LEL_ID_INPUT; - input->in = malloc( sizeof(InputStream) ); - initInputStream( input->in ); - return (Tree*)input; -} - -Kid *constructReplacementKid( Tree **bindings, Program *prg, Kid *prev, long pat ); - -static Kid *constructIgnoreList( Program *prg, long ignoreInd ) -{ - PatReplNode *nodes = prg->rtd->patReplNodes; - - Kid *first = 0, *last = 0; - while ( ignoreInd >= 0 ) { - Head *ignoreData = stringAllocPointer( prg, nodes[ignoreInd].data, nodes[ignoreInd].length ); - - Tree *ignTree = treeAllocate( prg ); - ignTree->refs = 1; - ignTree->id = nodes[ignoreInd].id; - ignTree->tokdata = ignoreData; - - Kid *ignKid = kidAllocate( prg ); - ignKid->tree = ignTree; - ignKid->next = 0; - - if ( last == 0 ) - first = ignKid; - else - last->next = ignKid; - - ignoreInd = nodes[ignoreInd].next; - last = ignKid; - } - - return first; -} - -static Kid *constructLeftIgnoreList( Program *prg, long pat ) -{ - PatReplNode *nodes = prg->rtd->patReplNodes; - return constructIgnoreList( prg, nodes[pat].leftIgnore ); -} - -static Kid *constructRightIgnoreList( Program *prg, long pat ) -{ - PatReplNode *nodes = prg->rtd->patReplNodes; - return constructIgnoreList( prg, nodes[pat].rightIgnore ); -} - -static void insLeftIgnore( Program *prg, Tree *tree, Tree *ignoreList ) -{ - assert( ! (tree->flags & AF_LEFT_IGNORE) ); - - /* Allocate. */ - Kid *kid = kidAllocate( prg ); - kid->tree = ignoreList; - treeUpref( ignoreList ); - - /* Attach it. */ - kid->next = tree->child; - tree->child = kid; - - tree->flags |= AF_LEFT_IGNORE; -} - -static void insRightIgnore( Program *prg, Tree *tree, Tree *ignoreList ) -{ - assert( ! (tree->flags & AF_RIGHT_IGNORE) ); - - /* Insert an ignore head in the child list. */ - Kid *kid = kidAllocate( prg ); - kid->tree = ignoreList; - treeUpref( ignoreList ); - - /* Attach it. */ - if ( tree->flags & AF_LEFT_IGNORE ) { - kid->next = tree->child->next; - tree->child->next = kid; - } - else { - kid->next = tree->child; - tree->child = kid; - } - - tree->flags |= AF_RIGHT_IGNORE; -} - -Tree *pushRightIgnore( Program *prg, Tree *pushTo, Tree *rightIgnore ) -{ - /* About to alter the data tree. Split first. */ - pushTo = splitTree( prg, pushTo ); - - if ( pushTo->flags & AF_RIGHT_IGNORE ) { - /* The previous token already has a right ignore. Merge by - * attaching it as a left ignore of the new list. */ - Kid *curIgnore = treeRightIgnoreKid( prg, pushTo ); - insLeftIgnore( prg, rightIgnore, curIgnore->tree ); - - /* Replace the current ignore. Safe to access refs here because we just - * upreffed it in insLeftIgnore. */ - curIgnore->tree->refs -= 1; - curIgnore->tree = rightIgnore; - treeUpref( rightIgnore ); - } - else { - /* Attach The ignore list. */ - insRightIgnore( prg, pushTo, rightIgnore ); - } - - return pushTo; -} - -Tree *pushLeftIgnore( Program *prg, Tree *pushTo, Tree *leftIgnore ) -{ - pushTo = splitTree( prg, pushTo ); - - /* Attach as left ignore to the token we are sending. */ - if ( pushTo->flags & AF_LEFT_IGNORE ) { - /* The token already has a left-ignore. Merge by attaching it as a - * right ignore of the new list. */ - Kid *curIgnore = treeLeftIgnoreKid( prg, pushTo ); - insRightIgnore( prg, leftIgnore, curIgnore->tree ); - - /* Replace the current ignore. Safe to upref here because we just - * upreffed it in insRightIgnore. */ - curIgnore->tree->refs -= 1; - curIgnore->tree = leftIgnore; - treeUpref( leftIgnore ); - } - else { - /* Attach the ignore list. */ - insLeftIgnore( prg, pushTo, leftIgnore ); - } - - return pushTo; -} - -static void remLeftIgnore( Program *prg, Tree **sp, Tree *tree ) -{ - assert( tree->flags & AF_LEFT_IGNORE ); - - Kid *next = tree->child->next; - treeDownref( prg, sp, tree->child->tree ); - kidFree( prg, tree->child ); - tree->child = next; - - tree->flags &= ~AF_LEFT_IGNORE; -} - -static void remRightIgnore( Program *prg, Tree **sp, Tree *tree ) -{ - assert( tree->flags & AF_RIGHT_IGNORE ); - - if ( tree->flags & AF_LEFT_IGNORE ) { - Kid *next = tree->child->next->next; - treeDownref( prg, sp, tree->child->next->tree ); - kidFree( prg, tree->child->next ); - tree->child->next = next; - } - else { - Kid *next = tree->child->next; - treeDownref( prg, sp, tree->child->tree ); - kidFree( prg, tree->child ); - tree->child = next; - } - - tree->flags &= ~AF_RIGHT_IGNORE; -} - -Tree *popRightIgnore( Program *prg, Tree **sp, Tree *popFrom, Tree **rightIgnore ) -{ - /* Modifying the tree we are detaching from. */ - popFrom = splitTree( prg, popFrom ); - - Kid *riKid = treeRightIgnoreKid( prg, popFrom ); - - /* If the right ignore has a left ignore, then that was the original - * right ignore. */ - Kid *li = treeLeftIgnoreKid( prg, riKid->tree ); - if ( li != 0 ) { - treeUpref( li->tree ); - remLeftIgnore( prg, sp, riKid->tree ); - *rightIgnore = riKid->tree; - treeUpref( *rightIgnore ); - riKid->tree = li->tree; - } - else { - *rightIgnore = riKid->tree; - treeUpref( *rightIgnore ); - remRightIgnore( prg, sp, popFrom ); - } - - return popFrom; -} - -Tree *popLeftIgnore( Program *prg, Tree **sp, Tree *popFrom, Tree **leftIgnore ) -{ - /* Modifying, make the write safe. */ - popFrom = splitTree( prg, popFrom ); - - Kid *liKid = treeLeftIgnoreKid( prg, popFrom ); - - /* If the left ignore has a right ignore, then that was the original - * left ignore. */ - Kid *ri = treeRightIgnoreKid( prg, liKid->tree ); - if ( ri != 0 ) { - treeUpref( ri->tree ); - remRightIgnore( prg, sp, liKid->tree ); - *leftIgnore = liKid->tree; - treeUpref( *leftIgnore ); - liKid->tree = ri->tree; - } - else { - *leftIgnore = liKid->tree; - treeUpref( *leftIgnore ); - remLeftIgnore( prg, sp, popFrom ); - } - - return popFrom; -} - - -/* Returns an uprefed tree. Saves us having to downref and bindings to zero to - * return a zero-ref tree. */ -Tree *constructReplacementTree( Kid *kid, Tree **bindings, Program *prg, long pat ) -{ - PatReplNode *nodes = prg->rtd->patReplNodes; - LangElInfo *lelInfo = prg->rtd->lelInfo; - Tree *tree = 0; - - if ( nodes[pat].bindId > 0 ) { - /* All bindings have been uprefed. */ - tree = bindings[nodes[pat].bindId]; - - long ignore = nodes[pat].leftIgnore; - Tree *leftIgnore = 0; - if ( ignore >= 0 ) { - Kid *ignore = constructLeftIgnoreList( prg, pat ); - - leftIgnore = treeAllocate( prg ); - leftIgnore->id = LEL_ID_IGNORE; - leftIgnore->child = ignore; - - tree = pushLeftIgnore( prg, tree, leftIgnore ); - } - - ignore = nodes[pat].rightIgnore; - Tree *rightIgnore = 0; - if ( ignore >= 0 ) { - Kid *ignore = constructRightIgnoreList( prg, pat ); - - rightIgnore = treeAllocate( prg ); - rightIgnore->id = LEL_ID_IGNORE; - rightIgnore->child = ignore; - - tree = pushRightIgnore( prg, tree, rightIgnore ); - } - } - else { - tree = treeAllocate( prg ); - tree->id = nodes[pat].id; - tree->refs = 1; - tree->tokdata = nodes[pat].length == 0 ? 0 : - stringAllocPointer( prg, - nodes[pat].data, nodes[pat].length ); - - int objectLength = lelInfo[tree->id].objectLength; - - Kid *attrs = allocAttrs( prg, objectLength ); - Kid *child = constructReplacementKid( bindings, prg, - 0, nodes[pat].child ); - - tree->child = kidListConcat( attrs, child ); - - /* Right first, then left. */ - Kid *ignore = constructRightIgnoreList( prg, pat ); - if ( ignore != 0 ) { - Tree *ignoreList = treeAllocate( prg ); - ignoreList->id = LEL_ID_IGNORE; - ignoreList->refs = 1; - ignoreList->child = ignore; - - Kid *ignoreHead = kidAllocate( prg ); - ignoreHead->tree = ignoreList; - ignoreHead->next = tree->child; - tree->child = ignoreHead; - - tree->flags |= AF_RIGHT_IGNORE; - } - - ignore = constructLeftIgnoreList( prg, pat ); - if ( ignore != 0 ) { - Tree *ignoreList = treeAllocate( prg ); - ignoreList->id = LEL_ID_IGNORE; - ignoreList->refs = 1; - ignoreList->child = ignore; - - Kid *ignoreHead = kidAllocate( prg ); - ignoreHead->tree = ignoreList; - ignoreHead->next = tree->child; - tree->child = ignoreHead; - - tree->flags |= AF_LEFT_IGNORE; - } - - int i; - for ( i = 0; i < lelInfo[tree->id].numCaptureAttr; i++ ) { - long ci = pat+1+i; - CaptureAttr *ca = prg->rtd->captureAttr + lelInfo[tree->id].captureAttr + i; - Tree *attr = treeAllocate( prg ); - attr->id = nodes[ci].id; - attr->refs = 1; - attr->tokdata = nodes[ci].length == 0 ? 0 : - stringAllocPointer( prg, - nodes[ci].data, nodes[ci].length ); - - setAttr( tree, ca->offset, attr ); - } - } - - return tree; -} - -Kid *constructReplacementKid( Tree **bindings, Program *prg, Kid *prev, long pat ) -{ - PatReplNode *nodes = prg->rtd->patReplNodes; - Kid *kid = 0; - - if ( pat != -1 ) { - kid = kidAllocate( prg ); - kid->tree = constructReplacementTree( kid, bindings, prg, pat ); - - /* Recurse down next. */ - Kid *next = constructReplacementKid( bindings, prg, - kid, nodes[pat].next ); - - kid->next = next; - } - - return kid; -} - -Tree *constructToken( Program *prg, Tree **root, long nargs ) -{ - Tree **const sp = root; - Tree **base = vm_ptop() + nargs; - - Int *idInt = (Int*)base[-1]; - Str *textStr = (Str*)base[-2]; - - long id = idInt->value; - Head *tokdata = stringCopy( prg, textStr->value ); - - LangElInfo *lelInfo = prg->rtd->lelInfo; - Tree *tree; - - if ( lelInfo[id].ignore ) { - tree = treeAllocate( prg ); - tree->refs = 1; - tree->id = id; - tree->tokdata = tokdata; - } - else { - long objectLength = lelInfo[id].objectLength; - Kid *attrs = allocAttrs( prg, objectLength ); - - tree = treeAllocate( prg ); - tree->id = id; - tree->refs = 1; - tree->tokdata = tokdata; - - tree->child = attrs; - - assert( nargs-2 <= objectLength ); - long id; - for ( id = 0; id < nargs-2; id++ ) { - setAttr( tree, id, base[-3-id] ); - treeUpref( getAttr( tree, id) ); - } - } - return tree; -} - -Tree *makeTree( Program *prg, Tree **root, long nargs ) -{ - Tree **const sp = root; - Tree **base = vm_ptop() + nargs; - - Int *idInt = (Int*)base[-1]; - - long id = idInt->value; - LangElInfo *lelInfo = prg->rtd->lelInfo; - - Tree *tree = treeAllocate( prg ); - tree->id = id; - tree->refs = 1; - - long objectLength = lelInfo[id].objectLength; - Kid *attrs = allocAttrs( prg, objectLength ); - - Kid *last = 0, *child = 0; - for ( id = 0; id < nargs-1; id++ ) { - Kid *kid = kidAllocate( prg ); - kid->tree = base[-2-id]; - treeUpref( kid->tree ); - - if ( last == 0 ) - child = kid; - else - last->next = kid; - - last = kid; - } - - tree->child = kidListConcat( attrs, child ); - - return tree; -} - -int testFalse( Program *prg, Tree *tree ) -{ - int flse = ( - tree == 0 || - tree == prg->falseVal || - ( tree->id == LEL_ID_INT && ((Int*)tree)->value == 0 ) ); - return flse; -} - -Kid *copyIgnoreList( Program *prg, Kid *ignoreHeader ) -{ - Kid *newHeader = kidAllocate( prg ); - Kid *last = 0, *ic = (Kid*)ignoreHeader->tree; - while ( ic != 0 ) { - Kid *newIc = kidAllocate( prg ); - - newIc->tree = ic->tree; - newIc->tree->refs += 1; - - /* List pointers. */ - if ( last == 0 ) - newHeader->tree = (Tree*)newIc; - else - last->next = newIc; - - ic = ic->next; - last = newIc; - } - return newHeader; -} - -Kid *copyKidList( Program *prg, Kid *kidList ) -{ - Kid *newList = 0, *last = 0, *ic = kidList; - - while ( ic != 0 ) { - Kid *newIc = kidAllocate( prg ); - - newIc->tree = ic->tree; - treeUpref( newIc->tree ); - - /* List pointers. */ - if ( last == 0 ) - newList = newIc; - else - last->next = newIc; - - ic = ic->next; - last = newIc; - } - return newList; -} - -/* New tree has zero ref. */ -Tree *copyRealTree( Program *prg, Tree *tree, Kid *oldNextDown, Kid **newNextDown ) -{ - /* Need to keep a lookout for next down. If - * copying it, return the copy. */ - Tree *newTree = treeAllocate( prg ); - - newTree->id = tree->id; - newTree->tokdata = stringCopy( prg, tree->tokdata ); - - /* Copy the child list. Start with ignores, then the list. */ - Kid *child = tree->child, *last = 0; - - /* Left ignores. */ - if ( tree->flags & AF_LEFT_IGNORE ) { - newTree->flags |= AF_LEFT_IGNORE; -// Kid *newHeader = copyIgnoreList( prg, child ); -// -// /* Always the head. */ -// newTree->child = newHeader; -// -// child = child->next; -// last = newHeader; - } - - /* Right ignores. */ - if ( tree->flags & AF_RIGHT_IGNORE ) { - newTree->flags |= AF_RIGHT_IGNORE; -// Kid *newHeader = copyIgnoreList( prg, child ); -// if ( last == 0 ) -// newTree->child = newHeader; -// else -// last->next = newHeader; -// child = child->next; -// last = newHeader; - } - - /* Attributes and children. */ - while ( child != 0 ) { - Kid *newKid = kidAllocate( prg ); - - /* Watch out for next down. */ - if ( child == oldNextDown ) - *newNextDown = newKid; - - newKid->tree = child->tree; - newKid->next = 0; - - /* May be an attribute. */ - if ( newKid->tree != 0 ) - newKid->tree->refs += 1; - - /* Store the first child. */ - if ( last == 0 ) - newTree->child = newKid; - else - last->next = newKid; - - child = child->next; - last = newKid; - } - - return newTree; -} - -List *copyList( Program *prg, List *list, Kid *oldNextDown, Kid **newNextDown ) -{ -// #ifdef COLM_LOG_BYTECODE -// if ( colm_log_bytecode ) { -// cerr << "splitting list: " << list << " refs: " << -// list->refs << endl; -// } -// #endif - - /* Not a need copy. */ - List *newList = (List*)mapElAllocate( prg ); - newList->id = list->genericInfo->langElId; - newList->genericInfo = list->genericInfo; - - ListEl *src = list->head; - while( src != 0 ) { - ListEl *newEl = listElAllocate( prg ); - newEl->value = src->value; - treeUpref( newEl->value ); - - listAppend( newList, newEl ); - - /* Watch out for next down. */ - if ( (Kid*)src == oldNextDown ) - *newNextDown = (Kid*)newEl; - - src = src->next; - } - - return newList; -} - -Map *copyMap( Program *prg, Map *map, Kid *oldNextDown, Kid **newNextDown ) -{ -// #ifdef COLM_LOG_BYTECODE -// if ( colm_log_bytecode ) { -// cerr << "splitting map: " << map << " refs: " << -// map->refs << endl; -// } -// #endif - - Map *newMap = (Map*)mapElAllocate( prg ); - newMap->id = map->genericInfo->langElId; - newMap->genericInfo = map->genericInfo; - newMap->treeSize = map->treeSize; - newMap->root = 0; - - /* If there is a root, copy the tree. */ - if ( map->root != 0 ) { - newMap->root = mapCopyBranch( prg, newMap, map->root, - oldNextDown, newNextDown ); - } - MapEl *el; - for ( el = newMap->head; el != 0; el = el->next ) { - assert( map->genericInfo->typeArg == TYPE_TREE ); - treeUpref( el->tree ); - } - - return newMap; -} - -Tree *copyTree( Program *prg, Tree *tree, Kid *oldNextDown, Kid **newNextDown ) -{ - LangElInfo *lelInfo = prg->rtd->lelInfo; - long genericId = lelInfo[tree->id].genericId; - if ( genericId > 0 ) { - GenericInfo *generic = &prg->rtd->genericInfo[genericId]; - if ( generic->type == GEN_LIST ) - tree = (Tree*) copyList( prg, (List*) tree, oldNextDown, newNextDown ); - else if ( generic->type == GEN_MAP ) - tree = (Tree*) copyMap( prg, (Map*) tree, oldNextDown, newNextDown ); - else if ( generic->type == GEN_PARSER ) { - /* Need to figure out the semantics here. */ - fatal( "ATTEMPT TO COPY PARSER\n" ); - assert(false); - } - } - else if ( tree->id == LEL_ID_PTR ) - assert(false); - else if ( tree->id == LEL_ID_BOOL ) - assert(false); - else if ( tree->id == LEL_ID_INT ) - assert(false); - else if ( tree->id == LEL_ID_STR ) - assert(false); - else if ( tree->id == LEL_ID_STREAM ) - assert(false); - else { - tree = copyRealTree( prg, tree, oldNextDown, newNextDown ); - } - - assert( tree->refs == 0 ); - return tree; -} - -Tree *splitTree( Program *prg, Tree *tree ) -{ - if ( tree != 0 ) { - assert( tree->refs >= 1 ); - - if ( tree->refs > 1 ) { - Kid *oldNextDown = 0, *newNextDown = 0; - Tree *newTree = copyTree( prg, tree, oldNextDown, &newNextDown ); - treeUpref( newTree ); - - /* Downref the original. Don't need to consider freeing because - * refs were > 1. */ - tree->refs -= 1; - - tree = newTree; - } - - assert( tree->refs == 1 ); - } - return tree; -} - -Tree *createGeneric( Program *prg, long genericId ) -{ - GenericInfo *genericInfo = &prg->rtd->genericInfo[genericId]; - Tree *newGeneric = 0; - switch ( genericInfo->type ) { - case GEN_MAP: { - Map *map = (Map*)mapElAllocate( prg ); - map->id = genericInfo->langElId; - map->genericInfo = genericInfo; - newGeneric = (Tree*) map; - break; - } - case GEN_LIST: { - List *list = (List*)mapElAllocate( prg ); - list->id = genericInfo->langElId; - list->genericInfo = genericInfo; - newGeneric = (Tree*) list; - break; - } - case GEN_PARSER: { - Parser *parser = (Parser*)mapElAllocate( prg ); - parser->id = genericInfo->langElId; - parser->genericInfo = genericInfo; - parser->fsmRun = malloc( sizeof(FsmRun) ); - parser->pdaRun = malloc( sizeof(PdaRun) ); - - /* Start off the parsing process. */ - initPdaRun( parser->pdaRun, prg, prg->rtd->pdaTables, - parser->fsmRun, genericInfo->parserId, false, false, 0 ); - initFsmRun( parser->fsmRun, prg ); - newToken( prg, parser->pdaRun, parser->fsmRun ); - - newGeneric = (Tree*) parser; - break; - } - default: - assert(false); - return 0; - } - - return newGeneric; -} - - -/* We can't make recursive calls here since the tree we are freeing may be - * very large. Need the VM stack. */ -void treeFreeRec( Program *prg, Tree **sp, Tree *tree ) -{ - Tree **top = sp; - LangElInfo *lelInfo; - long genericId; - -free_tree: - lelInfo = prg->rtd->lelInfo; - genericId = lelInfo[tree->id].genericId; - if ( genericId > 0 ) { - GenericInfo *generic = &prg->rtd->genericInfo[genericId]; - if ( generic->type == GEN_LIST ) { - List *list = (List*) tree; - ListEl *el = list->head; - while ( el != 0 ) { - ListEl *next = el->next; - vm_push( el->value ); - listElFree( prg, el ); - el = next; - } - mapElFree( prg, (MapEl*)list ); - } - else if ( generic->type == GEN_MAP ) { - Map *map = (Map*)tree; - MapEl *el = map->head; - while ( el != 0 ) { - MapEl *next = el->next; - vm_push( el->key ); - vm_push( el->tree ); - mapElFree( prg, el ); - el = next; - } - mapElFree( prg, (MapEl*)map ); - } - else if ( generic->type == GEN_PARSER ) { - Parser *parser = (Parser*)tree; - clearFsmRun( prg, parser->fsmRun ); - clearPdaRun( prg, sp, parser->pdaRun ); - free( parser->pdaRun ); - free( parser->fsmRun ); - treeDownref( prg, sp, (Tree*)parser->input ); - mapElFree( prg, (MapEl*)parser ); - } - else { - assert(false); - } - } - else { - if ( tree->id == LEL_ID_STR ) { - Str *str = (Str*) tree; - stringFree( prg, str->value ); - treeFree( prg, tree ); - } - else if ( tree->id == LEL_ID_BOOL || tree->id == LEL_ID_INT ) - treeFree( prg, tree ); - else if ( tree->id == LEL_ID_PTR ) - treeFree( prg, tree ); - else if ( tree->id == LEL_ID_STREAM ) { - Stream *stream = (Stream*)tree; - clearSourceStream( prg, sp, stream->in ); - free( stream->in ); - if ( stream->file != 0 ) - fclose( stream->file ); - streamFree( prg, stream ); - } - else if ( tree->id == LEL_ID_INPUT ) { - Input *input = (Input*)tree; - clearInputStream( prg, sp, input->in ); - free( input->in ); - inputFree( prg, input ); - } - else { - if ( tree->id != LEL_ID_IGNORE ) - stringFree( prg, tree->tokdata ); - - /* Attributes and grammar-based children. */ - Kid *child = tree->child; - while ( child != 0 ) { - Kid *next = child->next; - vm_push( child->tree ); - kidFree( prg, child ); - child = next; - } - - treeFree( prg, tree ); - } - } - - /* Any trees to downref? */ - while ( sp != top ) { - tree = vm_pop(); - if ( tree != 0 ) { - assert( tree->refs > 0 ); - tree->refs -= 1; - if ( tree->refs == 0 ) - goto free_tree; - } - } -} - -void treeUpref( Tree *tree ) -{ - if ( tree != 0 ) - tree->refs += 1; -} - -void treeDownref( Program *prg, Tree **sp, Tree *tree ) -{ - if ( tree != 0 ) { - assert( tree->refs > 0 ); - tree->refs -= 1; - if ( tree->refs == 0 ) - treeFreeRec( prg, sp, tree ); - } -} - -/* Find the first child of a tree. */ -Kid *treeChild( Program *prg, const Tree *tree ) -{ - LangElInfo *lelInfo = prg->rtd->lelInfo; - Kid *kid = tree->child; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - /* Skip over attributes. */ - long objectLength = lelInfo[tree->id].objectLength; - long a; - for ( a = 0; a < objectLength; a++ ) - kid = kid->next; - - return kid; -} - -/* Detach at the first real child of a tree. */ -Kid *treeExtractChild( Program *prg, Tree *tree ) -{ - LangElInfo *lelInfo = prg->rtd->lelInfo; - Kid *kid = tree->child, *last = 0; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - /* Skip over attributes. */ - long a, objectLength = lelInfo[tree->id].objectLength; - for ( a = 0; a < objectLength; a++ ) { - last = kid; - kid = kid->next; - } - - if ( last == 0 ) - tree->child = 0; - else - last->next = 0; - - return kid; -} - - -/* Find the first child of a tree. */ -Kid *treeAttr( Program *prg, const Tree *tree ) -{ - Kid *kid = tree->child; - - if ( tree->flags & AF_LEFT_IGNORE ) - kid = kid->next; - if ( tree->flags & AF_RIGHT_IGNORE ) - kid = kid->next; - - return kid; -} - -Tree *treeLeftIgnore( Program *prg, Tree *tree ) -{ - if ( tree->flags & AF_LEFT_IGNORE ) - return tree->child->tree; - return 0; -} - -Tree *treeRightIgnore( Program *prg, Tree *tree ) -{ - if ( tree->flags & AF_RIGHT_IGNORE ) { - if ( tree->flags & AF_LEFT_IGNORE ) - return tree->child->next->tree; - else - return tree->child->tree; - } - return 0; -} - -Kid *treeLeftIgnoreKid( Program *prg, Tree *tree ) -{ - if ( tree->flags & AF_LEFT_IGNORE ) - return tree->child; - return 0; -} - -Kid *treeRightIgnoreKid( Program *prg, Tree *tree ) -{ - if ( tree->flags & AF_RIGHT_IGNORE ) { - if ( tree->flags & AF_LEFT_IGNORE ) - return tree->child->next; - else - return tree->child; - } - return 0; -} - -Tree *treeIterDerefCur( TreeIter *iter ) -{ - return iter->ref.kid == 0 ? 0 : iter->ref.kid->tree; -} - -void refSetValue( Ref *ref, Tree *v ) -{ - Kid *firstKid = ref->kid; - while ( ref != 0 && ref->kid == firstKid ) { - ref->kid->tree = v; - ref = ref->next; - } -} - -Tree *getRhsEl( Program *prg, Tree *lhs, long position ) -{ - Kid *pos = treeChild( prg, lhs ); - while ( position > 0 ) { - pos = pos->next; - position -= 1; - } - return pos->tree; -} - -Tree *getRhsVal( Program *prg, Tree *tree, int *a ) -{ - int i, len = a[0]; - for ( i = 0; i < len; i++ ) { - int prodNum = a[1 + i * 2]; - int childNum = a[1 + i * 2 + 1]; - if ( tree->prodNum == prodNum ) - return getRhsEl( prg, tree, childNum ); - } - return 0; -} - -void setField( Program *prg, Tree *tree, long field, Tree *value ) -{ - assert( tree->refs == 1 ); - if ( value != 0 ) - assert( value->refs >= 1 ); - setAttr( tree, field, value ); -} - -Tree *getField( Tree *tree, Word field ) -{ - return getAttr( tree, field ); -} - -Kid *getFieldKid( Tree *tree, Word field ) -{ - return getAttrKid( tree, field ); -} - -Tree *getFieldSplit( Program *prg, Tree *tree, Word field ) -{ - Tree *val = getAttr( tree, field ); - Tree *split = splitTree( prg, val ); - setAttr( tree, field, split ); - return split; -} - -void setUiterCur( Program *prg, UserIter *uiter, Tree *tree ) -{ - uiter->ref.kid->tree = tree; -} - -void setTriterCur( Program *prg, TreeIter *iter, Tree *tree ) -{ - iter->ref.kid->tree = tree; -} - -Tree *getPtrVal( Pointer *ptr ) -{ - return ptr->value->tree; -} - -Tree *getPtrValSplit( Program *prg, Pointer *ptr ) -{ - Tree *val = ptr->value->tree; - Tree *split = splitTree( prg, val ); - ptr->value->tree = split; - return split; -} - -/* This must traverse in the same order that the bindId assignments are done - * in. */ -int matchPattern( Tree **bindings, Program *prg, long pat, Kid *kid, int checkNext ) -{ - PatReplNode *nodes = prg->rtd->patReplNodes; - -// #ifdef COLM_LOG_MATCH -// if ( colm_log_match ) { -// LangElInfo *lelInfo = prg->rtd->lelInfo; -// cerr << "match pattern " << ( pat == -1 ? "NULL" : lelInfo[nodes[pat].id].name ) << -// " vs " << ( kid == 0 ? "NULL" : lelInfo[kid->tree->id].name ) << endl; -// } -// #endif - - /* match node, recurse on children. */ - if ( pat != -1 && kid != 0 ) { - if ( nodes[pat].id == kid->tree->id ) { - /* If the pattern node has data, then this means we need to match - * the data against the token data. */ - if ( nodes[pat].data != 0 ) { - /* Check the length of token text. */ - if ( nodes[pat].length != stringLength( kid->tree->tokdata ) ) - return false; - - /* Check the token text data. */ - if ( nodes[pat].length > 0 && memcmp( nodes[pat].data, - stringData( kid->tree->tokdata ), nodes[pat].length ) != 0 ) - return false; - } - - /* No failure, all okay. */ - if ( nodes[pat].bindId > 0 ) { -// #ifdef COLM_LOG_MATCH -// if ( colm_log_match ) { -// cerr << "bindId: " << nodes[pat].bindId << endl; -// } -// #endif - bindings[nodes[pat].bindId] = kid->tree; - } - - /* If we didn't match a terminal duplicate of a nonterm then check - * down the children. */ - if ( !nodes[pat].stop ) { - /* Check for failure down child branch. */ - int childCheck = matchPattern( bindings, prg, - nodes[pat].child, treeChild( prg, kid->tree ), true ); - if ( ! childCheck ) - return false; - } - - /* If checking next, then look for failure there. */ - if ( checkNext ) { - int nextCheck = matchPattern( bindings, prg, - nodes[pat].next, kid->next, true ); - if ( ! nextCheck ) - return false; - } - - return true; - } - } - else if ( pat == -1 && kid == 0 ) { - /* Both null is a match. */ - return 1; - } - - return false; -} - - -long cmpTree( Program *prg, const Tree *tree1, const Tree *tree2 ) -{ - long cmpres = 0; - if ( tree1 == 0 ) { - if ( tree2 == 0 ) - return 0; - else - return -1; - } - else if ( tree2 == 0 ) - return 1; - else if ( tree1->id < tree2->id ) - return -1; - else if ( tree1->id > tree2->id ) - return 1; - else if ( tree1->id == LEL_ID_PTR ) { - if ( ((Pointer*)tree1)->value < ((Pointer*)tree2)->value ) - return -1; - else if ( ((Pointer*)tree1)->value > ((Pointer*)tree2)->value ) - return 1; - } - else if ( tree1->id == LEL_ID_INT ) { - if ( ((Int*)tree1)->value < ((Int*)tree2)->value ) - return -1; - else if ( ((Int*)tree1)->value > ((Int*)tree2)->value ) - return 1; - } - else if ( tree1->id == LEL_ID_STR ) { - cmpres = cmpString( ((Str*)tree1)->value, ((Str*)tree2)->value ); - if ( cmpres != 0 ) - return cmpres; - } - else { - if ( tree1->tokdata == 0 && tree2->tokdata != 0 ) - return -1; - else if ( tree1->tokdata != 0 && tree2->tokdata == 0 ) - return 1; - else if ( tree1->tokdata != 0 && tree2->tokdata != 0 ) { - cmpres = cmpString( tree1->tokdata, tree2->tokdata ); - if ( cmpres != 0 ) - return cmpres; - } - } - - Kid *kid1 = treeChild( prg, tree1 ); - Kid *kid2 = treeChild( prg, tree2 ); - - while ( true ) { - if ( kid1 == 0 && kid2 == 0 ) - return 0; - else if ( kid1 == 0 && kid2 != 0 ) - return -1; - else if ( kid1 != 0 && kid2 == 0 ) - return 1; - else { - cmpres = cmpTree( prg, kid1->tree, kid2->tree ); - if ( cmpres != 0 ) - return cmpres; - } - kid1 = kid1->next; - kid2 = kid2->next; - } -} - - -void splitRef( Program *prg, Tree ***psp, Ref *fromRef ) -{ - /* Go up the chain of kids, turing the pointers down. */ - Ref *last = 0, *ref = fromRef, *next = 0; - while ( ref->next != 0 ) { - next = ref->next; - ref->next = last; - last = ref; - ref = next; - } - ref->next = last; - - /* Now traverse the list, which goes down. */ - while ( ref != 0 ) { - if ( ref->kid->tree->refs > 1 ) { -// #ifdef COLM_LOG_BYTECODE -// if ( colm_log_bytecode ) { -// cerr << "splitting tree: " << ref->kid << " refs: " << -// ref->kid->tree->refs << endl; -// } -// #endif - - Ref *nextDown = ref->next; - while ( nextDown != 0 && nextDown->kid == ref->kid ) - nextDown = nextDown->next; - - Kid *oldNextKidDown = nextDown != 0 ? nextDown->kid : 0; - Kid *newNextKidDown = 0; - - Tree *newTree = copyTree( prg, ref->kid->tree, - oldNextKidDown, &newNextKidDown ); - treeUpref( newTree ); - - /* Downref the original. Don't need to consider freeing because - * refs were > 1. */ - ref->kid->tree->refs -= 1; - - while ( ref != 0 && ref != nextDown ) { - next = ref->next; - ref->next = 0; - - ref->kid->tree = newTree; - ref = next; - } - - /* Correct kid pointers down from ref. */ - while ( nextDown != 0 && nextDown->kid == oldNextKidDown ) { - nextDown->kid = newNextKidDown; - nextDown = nextDown->next; - } - } - else { - /* Reset the list as we go down. */ - next = ref->next; - ref->next = 0; - ref = next; - } - } -} - -void splitIterCur( Program *prg, Tree ***psp, TreeIter *iter ) -{ - if ( iter->ref.kid == 0 ) - return; - - splitRef( prg, psp, &iter->ref ); -} - -Tree *setListMem( List *list, Half field, Tree *value ) -{ - assert( list->refs == 1 ); - if ( value != 0 ) - assert( value->refs >= 1 ); - - Tree *existing = 0; - switch ( field ) { - case 0: - existing = list->head->value; - list->head->value = value; - break; - case 1: - existing = list->tail->value; - list->tail->value = value; - break; - default: - assert( false ); - break; - } - return existing; -} - -TreePair mapRemove( Program *prg, Map *map, Tree *key ) -{ - MapEl *mapEl = mapImplFind( prg, map, key ); - TreePair result = { 0, 0 }; - if ( mapEl != 0 ) { - mapDetach( prg, map, mapEl ); - result.key = mapEl->key; - result.val = mapEl->tree; - mapElFree( prg, mapEl ); - } - - return result; -} - -Tree *mapUnstore( Program *prg, Map *map, Tree *key, Tree *existing ) -{ - Tree *stored = 0; - if ( existing == 0 ) { - MapEl *mapEl = mapDetachByKey( prg, map, key ); - stored = mapEl->tree; - mapElFree( prg, mapEl ); - } - else { - MapEl *mapEl = mapImplFind( prg, map, key ); - stored = mapEl->tree; - mapEl->tree = existing; - } - return stored; -} - -Tree *mapFind( Program *prg, Map *map, Tree *key ) -{ - MapEl *mapEl = mapImplFind( prg, map, key ); - return mapEl == 0 ? 0 : mapEl->tree; -} - -long mapLength( Map *map ) -{ - return map->treeSize; -} - -void listAppend2( Program *prg, List *list, Tree *val ) -{ - assert( list->refs == 1 ); - if ( val != 0 ) - assert( val->refs >= 1 ); - ListEl *listEl = listElAllocate( prg ); - listEl->value = val; - listAppend( list, listEl ); -} - -Tree *listRemoveEnd( Program *prg, List *list ) -{ - Tree *tree = list->tail->value; - listElFree( prg, listDetachLast( list ) ); - return tree; -} - -Tree *getListMem( List *list, Word field ) -{ - Tree *result = 0; - switch ( field ) { - case 0: - result = list->head->value; - break; - case 1: - result = list->tail->value; - break; - default: - assert( false ); - break; - } - return result; -} - -Tree *getListMemSplit( Program *prg, List *list, Word field ) -{ - Tree *sv = 0; - switch ( field ) { - case 0: - sv = splitTree( prg, list->head->value ); - list->head->value = sv; - break; - case 1: - sv = splitTree( prg, list->tail->value ); - list->tail->value = sv; - break; - default: - assert( false ); - break; - } - return sv; -} - - -int mapInsert( Program *prg, Map *map, Tree *key, Tree *element ) -{ - MapEl *mapEl = mapInsertKey( prg, map, key, 0 ); - - if ( mapEl != 0 ) { - mapEl->tree = element; - return true; - } - - return false; -} - -void mapUnremove( Program *prg, Map *map, Tree *key, Tree *element ) -{ - MapEl *mapEl = mapInsertKey( prg, map, key, 0 ); - assert( mapEl != 0 ); - mapEl->tree = element; -} - -Tree *mapUninsert( Program *prg, Map *map, Tree *key ) -{ - MapEl *el = mapDetachByKey( prg, map, key ); - Tree *val = el->tree; - mapElFree( prg, el ); - return val; -} - -Tree *mapStore( Program *prg, Map *map, Tree *key, Tree *element ) -{ - Tree *oldTree = 0; - MapEl *elInTree = 0; - MapEl *mapEl = mapInsertKey( prg, map, key, &elInTree ); - - if ( mapEl != 0 ) - mapEl->tree = element; - else { - /* Element with key exists. Overwriting the value. */ - oldTree = elInTree->tree; - elInTree->tree = element; - } - - return oldTree; -} - -void iterFind( Program *prg, Tree ***psp, TreeIter *iter, int tryFirst ) -{ - int anyTree = iter->searchId == prg->rtd->anyId; - Tree **top = iter->stackRoot; - Kid *child; - Tree **sp = *psp; - -rec_call: - if ( tryFirst && ( iter->ref.kid->tree->id == iter->searchId || anyTree ) ) { - *psp = sp; - return; - } - else { - child = treeChild( prg, iter->ref.kid->tree ); - if ( child != 0 ) { - vm_push( (SW) iter->ref.next ); - vm_push( (SW) iter->ref.kid ); - iter->ref.kid = child; - iter->ref.next = (Ref*)vm_ptop(); - while ( iter->ref.kid != 0 ) { - tryFirst = true; - goto rec_call; - rec_return: - iter->ref.kid = iter->ref.kid->next; - } - iter->ref.kid = (Kid*)vm_pop(); - iter->ref.next = (Ref*)vm_pop(); - } - } - - if ( top != vm_ptop() ) - goto rec_return; - - iter->ref.kid = 0; - *psp = sp; -} - -Tree *treeIterAdvance( Program *prg, Tree ***psp, TreeIter *iter ) -{ - assert( iter->stackSize == iter->stackRoot - *psp ); - - if ( iter->ref.kid == 0 ) { - /* Kid is zero, start from the root. */ - iter->ref = iter->rootRef; - iterFind( prg, psp, iter, true ); - } - else { - /* Have a previous item, continue searching from there. */ - iterFind( prg, psp, iter, false ); - } - - iter->stackSize = iter->stackRoot - *psp; - - return (iter->ref.kid ? prg->trueVal : prg->falseVal ); -} - -Tree *treeIterNextChild( Program *prg, Tree ***psp, TreeIter *iter ) -{ - Tree **sp = *psp; - assert( iter->stackSize == iter->stackRoot - vm_ptop() ); - Kid *kid = 0; - - if ( iter->ref.kid == 0 ) { - /* Kid is zero, start from the first child. */ - Kid *child = treeChild( prg, iter->rootRef.kid->tree ); - - if ( child == 0 ) - iter->ref.next = 0; - else { - /* Make a reference to the root. */ - vm_push( (SW) iter->rootRef.next ); - vm_push( (SW) iter->rootRef.kid ); - iter->ref.next = (Ref*)vm_ptop(); - - kid = child; - } - } - else { - /* Start at next. */ - kid = iter->ref.kid->next; - } - - if ( iter->searchId != prg->rtd->anyId ) { - /* Have a previous item, go to the next sibling. */ - while ( kid != 0 && kid->tree->id != iter->searchId ) - kid = kid->next; - } - - iter->ref.kid = kid; - iter->stackSize = iter->stackRoot - vm_ptop(); - *psp = sp; - return ( iter->ref.kid ? prg->trueVal : prg->falseVal ); -} - -Tree *treeRevIterPrevChild( Program *prg, Tree ***psp, RevTreeIter *iter ) -{ - Tree **sp = *psp; - - assert( iter->stackSize == iter->stackRoot - vm_ptop() ); - - if ( iter->kidAtYield != iter->ref.kid ) { - /* Need to reload the kids. */ - Kid *kid = treeChild( prg, iter->rootRef.kid->tree ); - Kid **dst = (Kid**)iter->stackRoot - 1; - while ( kid != 0 ) { - *dst-- = kid; - kid = kid->next; - } - } - - if ( iter->ref.kid == 0 ) - iter->cur = (Kid**)iter->stackRoot - iter->children; - else - iter->cur += 1; - - if ( iter->searchId != prg->rtd->anyId ) { - /* Have a previous item, go to the next sibling. */ - while ( iter->cur != (Kid**)iter->stackRoot && (*iter->cur)->tree->id != iter->searchId ) - iter->cur += 1; - } - - if ( iter->cur == (Kid**)iter->stackRoot ) { - iter->ref.next = 0; - iter->ref.kid = 0; - } - else { - iter->ref.next = &iter->rootRef; - iter->ref.kid = *iter->cur; - } - - /* We will use this to detect a split above the iterated tree. */ - iter->kidAtYield = iter->ref.kid; - - iter->stackSize = iter->stackRoot - vm_ptop(); - - *psp = sp; - - return (iter->ref.kid ? prg->trueVal : prg->falseVal ); -} - -void iterFindRepeat( Program *prg, Tree ***psp, TreeIter *iter, int tryFirst ) -{ - Tree **sp = *psp; - int anyTree = iter->searchId == prg->rtd->anyId; - Tree **top = iter->stackRoot; - Kid *child; - -rec_call: - if ( tryFirst && ( iter->ref.kid->tree->id == iter->searchId || anyTree ) ) { - *psp = sp; - return; - } - else { - /* The repeat iterator is just like the normal top-down-left-right, - * execept it only goes into the children of a node if the node is the - * root of the iteration, or if does not have any neighbours to the - * right. */ - if ( top == vm_ptop() || iter->ref.kid->next == 0 ) { - child = treeChild( prg, iter->ref.kid->tree ); - if ( child != 0 ) { - vm_push( (SW) iter->ref.next ); - vm_push( (SW) iter->ref.kid ); - iter->ref.kid = child; - iter->ref.next = (Ref*)vm_ptop(); - while ( iter->ref.kid != 0 ) { - tryFirst = true; - goto rec_call; - rec_return: - iter->ref.kid = iter->ref.kid->next; - } - iter->ref.kid = (Kid*)vm_pop(); - iter->ref.next = (Ref*)vm_pop(); - } - } - } - - if ( top != vm_ptop() ) - goto rec_return; - - iter->ref.kid = 0; - *psp = sp; -} - -Tree *treeIterNextRepeat( Program *prg, Tree ***psp, TreeIter *iter ) -{ - assert( iter->stackSize == iter->stackRoot - *psp ); - - if ( iter->ref.kid == 0 ) { - /* Kid is zero, start from the root. */ - iter->ref = iter->rootRef; - iterFindRepeat( prg, psp, iter, true ); - } - else { - /* Have a previous item, continue searching from there. */ - iterFindRepeat( prg, psp, iter, false ); - } - - iter->stackSize = iter->stackRoot - *psp; - - return (iter->ref.kid ? prg->trueVal : prg->falseVal ); -} - -void iterFindRevRepeat( Program *prg, Tree ***psp, TreeIter *iter, int tryFirst ) -{ - Tree **sp = *psp; - int anyTree = iter->searchId == prg->rtd->anyId; - Tree **top = iter->stackRoot; - Kid *child; - - if ( tryFirst ) { - while ( true ) { - if ( top == vm_ptop() || iter->ref.kid->next == 0 ) { - child = treeChild( prg, iter->ref.kid->tree ); - - if ( child == 0 ) - break; - vm_push( (SW) iter->ref.next ); - vm_push( (SW) iter->ref.kid ); - iter->ref.kid = child; - iter->ref.next = (Ref*)vm_ptop(); - } - else { - /* Not the top and not there is a next, go over to it. */ - iter->ref.kid = iter->ref.kid->next; - } - } - - goto first; - } - - while ( true ) { - if ( top == vm_ptop() ) { - iter->ref.kid = 0; - return; - } - - if ( iter->ref.kid->next == 0 ) { - /* Go up one and then down. Remember we can't use iter->ref.next - * because the chain may have been split, setting it null (to - * prevent repeated walks up). */ - Ref *ref = (Ref*)vm_ptop(); - iter->ref.kid = treeChild( prg, ref->kid->tree ); - } - else { - iter->ref.kid = (Kid*)vm_pop(); - iter->ref.next = (Ref*)vm_pop(); - } -first: - if ( iter->ref.kid->tree->id == iter->searchId || anyTree ) { - *psp = sp; - return; - } - } - *psp = sp; - return; -} - - -Tree *treeIterPrevRepeat( Program *prg, Tree ***psp, TreeIter *iter ) -{ - assert( iter->stackSize == iter->stackRoot - *psp ); - - if ( iter->ref.kid == 0 ) { - /* Kid is zero, start from the root. */ - iter->ref = iter->rootRef; - iterFindRevRepeat( prg, psp, iter, true ); - } - else { - /* Have a previous item, continue searching from there. */ - iterFindRevRepeat( prg, psp, iter, false ); - } - - iter->stackSize = iter->stackRoot - *psp; - - return (iter->ref.kid ? prg->trueVal : prg->falseVal ); -} - -Tree *treeSearch( Program *prg, Kid *kid, long id ) -{ - /* This node the one? */ - if ( kid->tree->id == id ) - return kid->tree; - - Tree *res = 0; - - /* Search children. */ - Kid *child = treeChild( prg, kid->tree ); - if ( child != 0 ) - res = treeSearch( prg, child, id ); - - /* Search siblings. */ - if ( res == 0 && kid->next != 0 ) - res = treeSearch( prg, kid->next, id ); - - return res; -} - -Tree *treeSearch2( Program *prg, Tree *tree, long id ) -{ - Tree *res = 0; - if ( tree->id == id ) - res = tree; - else { - Kid *child = treeChild( prg, tree ); - if ( child != 0 ) - res = treeSearch( prg, child, id ); - } - return res; -} - -void xmlEscapeData( struct ColmPrintArgs *printArgs, const char *data, long len ) -{ - int i; - for ( i = 0; i < len; i++ ) { - if ( data[i] == '<' ) - printArgs->out( printArgs, "<", 4 ); - else if ( data[i] == '>' ) - printArgs->out( printArgs, ">", 4 ); - else if ( data[i] == '&' ) - printArgs->out( printArgs, "&", 5 ); - else if ( (32 <= data[i] && data[i] <= 126) || data[i] == '\t' || data[i] == '\n' || data[i] == '\r' ) - printArgs->out( printArgs, &data[i], 1 ); - else { - char out[64]; - sprintf( out, "&#%u;", ((unsigned)data[i]) ); - printArgs->out( printArgs, out, strlen(out) ); - } - } -} - -void initStrCollect( StrCollect *collect ) -{ - collect->data = (char*) malloc( BUFFER_INITIAL_SIZE ); - collect->allocated = BUFFER_INITIAL_SIZE; - collect->length = 0; -} - -void strCollectDestroy( StrCollect *collect ) -{ - free( collect->data ); -} - -void strCollectAppend( StrCollect *collect, const char *data, long len ) -{ - long newLen = collect->length + len; - if ( newLen > collect->allocated ) { - collect->allocated *= newLen * 2; - collect->data = (char*) realloc( collect->data, collect->allocated ); - } - memcpy( collect->data + collect->length, data, len ); - collect->length += len; -} - -void strCollectClear( StrCollect *collect ) -{ - collect->length = 0; -} - -#define INT_SZ 32 - -void printStr( struct ColmPrintArgs *printArgs, Head *str ) -{ - printArgs->out( printArgs, (char*)(str->data), str->length ); -} - -void appendCollect( struct ColmPrintArgs *args, const char *data, int length ) -{ - strCollectAppend( (StrCollect*) args->arg, data, length ); -} - -void appendFile( struct ColmPrintArgs *args, const char *data, int length ) -{ - fwrite( data, length, 1, (FILE*)args->arg ); -} - -Tree *treeTrim( struct ColmProgram *prg, Tree **sp, Tree *tree ) -{ - debug( REALM_PARSE, "attaching left ignore\n" ); - - /* Make the ignore list for the left-ignore. */ - Tree *leftIgnore = treeAllocate( prg ); - leftIgnore->id = LEL_ID_IGNORE; - leftIgnore->flags |= AF_SUPPRESS_RIGHT; - - tree = pushLeftIgnore( prg, tree, leftIgnore ); - - debug( REALM_PARSE, "attaching ignore right\n" ); - - /* Copy the ignore list first if we need to attach it as a right - * ignore. */ - Tree *rightIgnore = 0; - rightIgnore = treeAllocate( prg ); - rightIgnore->id = LEL_ID_IGNORE; - rightIgnore->flags |= AF_SUPPRESS_LEFT; - - tree = pushRightIgnore( prg, tree, rightIgnore ); - - return tree; -} - -enum ReturnType -{ - Done = 1, - CollectIgnoreLeft, - CollectIgnoreRight, - RecIgnoreList, - ChildPrint -}; - -enum VisitType -{ - IgnoreWrapper, - IgnoreData, - Term, - NonTerm, -}; - -#define TF_TERM_SEEN 0x1 - -void printKid( Program *prg, Tree **sp, struct ColmPrintArgs *printArgs, Kid *kid ) -{ - enum ReturnType rt; - Kid *parent = 0; - Kid *leadingIgnore = 0; - enum VisitType visitType; - int flags = 0; - - /* Iterate the kids passed in. We are expecting a next, which will allow us - * to print the trailing ignore list. */ - while ( kid != 0 ) { - vm_push( (SW) Done ); - goto rec_call; - rec_return_top: - kid = kid->next; - } - - return; - -rec_call: - if ( kid->tree == 0 ) - goto skip_null; - - /* If not currently skipping ignore data, then print it. Ignore data can - * be associated with terminals and nonterminals. */ - if ( kid->tree->flags & AF_LEFT_IGNORE ) { - vm_push( (SW)parent ); - vm_push( (SW)kid ); - parent = kid; - kid = treeLeftIgnoreKid( prg, kid->tree ); - vm_push( (SW) CollectIgnoreLeft ); - goto rec_call; - rec_return_ign_left: - kid = (Kid*)vm_pop(); - parent = (Kid*)vm_pop(); - } - - if ( kid->tree->id == LEL_ID_IGNORE ) - visitType = IgnoreWrapper; - else if ( parent != 0 && parent->tree->id == LEL_ID_IGNORE ) - visitType = IgnoreData; - else if ( kid->tree->id < prg->rtd->firstNonTermId ) - visitType = Term; - else - visitType = NonTerm; - - debug( REALM_PRINT, "visit type: %d\n", visitType ); - - if ( visitType == IgnoreData ) { - debug( REALM_PRINT, "putting %p on ignore list\n", kid->tree ); - Kid *newIgnore = kidAllocate( prg ); - newIgnore->next = leadingIgnore; - leadingIgnore = newIgnore; - leadingIgnore->tree = kid->tree; - goto skip_node; - } - - if ( visitType == IgnoreWrapper ) { - Kid *newIgnore = kidAllocate( prg ); - newIgnore->next = leadingIgnore; - leadingIgnore = newIgnore; - leadingIgnore->tree = kid->tree; - /* Don't skip. */ - } - - /* print leading ignore? Triggered by terminals. */ - if ( visitType == Term ) { - /* Reverse the leading ignore list. */ - if ( leadingIgnore != 0 ) { - Kid *ignore = 0, *last = 0; - - /* Reverse the list and take the opportunity to implement the - * suppress left. */ - while ( true ) { - Kid *next = leadingIgnore->next; - leadingIgnore->next = last; - - if ( leadingIgnore->tree->flags & AF_SUPPRESS_LEFT ) { - /* We are moving left. Chop off the tail. */ - debug( REALM_PRINT, "suppressing left\n" ); - freeKidList( prg, next ); - break; - } - - if ( next == 0 ) - break; - - last = leadingIgnore; - leadingIgnore = next; - } - - /* Print the leading ignore list. Also implement the suppress right - * in the process. */ - if ( printArgs->comm && (!printArgs->trim || (flags & TF_TERM_SEEN && kid->tree->id > 0)) ) { - ignore = leadingIgnore; - while ( ignore != 0 ) { - if ( ignore->tree->flags & AF_SUPPRESS_RIGHT ) - break; - - if ( ignore->tree->id != LEL_ID_IGNORE ) { - vm_push( (SW)visitType ); - vm_push( (SW)leadingIgnore ); - vm_push( (SW)ignore ); - vm_push( (SW)parent ); - vm_push( (SW)kid ); - - leadingIgnore = 0; - kid = ignore; - parent = 0; - - debug( REALM_PRINT, "rec call on %p\n", kid->tree ); - vm_push( (SW) RecIgnoreList ); - goto rec_call; - rec_return_il: - - kid = (Kid*)vm_pop(); - parent = (Kid*)vm_pop(); - ignore = (Kid*)vm_pop(); - leadingIgnore = (Kid*)vm_pop(); - visitType = (enum VisitType)vm_pop(); - } - - ignore = ignore->next; - } - } - - /* Free the leading ignore list. */ - freeKidList( prg, leadingIgnore ); - leadingIgnore = 0; - } - } - - if ( visitType == Term || visitType == NonTerm ) { - /* Open the tree. */ - printArgs->openTree( prg, sp, printArgs, parent, kid ); - } - - if ( visitType == Term ) - flags |= TF_TERM_SEEN; - - if ( visitType == Term || visitType == IgnoreData ) { - /* Print contents. */ - if ( kid->tree->id < prg->rtd->firstNonTermId ) { - debug( REALM_PRINT, "printing terminal %p\n", kid->tree ); - if ( kid->tree->id != 0 ) - printArgs->printTerm( prg, sp, printArgs, kid ); - } - } - - /* Print children. */ - Kid *child = printArgs->attr ? - treeAttr( prg, kid->tree ) : - treeChild( prg, kid->tree ); - - if ( child != 0 ) { - vm_push( (SW)visitType ); - vm_push( (SW)parent ); - vm_push( (SW)kid ); - parent = kid; - kid = child; - while ( kid != 0 ) { - vm_push( (SW) ChildPrint ); - goto rec_call; - rec_return: - kid = kid->next; - } - kid = (Kid*)vm_pop(); - parent = (Kid*)vm_pop(); - visitType = (enum VisitType)vm_pop(); - } - - if ( visitType == Term || visitType == NonTerm ) { - /* close the tree. */ - printArgs->closeTree( prg, sp, printArgs, parent, kid ); - } - -skip_node: - - /* If not currently skipping ignore data, then print it. Ignore data can - * be associated with terminals and nonterminals. */ - if ( kid->tree->flags & AF_RIGHT_IGNORE ) { - debug( REALM_PRINT, "right ignore\n" ); - vm_push( (SW)parent ); - vm_push( (SW)kid ); - parent = kid; - kid = treeRightIgnoreKid( prg, kid->tree ); - vm_push( (SW) CollectIgnoreRight ); - goto rec_call; - rec_return_ign_right: - kid = (Kid*)vm_pop(); - parent = (Kid*)vm_pop(); - } - -/* For skiping over content on null. */ -skip_null: - - rt = (enum ReturnType)vm_pop(); - switch ( rt ) { - case Done: - debug( REALM_PRINT, "return: done\n" ); - goto rec_return_top; - break; - case CollectIgnoreLeft: - debug( REALM_PRINT, "return: ignore left\n" ); - goto rec_return_ign_left; - case CollectIgnoreRight: - debug( REALM_PRINT, "return: ignore right\n" ); - goto rec_return_ign_right; - case RecIgnoreList: - debug( REALM_PRINT, "return: ignore list\n" ); - goto rec_return_il; - case ChildPrint: - debug( REALM_PRINT, "return: child print\n" ); - goto rec_return; - } -} - -void printTreeArgs( Program *prg, Tree **sp, struct ColmPrintArgs *printArgs, Tree *tree ) -{ - if ( tree == 0 ) - printArgs->out( printArgs, "NIL", 3 ); - else { - /* This term tree allows us to print trailing ignores. */ - Tree termTree; - memset( &termTree, 0, sizeof(termTree) ); - - Kid kid, term; - term.tree = &termTree; - term.next = 0; - term.flags = 0; - - kid.tree = tree; - kid.next = &term; - kid.flags = 0; - - printKid( prg, sp, printArgs, &kid ); - } -} - -void printTermTree( Program *prg, Tree **sp, struct ColmPrintArgs *printArgs, Kid *kid ) -{ - debug( REALM_PRINT, "printing term %p\n", kid->tree ); - - if ( kid->tree->id == LEL_ID_INT ) { - char buf[INT_SZ]; - sprintf( buf, "%ld", ((Int*)kid->tree)->value ); - printArgs->out( printArgs, buf, strlen(buf) ); - } - else if ( kid->tree->id == LEL_ID_BOOL ) { - if ( ((Int*)kid->tree)->value ) - printArgs->out( printArgs, "true", 4 ); - else - printArgs->out( printArgs, "false", 5 ); - } - else if ( kid->tree->id == LEL_ID_PTR ) { - char buf[INT_SZ]; - printArgs->out( printArgs, "#", 1 ); - sprintf( buf, "%p", (void*) ((Pointer*)kid->tree)->value ); - printArgs->out( printArgs, buf, strlen(buf) ); - } - else if ( kid->tree->id == LEL_ID_STR ) { - printStr( printArgs, ((Str*)kid->tree)->value ); - } - else if ( kid->tree->id == LEL_ID_STREAM ) { - char buf[INT_SZ]; - printArgs->out( printArgs, "#", 1 ); - sprintf( buf, "%p", (void*) ((Stream*)kid->tree)->file ); - printArgs->out( printArgs, buf, strlen(buf) ); - } - else if ( kid->tree->tokdata != 0 && - stringLength( kid->tree->tokdata ) > 0 ) - { - printArgs->out( printArgs, stringData( kid->tree->tokdata ), - stringLength( kid->tree->tokdata ) ); - } -} - - -void printNull( Program *prg, Tree **sp, struct ColmPrintArgs *args, Kid *parent, Kid *kid ) -{ -} - -void openTreeXml( Program *prg, Tree **sp, struct ColmPrintArgs *args, Kid *parent, Kid *kid ) -{ - /* Skip the terminal that is for forcing trailing ignores out. */ - if ( kid->tree->id == 0 ) - return; - - LangElInfo *lelInfo = prg->rtd->lelInfo; - - /* List flattening: skip the repeats and lists that are a continuation of - * the list. */ - if ( parent != 0 && parent->tree->id == kid->tree->id && kid->next == 0 && - ( lelInfo[parent->tree->id].repeat || lelInfo[parent->tree->id].list ) ) - { - return; - } - - const char *name = lelInfo[kid->tree->id].xmlTag; - args->out( args, "<", 1 ); - args->out( args, name, strlen( name ) ); - args->out( args, ">", 1 ); -} - -void printTermXml( Program *prg, Tree **sp, struct ColmPrintArgs *printArgs, Kid *kid ) -{ - //Kid *child; - - /*child = */ treeChild( prg, kid->tree ); - if ( kid->tree->id == LEL_ID_PTR ) { - char ptr[32]; - sprintf( ptr, "%p\n", (void*)((Pointer*)kid->tree)->value ); - printArgs->out( printArgs, ptr, strlen(ptr) ); - } - else if ( kid->tree->id == LEL_ID_BOOL ) { - if ( ((Int*)kid->tree)->value ) - printArgs->out( printArgs, "true", 4 ); - else - printArgs->out( printArgs, "false", 5 ); - } - else if ( kid->tree->id == LEL_ID_INT ) { - char ptr[32]; - sprintf( ptr, "%ld", ((Int*)kid->tree)->value ); - printArgs->out( printArgs, ptr, strlen(ptr) ); - } - else if ( kid->tree->id == LEL_ID_STR ) { - Head *head = (Head*) ((Str*)kid->tree)->value; - - xmlEscapeData( printArgs, (char*)(head->data), head->length ); - } - else if ( 0 < kid->tree->id && kid->tree->id < prg->rtd->firstNonTermId && - kid->tree->id != LEL_ID_IGNORE && - kid->tree->tokdata != 0 && - stringLength( kid->tree->tokdata ) > 0 ) - { - xmlEscapeData( printArgs, stringData( kid->tree->tokdata ), - stringLength( kid->tree->tokdata ) ); - } -} - - -void closeTreeXml( Program *prg, Tree **sp, struct ColmPrintArgs *args, Kid *parent, Kid *kid ) -{ - /* Skip the terminal that is for forcing trailing ignores out. */ - if ( kid->tree->id == 0 ) - return; - - LangElInfo *lelInfo = prg->rtd->lelInfo; - - /* List flattening: skip the repeats and lists that are a continuation of - * the list. */ - if ( parent != 0 && parent->tree->id == kid->tree->id && kid->next == 0 && - ( lelInfo[parent->tree->id].repeat || lelInfo[parent->tree->id].list ) ) - { - return; - } - - const char *name = lelInfo[kid->tree->id].xmlTag; - args->out( args, "</", 2 ); - args->out( args, name, strlen( name ) ); - args->out( args, ">", 1 ); -} - -void printTreeCollect( Program *prg, Tree **sp, StrCollect *collect, Tree *tree, int trim ) -{ - struct ColmPrintArgs printArgs = { collect, true, false, trim, &appendCollect, - &printNull, &printTermTree, &printNull }; - printTreeArgs( prg, sp, &printArgs, tree ); -} - -void printTreeFile( Program *prg, Tree **sp, FILE *out, Tree *tree, int trim ) -{ - struct ColmPrintArgs printArgs = { out, true, false, trim, &appendFile, - &printNull, &printTermTree, &printNull }; - printTreeArgs( prg, sp, &printArgs, tree ); -} - -void printXmlStdout( Program *prg, Tree **sp, Tree *tree, int commAttr, int trim ) -{ - struct ColmPrintArgs printArgs = { stdout, commAttr, commAttr, trim, &appendFile, - &openTreeXml, &printTermXml, &closeTreeXml }; - printTreeArgs( prg, sp, &printArgs, tree ); -} - diff --git a/colm/tree.h b/colm/tree.h deleted file mode 100644 index 4425cfc5..00000000 --- a/colm/tree.h +++ /dev/null @@ -1,355 +0,0 @@ -/* - * Copyright 2010-2012 Adrian Thurston <thurston@complang.org> - */ - -/* This file is part of Colm. - * - * Colm is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Colm is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Colm; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef __COLM_TREE_H -#define __COLM_TREE_H - -#if defined(__cplusplus) -extern "C" { -#endif - -#include <colm/colm.h> - -typedef unsigned char Code; -typedef unsigned long Word; -typedef unsigned long Half; -struct Bindings; - -typedef struct _File -{ - struct _File *prev; - struct _File *next; -} File; - -typedef struct _Location -{ - File *file; - long line; - long column; - long byte; -} Location; - -/* Header located just before string data. */ -typedef struct _Head -{ - const char *data; - long length; - Location *location; -} Head; - -typedef struct ColmKid -{ - /* The tree needs to be first since pointers to kids are used to reference - * trees on the stack. A pointer to the word that is a Tree* is cast to - * a Kid*. */ - struct ColmTree *tree; - struct ColmKid *next; - unsigned char flags; -} Kid; - -typedef struct _Ref -{ - struct ColmKid *kid; - struct _Ref *next; -} Ref; - -typedef struct ColmTree -{ - /* First four will be overlaid in other structures. */ - short id; - unsigned short flags; - long refs; - Kid *child; - - Head *tokdata; - - /* FIXME: this needs to go somewhere else. Will do for now. */ - unsigned short prodNum; -} Tree; - - -typedef struct _TreePair -{ - Tree *key; - Tree *val; -} TreePair; - -typedef struct _ParseTree -{ - short id; - unsigned short flags; - - struct _ParseTree *child; - struct _ParseTree *next; - struct _ParseTree *leftIgnore; - struct _ParseTree *rightIgnore; - Kid *shadow; - - /* Parsing algorithm. */ - long state; - long region; - short causeReduce; - - /* FIXME: unify probably. */ - char retryLower; - char retryUpper; -} ParseTree; - -typedef struct _Int -{ - /* Must overlay Tree. */ - short id; - unsigned short flags; - long refs; - Kid *child; - - long value; -} Int; - -typedef struct _Pointer -{ - /* Must overlay Tree. */ - short id; - unsigned short flags; - long refs; - Kid *child; - - Kid *value; -} Pointer; - -typedef struct _Str -{ - /* Must overlay Tree. */ - short id; - unsigned short flags; - long refs; - Kid *child; - - Head *value; -} Str; - -typedef struct _ListEl -{ - /* Must overlay kid. */ - Tree *value; - struct _ListEl *next; - struct _ListEl *prev; -} ListEl; - -/* - * Maps - */ -typedef struct _GenericInfo -{ - long type; - long typeArg; - long keyOffset; - long keyType; - long langElId; - long parserId; -} GenericInfo; - -typedef struct _List -{ - /* Must overlay Tree. */ - short id; - unsigned short flags; - long refs; - ListEl *head; - - ListEl *tail; - long listLen; - GenericInfo *genericInfo; - -} List; - -typedef struct _Stream -{ - /* Must overlay Tree. */ - short id; - unsigned short flags; - long refs; - Kid *child; - - FILE *file; - SourceStream *in; -} Stream; - -typedef struct _Input -{ - /* Must overlay Tree. */ - short id; - unsigned short flags; - long refs; - Kid *child; - - InputStream *in; -} Input; - -typedef struct _Parser -{ - /* Must overlay Tree. */ - short id; - unsigned short flags; - long refs; - Kid *child; - - GenericInfo *genericInfo; - - struct _PdaRun *pdaRun; - struct _FsmRun *fsmRun; - struct _Input *input; - Tree *result; -} Parser; - -typedef struct _TreeIter -{ - Ref rootRef; - Ref ref; - long searchId; - Tree **stackRoot; - long stackSize; -} TreeIter; - -/* This must overlay tree iter because some of the same bytecodes are used. */ -typedef struct _RevTreeIter -{ - Ref rootRef; - Ref ref; - long searchId; - Tree **stackRoot; - long stackSize; - - /* For detecting a split at the leaf. */ - Kid *kidAtYield; - long children; - Kid **cur; -} RevTreeIter; - - -typedef struct _UserIter -{ - /* The current item. */ - Ref ref; - Tree **stackRoot; - long argSize; - long stackSize; - Code *resume; - Tree **frame; - long searchId; -} UserIter; - - -void treeUpref( Tree *tree ); -void treeDownref( struct ColmProgram *prg, Tree **sp, Tree *tree ); -long cmpTree( struct ColmProgram *prg, const Tree *tree1, const Tree *tree2 ); - -Tree *pushRightIgnore( struct ColmProgram *prg, Tree *pushTo, Tree *rightIgnore ); -Tree *pushLeftIgnore( struct ColmProgram *prg, Tree *pushTo, Tree *leftIgnore ); -Tree *popRightIgnore( struct ColmProgram *prg, Tree **sp, Tree *popFrom, Tree **rightIgnore ); -Tree *popLeftIgnore( struct ColmProgram *prg, Tree **sp, Tree *popFrom, Tree **leftIgnore ); -Tree *treeLeftIgnore( struct ColmProgram *prg, Tree *tree ); -Tree *treeRightIgnore( struct ColmProgram *prg, Tree *tree ); -Kid *treeLeftIgnoreKid( struct ColmProgram *prg, Tree *tree ); -Kid *treeRightIgnoreKid( struct ColmProgram *prg, Tree *tree ); -Kid *treeChild( struct ColmProgram *prg, const Tree *tree ); -Kid *treeAttr( struct ColmProgram *prg, const Tree *tree ); -Kid *kidListConcat( Kid *list1, Kid *list2 ); -Kid *treeExtractChild( struct ColmProgram *prg, Tree *tree ); -Kid *reverseKidList( Kid *kid ); - -Tree *constructInteger( struct ColmProgram *prg, long i ); -Tree *constructPointer( struct ColmProgram *prg, Tree *tree ); -Tree *constructTerm( struct ColmProgram *prg, Word id, Head *tokdata ); -Tree *constructReplacementTree( Kid *kid, Tree **bindings, struct ColmProgram *prg, long pat ); -Tree *createGeneric( struct ColmProgram *prg, long genericId ); -Tree *constructToken( struct ColmProgram *prg, Tree **root, long nargs ); -Tree *constructInput( struct ColmProgram *prg ); - - -int testFalse( struct ColmProgram *prg, Tree *tree ); -Tree *makeTree( struct ColmProgram *prg, Tree **root, long nargs ); -Stream *openFile( struct ColmProgram *prg, Tree *name, Tree *mode ); -Stream *openStreamFd( struct ColmProgram *prg, long fd ); -Kid *copyIgnoreList( struct ColmProgram *prg, Kid *ignoreHeader ); -Kid *copyKidList( struct ColmProgram *prg, Kid *kidList ); -void streamFree( struct ColmProgram *prg, Stream *s ); -Tree *copyTree( struct ColmProgram *prg, Tree *tree, Kid *oldNextDown, Kid **newNextDown ); - -Tree *getPtrVal( Pointer *ptr ); -Tree *getPtrValSplit( struct ColmProgram *prg, Pointer *ptr ); -Tree *getField( Tree *tree, Word field ); -Tree *getFieldSplit( struct ColmProgram *prg, Tree *tree, Word field ); -Tree *getRhsEl( struct ColmProgram *prg, Tree *lhs, long position ); -void setField( struct ColmProgram *prg, Tree *tree, long field, Tree *value ); - -void setTriterCur( struct ColmProgram *prg, TreeIter *iter, Tree *tree ); -void setUiterCur( struct ColmProgram *prg, UserIter *uiter, Tree *tree ); -void refSetValue( Ref *ref, Tree *v ); -Tree *treeSearch( struct ColmProgram *prg, Kid *kid, long id ); -Tree *treeSearch2( struct ColmProgram *prg, Tree *tree, long id ); - -int matchPattern( Tree **bindings, struct ColmProgram *prg, long pat, Kid *kid, int checkNext ); -Tree *treeIterDerefCur( TreeIter *iter ); - -/* For making references of attributes. */ -Kid *getFieldKid( Tree *tree, Word field ); - -Tree *copyRealTree( struct ColmProgram *prg, Tree *tree, Kid *oldNextDown, Kid **newNextDown ); -void splitIterCur( struct ColmProgram *prg, Tree ***psp, TreeIter *iter ); -Tree *setListMem( List *list, Half field, Tree *value ); - -void listAppend2( struct ColmProgram *prg, List *list, Tree *val ); -Tree *listRemoveEnd( struct ColmProgram *prg, List *list ); -Tree *getListMem( List *list, Word field ); -Tree *getListMemSplit( struct ColmProgram *prg, List *list, Word field ); - -Tree *treeIterAdvance( struct ColmProgram *prg, Tree ***psp, TreeIter *iter ); -Tree *treeIterNextChild( struct ColmProgram *prg, Tree ***psp, TreeIter *iter ); -Tree *treeRevIterPrevChild( struct ColmProgram *prg, Tree ***psp, RevTreeIter *iter ); -Tree *treeIterNextRepeat( struct ColmProgram *prg, Tree ***psp, TreeIter *iter ); -Tree *treeIterPrevRepeat( struct ColmProgram *prg, Tree ***psp, TreeIter *iter ); - -/* An automatically grown buffer for collecting tokens. Always reuses space; - * never down resizes. */ -typedef struct _StrCollect -{ - char *data; - int allocated; - int length; -} StrCollect; - -void initStrCollect( StrCollect *collect ); -void strCollectDestroy( StrCollect *collect ); -void strCollectAppend( StrCollect *collect, const char *data, long len ); -void strCollectClear( StrCollect *collect ); -Tree *treeTrim( struct ColmProgram *prg, Tree **sp, Tree *tree ); - -void printTreeCollect( struct ColmProgram *prg, Tree **sp, StrCollect *collect, Tree *tree, int trim ); -void printTreeFile( struct ColmProgram *prg, Tree **sp, FILE *out, Tree *tree, int trim ); -void printXmlStdout( struct ColmProgram *prg, Tree **sp, Tree *tree, int commAttr, int trim ); - -#if defined(__cplusplus) -} -#endif - -#endif - |