diff options
author | Adrian Thurston <thurston@complang.org> | 2012-07-01 12:48:22 -0400 |
---|---|---|
committer | Adrian Thurston <thurston@complang.org> | 2012-07-01 12:48:22 -0400 |
commit | 247904a84430b8c9151fa6afb68f01b60afb92c9 (patch) | |
tree | 58d498f783a935b02255120c814c387745dc6e41 /src | |
parent | d8cdec468bb7efad768d25872147533312cffe91 (diff) | |
download | colm-247904a84430b8c9151fa6afb68f01b60afb92c9.tar.gz |
moved 'colm' dir to 'src'
Diffstat (limited to 'src')
-rw-r--r-- | src/.gitignore | 27 | ||||
-rw-r--r-- | src/Makefile.am | 183 | ||||
-rw-r--r-- | src/buffer.h | 55 | ||||
-rw-r--r-- | src/bytecode.c | 3579 | ||||
-rw-r--r-- | src/bytecode.h | 487 | ||||
-rw-r--r-- | src/closure.cc | 458 | ||||
-rw-r--r-- | src/codegen.cc | 50 | ||||
-rw-r--r-- | src/codevect.c | 183 | ||||
-rw-r--r-- | src/colm.h | 55 | ||||
-rw-r--r-- | src/compiler.cc | 1496 | ||||
-rw-r--r-- | src/ctinput.cc | 439 | ||||
-rw-r--r-- | src/debug.c | 78 | ||||
-rw-r--r-- | src/debug.h | 58 | ||||
-rw-r--r-- | src/declare.cc | 383 | ||||
-rw-r--r-- | src/defs.h.in | 49 | ||||
-rw-r--r-- | src/dotgen.cc | 113 | ||||
-rw-r--r-- | src/dotgen.h | 51 | ||||
-rw-r--r-- | src/exports.cc | 285 | ||||
-rw-r--r-- | src/fsmap.cc | 856 | ||||
-rw-r--r-- | src/fsmattach.cc | 425 | ||||
-rw-r--r-- | src/fsmbase.cc | 602 | ||||
-rw-r--r-- | src/fsmcodegen.cc | 1098 | ||||
-rw-r--r-- | src/fsmcodegen.h | 212 | ||||
-rw-r--r-- | src/fsmexec.cc | 208 | ||||
-rw-r--r-- | src/fsmgraph.cc | 1408 | ||||
-rw-r--r-- | src/fsmgraph.h | 1388 | ||||
-rw-r--r-- | src/fsmmin.cc | 732 | ||||
-rw-r--r-- | src/fsmrun.h | 36 | ||||
-rw-r--r-- | src/fsmstate.cc | 467 | ||||
-rw-r--r-- | src/global.h | 90 | ||||
-rw-r--r-- | src/input.c | 847 | ||||
-rw-r--r-- | src/input.h | 214 | ||||
-rw-r--r-- | src/keyops.h | 283 | ||||
-rw-r--r-- | src/list.c | 105 | ||||
-rw-r--r-- | src/lmparse.kh | 120 | ||||
-rw-r--r-- | src/lmparse.kl | 2677 | ||||
-rw-r--r-- | src/lmscan.h | 118 | ||||
-rw-r--r-- | src/lmscan.rl | 636 | ||||
-rw-r--r-- | src/main.cc | 623 | ||||
-rw-r--r-- | src/map.c | 763 | ||||
-rw-r--r-- | src/map.cc | 26 | ||||
-rw-r--r-- | src/map.h | 108 | ||||
-rw-r--r-- | src/parsedata.h | 1063 | ||||
-rw-r--r-- | src/parsetree.cc | 1776 | ||||
-rw-r--r-- | src/parsetree.h | 2253 | ||||
-rw-r--r-- | src/pcheck.cc | 154 | ||||
-rw-r--r-- | src/pcheck.h | 48 | ||||
-rw-r--r-- | src/pdabuild.cc | 2091 | ||||
-rw-r--r-- | src/pdacodegen.cc | 653 | ||||
-rw-r--r-- | src/pdacodegen.h | 106 | ||||
-rw-r--r-- | src/pdagraph.cc | 533 | ||||
-rw-r--r-- | src/pdagraph.h | 515 | ||||
-rw-r--r-- | src/pdarun.c | 2272 | ||||
-rw-r--r-- | src/pdarun.h | 473 | ||||
-rw-r--r-- | src/pool.c | 330 | ||||
-rw-r--r-- | src/pool.h | 86 | ||||
-rw-r--r-- | src/program.c | 254 | ||||
-rw-r--r-- | src/program.h | 128 | ||||
-rw-r--r-- | src/redbuild.cc | 650 | ||||
-rw-r--r-- | src/redbuild.h | 161 | ||||
-rw-r--r-- | src/redfsm.cc | 1112 | ||||
-rw-r--r-- | src/redfsm.h | 524 | ||||
-rw-r--r-- | src/resolve.cc | 805 | ||||
-rw-r--r-- | src/rtvector.h | 34 | ||||
-rw-r--r-- | src/string.c | 240 | ||||
-rw-r--r-- | src/synthesis.cc | 3277 | ||||
-rw-r--r-- | src/tree.c | 2484 | ||||
-rw-r--r-- | src/tree.h | 355 |
68 files changed, 44448 insertions, 0 deletions
diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 00000000..c2e96f8e --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,27 @@ +# Common testing files. +/tmp.lm +/tmp.c +/tmp.bin +/input +/out + +/*.o +/Makefile.in +/Makefile +/.*.d +/colm +/lmparse.h +/lmparse.cc +/lmscan.cc +/config.h.in +/config.h.in~ +/config.h +/defs.h +/version.h +/tags +/.deps +/libcolmd.a +/libcolmp.a +/.libs +/stamp-h1 +/stamp-h2 diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 00000000..c4a3504a --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,183 @@ +# +# Copyright 2007-2012 Adrian Thurston <thurston@complang.org> +# + +# This file is part of Colm. +# +# Colm is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Colm is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Colm; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +INCLUDES = -I$(top_srcdir)/aapl + +bin_PROGRAMS = colm + +RUNTIME_SRC = \ + map.c pdarun.c list.c input.c debug.c \ + codevect.c pool.c string.c tree.c bytecode.c program.c + +RUNTIME_HDR = \ + bytecode.h config.h defs.h debug.h pool.h input.h \ + fsmrun.h pdarun.h map.h tree.h program.h colm.h + +lib_LIBRARIES = libcolmp.a libcolmd.a + +libcolmp_a_SOURCES = $(RUNTIME_SRC) +libcolmp_a_CFLAGS = -I.. + +libcolmd_a_SOURCES = $(RUNTIME_SRC) +libcolmd_a_CFLAGS = -I.. + +colm_CXXFLAGS = \ + -Wall \ + -DCOLM_LOG \ + -DPREFIX='"$(prefix)"' \ + -I.. + +colm_LDADD = libcolmp.a + +# ADT DEFS_COLM += -DCOLM_LOG -DPREFIX='"$(prefix)"' + +colm_SOURCES = \ + buffer.h bytecode.h colm.h debug.h dotgen.h fsmcodegen.h fsmgraph.h \ + fsmrun.h input.h keyops.h lmparse.h lmscan.h map.h parsedata.h \ + parsetree.h pcheck.h pdacodegen.h pdagraph.h pdarun.h pool.h redbuild.h \ + redfsm.h rtvector.h tree.h version.h global.h colm.h \ + \ + resolve.cc synthesis.cc lmparse.cc lmscan.cc parsetree.cc \ + fsmstate.cc fsmbase.cc fsmattach.cc fsmmin.cc \ + fsmgraph.cc pdagraph.cc pdabuild.cc pdacodegen.cc fsmcodegen.cc \ + redfsm.cc fsmexec.cc main.cc redbuild.cc closure.cc fsmap.cc \ + dotgen.cc pcheck.cc ctinput.cc declare.cc codegen.cc \ + exports.cc compiler.cc + +colmincdir = $(includedir)/colm + +colminc_HEADERS = $(RUNTIME_HDR) + +BUILT_SOURCES = \ + version.h lmscan.cc lmparse.h lmparse.cc + +version.h: Makefile + echo '#define VERSION "$(PACKAGE_VERSION)"' > version.h + echo '#define PUBDATE "$(PUBDATE)"' >> version.h + +if BUILD_PARSERS + +lmparse.h: lmparse.kh + $(KELBT) -o $@ $< + +lmparse.cc: lmparse.kl lmparse.kh + $(KELBT) -o $@ $< + +lmscan.cc: lmparse.h + +lmscan.cc: lmscan.rl + $(RAGEL) -G2 -o $@ $< + +endif + +# ADT +# ADT # Logging: +# ADT # colm: rt on/off +# ADT # rt_prd: off +# ADT # rt_db: on +# ADT # rt_clm: rt on/off +# ADT +# ADT INCS += -I../aapl +# ADT +# ADT DEFS_COLM += -DCOLM_LOG -DPREFIX='"$(prefix)"' +# ADT DEFS_RT_P += +# ADT DEFS_RT_D += -DCOLM_LOG +# ADT +# ADT CFLAGS += -g -Wall -Wwrite-strings +# ADT LDFLAGS += +# ADT +# ADT # Files in ALL_SRC that are generated. +# ADT GEN_SRC = version.h lmscan.cc lmparse.h lmparse.cc +# ADT +# ADT RUNTIME_P = libcolmp.a +# ADT RUNTIME_D = libcolmd.a +# ADT +# ADT LIBS = +# ADT +# ADT #************************************* +# ADT +# ADT # Get the version info. +# ADT include ../version.mk +# ADT +# ADT prefix = @prefix@ +# ADT +# ADT BUILD_PARSERS = @BUILD_PARSERS@ +# ADT +# ADT # Programs +# ADT CXX = @CXX@ +# ADT CC = @CC@ +# ADT +# ADT # Get objects and dependencies from sources. +# ADT COLM_OBJ = $(COLM_SRC:%.cc=%.o) +# ADT RUNTIME_OBJ_P = $(RUNTIME_SRC:%.c=%_p.o) +# ADT RUNTIME_OBJ_D = $(RUNTIME_SRC:%.c=%_d.o) +# ADT +# ADT DEPS = $(COLM_SRC:%.cc=.%.d) $(RUNTIME_SRC:%.c=.%_p.d) $(RUNTIME_SRC:%.c=.%_d.d) +# ADT +# ADT # Rules. +# ADT all: colm $(RUNTIME_P) $(RUNTIME_D) +# ADT +# ADT colm: $(GEN_SRC) $(COLM_OBJ) $(RUNTIME_OBJ_D) $(RUNTIME_OBJ_C_D) $(LIBS) +# ADT $(CXX) $(LDFLAGS) -o $@ $(COLM_OBJ) $(RUNTIME_OBJ_D) $(RUNTIME_OBJ_C_D) $(LIBS) +# ADT +# ADT $(RUNTIME_P): $(RUNTIME_OBJ_P) $(RUNTIME_OBJ_C_P) +# ADT ar -cr $@ $^ +# ADT +# ADT $(RUNTIME_D): $(RUNTIME_OBJ_D) $(RUNTIME_OBJ_C_D) +# ADT ar -cr $@ $^ +# ADT +# ADT version.h: ../version.mk +# ADT echo '#define VERSION "$(VERSION)"' > version.h +# ADT echo '#define PUBDATE "$(PUBDATE)"' >> version.h +# ADT +# ADT +# ADT $(COLM_OBJ): %.o: %.cc +# ADT @$(CXX) -M $(DEFS_COLM) $(INCS) $< > .$*.d +# ADT $(CXX) -c $(CFLAGS) $(DEFS_COLM) $(INCS) -o $@ $< +# ADT +# ADT $(RUNTIME_OBJ_P): %_p.o: %.c +# ADT @$(CC) -M -MT $@ $(DEFS_RT_P) $< > .$*_p.d +# ADT $(CC) -c $(CFLAGS) $(DEFS_RT_P) -o $@ $< +# ADT +# ADT $(RUNTIME_OBJ_D): %_d.o: %.c +# ADT @$(CC) -M -MT $@ $(DEFS_RT_D) $< > .$*_d.d +# ADT $(CC) -c $(CFLAGS) $(DEFS_RT_D) -o $@ $< +# ADT +# ADT distclean: clean +# ADT rm -f Makefile config.h +# ADT +# ADT ifeq ($(BUILD_PARSERS),true) +# ADT EXTRA_CLEAN = $(GEN_SRC) +# ADT endif +# ADT +# ADT clean: +# ADT rm -f tags .*.d *.o colm $(EXTRA_CLEAN) $(RUNTIME_P) $(RUNTIME_D) +# ADT +# ADT install: all +# ADT install -d $(prefix)/bin +# ADT install -d $(prefix)/include +# ADT install -d $(prefix)/include/colm +# ADT install -d $(prefix)/lib +# ADT install -s colm $(prefix)/bin/colm +# ADT install libcolmp.a libcolmd.a $(prefix)/lib +# ADT install $(RUNTIME_HDR) $(prefix)/include/colm +# ADT +# ADT -include $(DEPS) + diff --git a/src/buffer.h b/src/buffer.h new file mode 100644 index 00000000..9039ad4b --- /dev/null +++ b/src/buffer.h @@ -0,0 +1,55 @@ +/* + * Copyright 2003 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _BUFFER_H +#define _BUFFER_H + +#define BUFFER_INITIAL_SIZE 4096 + +/* An automatically grown buffer for collecting tokens. Always reuses space; + * never down resizes. */ +struct Buffer +{ + Buffer() + { + data = (char*) malloc( BUFFER_INITIAL_SIZE ); + allocated = BUFFER_INITIAL_SIZE; + length = 0; + } + ~Buffer() { free(data); } + + void append( char p ) + { + if ( length == allocated ) { + allocated *= 2; + data = (char*) realloc( data, allocated ); + } + data[length++] = p; + } + + void clear() { length = 0; } + + char *data; + int allocated; + int length; +}; + +#endif /* _BUFFER_H */ diff --git a/src/bytecode.c b/src/bytecode.c new file mode 100644 index 00000000..2cfa598c --- /dev/null +++ b/src/bytecode.c @@ -0,0 +1,3579 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +//#define COLM_LOG + +#include <pdarun.h> +#include <fsmrun.h> +#include <tree.h> +#include <bytecode.h> +#include <pool.h> +#include <debug.h> +#include <config.h> + +#include <alloca.h> +#include <sys/mman.h> +#include <string.h> +#include <assert.h> +#include <stdlib.h> + +#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 + #error "SIZEOF_LONG contained an unexpected value" +#endif + +#define true 1 +#define false 0 + +/* More common macros are in bytecode.h. */ +#define vm_top_off(n) (sp[n]) +#define vm_popn(n) (sp += (n)) +#define vm_pushn(n) (sp -= (n)) +#define vm_local(o) (exec->framePtr[o]) +#define vm_plocal(o) (&exec->framePtr[o]) +#define vm_local_iframe(o) (exec->iframePtr[o]) +#define vm_plocal_iframe(o) (&exec->iframePtr[o]) + +#define read_byte( i ) do { \ + i = ((uchar) *instr++); \ +} while(0) + +#define consume_byte( ) do { \ + instr += 1; \ +} while(0) + + +#define read_word_p( i, p ) do { \ + i = ((Word) p[0]); \ + i |= ((Word) p[1]) << 8; \ + i |= ((Word) p[2]) << 16; \ + i |= ((Word) p[3]) << 24; \ +} while(0) + +/* There are better ways. */ +#if SIZEOF_LONG == 4 + #define read_word( i ) do { \ + i = ((Word) *instr++); \ + i |= ((Word) *instr++) << 8; \ + i |= ((Word) *instr++) << 16; \ + i |= ((Word) *instr++) << 24; \ + } while(0) +#else + #define read_word( i ) do { \ + i = ((Word) *instr++); \ + i |= ((Word) *instr++) << 8; \ + i |= ((Word) *instr++) << 16; \ + i |= ((Word) *instr++) << 24; \ + i |= ((Word) *instr++) << 32; \ + i |= ((Word) *instr++) << 40; \ + i |= ((Word) *instr++) << 48; \ + i |= ((Word) *instr++) << 56; \ + } while(0) +#endif + +/* There are better ways. */ +#if SIZEOF_LONG == 4 + #define read_tree( i ) do { \ + Word w; \ + w = ((Word) *instr++); \ + w |= ((Word) *instr++) << 8; \ + w |= ((Word) *instr++) << 16; \ + w |= ((Word) *instr++) << 24; \ + i = (Tree*) w; \ + } while(0) + + #define read_word_type( Type, i ) do { \ + Word w; \ + w = ((Word) *instr++); \ + w |= ((Word) *instr++) << 8; \ + w |= ((Word) *instr++) << 16; \ + w |= ((Word) *instr++) << 24; \ + i = (Type) w; \ + } while(0) + + #define consume_word( ) do { \ + instr += 4; \ + } while(0) +#else + #define read_tree( i ) do { \ + Word w; \ + w = ((Word) *instr++); \ + w |= ((Word) *instr++) << 8; \ + w |= ((Word) *instr++) << 16; \ + w |= ((Word) *instr++) << 24; \ + w |= ((Word) *instr++) << 32; \ + w |= ((Word) *instr++) << 40; \ + w |= ((Word) *instr++) << 48; \ + w |= ((Word) *instr++) << 56; \ + i = (Tree*) w; \ + } while(0) + + #define read_word_type( Type, i ) do { \ + Word w; \ + w = ((Word) *instr++); \ + w |= ((Word) *instr++) << 8; \ + w |= ((Word) *instr++) << 16; \ + w |= ((Word) *instr++) << 24; \ + w |= ((Word) *instr++) << 32; \ + w |= ((Word) *instr++) << 40; \ + w |= ((Word) *instr++) << 48; \ + w |= ((Word) *instr++) << 56; \ + i = (Type) w; \ + } while(0) + + #define consume_word( ) do { \ + instr += 8; \ + } while(0) +#endif + +#define read_half( i ) do { \ + i = ((Word) *instr++); \ + i |= ((Word) *instr++) << 8; \ +} while(0) + +int colm_log_bytecode = 0; +int colm_log_parse = 0; +int colm_log_match = 0; +int colm_log_compile = 0; +int colm_log_conds = 0; + +void vm_grow( Program *prg ) +{ + debug( REALM_BYTECODE, "growing stack\n" ); +} + +void parserSetContext( Program *prg, Tree **sp, Parser *parser, Tree *val ) +{ + parser->pdaRun->context = splitTree( prg, val ); +} + +Head *treeToStr( Program *prg, Tree **sp, Tree *tree, int trim ) +{ + /* Collect the tree data. */ + StrCollect collect; + initStrCollect( &collect ); + + printTreeCollect( prg, sp, &collect, tree, trim ); + + /* Set up the input stream. */ + Head *ret = stringAllocFull( prg, collect.data, collect.length ); + + strCollectDestroy( &collect ); + + return ret; +} + +Word streamAppend( Program *prg, Tree **sp, Tree *input, InputStream *inputStream ) +{ + long length = 0; + + if ( input->id == LEL_ID_STR ) { + //assert(false); + /* Collect the tree data. */ + StrCollect collect; + initStrCollect( &collect ); + printTreeCollect( prg, sp, &collect, input, true ); + + /* Load it into the input. */ + appendData( inputStream, collect.data, collect.length ); + length = collect.length; + strCollectDestroy( &collect ); + } + else if ( input->id == LEL_ID_STREAM ) { + treeUpref( input ); + appendStream( inputStream, input ); + } + else { + treeUpref( input ); + appendTree( inputStream, input ); + } + + return length; +} + +long parseFrag( Program *prg, Tree **sp, Parser *parser, long stopId, long entry ) +{ +switch ( entry ) { +case PcrStart: + + if ( ! parser->pdaRun->parseError ) { + parser->pdaRun->stopTarget = stopId; + + long pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry ); + + while ( pcr != PcrDone ) { + +return pcr; +case PcrReduction: +case PcrGeneration: +case PcrPreEof: +case PcrReverse: + + pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry ); + } + } + +case PcrDone: +break; } + + return PcrDone; +} + +long parseFinish( Tree **result, Program *prg, Tree **sp, + Parser *parser, int revertOn, long entry ) +{ +switch ( entry ) { +case PcrStart: + + if ( parser->pdaRun->stopTarget <= 0 ) { + setEof( parser->input->in ); + + if ( ! parser->pdaRun->parseError ) { + long pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry ); + + while ( pcr != PcrDone ) { + +return pcr; +case PcrReduction: +case PcrGeneration: +case PcrPreEof: +case PcrReverse: + + pcr = parseLoop( prg, sp, parser->pdaRun, parser->fsmRun, parser->input->in, entry ); + } + } + } + + /* FIXME: need something here to check that we aren' stopped waiting for + * more data when we are actually expected to finish. This check doesn't + * work (at time of writing). */ + //assert( (parser->pdaRun->stopTarget > 0 && parser->pdaRun->stopParsing) || parser->input->in->eofSent ); + + if ( !revertOn ) + commitFull( prg, sp, parser->pdaRun, 0 ); + + Tree *tree = getParsedRoot( parser->pdaRun, parser->pdaRun->stopTarget > 0 ); + treeUpref( tree ); + + *result = tree; + +case PcrDone: +break; } + + return PcrDone; +} + +long undoParseFrag( Program *prg, Tree **sp, Parser *parser, long steps, long entry ) +{ + InputStream *inputStream = parser->input->in; + FsmRun *fsmRun = parser->fsmRun; + PdaRun *pdaRun = parser->pdaRun; + + debug( REALM_PARSE, "undo parse frag, target steps: %ld, pdarun steps: %ld\n", steps, pdaRun->steps ); + + resetToken( fsmRun ); + +switch ( entry ) { +case PcrStart: + + if ( steps < pdaRun->steps ) { + /* Setup environment for going backwards until we reduced steps to + * what we want. */ + pdaRun->numRetry += 1; + pdaRun->targetSteps = steps; + pdaRun->triggerUndo = 1; + + /* The parse loop will recognise the situation. */ + long pcr = parseLoop( prg, sp, pdaRun, fsmRun, inputStream, entry ); + while ( pcr != PcrDone ) { + +return pcr; +case PcrReduction: +case PcrGeneration: +case PcrPreEof: +case PcrReverse: + + pcr = parseLoop( prg, sp, pdaRun, fsmRun, inputStream, entry ); + } + + /* Reset environment. */ + pdaRun->triggerUndo = 0; + pdaRun->targetSteps = -1; + pdaRun->numRetry -= 1; + } + +case PcrDone: +break; } + + return PcrDone; +} + +Tree *streamPullBc( Program *prg, FsmRun *fsmRun, InputStream *in, Tree *length ) +{ + long len = ((Int*)length)->value; + Head *tokdata = streamPull( prg, fsmRun, in, len ); + return constructString( prg, tokdata ); +} + +void undoPull( Program *prg, FsmRun *fsmRun, InputStream *in, Tree *str ) +{ + const char *data = stringData( ( (Str*)str )->value ); + long length = stringLength( ( (Str*)str )->value ); + undoStreamPull( fsmRun, in, data, length ); +} + +long streamPush( Program *prg, Tree **sp, FsmRun *fsmRun, InputStream *in, Tree *tree, int ignore ) +{ + if ( tree->id == LEL_ID_STR ) { + /* This should become a compile error. If it's text, it's up to the + * scanner to decide. Want to force it then send a token. */ + assert( !ignore ); + + /* Collect the tree data. */ + StrCollect collect; + initStrCollect( &collect ); + printTreeCollect( prg, sp, &collect, tree, true ); + + streamPushText( fsmRun, in, collect.data, collect.length ); + long length = collect.length; + strCollectDestroy( &collect ); + + return length; + } + else { + treeUpref( tree ); + streamPushTree( fsmRun, in, tree, ignore ); + return -1; + } +} + +void setLocal( Tree **frame, long field, Tree *tree ) +{ + if ( tree != 0 ) + assert( tree->refs >= 1 ); + frame[field] = tree; +} + +Tree *getLocalSplit( Program *prg, Tree **frame, long field ) +{ + Tree *val = frame[field]; + Tree *split = splitTree( prg, val ); + frame[field] = split; + return split; +} + +void downrefLocalTrees( Program *prg, Tree **sp, Tree **frame, char *trees, long treesLen ) +{ + long i; + for ( i = 0; i < treesLen; i++ ) { + debug( REALM_BYTECODE, "local tree downref: %ld\n", (long)trees[i] ); + + treeDownref( prg, sp, frame[((long)trees[i])] ); + } +} + +UserIter *uiterCreate( Program *prg, Tree ***psp, FunctionInfo *fi, long searchId ) +{ + Tree **sp = *psp; + vm_pushn( sizeof(UserIter) / sizeof(Word) ); + void *mem = vm_ptop(); + + UserIter *uiter = mem; + initUserIter( uiter, vm_ptop(), fi->argSize, searchId ); + *psp = sp; + return uiter; +} + +void uiterInit( Program *prg, Tree **sp, UserIter *uiter, + FunctionInfo *fi, int revertOn ) +{ + /* Set up the first yeild so when we resume it starts at the beginning. */ + uiter->ref.kid = 0; + uiter->stackSize = uiter->stackRoot - vm_ptop(); + uiter->frame = &uiter->stackRoot[-IFR_AA]; + + if ( revertOn ) + uiter->resume = prg->rtd->frameInfo[fi->frameId].codeWV; + else + uiter->resume = prg->rtd->frameInfo[fi->frameId].codeWC; +} + +void treeIterDestroy( Tree ***psp, TreeIter *iter ) +{ + Tree **sp = *psp; + long curStackSize = iter->stackRoot - vm_ptop(); + assert( iter->stackSize == curStackSize ); + vm_popn( iter->stackSize ); + *psp = sp; +} + +void userIterDestroy( Tree ***psp, UserIter *uiter ) +{ + Tree **sp = *psp; + + /* We should always be coming from a yield. The current stack size will be + * nonzero and the stack size in the iterator will be correct. */ + long curStackSize = uiter->stackRoot - vm_ptop(); + assert( uiter->stackSize == curStackSize ); + + long argSize = uiter->argSize; + + vm_popn( uiter->stackRoot - vm_ptop() ); + vm_popn( sizeof(UserIter) / sizeof(Word) ); + vm_popn( argSize ); + + *psp = sp; +} + +Tree *constructArgv( Program *prg, int argc, const char **argv ) +{ + Tree *list = createGeneric( prg, prg->rtd->argvGenericId ); + treeUpref( list ); + int i; + for ( i = 0; i < argc; i++ ) { + Head *head = stringAllocPointer( prg, argv[i], strlen(argv[i]) ); + Tree *arg = constructString( prg, head ); + treeUpref( arg ); + listAppend2( prg, (List*)list, arg ); + } + return list; +} + +/* + * Execution environment + */ + +void initExecution( Execution *exec, Parser *parser, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream, int frameId ) +{ + exec->parser = parser; + exec->pdaRun = pdaRun; + exec->fsmRun = fsmRun; + exec->inputStream = inputStream; + exec->framePtr = 0; + exec->iframePtr = 0; + exec->frameId = frameId; + exec->rcodeUnitLen = 0; +} + +void rcodeDownrefAll( Program *prg, Tree **sp, RtCodeVect *rev ) +{ + while ( rev->tabLen > 0 ) { + /* Read the length */ + Code *prcode = rev->data + rev->tabLen - SIZEOF_WORD; + Word len; + read_word_p( len, prcode ); + + /* Find the start of block. */ + long start = rev->tabLen - len - SIZEOF_WORD; + prcode = rev->data + start; + + /* Execute it. */ + rcodeDownref( prg, sp, prcode ); + + /* Backup over it. */ + rev->tabLen -= len + SIZEOF_WORD; + } +} + +void rcodeDownref( Program *prg, Tree **sp, Code *instr ) +{ +again: + switch ( *instr++ ) { + case IN_PARSE_LOAD_START: { + debug( REALM_BYTECODE, "IN_PARSE_LOAD_START\n" ); + break; + } + case IN_PARSE_SAVE_STEPS: { + debug( REALM_BYTECODE, "IN_PARSE_SAVE_STEPS\n" ); + break; + } + case IN_LOAD_TREE: { + Word w; + read_word( w ); + debug( REALM_BYTECODE, "IN_LOAD_TREE %p\n", (Tree*)w ); + treeDownref( prg, sp, (Tree*)w ); + break; + } + case IN_LOAD_WORD: { + Word w; + read_word( w ); + debug( REALM_BYTECODE, "IN_LOAD_WORD\n" ); + break; + } + case IN_RESTORE_LHS: { + Tree *restore; + read_tree( restore ); + debug( REALM_BYTECODE, "IN_RESTORE_LHS\n" ); + treeDownref( prg, sp, restore ); + break; + } + + case IN_PARSE_FRAG_BKT: { + Half stopId; + read_half( stopId ); + debug( REALM_BYTECODE, "IN_PARSE_FRAG_BKT\n" ); + break; + } + case IN_PARSE_FRAG_BKT3: { + debug( REALM_BYTECODE, "IN_PARSE_FRAG_BKT3\n" ); + break; + } + case IN_PARSE_FINISH_BKT: { + Half stopId; + read_half( stopId ); + debug( REALM_BYTECODE, "IN_PARSE_FINISH_BKT\n" ); + break; + } + case IN_PARSE_FINISH_BKT3: { + debug( REALM_BYTECODE, "IN_PARSE_FINISH_BKT3\n" ); + break; + } + case IN_PCR_CALL: { + debug( REALM_BYTECODE, "IN_PCR_CALL\n" ); + break; + } + case IN_PCR_RET: { + debug( REALM_BYTECODE, "IN_PCR_RET\n" ); + return; + } + case IN_PCR_END_DECK: { + debug( REALM_BYTECODE, "IN_PCR_END_DECK\n" ); + return; + } + case IN_INPUT_APPEND_BKT: { + Tree *parser; + Tree *input; + Word len; + read_tree( parser ); + read_tree( input ); + read_word( len ); + + debug( REALM_BYTECODE, "IN_INPUT_APPEND_BKT\n" ); + + treeDownref( prg, sp, parser ); + treeDownref( prg, sp, input ); + break; + } + case IN_INPUT_PULL_BKT: { + Word f; + Tree *string; + read_tree( string ); + read_word( f ); + + debug( REALM_BYTECODE, "IN_INPUT_PULL_BKT\n" ); + + treeDownref( prg, sp, string ); + break; + } + case IN_INPUT_PUSH_BKT: { + Word len; + read_word( len ); + + debug( REALM_BYTECODE, "IN_INPUT_PUSH_BKT\n" ); + break; + } + case IN_LOAD_GLOBAL_BKT: { + debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_BKT\n" ); + break; + } + case IN_LOAD_CONTEXT_BKT: { + debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_BKT\n" ); + break; + } + case IN_LOAD_ACCUM_BKT: { + /* Tree *parser; */ + consume_word(); + debug( REALM_BYTECODE, "IN_LOAD_ACCUM_BKT\n" ); + break; + } + case IN_LOAD_INPUT_BKT: { + /* Tree *input; */ + consume_word(); + debug( REALM_BYTECODE, "IN_LOAD_INPUT_BKT\n" ); + break; + } + case IN_GET_FIELD_BKT: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_GET_FIELD_BKT %hd\n", field ); + break; + } + case IN_SET_FIELD_BKT: { + short field; + Tree *val; + read_half( field ); + read_tree( val ); + + debug( REALM_BYTECODE, "IN_SET_FIELD_BKT %hd\n", field ); + + treeDownref( prg, sp, val ); + break; + } + case IN_PTR_DEREF_BKT: { + Tree *ptr; + read_tree( ptr ); + + debug( REALM_BYTECODE, "IN_PTR_DEREF_BKT\n" ); + + treeDownref( prg, sp, ptr ); + break; + } + case IN_SET_TOKEN_DATA_BKT: { + Word oldval; + read_word( oldval ); + + debug( REALM_BYTECODE, "IN_SET_TOKEN_DATA_BKT\n" ); + + Head *head = (Head*)oldval; + stringFree( prg, head ); + break; + } + case IN_LIST_APPEND_BKT: { + debug( REALM_BYTECODE, "IN_LIST_APPEND_BKT\n" ); + break; + } + case IN_LIST_REMOVE_END_BKT: { + Tree *val; + read_tree( val ); + + debug( REALM_BYTECODE, "IN_LIST_REMOVE_END_BKT\n" ); + + treeDownref( prg, sp, val ); + break; + } + case IN_GET_LIST_MEM_BKT: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_GET_LIST_MEM_BKT %hd\n", field ); + break; + } + case IN_SET_LIST_MEM_BKT: { + Half field; + Tree *val; + read_half( field ); + read_tree( val ); + + debug( REALM_BYTECODE, "IN_SET_LIST_MEM_BKT %hd\n", field ); + + treeDownref( prg, sp, val ); + break; + } + case IN_MAP_INSERT_BKT: { + /* uchar inserted; */ + Tree *key; + consume_byte(); + read_tree( key ); + + debug( REALM_BYTECODE, "IN_MAP_INSERT_BKT\n" ); + + treeDownref( prg, sp, key ); + break; + } + case IN_MAP_STORE_BKT: { + Tree *key, *val; + read_tree( key ); + read_tree( val ); + + debug( REALM_BYTECODE,"IN_MAP_STORE_BKT\n" ); + + treeDownref( prg, sp, key ); + treeDownref( prg, sp, val ); + break; + } + case IN_MAP_REMOVE_BKT: { + Tree *key, *val; + read_tree( key ); + read_tree( val ); + + debug( REALM_BYTECODE, "IN_MAP_REMOVE_BKT\n" ); + + treeDownref( prg, sp, key ); + treeDownref( prg, sp, val ); + break; + } + case IN_STOP: { + return; + } + default: { + fatal( "UNKNOWN INSTRUCTION 0x%2x: -- reverse code downref\n", *(instr-1)); + assert(false); + break; + } + } + goto again; +} + +void mainExecution( Program *prg, Execution *exec, Code *code ) +{ + Tree **sp = prg->vm_root; + + /* Set up the stack as if we have called. We allow a return value. */ + vm_push( 0 ); + vm_push( 0 ); + vm_push( 0 ); + vm_push( 0 ); + + /* Execution loop. */ + executeCode( prg, exec, sp, code ); + + vm_pop_ignore(); + vm_pop_ignore(); + prg->returnVal = vm_pop(); +} + +int makeReverseCode( PdaRun *pdaRun ) +{ + RtCodeVect *reverseCode = &pdaRun->reverseCode; + RtCodeVect *rcodeCollect = &pdaRun->rcodeCollect; + + /* Do we need to revert the left hand side? */ + + /* Check if there was anything generated. */ + if ( rcodeCollect->tabLen == 0 ) + return false; + + if ( pdaRun->rcBlockCount == 0 ) { + /* One reverse code run for the DECK terminator. */ + append( reverseCode, IN_PCR_END_DECK ); + append( reverseCode, IN_PCR_RET ); + appendWord( reverseCode, 2 ); + pdaRun->rcBlockCount += 1; + incrementSteps( pdaRun ); + } + + long startLength = reverseCode->tabLen; + + /* Go backwards, group by group, through the reverse code. Push each group + * to the global reverse code stack. */ + Code *p = rcodeCollect->data + rcodeCollect->tabLen; + while ( p != rcodeCollect->data ) { + p--; + long len = *p; + p = p - len; + append2( reverseCode, p, len ); + } + + /* Stop, then place a total length in the global stack. */ + append( reverseCode, IN_PCR_RET ); + long length = reverseCode->tabLen - startLength; + appendWord( reverseCode, length ); + + /* Clear the revere code buffer. */ + rcodeCollect->tabLen = 0; + + pdaRun->rcBlockCount += 1; + incrementSteps( pdaRun ); + + return true; +} + +void transferReverseCode( PdaRun *pdaRun, ParseTree *parseTree ) +{ + if ( pdaRun->rcBlockCount > 0 ) { + debug( REALM_PARSE, "attaching reverse code to token\n" ); + parseTree->flags |= PF_HAS_RCODE; + pdaRun->rcBlockCount = 0; + } +} + +Code *popReverseCode( RtCodeVect *allRev ) +{ + /* Read the length */ + Code *prcode = allRev->data + allRev->tabLen - SIZEOF_WORD; + Word len; + read_word_p( len, prcode ); + + /* Find the start of block. */ + long start = allRev->tabLen - len - SIZEOF_WORD; + prcode = allRev->data + start; + + /* Backup over it. */ + allRev->tabLen -= len + SIZEOF_WORD; + return prcode; +} + +Tree **executeCode( Program *prg, Execution *exec, Tree **sp, Code *instr ) +{ + /* When we exit we are going to verify that we did not eat up any stack + * space. */ + Tree **root = sp; + Code c; + +again: + c = *instr++; + //debug( REALM_BYTECODE, "--in 0x%x\n", c ); + + switch ( c ) { + case IN_RESTORE_LHS: { + Tree *restore; + read_tree( restore ); + + debug( REALM_BYTECODE, "IN_RESTORE_LHS\n" ); + treeDownref( prg, sp, exec->pdaRun->parseInput->shadow->tree ); + exec->pdaRun->parseInput->shadow->tree = restore; + break; + } + case IN_LOAD_NIL: { + debug( REALM_BYTECODE, "IN_LOAD_NIL\n" ); + vm_push( 0 ); + break; + } + case IN_LOAD_TREE: { + debug( REALM_BYTECODE, "IN_LOAD_TREE\n" ); + Tree *tree; + read_tree( tree ); + vm_push( tree ); + break; + } + case IN_LOAD_WORD: { + debug( REALM_BYTECODE, "IN_LOAD_WORD\n" ); + Word w; + read_word( w ); + vm_push( (SW)w ); + break; + } + case IN_LOAD_TRUE: { + debug( REALM_BYTECODE, "IN_LOAD_TRUE\n" ); + treeUpref( prg->trueVal ); + vm_push( prg->trueVal ); + break; + } + case IN_LOAD_FALSE: { + debug( REALM_BYTECODE, "IN_LOAD_FALSE\n" ); + treeUpref( prg->falseVal ); + vm_push( prg->falseVal ); + break; + } + case IN_LOAD_INT: { + Word i; + read_word( i ); + + debug( REALM_BYTECODE, "IN_LOAD_INT %d\n", i ); + + Tree *tree = constructInteger( prg, i ); + treeUpref( tree ); + vm_push( tree ); + break; + } + case IN_LOAD_STR: { + Word offset; + read_word( offset ); + + debug( REALM_BYTECODE, "IN_LOAD_STR %d\n", offset ); + + Head *lit = makeLiteral( prg, offset ); + Tree *tree = constructString( prg, lit ); + treeUpref( tree ); + vm_push( tree ); + break; + } + case IN_PRINT: { + int n; + read_byte( n ); + debug( REALM_BYTECODE, "IN_PRINT %d\n", n ); + + while ( n-- > 0 ) { + Tree *tree = vm_pop(); + printTreeFile( prg, sp, stdout, tree, true ); + treeDownref( prg, sp, tree ); + } + break; + } + case IN_PRINT_XML_AC: { + int n; + read_byte( n ); + + debug( REALM_BYTECODE, "IN_PRINT_XML_AC %d\n", n ); + + while ( n-- > 0 ) { + Tree *tree = vm_pop(); + printXmlStdout( prg, sp, tree, true, true ); + treeDownref( prg, sp, tree ); + } + break; + } + case IN_PRINT_XML: { + int n; + read_byte( n ); + debug( REALM_BYTECODE, "IN_PRINT_XML %d", n ); + + while ( n-- > 0 ) { + Tree *tree = vm_pop(); + printXmlStdout( prg, sp, tree, false, true ); + treeDownref( prg, sp, tree ); + } + break; + } + case IN_PRINT_STREAM: { + int n; + read_byte( n ); + debug( REALM_BYTECODE, "IN_PRINT_STREAM\n" ); + + Stream *stream = (Stream*)vm_pop(); + while ( n-- > 0 ) { + Tree *tree = vm_pop(); + printTreeFile( prg, sp, stream->file, tree, true ); + treeDownref( prg, sp, tree ); + } + treeDownref( prg, sp, (Tree*)stream ); + break; + } + case IN_LOAD_CONTEXT_R: { + debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_R\n" ); + + treeUpref( exec->pdaRun->context ); + vm_push( exec->pdaRun->context ); + break; + } + case IN_LOAD_CONTEXT_WV: { + debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_WV\n" ); + + treeUpref( exec->pdaRun->context ); + vm_push( exec->pdaRun->context ); + + /* Set up the reverse instruction. */ + append( &exec->pdaRun->rcodeCollect, IN_LOAD_CONTEXT_BKT ); + exec->rcodeUnitLen = SIZEOF_CODE; + break; + } + case IN_LOAD_CONTEXT_WC: { + debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_WC\n" ); + + /* This is identical to the _R version, but using it for writing + * would be confusing. */ + treeUpref( exec->pdaRun->context ); + vm_push( exec->pdaRun->context ); + break; + } + case IN_LOAD_CONTEXT_BKT: { + debug( REALM_BYTECODE, "IN_LOAD_CONTEXT_BKT\n" ); + + treeUpref( exec->pdaRun->context ); + vm_push( exec->pdaRun->context ); + break; + } + case IN_LOAD_GLOBAL_R: { + debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_R\n" ); + + treeUpref( prg->global ); + vm_push( prg->global ); + break; + } + case IN_LOAD_GLOBAL_WV: { + debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_WV\n" ); + + treeUpref( prg->global ); + vm_push( prg->global ); + + /* Set up the reverse instruction. */ + append( &exec->pdaRun->rcodeCollect, IN_LOAD_GLOBAL_BKT ); + exec->rcodeUnitLen = SIZEOF_CODE; + break; + } + case IN_LOAD_GLOBAL_WC: { + debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_WC\n" ); + + /* This is identical to the _R version, but using it for writing + * would be confusing. */ + treeUpref( prg->global ); + vm_push( prg->global ); + break; + } + case IN_LOAD_GLOBAL_BKT: { + debug( REALM_BYTECODE, "IN_LOAD_GLOBAL_BKT\n" ); + + treeUpref( prg->global ); + vm_push( prg->global ); + break; + } + case IN_LOAD_ACCUM_R: { + debug( REALM_BYTECODE, "IN_LOAD_ACCUM_R\n" ); + + treeUpref( (Tree*)exec->parser ); + vm_push( (Tree*)exec->parser ); + assert( exec->parser != 0 ); + break; + } + case IN_LOAD_ACCUM_WV: { + debug( REALM_BYTECODE, "IN_LOAD_ACCUM_WV\n" ); + + treeUpref( (Tree*)exec->parser ); + vm_push( (Tree*)exec->parser ); + assert( exec->parser != 0 ); + + /* Set up the reverse instruction. */ + append( &exec->pdaRun->rcodeCollect, IN_LOAD_ACCUM_BKT ); + appendWord( &exec->pdaRun->rcodeCollect, (Word)exec->parser ); + exec->rcodeUnitLen = SIZEOF_CODE + SIZEOF_WORD; + break; + } + case IN_LOAD_ACCUM_WC: { + debug( REALM_BYTECODE, "IN_LOAD_ACCUM_WC\n" ); + + /* This is identical to the _R version, but using it for writing + * would be confusing. */ + treeUpref( (Tree*)exec->parser ); + vm_push( (Tree*)exec->parser ); + assert( exec->parser != 0 ); + break; + } + case IN_LOAD_ACCUM_BKT: { + Tree *parser; + read_tree( parser ); + + debug( REALM_BYTECODE, "IN_LOAD_ACCUM_BKT\n" ); + + treeUpref( parser ); + vm_push( parser ); + break; + } + case IN_LOAD_INPUT_R: { + debug( REALM_BYTECODE, "IN_LOAD_INPUT_R\n" ); + + assert( exec->parser != 0 ); + treeUpref( (Tree*)exec->parser->input ); + vm_push( (Tree*)exec->parser->input ); + break; + } + case IN_LOAD_INPUT_WV: { + debug( REALM_BYTECODE, "IN_LOAD_INPUT_WV\n" ); + + assert( exec->parser != 0 ); + treeUpref( (Tree*)exec->parser->input ); + vm_push( (Tree*)exec->parser->input ); + + /* Set up the reverse instruction. */ + append( &exec->pdaRun->rcodeCollect, IN_LOAD_INPUT_BKT ); + appendWord( &exec->pdaRun->rcodeCollect, (Word)exec->parser->input ); + exec->rcodeUnitLen = SIZEOF_CODE + SIZEOF_WORD; + break; + } + case IN_LOAD_INPUT_WC: { + debug( REALM_BYTECODE, "IN_LOAD_INPUT_WC\n" ); + + /* This is identical to the _R version, but using it for writing + * would be confusing. */ + assert( exec->parser != 0 ); + treeUpref( (Tree*)exec->parser->input ); + vm_push( (Tree*)exec->parser->input ); + break; + } + case IN_LOAD_INPUT_BKT: { + Tree *accumStream; + read_tree( accumStream ); + + debug( REALM_BYTECODE, "IN_LOAD_INPUT_BKT\n" ); + + treeUpref( accumStream ); + vm_push( accumStream ); + break; + } + case IN_LOAD_CTX_R: { + debug( REALM_BYTECODE, "IN_LOAD_CTX_R\n" ); + + treeUpref( exec->pdaRun->context ); + vm_push( exec->pdaRun->context ); + break; + } + case IN_LOAD_CTX_WV: { + debug( REALM_BYTECODE, "IN_LOAD_CTX_WV\n" ); + + treeUpref( exec->pdaRun->context ); + vm_push( exec->pdaRun->context ); + + /* Set up the reverse instruction. */ + append( &exec->pdaRun->rcodeCollect, IN_LOAD_ACCUM_BKT ); + appendWord( &exec->pdaRun->rcodeCollect, (Word)exec->parser ); + exec->rcodeUnitLen = SIZEOF_CODE + SIZEOF_WORD; + break; + } + case IN_LOAD_CTX_WC: { + debug( REALM_BYTECODE, "IN_LOAD_CTX_WC\n" ); + + /* This is identical to the _R version, but using it for writing + * would be confusing. */ + treeUpref( exec->pdaRun->context ); + vm_push( exec->pdaRun->context ); + break; + } + case IN_LOAD_CTX_BKT: { + debug( REALM_BYTECODE, "IN_LOAD_CTX_BKT\n" ); + + treeUpref( exec->pdaRun->context ); + vm_push( exec->pdaRun->context ); + break; + } + case IN_INIT_CAPTURES: { + /* uchar ncaps; */ + consume_byte(); + + debug( REALM_BYTECODE, "IN_INIT_CAPTURES\n" ); + + /* If there are captures (this is a translate block) then copy them into + * the local frame now. */ + LangElInfo *lelInfo = prg->rtd->lelInfo; + char **mark = exec->fsmRun->mark; + + int i; + for ( i = 0; i < lelInfo[exec->pdaRun->tokenId].numCaptureAttr; i++ ) { + CaptureAttr *ca = &prg->rtd->captureAttr[lelInfo[exec->pdaRun->tokenId].captureAttr + i]; + Head *data = stringAllocFull( prg, + mark[ca->mark_enter], mark[ca->mark_leave] - mark[ca->mark_enter] ); + Tree *string = constructString( prg, data ); + treeUpref( string ); + setLocal( exec->framePtr, -1 - i, string ); + } + break; + } + case IN_INIT_RHS_EL: { + Half position; + short field; + read_half( position ); + read_half( field ); + + debug( REALM_BYTECODE, "IN_INIT_RHS_EL %hd\n", field ); + + Tree *val = getRhsEl( prg, exec->pdaRun->redLel->shadow->tree, position ); + treeUpref( val ); + vm_local(field) = val; + break; + } + + case IN_INIT_LHS_EL: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_INIT_LHS_EL %hd\n", field ); + + /* We transfer it to to the local field. Possibly take a copy. */ + Tree *val = exec->pdaRun->redLel->shadow->tree; + + /* Save it. */ + treeUpref( val ); + exec->pdaRun->parsed = val; + + exec->pdaRun->redLel->shadow->tree = 0; + vm_local(field) = val; + break; + } + case IN_STORE_LHS_EL: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_STORE_LHS_EL %hd\n", field ); + + Tree *val = vm_local(field); + vm_local(field) = 0; + exec->pdaRun->redLel->shadow->tree = val; + break; + } + case IN_UITER_ADVANCE: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_UITER_ADVANCE\n" ); + + /* Get the iterator. */ + UserIter *uiter = (UserIter*) vm_local(field); + + long stackSize = uiter->stackRoot - vm_ptop(); + assert( uiter->stackSize == stackSize ); + + /* Fix the return instruction pointer. */ + uiter->stackRoot[-IFR_AA + IFR_RIN] = (SW)instr; + + instr = uiter->resume; + exec->framePtr = uiter->frame; + exec->iframePtr = &uiter->stackRoot[-IFR_AA]; + break; + } + case IN_UITER_GET_CUR_R: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_UITER_GET_CUR_R\n" ); + + UserIter *uiter = (UserIter*) vm_local(field); + Tree *val = uiter->ref.kid->tree; + treeUpref( val ); + vm_push( val ); + break; + } + case IN_UITER_GET_CUR_WC: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_UITER_GET_CUR_WC\n" ); + + UserIter *uiter = (UserIter*) vm_local(field); + splitRef( prg, &sp, &uiter->ref ); + Tree *split = uiter->ref.kid->tree; + treeUpref( split ); + vm_push( split ); + break; + } + case IN_UITER_SET_CUR_WC: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_UITER_SET_CUR_WC\n" ); + + Tree *t = vm_pop(); + UserIter *uiter = (UserIter*) vm_local(field); + splitRef( prg, &sp, &uiter->ref ); + Tree *old = uiter->ref.kid->tree; + setUiterCur( prg, uiter, t ); + treeDownref( prg, sp, old ); + break; + } + case IN_GET_LOCAL_R: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_GET_LOCAL_R\n" ); + + Tree *val = vm_local(field); + treeUpref( val ); + vm_push( val ); + break; + } + case IN_GET_LOCAL_WC: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_GET_LOCAL_WC\n" ); + + Tree *split = getLocalSplit( prg, exec->framePtr, field ); + treeUpref( split ); + vm_push( split ); + break; + } + case IN_SET_LOCAL_WC: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_SET_LOCAL_WC %d\n", field ); + + Tree *val = vm_pop(); + treeDownref( prg, sp, vm_local(field) ); + setLocal( exec->framePtr, field, val ); + break; + } + case IN_SAVE_RET: { + debug( REALM_BYTECODE, "IN_SAVE_RET\n" ); + + Tree *val = vm_pop(); + vm_local(FR_RV) = val; + break; + } + case IN_GET_LOCAL_REF_R: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_GET_LOCAL_REF_R\n" ); + + Ref *ref = (Ref*) vm_plocal(field); + Tree *val = ref->kid->tree; + treeUpref( val ); + vm_push( val ); + break; + } + case IN_GET_LOCAL_REF_WC: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_GET_LOCAL_REF_WC\n" ); + + Ref *ref = (Ref*) vm_plocal(field); + splitRef( prg, &sp, ref ); + Tree *val = ref->kid->tree; + treeUpref( val ); + vm_push( val ); + break; + } + case IN_SET_LOCAL_REF_WC: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_SET_LOCAL_REF_WC\n" ); + + Tree *val = vm_pop(); + Ref *ref = (Ref*) vm_plocal(field); + splitRef( prg, &sp, ref ); + refSetValue( ref, val ); + break; + } + case IN_GET_FIELD_R: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_GET_FIELD_R %d\n", field ); + + Tree *obj = vm_pop(); + treeDownref( prg, sp, obj ); + + Tree *val = getField( obj, field ); + treeUpref( val ); + vm_push( val ); + break; + } + case IN_GET_FIELD_WC: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_GET_FIELD_WC %d\n", field ); + + Tree *obj = vm_pop(); + treeDownref( prg, sp, obj ); + + Tree *split = getFieldSplit( prg, obj, field ); + treeUpref( split ); + vm_push( split ); + break; + } + case IN_GET_FIELD_WV: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_GET_FIELD_WV\n" ); + + Tree *obj = vm_pop(); + treeDownref( prg, sp, obj ); + + Tree *split = getFieldSplit( prg, obj, field ); + treeUpref( split ); + vm_push( split ); + + /* Set up the reverse instruction. */ + append( &exec->pdaRun->rcodeCollect, IN_GET_FIELD_BKT ); + appendHalf( &exec->pdaRun->rcodeCollect, field ); + exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_HALF; + break; + } + case IN_GET_FIELD_BKT: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_GET_FIELD_BKT\n" ); + + Tree *obj = vm_pop(); + treeDownref( prg, sp, obj ); + + Tree *split = getFieldSplit( prg, obj, field ); + treeUpref( split ); + vm_push( split ); + break; + } + case IN_SET_FIELD_WC: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_SET_FIELD_WC %d\n", field ); + + Tree *obj = vm_pop(); + Tree *val = vm_pop(); + treeDownref( prg, sp, obj ); + + /* Downref the old value. */ + Tree *prev = getField( obj, field ); + treeDownref( prg, sp, prev ); + + setField( prg, obj, field, val ); + break; + } + case IN_SET_FIELD_WV: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_SET_FIELD_WV %d\n", field ); + + Tree *obj = vm_pop(); + Tree *val = vm_pop(); + treeDownref( prg, sp, obj ); + + /* Save the old value, then set the field. */ + Tree *prev = getField( obj, field ); + setField( prg, obj, field, val ); + + /* Set up the reverse instruction. */ + append( &exec->pdaRun->rcodeCollect, IN_SET_FIELD_BKT ); + appendHalf( &exec->pdaRun->rcodeCollect, field ); + appendWord( &exec->pdaRun->rcodeCollect, (Word)prev ); + exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_HALF + SIZEOF_WORD; + append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); + /* FLUSH */ + break; + } + case IN_SET_FIELD_BKT: { + short field; + Tree *val; + read_half( field ); + read_tree( val ); + + debug( REALM_BYTECODE, "IN_SET_FIELD_BKT\n" ); + + Tree *obj = vm_pop(); + treeDownref( prg, sp, obj ); + + /* Downref the old value. */ + Tree *prev = getField( obj, field ); + treeDownref( prg, sp, prev ); + + setField( prg, obj, field, val ); + break; + } + case IN_SET_FIELD_LEAVE_WC: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_SET_FIELD_LEAVE_WC\n" ); + + /* Note that we don't downref the object here because we are + * leaving it on the stack. */ + Tree *obj = vm_pop(); + Tree *val = vm_pop(); + + /* Downref the old value. */ + Tree *prev = getField( obj, field ); + treeDownref( prg, sp, prev ); + + /* Set the field. */ + setField( prg, obj, field, val ); + + /* Leave the object on the top of the stack. */ + vm_push( obj ); + break; + } + case IN_GET_RHS_VAL_R: { + debug( REALM_BYTECODE, "IN_GET_RHS_VAL_R\n" ); + int i, done = 0; + uchar len; + + Tree *obj = vm_pop(), *val = 0; + treeDownref( prg, sp, obj ); + + read_byte( len ); + for ( i = 0; i < len; i++ ) { + uchar prodNum, childNum; + read_byte( prodNum ); + read_byte( childNum ); + if ( !done && obj->prodNum == prodNum ) { + val = getRhsEl( prg, obj, childNum ); + done = 1; + } + } + + treeUpref( val ); + vm_push( val ); + break; + } + case IN_POP: { + debug( REALM_BYTECODE, "IN_POP\n" ); + + Tree *val = vm_pop(); + treeDownref( prg, sp, val ); + break; + } + case IN_POP_N_WORDS: { + short n; + read_half( n ); + + debug( REALM_BYTECODE, "IN_POP_N_WORDS\n" ); + + vm_popn( n ); + break; + } + case IN_SPRINTF: { + debug( REALM_BYTECODE, "IN_SPRINTF\n" ); + + Tree *f = vm_pop(); + f++; + Tree *integer = vm_pop(); + Tree *format = vm_pop(); + Head *res = stringSprintf( prg, (Str*)format, (Int*)integer ); + Tree *str = constructString( prg, res ); + treeUpref( str ); + vm_push( str ); + treeDownref( prg, sp, integer ); + treeDownref( prg, sp, format ); + break; + } + case IN_STR_ATOI: { + debug( REALM_BYTECODE, "IN_STR_ATOI\n" ); + + Str *str = (Str*)vm_pop(); + Word res = strAtoi( str->value ); + Tree *integer = constructInteger( prg, res ); + treeUpref( integer ); + vm_push( integer ); + treeDownref( prg, sp, (Tree*)str ); + break; + } + case IN_INT_TO_STR: { + debug( REALM_BYTECODE, "IN_INT_TO_STR\n" ); + + Int *i = (Int*)vm_pop(); + Head *res = intToStr( prg, i->value ); + Tree *str = constructString( prg, res ); + treeUpref( str ); + vm_push( str ); + treeDownref( prg, sp, (Tree*) i ); + break; + } + case IN_TREE_TO_STR: { + debug( REALM_BYTECODE, "IN_TREE_TO_STR\n" ); + + Tree *tree = vm_pop(); + Head *res = treeToStr( prg, sp, tree, true ); + Tree *str = constructString( prg, res ); + treeUpref( str ); + vm_push( str ); + treeDownref( prg, sp, tree ); + break; + } + case IN_TREE_TO_STR_NOTRIM: { + debug( REALM_BYTECODE, "IN_TREE_TO_STR_NOTRIM\n" ); + + Tree *tree = vm_pop(); + Head *res = treeToStr( prg, sp, tree, false ); + Tree *str = constructString( prg, res ); + treeUpref( str ); + vm_push( str ); + treeDownref( prg, sp, tree ); + break; + } + case IN_TREE_TRIM: { + debug( REALM_BYTECODE, "IN_TREE_TRIM\n" ); + + Tree *tree = vm_pop(); + Tree *trimmed = treeTrim( prg, sp, tree ); + vm_push( trimmed ); + break; + } + case IN_CONCAT_STR: { + debug( REALM_BYTECODE, "IN_CONCAT_STR\n" ); + + Str *s2 = (Str*)vm_pop(); + Str *s1 = (Str*)vm_pop(); + Head *res = concatStr( s1->value, s2->value ); + Tree *str = constructString( prg, res ); + treeUpref( str ); + treeDownref( prg, sp, (Tree*)s1 ); + treeDownref( prg, sp, (Tree*)s2 ); + vm_push( str ); + break; + } + case IN_STR_UORD8: { + debug( REALM_BYTECODE, "IN_STR_UORD8\n" ); + + Str *str = (Str*)vm_pop(); + Word res = strUord8( str->value ); + Tree *tree = constructInteger( prg, res ); + treeUpref( tree ); + vm_push( tree ); + treeDownref( prg, sp, (Tree*)str ); + break; + } + case IN_STR_UORD16: { + debug( REALM_BYTECODE, "IN_STR_UORD16\n" ); + + Str *str = (Str*)vm_pop(); + Word res = strUord16( str->value ); + Tree *tree = constructInteger( prg, res ); + treeUpref( tree ); + vm_push( tree ); + treeDownref( prg, sp, (Tree*)str ); + break; + } + + case IN_STR_LENGTH: { + debug( REALM_BYTECODE, "IN_STR_LENGTH\n" ); + + Str *str = (Str*)vm_pop(); + long len = stringLength( str->value ); + Tree *res = constructInteger( prg, len ); + treeUpref( res ); + vm_push( res ); + treeDownref( prg, sp, (Tree*)str ); + break; + } + case IN_JMP_FALSE: { + short dist; + read_half( dist ); + + debug( REALM_BYTECODE, "IN_JMP_FALSE %d\n", dist ); + + Tree *tree = vm_pop(); + if ( testFalse( prg, tree ) ) + instr += dist; + treeDownref( prg, sp, tree ); + break; + } + case IN_JMP_TRUE: { + short dist; + read_half( dist ); + + debug( REALM_BYTECODE, "IN_JMP_TRUE %d\n", dist ); + + Tree *tree = vm_pop(); + if ( !testFalse( prg, tree ) ) + instr += dist; + treeDownref( prg, sp, tree ); + break; + } + case IN_JMP: { + short dist; + read_half( dist ); + + debug( REALM_BYTECODE, "IN_JMP\n" ); + + instr += dist; + break; + } + case IN_REJECT: { + debug( REALM_BYTECODE, "IN_REJECT\n" ); + exec->pdaRun->reject = true; + break; + } + + /* + * Binary comparison operators. + */ + case IN_TST_EQL: { + debug( REALM_BYTECODE, "IN_TST_EQL\n" ); + + Tree *o2 = vm_pop(); + Tree *o1 = vm_pop(); + long r = cmpTree( prg, o1, o2 ); + Tree *val = r ? prg->falseVal : prg->trueVal; + treeUpref( val ); + vm_push( val ); + treeDownref( prg, sp, o1 ); + treeDownref( prg, sp, o2 ); + break; + } + case IN_TST_NOT_EQL: { + debug( REALM_BYTECODE, "IN_TST_NOT_EQL\n" ); + + Tree *o2 = vm_pop(); + Tree *o1 = vm_pop(); + long r = cmpTree( prg, o1, o2 ); + Tree *val = r ? prg->trueVal : prg->falseVal; + treeUpref( val ); + vm_push( val ); + treeDownref( prg, sp, o1 ); + treeDownref( prg, sp, o2 ); + break; + } + case IN_TST_LESS: { + debug( REALM_BYTECODE, "IN_TST_LESS\n" ); + + Tree *o2 = vm_pop(); + Tree *o1 = vm_pop(); + long r = cmpTree( prg, o1, o2 ); + Tree *val = r < 0 ? prg->trueVal : prg->falseVal; + treeUpref( val ); + vm_push( val ); + treeDownref( prg, sp, o1 ); + treeDownref( prg, sp, o2 ); + break; + } + case IN_TST_LESS_EQL: { + debug( REALM_BYTECODE, "IN_TST_LESS_EQL\n" ); + + Tree *o2 = vm_pop(); + Tree *o1 = vm_pop(); + long r = cmpTree( prg, o1, o2 ); + Tree *val = r <= 0 ? prg->trueVal : prg->falseVal; + treeUpref( val ); + vm_push( val ); + treeDownref( prg, sp, o1 ); + treeDownref( prg, sp, o2 ); + } + case IN_TST_GRTR: { + debug( REALM_BYTECODE, "IN_TST_GRTR\n" ); + + Tree *o2 = vm_pop(); + Tree *o1 = vm_pop(); + long r = cmpTree( prg, o1, o2 ); + Tree *val = r > 0 ? prg->trueVal : prg->falseVal; + treeUpref( val ); + vm_push( val ); + treeDownref( prg, sp, o1 ); + treeDownref( prg, sp, o2 ); + break; + } + case IN_TST_GRTR_EQL: { + debug( REALM_BYTECODE, "IN_TST_GRTR_EQL\n" ); + + Tree *o2 = (Tree*)vm_pop(); + Tree *o1 = (Tree*)vm_pop(); + long r = cmpTree( prg, o1, o2 ); + Tree *val = r >= 0 ? prg->trueVal : prg->falseVal; + treeUpref( val ); + vm_push( val ); + treeDownref( prg, sp, o1 ); + treeDownref( prg, sp, o2 ); + break; + } + case IN_TST_LOGICAL_AND: { + debug( REALM_BYTECODE, "IN_TST_LOGICAL_AND\n" ); + + Tree *o2 = vm_pop(); + Tree *o1 = vm_pop(); + long v2 = !testFalse( prg, o2 ); + long v1 = !testFalse( prg, o1 ); + Word r = v1 && v2; + Tree *val = r ? prg->trueVal : prg->falseVal; + treeUpref( val ); + vm_push( val ); + treeDownref( prg, sp, o1 ); + treeDownref( prg, sp, o2 ); + break; + } + case IN_TST_LOGICAL_OR: { + debug( REALM_BYTECODE, "IN_TST_LOGICAL_OR\n" ); + + Tree *o2 = vm_pop(); + Tree *o1 = vm_pop(); + long v2 = !testFalse( prg, o2 ); + long v1 = !testFalse( prg, o1 ); + Word r = v1 || v2; + Tree *val = r ? prg->trueVal : prg->falseVal; + treeUpref( val ); + vm_push( val ); + treeDownref( prg, sp, o1 ); + treeDownref( prg, sp, o2 ); + break; + } + case IN_NOT: { + debug( REALM_BYTECODE, "IN_NOT\n" ); + + Tree *tree = (Tree*)vm_pop(); + long r = testFalse( prg, tree ); + Tree *val = r ? prg->trueVal : prg->falseVal; + treeUpref( val ); + vm_push( val ); + treeDownref( prg, sp, tree ); + break; + } + + case IN_ADD_INT: { + debug( REALM_BYTECODE, "IN_ADD_INT\n" ); + + Int *o2 = (Int*)vm_pop(); + Int *o1 = (Int*)vm_pop(); + long r = o1->value + o2->value; + Tree *tree = constructInteger( prg, r ); + treeUpref( tree ); + vm_push( tree ); + treeDownref( prg, sp, (Tree*)o1 ); + treeDownref( prg, sp, (Tree*)o2 ); + break; + } + case IN_MULT_INT: { + debug( REALM_BYTECODE, "IN_MULT_INT\n" ); + + Int *o2 = (Int*)vm_pop(); + Int *o1 = (Int*)vm_pop(); + long r = o1->value * o2->value; + Tree *tree = constructInteger( prg, r ); + treeUpref( tree ); + vm_push( tree ); + treeDownref( prg, sp, (Tree*)o1 ); + treeDownref( prg, sp, (Tree*)o2 ); + break; + } + case IN_DIV_INT: { + debug( REALM_BYTECODE, "IN_DIV_INT\n" ); + + Int *o2 = (Int*)vm_pop(); + Int *o1 = (Int*)vm_pop(); + long r = o1->value / o2->value; + Tree *tree = constructInteger( prg, r ); + treeUpref( tree ); + vm_push( tree ); + treeDownref( prg, sp, (Tree*)o1 ); + treeDownref( prg, sp, (Tree*)o2 ); + break; + } + case IN_SUB_INT: { + debug( REALM_BYTECODE, "IN_SUB_INT\n" ); + + Int *o2 = (Int*)vm_pop(); + Int *o1 = (Int*)vm_pop(); + long r = o1->value - o2->value; + Tree *tree = constructInteger( prg, r ); + treeUpref( tree ); + vm_push( tree ); + treeDownref( prg, sp, (Tree*)o1 ); + treeDownref( prg, sp, (Tree*)o2 ); + break; + } + case IN_DUP_TOP_OFF: { + short off; + read_half( off ); + + debug( REALM_BYTECODE, "IN_DUP_TOP_OFF %hd\n", off ); + + Tree *val = vm_top_off(off); + treeUpref( val ); + vm_push( val ); + break; + } + case IN_DUP_TOP: { + debug( REALM_BYTECODE, "IN_DUP_TOP\n" ); + + Tree *val = vm_top(); + treeUpref( val ); + vm_push( val ); + break; + } + case IN_TRITER_FROM_REF: { + short field; + Half searchTypeId; + read_half( field ); + read_half( searchTypeId ); + + debug( REALM_BYTECODE, "IN_TRITER_FROM_REF\n" ); + + Ref rootRef; + rootRef.kid = (Kid*)vm_pop(); + rootRef.next = (Ref*)vm_pop(); + void *mem = vm_plocal(field); + initTreeIter( (TreeIter*)mem, &rootRef, searchTypeId, vm_ptop() ); + break; + } + case IN_TRITER_DESTROY: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_TRITER_DESTROY\n" ); + + TreeIter *iter = (TreeIter*) vm_plocal(field); + treeIterDestroy( &sp, iter ); + break; + } + case IN_REV_TRITER_FROM_REF: { + short field; + Half searchTypeId; + read_half( field ); + read_half( searchTypeId ); + + debug( REALM_BYTECODE, "IN_REV_TRITER_FROM_REF\n" ); + + Ref rootRef; + rootRef.kid = (Kid*)vm_pop(); + rootRef.next = (Ref*)vm_pop(); + + Tree **stackRoot = vm_ptop(); + + int children = 0; + Kid *kid = treeChild( prg, rootRef.kid->tree ); + while ( kid != 0 ) { + children++; + vm_push( (SW) kid ); + kid = kid->next; + } + + void *mem = vm_plocal(field); + initRevTreeIter( (RevTreeIter*)mem, &rootRef, searchTypeId, stackRoot, children ); + break; + } + case IN_REV_TRITER_DESTROY: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_REV_TRITER_DESTROY\n" ); + + RevTreeIter *iter = (RevTreeIter*) vm_plocal(field); + long curStackSize = iter->stackRoot - vm_ptop(); + assert( iter->stackSize == curStackSize ); + vm_popn( iter->stackSize ); + break; + } + case IN_TREE_SEARCH: { + Word id; + read_word( id ); + + debug( REALM_BYTECODE, "IN_TREE_SEARCH\n" ); + + Tree *tree = vm_pop(); + Tree *res = treeSearch2( prg, tree, id ); + treeUpref( res ); + vm_push( res ); + treeDownref( prg, sp, tree ); + break; + } + case IN_TRITER_ADVANCE: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_TRITER_ADVANCE\n" ); + + TreeIter *iter = (TreeIter*) vm_plocal(field); + Tree *res = treeIterAdvance( prg, &sp, iter ); + treeUpref( res ); + vm_push( res ); + break; + } + case IN_TRITER_NEXT_CHILD: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_TRITER_NEXT_CHILD\n" ); + + TreeIter *iter = (TreeIter*) vm_plocal(field); + Tree *res = treeIterNextChild( prg, &sp, iter ); + treeUpref( res ); + vm_push( res ); + break; + } + case IN_REV_TRITER_PREV_CHILD: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_REV_TRITER_PREV_CHILD\n" ); + + RevTreeIter *iter = (RevTreeIter*) vm_plocal(field); + Tree *res = treeRevIterPrevChild( prg, &sp, iter ); + treeUpref( res ); + vm_push( res ); + break; + } + case IN_TRITER_NEXT_REPEAT: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_TRITER_NEXT_REPEAT\n" ); + + TreeIter *iter = (TreeIter*) vm_plocal(field); + Tree *res = treeIterNextRepeat( prg, &sp, iter ); + treeUpref( res ); + vm_push( res ); + break; + } + case IN_TRITER_PREV_REPEAT: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_TRITER_PREV_REPEAT\n" ); + + TreeIter *iter = (TreeIter*) vm_plocal(field); + Tree *res = treeIterPrevRepeat( prg, &sp, iter ); + treeUpref( res ); + vm_push( res ); + break; + } + case IN_TRITER_GET_CUR_R: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_TRITER_GET_CUR_R\n" ); + + TreeIter *iter = (TreeIter*) vm_plocal(field); + Tree *tree = treeIterDerefCur( iter ); + treeUpref( tree ); + vm_push( tree ); + break; + } + case IN_TRITER_GET_CUR_WC: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_TRITER_GET_CUR_WC\n" ); + + TreeIter *iter = (TreeIter*) vm_plocal(field); + splitIterCur( prg, &sp, iter ); + Tree *tree = treeIterDerefCur( iter ); + treeUpref( tree ); + vm_push( tree ); + break; + } + case IN_TRITER_SET_CUR_WC: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_TRITER_SET_CUR_WC\n" ); + + Tree *tree = vm_pop(); + TreeIter *iter = (TreeIter*) vm_plocal(field); + splitIterCur( prg, &sp, iter ); + Tree *old = treeIterDerefCur( iter ); + setTriterCur( prg, iter, tree ); + treeDownref( prg, sp, old ); + break; + } + case IN_MATCH: { + Half patternId; + read_half( patternId ); + + debug( REALM_BYTECODE, "IN_MATCH\n" ); + + Tree *tree = vm_pop(); + + /* Run the match, push the result. */ + int rootNode = prg->rtd->patReplInfo[patternId].offset; + + /* Bindings are indexed starting at 1. Zero bindId to represent no + * binding. We make a space for it here rather than do math at + * access them. */ + long numBindings = prg->rtd->patReplInfo[patternId].numBindings; + Tree *bindings[1+numBindings]; + memset( bindings, 0, sizeof(Tree*)*(1+numBindings) ); + + Kid kid; + kid.tree = tree; + kid.next = 0; + int matched = matchPattern( bindings, prg, rootNode, &kid, false ); + + if ( !matched ) + memset( bindings, 0, sizeof(Tree*)*(1+numBindings) ); + else { + int b; + for ( b = 1; b <= numBindings; b++ ) + assert( bindings[b] != 0 ); + } + + Tree *result = matched ? tree : 0; + treeUpref( result ); + vm_push( result ? tree : 0 ); + int b; + for ( b = 1; b <= numBindings; b++ ) { + treeUpref( bindings[b] ); + vm_push( bindings[b] ); + } + + treeDownref( prg, sp, tree ); + break; + } + + case IN_GET_ACCUM_CTX_R: { + debug( REALM_BYTECODE, "IN_GET_ACCUM_CTX_R\n" ); + + Tree *obj = vm_pop(); + Tree *ctx = ((Parser*)obj)->pdaRun->context; + treeUpref( ctx ); + vm_push( ctx ); + treeDownref( prg, sp, obj ); + break; + } + + case IN_SET_ACCUM_CTX_WC: { + debug( REALM_BYTECODE, "IN_SET_ACCUM_CTX_WC\n" ); + + Tree *parser = vm_pop(); + Tree *val = vm_pop(); + parserSetContext( prg, sp, (Parser*)parser, val ); + treeDownref( prg, sp, parser ); + break; + } + +// case IN_GET_ACCUM_CTX_WC: +// case IN_GET_ACCUM_CTX_WV: +// case IN_SET_ACCUM_CTX_WC: +// case IN_SET_ACCUM_CTX_WV: +// break; + + case IN_INPUT_APPEND_WC: { + debug( REALM_BYTECODE, "IN_INPUT_APPEND_WC \n" ); + + Input *accumStream = (Input*)vm_pop(); + Tree *input = vm_pop(); + streamAppend( prg, sp, input, accumStream->in ); + + vm_push( (Tree*)accumStream ); + treeDownref( prg, sp, input ); + break; + } + case IN_INPUT_APPEND_WV: { + debug( REALM_BYTECODE, "IN_INPUT_APPEND_WV \n" ); + + Input *accumStream = (Input*)vm_pop(); + Tree *input = vm_pop(); + Word len = streamAppend( prg, sp, input, accumStream->in ); + + treeUpref( (Tree*)accumStream ); + vm_push( (Tree*)accumStream ); + + append( &exec->pdaRun->rcodeCollect, IN_INPUT_APPEND_BKT ); + appendWord( &exec->pdaRun->rcodeCollect, (Word) accumStream ); + appendWord( &exec->pdaRun->rcodeCollect, (Word) input ); + appendWord( &exec->pdaRun->rcodeCollect, (Word) len ); + append( &exec->pdaRun->rcodeCollect, SIZEOF_CODE + 3 * SIZEOF_WORD ); + break; + } + + case IN_INPUT_APPEND_BKT: { + Tree *accumStream; + Tree *input; + Word len; + read_tree( accumStream ); + read_tree( input ); + read_word( len ); + + debug( REALM_BYTECODE, "IN_INPUT_APPEND_BKT\n" ); + + undoStreamAppend( prg, sp, 0, ((Input*)accumStream)->in, input, len ); + treeDownref( prg, sp, accumStream ); + treeDownref( prg, sp, input ); + break; + } + + case IN_PARSE_LOAD_START: { + debug( REALM_BYTECODE, "IN_PARSE_LOAD_START\n" ); + vm_push( (SW) PcrStart ); + break; + } + + case IN_PARSE_SAVE_STEPS: { + debug( REALM_BYTECODE, "IN_PARSE_SAVE_STEPS\n" ); + + Parser *parser = (Parser*)vm_pop(); + long steps = parser->pdaRun->steps; + + vm_push( (SW)steps ); + vm_push( (SW)parser ); + break; + } + + case IN_PCR_CALL: { + debug( REALM_BYTECODE, "IN_PCR_CALL\n" ); + + long pcr = (long)vm_pop(); + Parser *parser = (Parser*)vm_pop(); + long steps = (long)vm_pop(); + + vm_push( (SW)steps ); + vm_push( (SW)parser ); + vm_push( (SW)pcr ); + + vm_push( (SW)exec->parser ); + vm_push( (SW)exec->pdaRun ); + vm_push( (SW)exec->fsmRun ); + vm_push( (SW)exec->inputStream ); + vm_push( (SW)exec->framePtr ); + vm_push( (SW)exec->iframePtr ); + vm_push( (SW)exec->frameId ); + vm_push( (SW)exec->rcodeUnitLen ); + + Code *returnTo = instr - ( SIZEOF_CODE + SIZEOF_CODE + SIZEOF_HALF ); + vm_push( (SW)returnTo ); + + initExecution( exec, parser, parser->pdaRun, parser->fsmRun, parser->input->in, parser->pdaRun->frameId ); + instr = parser->pdaRun->code; + break; + } + + case IN_PCR_RET: { + debug( REALM_BYTECODE, "IN_PCR_RET\n" ); + + FrameInfo *fi = &prg->rtd->frameInfo[exec->frameId]; + downrefLocalTrees( prg, sp, exec->framePtr, fi->trees, fi->treesLen ); + vm_popn( fi->frameSize ); + + instr = (Code*) vm_pop(); + exec->rcodeUnitLen = ( long ) vm_pop(); + exec->frameId = ( long ) vm_pop(); + exec->iframePtr = ( Tree ** ) vm_pop(); + exec->framePtr = ( Tree ** ) vm_pop(); + exec->inputStream = ( InputStream * ) vm_pop(); + exec->fsmRun = ( FsmRun * ) vm_pop(); + exec->pdaRun = ( PdaRun * ) vm_pop(); + exec->parser = ( Parser * ) vm_pop(); + + if ( instr == 0 ) { + fflush( stdout ); + goto out; + } + break; + } + + case IN_PCR_END_DECK: { + debug( REALM_BYTECODE, "IN_PCR_END_DECK\n" ); + exec->pdaRun->onDeck = false; + break; + } + + case IN_PARSE_FRAG_WC: { + debug( REALM_BYTECODE, "IN_PARSE_FRAG_WC\n" ); + + Half stopId; + read_half( stopId ); + + long pcr = (long)vm_pop(); + Parser *parser = (Parser*)vm_pop(); + long steps = (long)vm_pop(); + + pcr = parseFrag( prg, sp, parser, stopId, pcr ); + + vm_push( (SW)steps ); + vm_push( (SW)parser ); + vm_push( (SW)pcr ); + + /* If done, jump to the terminating instruction, otherwise fall + * through to call some code, then jump back here. */ + if ( pcr == PcrDone ) + instr += SIZEOF_CODE; + break; + } + + case IN_PARSE_FRAG_WC3: { + debug( REALM_BYTECODE, "IN_PARSE_FRAG_WC3\n" ); + + vm_pop_ignore(); + Parser *parser = (Parser*)vm_pop(); + vm_pop_ignore(); + + treeDownref( prg, sp, (Tree*)parser ); + + if ( prg->induceExit ) + goto out; + + break; + } + + case IN_PARSE_FRAG_WV: { + Half stopId; + read_half( stopId ); + + debug( REALM_BYTECODE, "IN_PARSE_FRAG_WV\n" ); + + long pcr = (long)vm_pop(); + Parser *parser = (Parser*)vm_pop(); + long steps = (long)vm_pop(); + + pcr = parseFrag( prg, sp, parser, stopId, pcr ); + + vm_push( (SW)steps ); + vm_push( (SW)parser ); + vm_push( (SW)pcr ); + + /* If done, jump to the terminating instruction, otherwise fall + * through to call some code, then jump back here. */ + if ( pcr == PcrDone ) + instr += SIZEOF_CODE; + break; + } + + case IN_PARSE_FRAG_WV3: { + debug( REALM_BYTECODE, "IN_PARSE_FRAG_WV3 \n" ); + + vm_pop_ignore(); + Parser *parser = (Parser*)vm_pop(); + long steps = (long)vm_pop(); + + append( &exec->pdaRun->rcodeCollect, IN_LOAD_WORD ); + appendWord( &exec->pdaRun->rcodeCollect, steps ); + append( &exec->pdaRun->rcodeCollect, IN_LOAD_TREE ); + appendWord( &exec->pdaRun->rcodeCollect, (Word)parser ); + append( &exec->pdaRun->rcodeCollect, IN_PARSE_LOAD_START ); + append( &exec->pdaRun->rcodeCollect, IN_PARSE_FRAG_BKT ); + appendHalf( &exec->pdaRun->rcodeCollect, 0 ); + append( &exec->pdaRun->rcodeCollect, IN_PCR_CALL ); + append( &exec->pdaRun->rcodeCollect, IN_PARSE_FRAG_BKT3 ); + append( &exec->pdaRun->rcodeCollect, 6 * SIZEOF_CODE + 2 * SIZEOF_WORD + SIZEOF_HALF ); + + if ( prg->induceExit ) + goto out; + break; + } + + case IN_PARSE_FRAG_BKT: { + Half stopId; + read_half( stopId ); + + debug( REALM_BYTECODE, "IN_PARSE_FRAG_BKT\n" ); + + long pcr = (long)vm_pop(); + Parser *parser = (Parser*)vm_pop(); + long steps = (long)vm_pop(); + + pcr = undoParseFrag( prg, sp, parser, steps, pcr ); + + vm_push( (SW)steps ); + vm_push( (SW)parser ); + vm_push( (SW)pcr ); + + if ( pcr == PcrDone ) + instr += SIZEOF_CODE; + break; + } + + case IN_PARSE_FRAG_BKT3: { + debug( REALM_BYTECODE, "IN_PARSE_FRAG_BKT3\n" ); + + vm_pop_ignore(); + Parser *parser = (Parser*)vm_pop(); + vm_pop_ignore(); + + treeDownref( prg, sp, (Tree*)parser ); + break; + } + + case IN_PARSE_FINISH_WC: { + Half stopId; + read_half( stopId ); + + debug( REALM_BYTECODE, "IN_PARSE_FINISH_WC\n" ); + + long pcr = (long)vm_pop(); + Parser *parser = (Parser*)vm_pop(); + long steps = (long)vm_pop(); + + parser->result = 0; + pcr = parseFinish( &parser->result, prg, sp, parser, false, pcr ); + + vm_push( (SW)steps ); + vm_push( (SW)parser ); + vm_push( (SW)pcr ); + + /* If done, jump to the terminating instruction, otherwise fall + * through to call some code, then jump back here. */ + if ( pcr == PcrDone ) + instr += SIZEOF_CODE; + break; + } + + case IN_PARSE_FINISH_WC3: { + debug( REALM_BYTECODE, "IN_PARSE_FINISH_WC3\n" ); + + vm_pop_ignore(); + Parser *parser = (Parser*)vm_pop(); + vm_pop_ignore(); + + vm_push( parser->result ); + debug( REALM_BYTECODE, "parser refs: %d\n", parser->refs ); + treeDownref( prg, sp, (Tree*)parser ); + if ( prg->induceExit ) + goto out; + + break; + } + + case IN_PARSE_FINISH_WV: { + Half stopId; + read_half( stopId ); + + debug( REALM_BYTECODE, "IN_PARSE_FINISH_WV\n" ); + + long pcr = (long)vm_pop(); + Parser *parser = (Parser*)vm_pop(); + long steps = (long)vm_pop(); + + parser->result = 0; + pcr = parseFinish( &parser->result, prg, sp, parser, true, pcr ); + + vm_push( (SW)steps ); + vm_push( (SW)parser ); + vm_push( (SW)pcr ); + + if ( pcr == PcrDone ) + instr += SIZEOF_CODE; + break; + } + + case IN_PARSE_FINISH_WV3: { + debug( REALM_BYTECODE, "IN_PARSE_FINISH_WV3\n" ); + + vm_pop_ignore(); + Parser *parser = (Parser*)vm_pop(); + long steps = (long)vm_pop(); + + vm_push( parser->result ); + + append( &exec->pdaRun->rcodeCollect, IN_LOAD_WORD ); + appendWord( &exec->pdaRun->rcodeCollect, steps ); + append( &exec->pdaRun->rcodeCollect, IN_LOAD_TREE ); + appendWord( &exec->pdaRun->rcodeCollect, (Word)parser ); + append( &exec->pdaRun->rcodeCollect, IN_PARSE_LOAD_START ); + append( &exec->pdaRun->rcodeCollect, IN_PARSE_FINISH_BKT ); + appendHalf( &exec->pdaRun->rcodeCollect, 0 ); + append( &exec->pdaRun->rcodeCollect, IN_PCR_CALL ); + append( &exec->pdaRun->rcodeCollect, IN_PARSE_FINISH_BKT3 ); + append( &exec->pdaRun->rcodeCollect, 6 * SIZEOF_CODE + 2 * SIZEOF_WORD + SIZEOF_HALF ); + + if ( prg->induceExit ) + goto out; + + break; + } + + case IN_PARSE_FINISH_BKT: { + Half stopId; + read_half( stopId ); + + debug( REALM_BYTECODE, "IN_PARSE_FINISH_BKT\n" ); + + long pcr = (long)vm_pop(); + Parser *parser = (Parser*)vm_pop(); + long steps = (long)vm_pop(); + + pcr = undoParseFrag( prg, sp, parser, steps, pcr ); + + vm_push( (SW)steps ); + vm_push( (SW)parser ); + vm_push( (SW)pcr ); + + if ( pcr == PcrDone ) + instr += SIZEOF_CODE; + break; + } + + case IN_PARSE_FINISH_BKT3: { + debug( REALM_BYTECODE, "IN_PARSE_FINISH_BKT3\n" ); + + vm_pop_ignore(); + Parser *parser = (Parser*)vm_pop(); + vm_pop_ignore(); + + unsetEof( parser->input->in ); + treeDownref( prg, sp, (Tree*)parser ); + break; + } + + case IN_INPUT_PULL_WV: { + debug( REALM_BYTECODE, "IN_INPUT_PULL_WV\n" ); + + Input *accumStream = (Input*)vm_pop(); + Tree *len = vm_pop(); + Tree *string = streamPullBc( prg, exec->fsmRun, accumStream->in, len ); + treeUpref( string ); + vm_push( string ); + + /* Single unit. */ + treeUpref( string ); + append( &exec->pdaRun->rcodeCollect, IN_INPUT_PULL_BKT ); + appendWord( &exec->pdaRun->rcodeCollect, (Word) string ); + appendWord( &exec->pdaRun->rcodeCollect, (Word) exec->fsmRun ); + exec->rcodeUnitLen += SIZEOF_CODE + 2 *SIZEOF_WORD; + append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); + + treeDownref( prg, sp, (Tree*)accumStream ); + treeDownref( prg, sp, len ); + break; + } + case IN_INPUT_PULL_BKT: { + Word f; + Tree *string; + read_tree( string ); + read_word( f ); + FsmRun *fsmRun = (FsmRun*)f; + + Tree *accumStream = vm_pop(); + + debug( REALM_BYTECODE, "IN_INPUT_PULL_BKT\n" ); + + undoPull( prg, fsmRun, ((Input*)accumStream)->in, string ); + treeDownref( prg, sp, accumStream ); + treeDownref( prg, sp, string ); + break; + } + case IN_INPUT_PUSH_WV: { + debug( REALM_BYTECODE, "IN_INPUT_PUSH_WV\n" ); + + Input *input = (Input*)vm_pop(); + Tree *tree = vm_pop(); + long len = streamPush( prg, sp, 0, input->in, tree, false ); + vm_push( 0 ); + + /* Single unit. */ + append( &exec->pdaRun->rcodeCollect, IN_INPUT_PUSH_BKT ); + appendWord( &exec->pdaRun->rcodeCollect, len ); + exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD; + append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); + + treeDownref( prg, sp, (Tree*)input ); + treeDownref( prg, sp, tree ); + break; + } + case IN_INPUT_PUSH_IGNORE_WV: { + debug( REALM_BYTECODE, "IN_INPUT_PUSH_IGNORE_WV\n" ); + + Input *input = (Input*)vm_pop(); + Tree *tree = vm_pop(); + long len = streamPush( prg, sp, 0, input->in, tree, true ); + vm_push( 0 ); + + /* Single unit. */ + append( &exec->pdaRun->rcodeCollect, IN_INPUT_PUSH_BKT ); + appendWord( &exec->pdaRun->rcodeCollect, len ); + exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD; + append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); + + treeDownref( prg, sp, (Tree*)input ); + treeDownref( prg, sp, tree ); + break; + } + case IN_INPUT_PUSH_BKT: { + Word len; + read_word( len ); + + Input *input = (Input*)vm_pop(); + + debug( REALM_BYTECODE, "IN_INPUT_PUSH_BKT\n" ); + + undoStreamPush( prg, sp, 0, input->in, len ); + treeDownref( prg, sp, (Tree*)input ); + break; + } + case IN_CONSTRUCT: { + Half patternId; + read_half( patternId ); + + debug( REALM_BYTECODE, "IN_CONSTRUCT\n" ); + + int rootNode = prg->rtd->patReplInfo[patternId].offset; + + /* Note that bindIds are indexed at one. Add one spot for them. */ + int numBindings = prg->rtd->patReplInfo[patternId].numBindings; + Tree *bindings[1+numBindings]; + + int b; + for ( b = 1; b <= numBindings; b++ ) { + bindings[b] = vm_pop(); + assert( bindings[b] != 0 ); + } + + Tree *replTree = 0; + PatReplNode *nodes = prg->rtd->patReplNodes; + LangElInfo *lelInfo = prg->rtd->lelInfo; + long genericId = lelInfo[nodes[rootNode].id].genericId; + if ( genericId > 0 ) { + replTree = createGeneric( prg, genericId ); + treeUpref( replTree ); + } + else { + replTree = constructReplacementTree( 0, bindings, + prg, rootNode ); + } + + vm_push( replTree ); + break; + } + case IN_CONSTRUCT_INPUT: { + debug( REALM_BYTECODE, "IN_CONSTRUCT_INPUT\n" ); + + Tree *input = constructInput( prg ); + treeUpref( input ); + vm_push( input ); + break; + } + case IN_GET_INPUT: { + debug( REALM_BYTECODE, "IN_GET_INPUT\n" ); + + Parser *parser = (Parser*)vm_pop(); + treeUpref( (Tree*)parser->input ); + vm_push( (Tree*)parser->input ); + treeDownref( prg, sp, (Tree*)parser ); + break; + } + case IN_SET_INPUT: { + debug( REALM_BYTECODE, "IN_SET_INPUT\n" ); + + Parser *parser = (Parser*)vm_pop(); + Input *accumStream = (Input*)vm_pop(); + parser->input = accumStream; + treeUpref( (Tree*)accumStream ); + treeDownref( prg, sp, (Tree*)parser ); + treeDownref( prg, sp, (Tree*)accumStream ); + break; + } + case IN_CONSTRUCT_TERM: { + Half tokenId; + read_half( tokenId ); + + debug( REALM_BYTECODE, "IN_CONSTRUCT_TERM\n" ); + + /* Pop the string we are constructing the token from. */ + Str *str = (Str*)vm_pop(); + Tree *res = constructTerm( prg, tokenId, str->value ); + treeUpref( res ); + vm_push( res ); + break; + } + case IN_MAKE_TOKEN: { + uchar nargs; + read_byte( nargs ); + + debug( REALM_BYTECODE, "IN_MAKE_TOKEN\n" ); + + Tree *result = constructToken( prg, sp, nargs ); + long i; + for ( i = 0; i < nargs; i++ ) { + Tree *arg = vm_pop(); + treeDownref( prg, sp, arg ); + } + vm_push( result ); + break; + } + case IN_MAKE_TREE: { + uchar nargs; + read_byte( nargs ); + + debug( REALM_BYTECODE, "IN_MAKE_TREE\n" ); + + Tree *result = makeTree( prg, sp, nargs ); + long i; + for ( i = 0; i < nargs; i++ ) { + Tree *arg = vm_pop(); + treeDownref( prg, sp, arg ); + } + vm_push( result ); + break; + } + case IN_TREE_NEW: { + debug( REALM_BYTECODE, "IN_TREE_NEW \n" ); + + Tree *tree = vm_pop(); + Tree *res = constructPointer( prg, tree ); + treeUpref( res ); + vm_push( res ); + break; + } + case IN_PTR_DEREF_R: { + debug( REALM_BYTECODE, "IN_PTR_DEREF_R\n" ); + + Pointer *ptr = (Pointer*)vm_pop(); + treeDownref( prg, sp, (Tree*)ptr ); + + Tree *dval = getPtrVal( ptr ); + treeUpref( dval ); + vm_push( dval ); + break; + } + case IN_PTR_DEREF_WC: { + debug( REALM_BYTECODE, "IN_PTR_DEREF_WC\n" ); + + Pointer *ptr = (Pointer*)vm_pop(); + treeDownref( prg, sp, (Tree*)ptr ); + + Tree *dval = getPtrValSplit( prg, ptr ); + treeUpref( dval ); + vm_push( dval ); + break; + } + case IN_PTR_DEREF_WV: { + debug( REALM_BYTECODE, "IN_PTR_DEREF_WV\n" ); + + Pointer *ptr = (Pointer*)vm_pop(); + /* Don't downref the pointer since it is going into the reverse + * instruction. */ + + Tree *dval = getPtrValSplit( prg, ptr ); + treeUpref( dval ); + vm_push( dval ); + + /* This is an initial global load. Need to reverse execute it. */ + append( &exec->pdaRun->rcodeCollect, IN_PTR_DEREF_BKT ); + appendWord( &exec->pdaRun->rcodeCollect, (Word) ptr ); + exec->rcodeUnitLen = SIZEOF_CODE + SIZEOF_WORD; + break; + } + case IN_PTR_DEREF_BKT: { + Word p; + read_word( p ); + + debug( REALM_BYTECODE, "IN_PTR_DEREF_BKT\n" ); + + Pointer *ptr = (Pointer*)p; + + Tree *dval = getPtrValSplit( prg, ptr ); + treeUpref( dval ); + vm_push( dval ); + + treeDownref( prg, sp, (Tree*)ptr ); + break; + } + case IN_REF_FROM_LOCAL: { + short int field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_REF_FROM_LOCAL\n" ); + + /* First push the null next pointer, then the kid pointer. */ + Tree **ptr = vm_plocal(field); + vm_push( 0 ); + vm_push( (SW)ptr ); + break; + } + case IN_REF_FROM_REF: { + short int field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_REF_FROM_REF\n" ); + + Ref *ref = (Ref*)vm_plocal(field); + vm_push( (SW)ref ); + vm_push( (SW)ref->kid ); + break; + } + case IN_REF_FROM_QUAL_REF: { + short int back; + short int field; + read_half( back ); + read_half( field ); + + debug( REALM_BYTECODE, "IN_REF_FROM_QUAL_REF\n" ); + + Ref *ref = (Ref*)(sp + back); + + Tree *obj = ref->kid->tree; + Kid *attr_kid = getFieldKid( obj, field ); + + vm_push( (SW)ref ); + vm_push( (SW)attr_kid ); + break; + } + case IN_TRITER_REF_FROM_CUR: { + short int field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_TRITER_REF_FROM_CUR\n" ); + + /* Push the next pointer first, then the kid. */ + TreeIter *iter = (TreeIter*) vm_plocal(field); + Ref *ref = &iter->ref; + vm_push( (SW)ref ); + vm_push( (SW)iter->ref.kid ); + break; + } + case IN_UITER_REF_FROM_CUR: { + short int field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_UITER_REF_FROM_CUR\n" ); + + /* Push the next pointer first, then the kid. */ + UserIter *uiter = (UserIter*) vm_local(field); + vm_push( (SW)uiter->ref.next ); + vm_push( (SW)uiter->ref.kid ); + break; + } + case IN_GET_TOKEN_DATA_R: { + debug( REALM_BYTECODE, "IN_GET_TOKEN_DATA_R\n" ); + + Tree *tree = (Tree*) vm_pop(); + Head *data = stringCopy( prg, tree->tokdata ); + Tree *str = constructString( prg, data ); + treeUpref( str ); + vm_push( str ); + treeDownref( prg, sp, tree ); + break; + } + case IN_SET_TOKEN_DATA_WC: { + debug( REALM_BYTECODE, "IN_SET_TOKEN_DATA_WC\n" ); + + Tree *tree = vm_pop(); + Tree *val = vm_pop(); + Head *head = stringCopy( prg, ((Str*)val)->value ); + stringFree( prg, tree->tokdata ); + tree->tokdata = head; + + treeDownref( prg, sp, tree ); + treeDownref( prg, sp, val ); + break; + } + case IN_SET_TOKEN_DATA_WV: { + debug( REALM_BYTECODE, "IN_SET_TOKEN_DATA_WV\n" ); + + Tree *tree = vm_pop(); + Tree *val = vm_pop(); + + Head *oldval = tree->tokdata; + Head *head = stringCopy( prg, ((Str*)val)->value ); + tree->tokdata = head; + + /* Set up reverse code. Needs no args. */ + append( &exec->pdaRun->rcodeCollect, IN_SET_TOKEN_DATA_BKT ); + appendWord( &exec->pdaRun->rcodeCollect, (Word)oldval ); + exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD; + append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); + + treeDownref( prg, sp, tree ); + treeDownref( prg, sp, val ); + break; + } + case IN_SET_TOKEN_DATA_BKT: { + debug( REALM_BYTECODE, "IN_SET_TOKEN_DATA_BKT \n" ); + + Word oldval; + read_word( oldval ); + + Tree *tree = vm_pop(); + Head *head = (Head*)oldval; + stringFree( prg, tree->tokdata ); + tree->tokdata = head; + treeDownref( prg, sp, tree ); + break; + } + case IN_GET_TOKEN_POS_R: { + debug( REALM_BYTECODE, "IN_GET_TOKEN_POS_R\n" ); + + Tree *tree = (Tree*) vm_pop(); + Tree *integer = 0; + if ( tree->tokdata->location ) { + integer = constructInteger( prg, tree->tokdata->location->byte ); + treeUpref( integer ); + } + vm_push( integer ); + treeDownref( prg, sp, tree ); + break; + } + case IN_GET_TOKEN_LINE_R: { + debug( REALM_BYTECODE, "IN_GET_TOKEN_LINE_R\n" ); + + Tree *tree = (Tree*) vm_pop(); + Tree *integer = 0; + if ( tree->tokdata->location ) { + integer = constructInteger( prg, tree->tokdata->location->line ); + treeUpref( integer ); + } + vm_push( integer ); + treeDownref( prg, sp, tree ); + break; + } + case IN_GET_MATCH_LENGTH_R: { + debug( REALM_BYTECODE, "IN_GET_MATCH_LENGTH_R\n" ); + + Tree *integer = constructInteger( prg, stringLength(exec->pdaRun->tokdata) ); + treeUpref( integer ); + vm_push( integer ); + break; + } + case IN_GET_MATCH_TEXT_R: { + debug( REALM_BYTECODE, "IN_GET_MATCH_TEXT_R\n" ); + + Head *s = stringCopy( prg, exec->pdaRun->tokdata ); + Tree *tree = constructString( prg, s ); + treeUpref( tree ); + vm_push( tree ); + break; + } + case IN_LIST_LENGTH: { + debug( REALM_BYTECODE, "IN_LIST_LENGTH\n" ); + + List *list = (List*) vm_pop(); + long len = listLength( list ); + Tree *res = constructInteger( prg, len ); + treeDownref( prg, sp, (Tree*)list ); + treeUpref( res ); + vm_push( res ); + break; + } + case IN_LIST_APPEND_WV: { + debug( REALM_BYTECODE, "IN_LIST_APPEND_WV\n" ); + + Tree *obj = vm_pop(); + Tree *val = vm_pop(); + + treeDownref( prg, sp, obj ); + + listAppend2( prg, (List*)obj, val ); + treeUpref( prg->trueVal ); + vm_push( prg->trueVal ); + + /* Set up reverse code. Needs no args. */ + append( &exec->pdaRun->rcodeCollect, IN_LIST_APPEND_BKT ); + exec->rcodeUnitLen += SIZEOF_CODE; + append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); + /* FLUSH */ + break; + } + case IN_LIST_APPEND_WC: { + debug( REALM_BYTECODE, "IN_LIST_APPEND_WC\n" ); + + Tree *obj = vm_pop(); + Tree *val = vm_pop(); + + treeDownref( prg, sp, obj ); + + listAppend2( prg, (List*)obj, val ); + treeUpref( prg->trueVal ); + vm_push( prg->trueVal ); + break; + } + case IN_LIST_APPEND_BKT: { + debug( REALM_BYTECODE, "IN_LIST_APPEND_BKT\n" ); + + Tree *obj = vm_pop(); + treeDownref( prg, sp, obj ); + + Tree *tree = listRemoveEnd( prg, (List*)obj ); + treeDownref( prg, sp, tree ); + break; + } + case IN_LIST_REMOVE_END_WC: { + debug( REALM_BYTECODE, "IN_LIST_REMOVE_END_WC\n" ); + + Tree *obj = vm_pop(); + treeDownref( prg, sp, obj ); + + Tree *end = listRemoveEnd( prg, (List*)obj ); + vm_push( end ); + break; + } + case IN_LIST_REMOVE_END_WV: { + debug( REALM_BYTECODE, "IN_LIST_REMOVE_END_WV\n" ); + + Tree *obj = vm_pop(); + treeDownref( prg, sp, obj ); + + Tree *end = listRemoveEnd( prg, (List*)obj ); + vm_push( end ); + + /* Set up reverse. The result comes off the list downrefed. + * Need it up referenced for the reverse code too. */ + treeUpref( end ); + append( &exec->pdaRun->rcodeCollect, IN_LIST_REMOVE_END_BKT ); + appendWord( &exec->pdaRun->rcodeCollect, (Word)end ); + exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD; + append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); + /* FLUSH */ + break; + } + case IN_LIST_REMOVE_END_BKT: { + debug( REALM_BYTECODE, "IN_LIST_REMOVE_END_BKT\n" ); + + Tree *val; + read_tree( val ); + + Tree *obj = vm_pop(); + treeDownref( prg, sp, obj ); + + listAppend2( prg, (List*)obj, val ); + break; + } + case IN_GET_LIST_MEM_R: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_GET_LIST_MEM_R\n" ); + + Tree *obj = vm_pop(); + treeDownref( prg, sp, obj ); + + Tree *val = getListMem( (List*)obj, field ); + treeUpref( val ); + vm_push( val ); + break; + } + case IN_GET_LIST_MEM_WC: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_GET_LIST_MEM_WC\n" ); + + Tree *obj = vm_pop(); + treeDownref( prg, sp, obj ); + + Tree *val = getListMemSplit( prg, (List*)obj, field ); + treeUpref( val ); + vm_push( val ); + break; + } + case IN_GET_LIST_MEM_WV: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_GET_LIST_MEM_WV\n" ); + + Tree *obj = vm_pop(); + treeDownref( prg, sp, obj ); + + Tree *val = getListMemSplit( prg, (List*)obj, field ); + treeUpref( val ); + vm_push( val ); + + /* Set up the reverse instruction. */ + append( &exec->pdaRun->rcodeCollect, IN_GET_LIST_MEM_BKT ); + appendHalf( &exec->pdaRun->rcodeCollect, field ); + exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_HALF; + break; + } + case IN_GET_LIST_MEM_BKT: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_GET_LIST_MEM_BKT\n" ); + + Tree *obj = vm_pop(); + treeDownref( prg, sp, obj ); + + Tree *res = getListMemSplit( prg, (List*)obj, field ); + treeUpref( res ); + vm_push( res ); + break; + } + case IN_SET_LIST_MEM_WC: { + Half field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_SET_LIST_MEM_WC\n" ); + + Tree *obj = vm_pop(); + treeDownref( prg, sp, obj ); + + Tree *val = vm_pop(); + Tree *existing = setListMem( (List*)obj, field, val ); + treeDownref( prg, sp, existing ); + break; + } + case IN_SET_LIST_MEM_WV: { + Half field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_SET_LIST_MEM_WV\n" ); + + Tree *obj = vm_pop(); + treeDownref( prg, sp, obj ); + + Tree *val = vm_pop(); + Tree *existing = setListMem( (List*)obj, field, val ); + + /* Set up the reverse instruction. */ + append( &exec->pdaRun->rcodeCollect, IN_SET_LIST_MEM_BKT ); + appendHalf( &exec->pdaRun->rcodeCollect, field ); + appendWord( &exec->pdaRun->rcodeCollect, (Word)existing ); + exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_HALF + SIZEOF_WORD; + append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); + /* FLUSH */ + break; + } + case IN_SET_LIST_MEM_BKT: { + Half field; + Tree *val; + read_half( field ); + read_tree( val ); + + debug( REALM_BYTECODE, "IN_SET_LIST_MEM_BKT\n" ); + + Tree *obj = vm_pop(); + treeDownref( prg, sp, obj ); + + Tree *undid = setListMem( (List*)obj, field, val ); + treeDownref( prg, sp, undid ); + break; + } + case IN_MAP_INSERT_WV: { + debug( REALM_BYTECODE, "IN_MAP_INSERT_WV\n" ); + + Tree *obj = vm_pop(); + Tree *val = vm_pop(); + Tree *key = vm_pop(); + + treeDownref( prg, sp, obj ); + + int inserted = mapInsert( prg, (Map*)obj, key, val ); + Tree *result = inserted ? prg->trueVal : prg->falseVal; + treeUpref( result ); + vm_push( result ); + + /* Set up the reverse instruction. If the insert fails still need + * to pop the loaded map object. Just use the reverse instruction + * since it's nice to see it in the logs. */ + + /* Need to upref key for storage in reverse code. */ + treeUpref( key ); + append( &exec->pdaRun->rcodeCollect, IN_MAP_INSERT_BKT ); + append( &exec->pdaRun->rcodeCollect, inserted ); + appendWord( &exec->pdaRun->rcodeCollect, (Word)key ); + exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_CODE + SIZEOF_WORD; + append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); + + if ( ! inserted ) { + treeDownref( prg, sp, key ); + treeDownref( prg, sp, val ); + } + break; + } + case IN_MAP_INSERT_WC: { + debug( REALM_BYTECODE, "IN_MAP_INSERT_WC\n" ); + + Tree *obj = vm_pop(); + Tree *val = vm_pop(); + Tree *key = vm_pop(); + + treeDownref( prg, sp, obj ); + + int inserted = mapInsert( prg, (Map*)obj, key, val ); + Tree *result = inserted ? prg->trueVal : prg->falseVal; + treeUpref( result ); + vm_push( result ); + + if ( ! inserted ) { + treeDownref( prg, sp, key ); + treeDownref( prg, sp, val ); + } + break; + } + case IN_MAP_INSERT_BKT: { + uchar inserted; + Tree *key; + read_byte( inserted ); + read_tree( key ); + + debug( REALM_BYTECODE, "IN_MAP_INSERT_BKT\n" ); + + Tree *obj = vm_pop(); + if ( inserted ) { + Tree *val = mapUninsert( prg, (Map*)obj, key ); + treeDownref( prg, sp, key ); + treeDownref( prg, sp, val ); + } + + treeDownref( prg, sp, obj ); + treeDownref( prg, sp, key ); + break; + } + case IN_MAP_STORE_WC: { + debug( REALM_BYTECODE, "IN_MAP_STORE_WC\n" ); + + Tree *obj = vm_pop(); + Tree *element = vm_pop(); + Tree *key = vm_pop(); + + Tree *existing = mapStore( prg, (Map*)obj, key, element ); + Tree *result = existing == 0 ? prg->trueVal : prg->falseVal; + treeUpref( result ); + vm_push( result ); + + treeDownref( prg, sp, obj ); + if ( existing != 0 ) { + treeDownref( prg, sp, key ); + treeDownref( prg, sp, existing ); + } + break; + } + case IN_MAP_STORE_WV: { + debug( REALM_BYTECODE, "IN_MAP_STORE_WV\n" ); + + Tree *obj = vm_pop(); + Tree *element = vm_pop(); + Tree *key = vm_pop(); + + Tree *existing = mapStore( prg, (Map*)obj, key, element ); + Tree *result = existing == 0 ? prg->trueVal : prg->falseVal; + treeUpref( result ); + vm_push( result ); + + /* Set up the reverse instruction. */ + treeUpref( key ); + treeUpref( existing ); + append( &exec->pdaRun->rcodeCollect, IN_MAP_STORE_BKT ); + appendWord( &exec->pdaRun->rcodeCollect, (Word)key ); + appendWord( &exec->pdaRun->rcodeCollect, (Word)existing ); + exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD + SIZEOF_WORD; + append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); + /* FLUSH */ + + treeDownref( prg, sp, obj ); + if ( existing != 0 ) { + treeDownref( prg, sp, key ); + treeDownref( prg, sp, existing ); + } + break; + } + case IN_MAP_STORE_BKT: { + Tree *key, *val; + read_tree( key ); + read_tree( val ); + + debug( REALM_BYTECODE, "IN_MAP_STORE_BKT\n" ); + + Tree *obj = vm_pop(); + Tree *stored = mapUnstore( prg, (Map*)obj, key, val ); + + treeDownref( prg, sp, stored ); + if ( val == 0 ) + treeDownref( prg, sp, key ); + + treeDownref( prg, sp, obj ); + treeDownref( prg, sp, key ); + break; + } + case IN_MAP_REMOVE_WC: { + debug( REALM_BYTECODE, "IN_MAP_REMOVE_WC\n" ); + + Tree *obj = vm_pop(); + Tree *key = vm_pop(); + TreePair pair = mapRemove( prg, (Map*)obj, key ); + + vm_push( pair.val ); + + treeDownref( prg, sp, obj ); + treeDownref( prg, sp, key ); + treeDownref( prg, sp, pair.key ); + break; + } + case IN_MAP_REMOVE_WV: { + debug( REALM_BYTECODE, "IN_MAP_REMOVE_WV\n" ); + + Tree *obj = vm_pop(); + Tree *key = vm_pop(); + TreePair pair = mapRemove( prg, (Map*)obj, key ); + + treeUpref( pair.val ); + vm_push( pair.val ); + + /* Reverse instruction. */ + append( &exec->pdaRun->rcodeCollect, IN_MAP_REMOVE_BKT ); + appendWord( &exec->pdaRun->rcodeCollect, (Word)pair.key ); + appendWord( &exec->pdaRun->rcodeCollect, (Word)pair.val ); + exec->rcodeUnitLen += SIZEOF_CODE + SIZEOF_WORD + SIZEOF_WORD; + append( &exec->pdaRun->rcodeCollect, exec->rcodeUnitLen ); + + treeDownref( prg, sp, obj ); + treeDownref( prg, sp, key ); + break; + } + case IN_MAP_REMOVE_BKT: { + Tree *key, *val; + read_tree( key ); + read_tree( val ); + + debug( REALM_BYTECODE, "IN_MAP_REMOVE_BKT\n" ); + + /* Either both or neither. */ + assert( ( key == 0 ) ^ ( val != 0 ) ); + + Tree *obj = vm_pop(); + if ( key != 0 ) + mapUnremove( prg, (Map*)obj, key, val ); + + treeDownref( prg, sp, obj ); + break; + } + case IN_MAP_LENGTH: { + debug( REALM_BYTECODE, "IN_MAP_LENGTH\n" ); + + Tree *obj = vm_pop(); + long len = mapLength( (Map*)obj ); + Tree *res = constructInteger( prg, len ); + treeUpref( res ); + vm_push( res ); + + treeDownref( prg, sp, obj ); + break; + } + case IN_MAP_FIND: { + debug( REALM_BYTECODE, "IN_MAP_FIND\n" ); + + Tree *obj = vm_pop(); + Tree *key = vm_pop(); + Tree *result = mapFind( prg, (Map*)obj, key ); + treeUpref( result ); + vm_push( result ); + + treeDownref( prg, sp, obj ); + treeDownref( prg, sp, key ); + break; + } + case IN_INIT_LOCALS: { + Half size; + read_half( size ); + + debug( REALM_BYTECODE, "IN_INIT_LOCALS\n" ); + + exec->framePtr = vm_ptop(); + vm_pushn( size ); + memset( vm_ptop(), 0, sizeof(Word) * size ); + break; + } + case IN_CALL_WV: { + Half funcId; + read_half( funcId ); + + FunctionInfo *fi = &prg->rtd->functionInfo[funcId]; + + debug( REALM_BYTECODE, "IN_CALL_WV %ld\n", fi->name ); + + vm_push( 0 ); /* Return value. */ + vm_push( (SW)instr ); + vm_push( (SW)exec->framePtr ); + vm_push( (SW)exec->frameId ); + + instr = prg->rtd->frameInfo[fi->frameId].codeWV; + exec->framePtr = vm_ptop(); + exec->frameId = fi->frameId; + break; + } + case IN_CALL_WC: { + Half funcId; + read_half( funcId ); + + FunctionInfo *fi = &prg->rtd->functionInfo[funcId]; + + debug( REALM_BYTECODE, "IN_CALL_WC %ld\n", fi->name ); + + vm_push( 0 ); /* Return value. */ + vm_push( (SW)instr ); + vm_push( (SW)exec->framePtr ); + vm_push( (SW)exec->frameId ); + + instr = prg->rtd->frameInfo[fi->frameId].codeWC; + exec->framePtr = vm_ptop(); + exec->frameId = fi->frameId; + break; + } + case IN_YIELD: { + debug( REALM_BYTECODE, "IN_YIELD\n" ); + + Kid *kid = (Kid*)vm_pop(); + Ref *next = (Ref*)vm_pop(); + UserIter *uiter = (UserIter*) vm_plocal_iframe( IFR_AA ); + + if ( kid == 0 || kid->tree == 0 || + kid->tree->id == uiter->searchId || + uiter->searchId == prg->rtd->anyId ) + { + /* Store the yeilded value. */ + uiter->ref.kid = kid; + uiter->ref.next = next; + uiter->stackSize = uiter->stackRoot - vm_ptop(); + uiter->resume = instr; + uiter->frame = exec->framePtr; + + /* Restore the instruction and frame pointer. */ + instr = (Code*) vm_local_iframe(IFR_RIN); + exec->framePtr = (Tree**) vm_local_iframe(IFR_RFR); + exec->iframePtr = (Tree**) vm_local_iframe(IFR_RIF); + + /* Return the yield result on the top of the stack. */ + Tree *result = uiter->ref.kid != 0 ? prg->trueVal : prg->falseVal; + treeUpref( result ); + vm_push( result ); + } + break; + } + case IN_UITER_CREATE_WV: { + short field; + Half funcId, searchId; + read_half( field ); + read_half( funcId ); + read_half( searchId ); + + debug( REALM_BYTECODE, "IN_UITER_CREATE_WV\n" ); + + FunctionInfo *fi = prg->rtd->functionInfo + funcId; + UserIter *uiter = uiterCreate( prg, &sp, fi, searchId ); + vm_local(field) = (SW) uiter; + + /* This is a setup similar to as a call, only the frame structure + * is slightly different for user iterators. We aren't going to do + * the call. We don't need to set up the return ip because the + * uiter advance will set it. The frame we need to do because it + * is set once for the lifetime of the iterator. */ + vm_push( 0 ); /* Return instruction pointer, */ + vm_push( (SW)exec->iframePtr ); /* Return iframe. */ + vm_push( (SW)exec->framePtr ); /* Return frame. */ + + uiterInit( prg, sp, uiter, fi, true ); + break; + } + case IN_UITER_CREATE_WC: { + short field; + Half funcId, searchId; + read_half( field ); + read_half( funcId ); + read_half( searchId ); + + debug( REALM_BYTECODE, "IN_UITER_CREATE_WC\n" ); + + FunctionInfo *fi = prg->rtd->functionInfo + funcId; + UserIter *uiter = uiterCreate( prg, &sp, fi, searchId ); + vm_local(field) = (SW) uiter; + + /* This is a setup similar to as a call, only the frame structure + * is slightly different for user iterators. We aren't going to do + * the call. We don't need to set up the return ip because the + * uiter advance will set it. The frame we need to do because it + * is set once for the lifetime of the iterator. */ + vm_push( 0 ); /* Return instruction pointer, */ + vm_push( (SW)exec->iframePtr ); /* Return iframe. */ + vm_push( (SW)exec->framePtr ); /* Return frame. */ + + uiterInit( prg, sp, uiter, fi, false ); + break; + } + case IN_UITER_DESTROY: { + short field; + read_half( field ); + + debug( REALM_BYTECODE, "IN_UITER_DESTROY\n" ); + + UserIter *uiter = (UserIter*) vm_local(field); + userIterDestroy( &sp, uiter ); + break; + } + case IN_RET: { + debug( REALM_BYTECODE, "IN_RET\n" ); + + FrameInfo *fi = &prg->rtd->frameInfo[exec->frameId]; + downrefLocalTrees( prg, sp, exec->framePtr, fi->trees, fi->treesLen ); + vm_popn( fi->frameSize ); + + exec->frameId = (long) vm_pop(); + exec->framePtr = (Tree**) vm_pop(); + instr = (Code*) vm_pop(); + Tree *retVal = vm_pop(); + vm_popn( fi->argSize ); + vm_push( retVal ); + break; + } + case IN_TO_UPPER: { + debug( REALM_BYTECODE, "IN_TO_UPPER\n" ); + + Tree *in = vm_pop(); + Head *head = stringToUpper( in->tokdata ); + Tree *upper = constructString( prg, head ); + treeUpref( upper ); + vm_push( upper ); + treeDownref( prg, sp, in ); + break; + } + case IN_TO_LOWER: { + debug( REALM_BYTECODE, "IN_TO_LOWER\n" ); + + Tree *in = vm_pop(); + Head *head = stringToLower( in->tokdata ); + Tree *lower = constructString( prg, head ); + treeUpref( lower ); + vm_push( lower ); + treeDownref( prg, sp, in ); + break; + } + case IN_ERROR: { + debug( REALM_BYTECODE, "IN_ERROR\n" ); + + /* Pop the global. */ + Tree *global = vm_pop(); + treeDownref( prg, sp, global ); + treeUpref( prg->lastParseError ); + vm_push( prg->lastParseError ); + break; + } + case IN_OPEN_FILE: { + debug( REALM_BYTECODE, "IN_OPEN_FILE\n" ); + + Tree *mode = vm_pop(); + Tree *name = vm_pop(); + Tree *res = (Tree*)openFile( prg, name, mode ); + treeUpref( res ); + vm_push( res ); + treeDownref( prg, sp, name ); + treeDownref( prg, sp, mode ); + break; + } + case IN_GET_STDIN: { + debug( REALM_BYTECODE, "IN_GET_STDIN\n" ); + + /* Pop the root object. */ + Tree *obj = vm_pop(); + treeDownref( prg, sp, obj ); + if ( prg->stdinVal == 0 ) { + prg->stdinVal = openStreamFd( prg, 0 ); + treeUpref( (Tree*)prg->stdinVal ); + } + + treeUpref( (Tree*)prg->stdinVal ); + vm_push( (Tree*)prg->stdinVal ); + break; + } + case IN_LOAD_ARGV: { + Half field; + read_half( field ); + debug( REALM_BYTECODE, "IN_LOAD_ARGV %lu\n", field ); + + /* Tree comes back upreffed. */ + Tree *tree = constructArgv( prg, prg->argc, prg->argv ); + setField( prg, prg->global, field, tree ); + break; + } + + case IN_EXIT: { + debug( REALM_BYTECODE, "IN_EXIT\n" ); + + Tree *global = vm_pop(); + Int *status = (Int*)vm_pop(); + prg->exitStatus = status->value; + prg->induceExit = 1; + treeDownref( prg, sp, global ); + treeDownref( prg, sp, (Tree*)status ); + + while ( true ) { + FrameInfo *fi = &prg->rtd->frameInfo[exec->frameId]; + downrefLocalTrees( prg, sp, exec->framePtr, fi->trees, fi->treesLen ); + vm_popn( fi->frameSize ); + + /* Call layout. */ + exec->frameId = (long) vm_pop(); + exec->framePtr = (Tree**) vm_pop(); + instr = (Code*) vm_pop(); + Tree *retVal = vm_pop(); + vm_popn( fi->argSize ); + + treeDownref( prg, sp, retVal ); + + /* We stop on the root, which doesn't have the full function + * stack layout. */ + if ( exec->frameId == prg->rtd->rootFrameId ) + break; + } + + goto out; + } + + case IN_STOP: { + debug( REALM_BYTECODE, "IN_STOP\n" ); + + FrameInfo *fi = &prg->rtd->frameInfo[exec->frameId]; + downrefLocalTrees( prg, sp, exec->framePtr, fi->trees, fi->treesLen ); + vm_popn( fi->frameSize ); + + fflush( stdout ); + goto out; + } + + /* Halt is a default instruction given by the compiler when it is + * asked to generate and instruction it doesn't have. It is deliberate + * and can represent "not implemented" or "compiler error" because a + * variable holding instructions was not properly initialize. */ + case IN_HALT: { + fatal( "IN_HALT -- compiler did something wrong\n" ); + exit(1); + break; + } + default: { + fatal( "UNKNOWN INSTRUCTION: 0x%2x -- something is wrong\n", *(instr-1) ); + assert(false); + break; + } + } + goto again; + +out: + if ( ! prg->induceExit ) + assert( sp == root ); + return sp; +} + diff --git a/src/bytecode.h b/src/bytecode.h new file mode 100644 index 00000000..8e626ef1 --- /dev/null +++ b/src/bytecode.h @@ -0,0 +1,487 @@ +/* + * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _BYTECODE_H +#define _BYTECODE_H + +#include <pdarun.h> +#include <tree.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 + #error "SIZEOF_LONG contained an unexpected value" +#endif + +typedef unsigned long ulong; +typedef unsigned char uchar; + +#define IN_LOAD_INT 0x02 +#define IN_LOAD_STR 0x03 +#define IN_LOAD_NIL 0x04 +#define IN_LOAD_TRUE 0x05 +#define IN_LOAD_FALSE 0x06 +#define IN_LOAD_TREE 0xf4 +#define IN_LOAD_WORD 0xf5 + +#define IN_ADD_INT 0x07 +#define IN_SUB_INT 0x08 +#define IN_MULT_INT 0x09 +#define IN_DIV_INT 0xd0 + +#define IN_TST_EQL 0x0a +#define IN_TST_NOT_EQL 0x0b +#define IN_TST_LESS 0x0c +#define IN_TST_GRTR 0x0d +#define IN_TST_LESS_EQL 0x0e +#define IN_TST_GRTR_EQL 0x0f +#define IN_TST_LOGICAL_AND 0x10 +#define IN_TST_LOGICAL_OR 0x11 + +#define IN_NOT 0x12 + +#define IN_JMP 0x13 +#define IN_JMP_FALSE 0x14 +#define IN_JMP_TRUE 0x15 + +#define IN_STR_ATOI 0x16 +#define IN_STR_LENGTH 0x17 +#define IN_CONCAT_STR 0x18 +#define IN_TREE_TRIM 0xfc + +#define IN_INIT_LOCALS 0x19 +#define IN_POP 0x1b +#define IN_POP_N_WORDS 0x1c +#define IN_DUP_TOP 0x1d +#define IN_DUP_TOP_OFF 0xbc +#define IN_REJECT 0x1e +#define IN_MATCH 0x1f +#define IN_CONSTRUCT 0x20 +#define IN_TREE_NEW 0x21 + +#define IN_GET_LOCAL_R 0x22 +#define IN_GET_LOCAL_WC 0x23 +#define IN_SET_LOCAL_WC 0x24 + +#define IN_GET_LOCAL_REF_R 0x25 +#define IN_GET_LOCAL_REF_WC 0x26 +#define IN_SET_LOCAL_REF_WC 0x27 + +#define IN_SAVE_RET 0x28 + +#define IN_GET_FIELD_R 0x29 +#define IN_GET_FIELD_WC 0x2a +#define IN_GET_FIELD_WV 0x2b +#define IN_GET_FIELD_BKT 0x2c + +#define IN_SET_FIELD_WV 0x2d +#define IN_SET_FIELD_WC 0x2e +#define IN_SET_FIELD_BKT 0x2f +#define IN_SET_FIELD_LEAVE_WC 0x30 + +#define IN_GET_MATCH_LENGTH_R 0x31 +#define IN_GET_MATCH_TEXT_R 0x32 + +#define IN_GET_TOKEN_DATA_R 0x33 +#define IN_SET_TOKEN_DATA_WC 0x34 +#define IN_SET_TOKEN_DATA_WV 0x35 +#define IN_SET_TOKEN_DATA_BKT 0x36 + +#define IN_GET_TOKEN_POS_R 0x37 +#define IN_GET_TOKEN_LINE_R 0xf6 + +#define IN_INIT_RHS_EL 0x38 +#define IN_INIT_LHS_EL 0xef +#define IN_INIT_CAPTURES 0x39 +#define IN_STORE_LHS_EL 0xf0 +#define IN_RESTORE_LHS 0x01 + +#define IN_TRITER_FROM_REF 0x3a +#define IN_TRITER_ADVANCE 0x3b +#define IN_TRITER_NEXT_CHILD 0x3c +#define IN_TRITER_GET_CUR_R 0x3d +#define IN_TRITER_GET_CUR_WC 0x3e +#define IN_TRITER_SET_CUR_WC 0x3f +#define IN_TRITER_DESTROY 0x40 +#define IN_TRITER_NEXT_REPEAT 0x41 +#define IN_TRITER_PREV_REPEAT 0x42 + +#define IN_REV_TRITER_FROM_REF 0x43 +#define IN_REV_TRITER_DESTROY 0x44 +#define IN_REV_TRITER_PREV_CHILD 0x45 + +#define IN_UITER_DESTROY 0x46 +#define IN_UITER_CREATE_WV 0x47 +#define IN_UITER_CREATE_WC 0x48 +#define IN_UITER_ADVANCE 0x49 +#define IN_UITER_GET_CUR_R 0x4a +#define IN_UITER_GET_CUR_WC 0x4b +#define IN_UITER_SET_CUR_WC 0x4c + +#define IN_TREE_SEARCH 0x4d + +#define IN_LOAD_GLOBAL_R 0x4e +#define IN_LOAD_GLOBAL_WV 0x4f +#define IN_LOAD_GLOBAL_WC 0x50 +#define IN_LOAD_GLOBAL_BKT 0x51 + +#define IN_PTR_DEREF_R 0x52 +#define IN_PTR_DEREF_WV 0x53 +#define IN_PTR_DEREF_WC 0x54 +#define IN_PTR_DEREF_BKT 0x55 + +#define IN_REF_FROM_LOCAL 0x56 +#define IN_REF_FROM_REF 0x57 +#define IN_REF_FROM_QUAL_REF 0x58 +#define IN_TRITER_REF_FROM_CUR 0x59 +#define IN_UITER_REF_FROM_CUR 0x5a + +#define IN_MAP_LENGTH 0x5b +#define IN_MAP_FIND 0x5c +#define IN_MAP_INSERT_WV 0x5d +#define IN_MAP_INSERT_WC 0x5e +#define IN_MAP_INSERT_BKT 0x5f +#define IN_MAP_STORE_WV 0x60 +#define IN_MAP_STORE_WC 0x61 +#define IN_MAP_STORE_BKT 0x62 +#define IN_MAP_REMOVE_WV 0x63 +#define IN_MAP_REMOVE_WC 0x64 +#define IN_MAP_REMOVE_BKT 0x65 + +#define IN_LIST_LENGTH 0x66 +#define IN_LIST_APPEND_WV 0x67 +#define IN_LIST_APPEND_WC 0x68 +#define IN_LIST_APPEND_BKT 0x69 +#define IN_LIST_REMOVE_END_WV 0x6a +#define IN_LIST_REMOVE_END_WC 0x6b +#define IN_LIST_REMOVE_END_BKT 0x6c + +#define IN_GET_LIST_MEM_R 0x6d +#define IN_GET_LIST_MEM_WC 0x6e +#define IN_GET_LIST_MEM_WV 0x6f +#define IN_GET_LIST_MEM_BKT 0x70 +#define IN_SET_LIST_MEM_WV 0x71 +#define IN_SET_LIST_MEM_WC 0x72 +#define IN_SET_LIST_MEM_BKT 0x73 + +#define IN_VECTOR_LENGTH 0x74 +#define IN_VECTOR_APPEND_WV 0x75 +#define IN_VECTOR_APPEND_WC 0x76 +#define IN_VECTOR_APPEND_BKT 0x77 +#define IN_VECTOR_INSERT_WV 0x78 +#define IN_VECTOR_INSERT_WC 0x79 +#define IN_VECTOR_INSERT_BKT 0x7a + +#define IN_PRINT 0x7b +#define IN_PRINT_XML_AC 0x7c +#define IN_PRINT_XML 0x7d +#define IN_PRINT_STREAM 0x7e + +#define IN_HALT 0x7f + +#define IN_CALL_WC 0x80 +#define IN_CALL_WV 0x81 +#define IN_RET 0x82 +#define IN_YIELD 0x83 +#define IN_STOP 0x84 + +#define IN_STR_UORD8 0x85 +#define IN_STR_SORD8 0x86 +#define IN_STR_UORD16 0x87 +#define IN_STR_SORD16 0x88 +#define IN_STR_UORD32 0x89 +#define IN_STR_SORD32 0x8a + +#define IN_INT_TO_STR 0x8b +#define IN_TREE_TO_STR 0x8c +#define IN_TREE_TO_STR_NOTRIM 0xfd + +#define IN_CREATE_TOKEN 0x8d +#define IN_MAKE_TOKEN 0x8e +#define IN_MAKE_TREE 0x8f +#define IN_CONSTRUCT_TERM 0x90 + +#define IN_INPUT_PULL_WV 0xf7 +#define IN_INPUT_PULL_BKT 0xf8 + +#define IN_PARSE_LOAD_START 0xf2 +#define IN_PARSE_SAVE_STEPS 0xf3 +#define IN_PARSE_FRAG_WC 0xc0 +#define IN_PARSE_FRAG_WC3 0xe1 + +#define IN_PARSE_FRAG_WV 0xc1 +#define IN_PARSE_FRAG_WV3 0xe4 + +#define IN_PARSE_FRAG_BKT 0xc2 +#define IN_PARSE_FRAG_BKT3 0xe6 + +#define IN_INPUT_APPEND_WC 0x91 +#define IN_INPUT_APPEND_WV 0x92 +#define IN_INPUT_APPEND_BKT 0x93 + +#define IN_PARSE_FINISH_WC 0x9d +#define IN_PARSE_FINISH_WC3 0xea + +#define IN_PARSE_FINISH_WV 0xbd +#define IN_PARSE_FINISH_WV3 0xeb + +#define IN_PARSE_FINISH_BKT 0xbf +#define IN_PARSE_FINISH_BKT3 0xec + +#define IN_PCR_CALL 0xe0 +#define IN_PCR_RET 0xe3 +#define IN_PCR_END_DECK 0xed + +#define IN_PARSE_EXTRACT_INPUT + +#define IN_OPEN_FILE 0x9e +#define IN_GET_STDIN 0x9f +#define IN_GET_STDOUT 0xa0 +#define IN_GET_STDERR 0xa1 +#define IN_LOAD_ARGV 0xa2 +#define IN_TO_UPPER 0xa3 +#define IN_TO_LOWER 0xa4 +#define IN_EXIT 0xa5 +#define IN_ERROR 0xa6 + +#define IN_LOAD_ACCUM_R 0xa8 +#define IN_LOAD_ACCUM_WV 0xa9 +#define IN_LOAD_ACCUM_WC 0xaa +#define IN_LOAD_ACCUM_BKT 0xab + +#define IN_LOAD_INPUT_R 0x98 +#define IN_LOAD_INPUT_WV 0x99 +#define IN_LOAD_INPUT_WC 0x9a +#define IN_LOAD_INPUT_BKT 0x9b + +#define IN_INPUT_PUSH_WV 0xf9 +#define IN_INPUT_PUSH_BKT 0xfa +#define IN_INPUT_PUSH_IGNORE_WV 0xfb + +#define IN_LOAD_CONTEXT_R 0xac +#define IN_LOAD_CONTEXT_WV 0xad +#define IN_LOAD_CONTEXT_WC 0xae +#define IN_LOAD_CONTEXT_BKT 0xaf + +#define IN_GET_ACCUM_CTX_R 0xb0 +#define IN_GET_ACCUM_CTX_WC 0xb1 +#define IN_GET_ACCUM_CTX_WV 0xb2 +#define IN_SET_ACCUM_CTX_WC 0xb3 +#define IN_SET_ACCUM_CTX_WV 0xb4 + +#define IN_LOAD_CTX_R 0xb5 +#define IN_LOAD_CTX_WC 0xb6 +#define IN_LOAD_CTX_WV 0xb7 +#define IN_LOAD_CTX_BKT 0xb8 + +#define IN_SPRINTF 0xcf + +#define IN_GET_RHS_VAL_R 0xd1 +#define IN_GET_RHS_VAL_WC 0xd2 +#define IN_GET_RHS_VAL_WV 0xd3 +#define IN_GET_RHS_VAL_BKT 0xd4 +#define IN_SET_RHS_VAL_WC 0xd5 +#define IN_SET_RHS_VAL_WV 0xd6 +#define IN_SET_RHS_VAL_BKT 0xd7 + +#define IN_CONSTRUCT_INPUT 0x9c +#define IN_SET_INPUT 0xa7 +#define IN_GET_INPUT 0xb9 + +/* Types */ +#define TYPE_NIL 0x01 +#define TYPE_TREE 0x02 +#define TYPE_REF 0x03 +#define TYPE_PTR 0x04 +#define TYPE_ITER 0x05 +#define TYPE_IGNORE_LIST 0x06 + +/* Types of Generics. */ +#define GEN_LIST 0x10 +#define GEN_MAP 0x11 +#define GEN_VECTOR 0x12 +#define GEN_PARSER 0x13 + +/* Virtual machine stack size, number of pointers. + * This will be mmapped. */ +#define VM_STACK_SIZE (SIZEOF_WORD*1024ll*1024ll) + +/* Known language element ids. */ +#define LEL_ID_PTR 1 +#define LEL_ID_BOOL 2 +#define LEL_ID_INT 3 +#define LEL_ID_STR 4 +#define LEL_ID_STREAM 5 +#define LEL_ID_INPUT 6 +#define LEL_ID_IGNORE 7 + +/* + * Flags + */ + +/* A tree that has been generated by a termDup. */ +#define PF_TERM_DUP 0x0001 + +/* Has been processed by the commit function. All children have also been + * processed. */ +#define PF_COMMITTED 0x0002 + +/* Created by a token generation action, not made from the input. */ +#define PF_ARTIFICIAL 0x0004 + +/* Named node from a pattern or constructor. */ +#define PF_NAMED 0x0008 + +/* There is reverse code associated with this tree node. */ +#define PF_HAS_RCODE 0x0010 + +#define PF_RIGHT_IGNORE 0x0020 + +#define PF_LEFT_IL_ATTACHED 0x0400 +#define PF_RIGHT_IL_ATTACHED 0x0800 + +#define AF_LEFT_IGNORE 0x0100 +#define AF_RIGHT_IGNORE 0x0200 + +#define AF_SUPPRESS_LEFT 0x4000 +#define AF_SUPPRESS_RIGHT 0x8000 + +/* + * Call stack. + */ + +/* Number of spots in the frame, after the args. */ +#define FR_AA 4 + +/* Positions relative to the frame pointer. */ +#define FR_RV 3 /* return value */ +#define FR_RI 2 /* return instruction */ +#define FR_RFP 1 /* return frame pointer */ +#define FR_RFD 0 /* return frame id. */ + +/* + * Calling Convention: + * a1 + * a2 + * a3 + * ... + * return value FR_RV + * return instr FR_RI + * return frame ptr FR_RFP + * return frame id FR_RFD + */ + +/* + * User iterator call stack. + * Adds an iframe pointer, removes the return value. + */ + +/* Number of spots in the frame, after the args. */ +#define IFR_AA 3 + +/* Positions relative to the frame pointer. */ +#define IFR_RIN 2 /* return instruction */ +#define IFR_RIF 1 /* return iframe pointer */ +#define IFR_RFR 0 /* return frame pointer */ + +/* Exported to modules other than bytecode.c */ +#define vm_push(i) /*if ( sp == prg->se ) vm_grow( prg ); */ (*(--sp) = (i)) +#define vm_pop() (*sp++) +#define vm_top() (*sp) +#define vm_ptop() (sp) +#define vm_pop_ignore() (sp++) + +void vm_grow( struct ColmProgram * ); + +typedef Tree *SW; +typedef Tree **StackPtr; + + +/* Can't use sizeof() because we have used types that are bigger than the + * serial representation. */ +#define SIZEOF_CODE 1 +#define SIZEOF_HALF 2 +#define SIZEOF_WORD sizeof(Word) + +typedef struct _Execution +{ + Parser *parser; + PdaRun *pdaRun; + FsmRun *fsmRun; + InputStream *inputStream; + Tree **framePtr; + Tree **iframePtr; + long frameId; + long rcodeUnitLen; +} Execution; + +long stringLength( Head *str ); +const char *stringData( Head *str ); +Head *stringAllocFull( struct ColmProgram *prg, const char *data, long length ); +Head *stringCopy( struct ColmProgram *prg, Head *head ); +void stringFree( struct ColmProgram *prg, Head *head ); +void stringShorten( Head *tokdata, long newlen ); +Head *concatStr( Head *s1, Head *s2 ); +Word strAtoi( Head *str ); +Word strUord16( Head *head ); +Word strUord8( Head *head ); +Word cmpString( Head *s1, Head *s2 ); +Head *stringToUpper( Head *s ); +Head *stringToLower( Head *s ); +Head *stringSprintf( struct ColmProgram *prg, Str *format, Int *integer ); + +Head *makeLiteral( struct ColmProgram *prg, long litoffset ); +Head *intToStr( struct ColmProgram *prg, Word i ); + +Tree *constructString( struct ColmProgram *prg, Head *s ); + +void initExecution( Execution *exec, Parser *parser, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream, int frameId ); + +void mainExecution( struct ColmProgram *prg, Execution *exec, Code *code ); +void reductionExecution( Execution *exec, Tree **sp ); +void generationExecution( Execution *exec, Tree **sp ); +void reverseExecution( Execution *exec, Tree **sp, RtCodeVect *allRev ); + +Kid *allocAttrs( struct ColmProgram *prg, long length ); +void freeAttrs( struct ColmProgram *prg, Kid *attrs ); +void setAttr( Tree *tree, long pos, Tree *val ); +Kid *getAttrKid( Tree *tree, long pos ); + +Tree *splitTree( struct ColmProgram *prg, Tree *t ); +void rcodeDownrefAll( struct ColmProgram *prg, Tree **sp, RtCodeVect *cv ); +void commitFull( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, long commitReduce ); +Tree *getParsedRoot( PdaRun *pdaRun, int stop ); +void splitRef( struct ColmProgram *prg, Tree ***sp, Ref *fromRef ); + +void allocGlobal( struct ColmProgram *prg ); +Tree **executeCode( struct ColmProgram *prg, Execution *exec, Tree **sp, Code *instr ); +void rcodeDownref( struct ColmProgram *prg, Tree **sp, Code *instr ); +Tree **stackAlloc(); +Code *popReverseCode( RtCodeVect *allRev ); +void sendBackBuffered( FsmRun *fsmRun, InputStream *inputStream ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/closure.cc b/src/closure.cc new file mode 100644 index 00000000..37b0e259 --- /dev/null +++ b/src/closure.cc @@ -0,0 +1,458 @@ +/* + * Copyright 2005-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "global.h" +#include "parsedata.h" + +#include "vector.h" +#include <assert.h> +#include <string.h> +#include <iostream> + +using std::endl; +using std::cerr; + +void Compiler::lr0BringInItem( PdaGraph *pdaGraph, PdaState *dest, PdaState *prodState, + PdaTrans *expandFrom, Definition *prod ) +{ + /* We use dot sets for finding unique states. In the future, should merge + * dots sets with the stateSet pointer (only need one of these). */ + assert( dest != prodState ); + dest->dotSet.insert( prodState->dotSet ); + + /* Get the epsilons, context, out priorities. */ + dest->pendingCommits.insert( prodState->pendingCommits ); + //if ( prodState->pendingCommits.length() > 0 ) + // cerr << "THERE ARE PENDING COMMITS DRAWN IN" << endl; + + if ( prodState->transMap.length() > 0 ) { + assert( prodState->transMap.length() == 1 ); + PdaTrans *srcTrans = prodState->transMap[0].value; + + /* Look for the source in the destination. */ + TransMapEl *destTel = dest->transMap.find( srcTrans->lowKey ); + if ( destTel == 0 ) { + /* Make a new state and transition to it. */ + PdaState *newState = pdaGraph->addState(); + PdaTrans *newTrans = new PdaTrans(); + + /* Attach the new transition to the new state. */ + newTrans->lowKey = srcTrans->lowKey; + pdaGraph->attachTrans( dest, newState, newTrans ); + pdaGraph->addInTrans( newTrans, srcTrans ); + + /* The transitions we make during lr0 closure are all shifts. */ + assert( newTrans->isShift ); + assert( srcTrans->isShift ); + + /* The new state must have its state set setup. */ + newState->stateSet = new PdaStateSet; + newState->stateSet->insert( srcTrans->toState ); + + /* Insert the transition into the map. Be sure to set destTel, it + * is needed below. */ + dest->transMap.insert( srcTrans->lowKey, newTrans, &destTel ); + + /* If the item is a non-term, queue it for closure. */ + LangEl *langEl = langElIndex[srcTrans->lowKey]; + if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { + pdaGraph->transClosureQueue.append( newTrans ); + //cerr << "put to trans closure queue" << endl; + } + } + else { + //cerr << "merging transitions" << endl; + destTel->value->toState->stateSet->insert( srcTrans->toState ); + pdaGraph->addInTrans( destTel->value, srcTrans ); + } + + /* If this is an expansion then we may need to bring in commits. */ + if ( expandFrom != 0 && expandFrom->commits.length() > 0 ) { + //cerr << "SETTING COMMIT ON CLOSURE ROUND" << endl; + destTel->value->commits.insert( expandFrom->commits ); + + expandFrom->commits.empty(); + } + } + else { + /* ProdState does not have any transitions out. It is at the end of a + * production. */ + if ( expandFrom != 0 && expandFrom->commits.length() > 0 ) { + //cerr << "SETTING COMMIT IN PENDING LOOKAHEAD" << endl; + for ( LongSet::Iter len = expandFrom->commits; len.lte(); len++ ) + dest->pendingCommits.insert( ProdIdPair( prod->prodId, *len ) ); + + expandFrom->commits.empty(); + } + } +} + +void Compiler::lr0InvokeClosure( PdaGraph *pdaGraph, PdaState *state ) +{ + /* State should not already be closed. */ + assert( !state->inClosedMap ); + + /* This is used each time we invoke closure, it must be cleared. */ + pdaGraph->transClosureQueue.abandon(); + + /* Drag in the core items. */ + for ( PdaStateSet::Iter ssi = *state->stateSet; ssi.lte(); ssi++ ) + lr0BringInItem( pdaGraph, state, *ssi, 0, 0 ); + + /* Now bring in the derived items. */ + while ( pdaGraph->transClosureQueue.length() > 0 ) { + PdaTrans *toClose = pdaGraph->transClosureQueue.detachFirst(); + //cerr << "have a transition to derive" << endl; + + /* Get the langEl. */ + LangEl *langEl = langElIndex[toClose->lowKey]; + + /* Make graphs for all of the productions that the non + * terminal goes to that are not already in the state's dotSet. */ + for ( LelDefList::Iter prod = langEl->defList; prod.lte(); prod++ ) { + /* Bring in the start state of the production. */ + lr0BringInItem( pdaGraph, state, prod->fsm->startState, toClose, prod ); + } + } + + /* Try and insert into the closed dict. */ + DotSetMapEl *lastFound; + if ( pdaGraph->closedMap.insert( state, &lastFound ) ) { + /* Insertion into closed dict succeeded. There is no state with the + * same dot set. The state is now closed. It is guaranteed a spot in + * the closed dict and it will never go away (states never deleted + * during closure). */ + pdaGraph->stateClosedList.append( state ); + state->inClosedMap = true; + + /* Add all of the states in the out transitions to the closure queue. + * This will give us a depth first search of the graph. */ + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + /* Get the state the transEl goes to. */ + PdaState *targ = trans->value->toState; + + /* If the state on this tranisition has not already been slated + * for closure, then add it to the queue. */ + if ( !targ->onClosureQueue && !targ->inClosedMap ) { + pdaGraph->stateClosureQueue.append( targ ); + targ->onClosureQueue = true; + } + } + } + else { + /* Insertion into closed dict failed. There is an existing state + * with the same dot set. Get the existing state. */ + pdaGraph->inTransMove( lastFound, state ); + for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { + pdaGraph->stateList.detach( tel->value->toState ); + delete tel->value->toState; + delete tel->value; + } + pdaGraph->stateList.detach( state ); + delete state; + } +} + +/* Invoke cloure on the graph. We use a queue here to achieve a breadth + * first search of the tree we build. Note, there are back edges in this + * tree. They are the edges made when upon closure, a dot set exists + * already. */ +void Compiler::lr0CloseAllStates( PdaGraph *pdaGraph ) +{ + /* While there are items on the closure queue. */ + while ( pdaGraph->stateClosureQueue.length() > 0 ) { + /* Pop the first item off. */ + PdaState *state = pdaGraph->stateClosureQueue.detachFirst(); + state->onClosureQueue = false; + + /* Invoke closure upon the state. */ + lr0InvokeClosure( pdaGraph, state ); + } +} + +void Compiler::transferCommits( PdaGraph *pdaGraph, PdaTrans *trans, + PdaState *state, long prodId ) +{ + ProdIdPairSet &pendingCommits = state->pendingCommits; + for ( ProdIdPairSet::Iter pi = pendingCommits; pi.lte(); pi++ ) { + if ( pi->onReduce == prodId ) + trans->commits.insert( pi->length ); + } +} + +void Compiler::lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, FollowToAdd &followKeys ) +{ + for ( ExpandToSet::Iter ets = trans->expandTo; ets.lte(); ets++ ) { + int prodId = ets->prodId; + PdaState *expandTo = ets->state; + + for ( FollowToAdd::Iter fkey = followKeys; fkey.lte(); fkey++ ) { + TransMapEl *transEl = expandTo->transMap.find( fkey->key ); + + if ( transEl != 0 ) { + /* Set up the follow transition. */ + PdaTrans *destTrans = transEl->value; + + transferCommits( pdaGraph, destTrans, expandTo, prodId ); + + pdaGraph->addInReduction( destTrans, prodId, fkey->value ); + } + else { + /* Set up the follow transition. */ + PdaTrans *followTrans = new PdaTrans; + followTrans->lowKey = fkey->key; + followTrans->isShift = false; + followTrans->reductions.insert( prodId, fkey->value ); + + transferCommits( pdaGraph, followTrans, expandTo, prodId ); + + pdaGraph->attachTrans( expandTo, actionDestState, followTrans ); + expandTo->transMap.insert( followTrans->lowKey, followTrans ); + pdaGraph->transClosureQueue.append( followTrans ); + } + } + } +} + +long PdaTrans::maxPrior() +{ + long prior = LONG_MIN; + if ( isShift && shiftPrior > prior ) + prior = shiftPrior; + for ( ReductionMap::Iter red = reductions; red.lte(); red++ ) { + if ( red->value > prior ) + prior = red->value; + } + return prior; +} + +void Compiler::lalr1AddFollow1( PdaGraph *pdaGraph, PdaState *state ) +{ + /* Finding non-terminals into the state. */ + for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) { + long key = in->lowKey; + LangEl *langEl = langElIndex[key]; + if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { + /* Finding the following transitions. */ + FollowToAdd followKeys; + for ( TransMap::Iter fout = state->transMap; fout.lte(); fout++ ) { + int fkey = fout->key; + LangEl *flel = langElIndex[fkey]; + if ( flel == 0 || flel->type == LangEl::Term ) { + long prior = fout->value->maxPrior(); + followKeys.insert( fkey, prior ); + } + } + + if ( followKeys.length() > 0 ) + lalr1AddFollow2( pdaGraph, in, followKeys ); + } + } +} + +void Compiler::lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, + long followKey, long prior ) +{ + for ( ExpandToSet::Iter ets = trans->expandTo; ets.lte(); ets++ ) { + int prodId = ets->prodId; + PdaState *expandTo = ets->state; + + TransMapEl *transEl = expandTo->transMap.find( followKey ); + if ( transEl != 0 ) { + /* Add in the reductions, or in the shift. */ + PdaTrans *destTrans = transEl->value; + + transferCommits( pdaGraph, destTrans, expandTo, prodId ); + + pdaGraph->addInReduction( destTrans, prodId, prior ); + } + else { + /* Set up the follow transition. */ + PdaTrans *followTrans = new PdaTrans; + followTrans->lowKey = followKey; + followTrans->isShift = false; + followTrans->reductions.insert( prodId, prior ); + + transferCommits( pdaGraph, followTrans, expandTo, prodId ); + + pdaGraph->attachTrans( expandTo, actionDestState, followTrans ); + expandTo->transMap.insert( followTrans->lowKey, followTrans ); + pdaGraph->transClosureQueue.append( followTrans ); + } + } +} + +void Compiler::lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans ) +{ + PdaState *state = trans->fromState; + int fkey = trans->lowKey; + LangEl *flel = langElIndex[fkey]; + if ( flel == 0 || flel->type == LangEl::Term ) { + /* Finding non-terminals into the state. */ + for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) { + long key = in->lowKey; + LangEl *langEl = langElIndex[key]; + if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { + //cerr << "FOLLOW PRIOR TRANSFER 2: " << prior << endl; + long prior = trans->maxPrior(); + lalr1AddFollow2( pdaGraph, in, fkey, prior ); + } + } + } +} + +/* Add follow sets to an LR(0) graph to make it LALR(1). */ +void Compiler::lalr1AddFollowSets( PdaGraph *pdaGraph, LangElSet &parserEls ) +{ + /* Make the state that all reduction actions go to. Since a reduction pops + * states of the stack and sets the new target state, this state is + * actually never reached. Just here to link the trans to. */ + actionDestState = pdaGraph->addState(); + pdaGraph->setFinState( actionDestState ); + + for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) { + /* Get the entry into the graph and traverse over start. */ + PdaState *overStart = pdaGraph->followFsm( (*pe)->startState, (*pe)->rootDef->fsm ); + + /* Add _eof after the initial _start. */ + PdaTrans *eofTrans = pdaGraph->insertNewTrans( overStart, actionDestState, + (*pe)->eofLel->id, (*pe)->eofLel->id ); + eofTrans->isShift = true; + } + + /* This was used during lr0 table construction. */ + pdaGraph->transClosureQueue.abandon(); + + /* Need to pass over every state initially. */ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) + lalr1AddFollow1( pdaGraph, state ); + + /* While the closure queue has items, pop them off and add follow + * characters. */ + while ( pdaGraph->transClosureQueue.length() > 0 ) { + /* Pop the first item off and add Follow for it . */ + PdaTrans *trans = pdaGraph->transClosureQueue.detachFirst(); + lalr1AddFollow1( pdaGraph, trans ); + } +} + +void Compiler::linkExpansions( PdaGraph *pdaGraph ) +{ + pdaGraph->setStateNumbers(); + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + /* Find transitions out on non terminals. */ + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + long key = trans->key; + LangEl *langEl = langElIndex[key]; + if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { + /* For each production that the non terminal expand to ... */ + for ( LelDefList::Iter prod = langEl->defList; prod.lte(); prod++ ) { + /* Follow the production and add to the trans's expand to set. */ + PdaState *followRes = pdaGraph->followFsm( state, prod->fsm ); + + //LangEl *lel = langElIndex[key]; + //cerr << state->stateNum << ", "; + //if ( lel != 0 ) + // cerr << lel->data; + //else + // cerr << (char)key; + //cerr << " -> " << (*fto)->stateNum << " on " << + // prod->data << " (fss = " << fin.pos() << ")" << endl; + trans->value->expandTo.insert( ExpandToEl( followRes, prod->prodId ) ); + } + } + } + } +} + +/* Add terminal versions of all nonterminal transitions. */ +void Compiler::addDupTerms( PdaGraph *pdaGraph ) +{ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + PdaTransList newTranitions; + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + LangEl *lel = langElIndex[trans->value->lowKey]; + if ( lel->type == LangEl::NonTerm ) { + PdaTrans *dupTrans = new PdaTrans; + dupTrans->lowKey = lel->termDup->id; + dupTrans->isShift = true; + + /* Save the target state in to state. In the next loop when we + * attach the transition we must clear this because the + * attaching code requires the transition to be unattached. */ + dupTrans->toState = trans->value->toState; + newTranitions.append( dupTrans ); + + /* Commit code used? */ + //transferCommits( pdaGraph, followTrans, expandTo, prodId ); + } + } + + for ( PdaTrans *dup = newTranitions.head; dup != 0; ) { + PdaTrans *next = dup->next; + PdaState *toState = dup->toState; + dup->toState = 0; + pdaGraph->attachTrans( state, toState, dup ); + state->transMap.insert( dup->lowKey, dup ); + dup = next; + } + } +} + +/* Generate a LALR(1) graph. */ +void Compiler::lalr1GenerateParser( PdaGraph *pdaGraph, LangElSet &parserEls ) +{ + /* Make the intial graph. */ + pdaGraph->langElIndex = langElIndex; + + for ( Vector<LangEl*>::Iter r = parserEls; r.lte(); r++ ) { + /* Create the entry point. */ + PdaState *rs = pdaGraph->addState(); + pdaGraph->entryStateSet.insert( rs ); + + /* State set of just one state. */ + rs->stateSet = new PdaStateSet; + rs->stateSet->insert( (*r)->rootDef->fsm->startState ); + + /* Queue the start state for closure. */ + rs->onClosureQueue = true; + pdaGraph->stateClosureQueue.append( rs ); + + (*r)->startState = rs; + } + + /* Run the lr0 closure. */ + lr0CloseAllStates( pdaGraph ); + + /* Add terminal versions of all nonterminal transitions. */ + addDupTerms( pdaGraph ); + + /* Link production expansions to the place they expand to. */ + linkExpansions( pdaGraph ); + + /* Walk the graph adding follow sets to the LR(0) graph. */ + lalr1AddFollowSets( pdaGraph, parserEls ); + +// /* Set the commit on the final eof shift. */ +// PdaTrans *overStart = pdaGraph->startState->findTrans( rootEl->id ); +// PdaTrans *eofTrans = overStart->toState->findTrans( eofLangEl->id ); +// eofTrans->afterShiftCommits.insert( 2 ); +} diff --git a/src/codegen.cc b/src/codegen.cc new file mode 100644 index 00000000..4403cf8f --- /dev/null +++ b/src/codegen.cc @@ -0,0 +1,50 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "parsedata.h" +#include "fsmcodegen.h" +#include "redfsm.h" +#include "bstmap.h" +#include "fsmrun.h" +#include "debug.h" +#include <sstream> +#include <string> + + +void FsmCodeGen::writeMain() +{ + out << + "int main( int argc, const char **argv )\n" + "{\n" + " struct ColmProgram *prg;\n" + " int exitStatus;\n" + " colmInit( " << colmActiveRealm << " );\n" + " prg = colmNewProgram( &main_runtimeData, argc, argv );\n" + " colmRunProgram( prg );\n" + " exitStatus = colmDeleteProgram( prg );\n" + " return exitStatus;\n" + "}\n" + "\n"; + + out.flush(); +} + + diff --git a/src/codevect.c b/src/codevect.c new file mode 100644 index 00000000..f8997423 --- /dev/null +++ b/src/codevect.c @@ -0,0 +1,183 @@ +/* + * Copyright 2010-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <rtvector.h> +#include <pdarun.h> + +#include <string.h> +#include <stdlib.h> +#include <assert.h> + + +void initRtCodeVect( RtCodeVect *vect ) +{ + vect->data = 0; + vect->tabLen = 0; + vect->allocLen = 0; +} + +static long newSizeUp( long existing, long needed ) +{ + return needed > existing ? (needed<<1) : existing; +} + +static long newSizeDown( long existing, long needed ) +{ + return needed < (existing>>2) ? (needed<<1) : existing; +} + +/* Up resize the data for len elements using Resize::upResize to tell us the + * new tabLen. Reads and writes allocLen. Does not read or write tabLen. */ +static void upResize( RtCodeVect *vect, long len ) +{ + /* Ask the resizer what the new tabLen will be. */ + long newLen = newSizeUp(vect->allocLen, len); + + /* Did the data grow? */ + if ( newLen > vect->allocLen ) { + vect->allocLen = newLen; + if ( vect->data != 0 ) { + /* Table exists already, resize it up. */ + vect->data = (Code*) realloc( vect->data, sizeof(Code) * newLen ); + //if ( vect->data == 0 ) + // throw std::bad_alloc(); + } + else { + /* Create the data. */ + vect->data = (Code*) malloc( sizeof(Code) * newLen ); + //if ( vect->data == 0 ) + // throw std::bad_alloc(); + } + } +} + +/* Down resize the data for len elements using Resize::downResize to determine + * the new tabLen. Reads and writes allocLen. Does not read or write tabLen. */ +static void downResize( RtCodeVect *vect, long len) +{ + /* Ask the resizer what the new tabLen will be. */ + long newLen = newSizeDown( vect->allocLen, len ); + + /* Did the data shrink? */ + if ( newLen < vect->allocLen ) { + vect->allocLen = newLen; + if ( newLen == 0 ) { + /* Simply free the data. */ + free( vect->data ); + vect->data = 0; + } + else { + /* Not shrinking to size zero, realloc it to the smaller size. */ + vect->data = (Code*) realloc( vect->data, sizeof(Code) * newLen ); + //if ( vect->data == 0 ) + // throw std::bad_alloc(); + } + } +} + + +void rtCodeVectEmpty( RtCodeVect *vect ) +{ + if ( vect->data != 0 ) { + /* Free the data space. */ + free( vect->data ); + vect->data = 0; + vect->tabLen = vect->allocLen = 0; + } +} + +void rtCodeVectReplace( RtCodeVect *vect, long pos, const Code *val, long len ) +{ + long endPos, i; + //Code *item; + + /* If we are given a negative position to replace at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = vect->tabLen + pos; + + /* The end is the one past the last item that we want + * to write to. */ + endPos = pos + len; + + /* Make sure we have enough space. */ + if ( endPos > vect->tabLen ) { + upResize( vect, endPos ); + + /* Delete any objects we need to delete. */ + //item = vect->data + pos; + //for ( i = pos; i < vect->tabLen; i++, item++ ) + // item->~Code(); + + /* We are extending the vector, set the new data length. */ + vect->tabLen = endPos; + } + else { + /* Delete any objects we need to delete. */ + //item = vect->data + pos; + //for ( i = pos; i < endPos; i++, item++ ) + // item->~Code(); + } + + /* Copy data in using copy constructor. */ + Code *dst = vect->data + pos; + const Code *src = val; + for ( i = 0; i < len; i++, dst++, src++ ) + *dst = *src; +} + +void rtCodeVectRemove( RtCodeVect *vect, long pos, long len ) +{ + long newLen, lenToSlideOver, endPos; + Code *dst;//, *item; + + /* If we are given a negative position to remove at then + * treat it as a position relative to the length. */ + if ( pos < 0 ) + pos = vect->tabLen + pos; + + /* The first position after the last item deleted. */ + endPos = pos + len; + + /* The new data length. */ + newLen = vect->tabLen - len; + + /* The place in the data we are deleting at. */ + dst = vect->data + pos; + + /* Call Destructors. */ + //item = dst; + //for ( long i = 0; i < len; i += 1, item += 1 ) + // item->~Code(); + + /* Shift data over if necessary. */ + lenToSlideOver = vect->tabLen - endPos; + if ( len > 0 && lenToSlideOver > 0 ) + memmove(dst, dst + len, sizeof(Code)*lenToSlideOver); + + /* Shrink the data if necessary. */ + downResize( vect, newLen ); + + /* Set the new data length. */ + vect->tabLen = newLen; +} + + diff --git a/src/colm.h b/src/colm.h new file mode 100644 index 00000000..4f169254 --- /dev/null +++ b/src/colm.h @@ -0,0 +1,55 @@ +#ifndef __COLM_COLM_H +#define __COLM_COLM_H + +#ifdef __cplusplus +extern "C" { +#endif + +struct ColmTree; +struct ColmKid; +struct ColmProgram; +struct ColmRuntimeData; + +void colmInit( long debugRealm ); +struct ColmProgram *colmNewProgram( struct ColmRuntimeData *rtd, int argc, const char **argv ); +void colmRunProgram( struct ColmProgram *prg ); +int colmDeleteProgram( struct ColmProgram *prg ); + +struct ColmPrintArgs +{ + void *arg; + int comm; + int attr; + int trim; + void (*out)( struct ColmPrintArgs *args, const char *data, int length ); + void (*openTree)( struct ColmProgram *prg, struct ColmTree **sp, + struct ColmPrintArgs *args, struct ColmKid *parent, struct ColmKid *kid ); + void (*printTerm)( struct ColmProgram *prg, struct ColmTree **sp, + struct ColmPrintArgs *args, struct ColmKid *kid ); + void (*closeTree)( struct ColmProgram *prg, struct ColmTree **sp, + struct ColmPrintArgs *args, struct ColmKid *parent, struct ColmKid *kid ); +}; + +void printNull( struct ColmProgram *prg, struct ColmTree **sp, + struct ColmPrintArgs *args, struct ColmKid *parent, struct ColmKid *kid ); +void printTermTree( struct ColmProgram *prg, struct ColmTree **sp, + struct ColmPrintArgs *printArgs, struct ColmKid *kid ); + +struct ColmTree **vm_root( struct ColmProgram *prg ); +struct ColmTree *returnVal( struct ColmProgram *prg ); +void printTreeArgs( struct ColmProgram *prg, struct ColmTree **sp, + struct ColmPrintArgs *printArgs, struct ColmTree *tree ); + +int repeatEnd( struct ColmTree *tree ); +int listLast( struct ColmTree *tree ); +struct ColmTree *getRhsVal( struct ColmProgram *prg, struct ColmTree *tree, int *a ); +struct ColmTree *getAttr( struct ColmTree *tree, long pos ); +struct ColmTree *getGlobal( struct ColmProgram *prg, long pos ); +struct ColmTree *getRepeatNext( struct ColmTree *tree ); +struct ColmTree *getRepeatVal( struct ColmTree *tree ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/compiler.cc b/src/compiler.cc new file mode 100644 index 00000000..c1e775f2 --- /dev/null +++ b/src/compiler.cc @@ -0,0 +1,1496 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <iostream> +#include <iomanip> +#include <errno.h> +#include <stdlib.h> +#include <limits.h> +#include <sstream> + +#include "global.h" +#include "lmparse.h" +#include "parsedata.h" +#include "parsetree.h" +#include "mergesort.h" +#include "redbuild.h" +#include "pdacodegen.h" +#include "fsmcodegen.h" +#include "fsmrun.h" +#include "pdarun.h" +#include "colm.h" +#include "pool.h" + +using namespace std; +using std::ostringstream; + +char machineMain[] = "main"; +exit_object endp; +void operator<<( ostream &out, exit_object & ) +{ + out << endl; + exit(1); +} + +/* Perform minimization after an operation according + * to the command line args. */ +void afterOpMinimize( FsmGraph *fsm, bool lastInSeq ) +{ + /* Switch on the prefered minimization algorithm. */ + if ( lastInSeq ) { + /* First clean up the graph. FsmGraph operations may leave these + * lying around. There should be no dead end states. The subtract + * intersection operators are the only places where they may be + * created and those operators clean them up. */ + fsm->removeUnreachableStates(); + fsm->minimizePartition2(); + } +} + +/* Count the transitions in the fsm by walking the state list. */ +int countTransitions( FsmGraph *fsm ) +{ + int numTrans = 0; + FsmState *state = fsm->stateList.head; + while ( state != 0 ) { + numTrans += state->outList.length(); + state = state->next; + } + return numTrans; +} + +Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd ) +{ + /* Reset errno so we can check for overflow or underflow. In the event of + * an error, sets the return val to the upper or lower bound being tested + * against. */ + errno = 0; + unsigned int size = keyOps->alphType->size; + bool unusedBits = size < sizeof(unsigned long); + + unsigned long ul = strtoul( str, 0, 16 ); + + if ( errno == ERANGE || (unusedBits && ul >> (size * 8)) ) { + error(loc) << "literal " << str << " overflows the alphabet type" << endl; + ul = 1 << (size * 8); + } + + if ( unusedBits && keyOps->alphType->isSigned && ul >> (size * 8 - 1) ) + ul |= (0xffffffff >> (size*8 ) ) << (size*8); + + return Key( (long)ul ); +} + +Key makeFsmKeyDec( char *str, const InputLoc &loc, Compiler *pd ) +{ + /* Convert the number to a decimal. First reset errno so we can check + * for overflow or underflow. */ + errno = 0; + long long minVal = keyOps->alphType->minVal; + long long maxVal = keyOps->alphType->maxVal; + + long long ll = strtoll( str, 0, 10 ); + + /* Check for underflow. */ + if ( (errno == ERANGE && ll < 0) || ll < minVal) { + error(loc) << "literal " << str << " underflows the alphabet type" << endl; + ll = minVal; + } + /* Check for overflow. */ + else if ( (errno == ERANGE && ll > 0) || ll > maxVal ) { + error(loc) << "literal " << str << " overflows the alphabet type" << endl; + ll = maxVal; + } + + if ( keyOps->alphType->isSigned ) + return Key( (long)ll ); + else + return Key( (unsigned long)ll ); +} + +/* Make an fsm key in int format (what the fsm graph uses) from an alphabet + * number returned by the parser. Validates that the number doesn't overflow + * the alphabet type. */ +Key makeFsmKeyNum( char *str, const InputLoc &loc, Compiler *pd ) +{ + /* Switch on hex/decimal format. */ + if ( str[0] == '0' && str[1] == 'x' ) + return makeFsmKeyHex( str, loc, pd ); + else + return makeFsmKeyDec( str, loc, pd ); +} + +/* Make an fsm int format (what the fsm graph uses) from a single character. + * Performs proper conversion depending on signed/unsigned property of the + * alphabet. */ +Key makeFsmKeyChar( char c, Compiler *pd ) +{ + if ( keyOps->isSigned ) { + /* Copy from a char type. */ + return Key( c ); + } + else { + /* Copy from an unsigned byte type. */ + return Key( (unsigned char)c ); + } +} + +/* Make an fsm key array in int format (what the fsm graph uses) from a string + * of characters. Performs proper conversion depending on signed/unsigned + * property of the alphabet. */ +void makeFsmKeyArray( Key *result, char *data, int len, Compiler *pd ) +{ + if ( keyOps->isSigned ) { + /* Copy from a char star type. */ + char *src = data; + for ( int i = 0; i < len; i++ ) + result[i] = Key(src[i]); + } + else { + /* Copy from an unsigned byte ptr type. */ + unsigned char *src = (unsigned char*) data; + for ( int i = 0; i < len; i++ ) + result[i] = Key(src[i]); + } +} + +/* Like makeFsmKeyArray except the result has only unique keys. They ordering + * will be changed. */ +void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, + bool caseInsensitive, Compiler *pd ) +{ + /* Use a transitions list for getting unique keys. */ + if ( keyOps->isSigned ) { + /* Copy from a char star type. */ + char *src = data; + for ( int si = 0; si < len; si++ ) { + Key key( src[si] ); + result.insert( key ); + if ( caseInsensitive ) { + if ( key.isLower() ) + result.insert( key.toUpper() ); + else if ( key.isUpper() ) + result.insert( key.toLower() ); + } + } + } + else { + /* Copy from an unsigned byte ptr type. */ + unsigned char *src = (unsigned char*) data; + for ( int si = 0; si < len; si++ ) { + Key key( src[si] ); + result.insert( key ); + if ( caseInsensitive ) { + if ( key.isLower() ) + result.insert( key.toUpper() ); + else if ( key.isUpper() ) + result.insert( key.toLower() ); + } + } + } +} + +FsmGraph *dotFsm( Compiler *pd ) +{ + FsmGraph *retFsm = new FsmGraph(); + retFsm->rangeFsm( keyOps->minKey, keyOps->maxKey ); + return retFsm; +} + +FsmGraph *dotStarFsm( Compiler *pd ) +{ + FsmGraph *retFsm = new FsmGraph(); + retFsm->rangeStarFsm( keyOps->minKey, keyOps->maxKey ); + return retFsm; +} + +/* Make a builtin type. Depends on the signed nature of the alphabet type. */ +FsmGraph *makeBuiltin( BuiltinMachine builtin, Compiler *pd ) +{ + /* FsmGraph created to return. */ + FsmGraph *retFsm = 0; + bool isSigned = keyOps->isSigned; + + switch ( builtin ) { + case BT_Any: { + /* All characters. */ + retFsm = dotFsm( pd ); + break; + } + case BT_Ascii: { + /* Ascii characters 0 to 127. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( 0, 127 ); + break; + } + case BT_Extend: { + /* Ascii extended characters. This is the full byte range. Dependent + * on signed, vs no signed. If the alphabet is one byte then just use + * dot fsm. */ + if ( isSigned ) { + retFsm = new FsmGraph(); + retFsm->rangeFsm( -128, 127 ); + } + else { + retFsm = new FsmGraph(); + retFsm->rangeFsm( 0, 255 ); + } + break; + } + case BT_Alpha: { + /* Alpha [A-Za-z]. */ + FsmGraph *upper = new FsmGraph(), *lower = new FsmGraph(); + upper->rangeFsm( 'A', 'Z' ); + lower->rangeFsm( 'a', 'z' ); + upper->unionOp( lower ); + upper->minimizePartition2(); + retFsm = upper; + break; + } + case BT_Digit: { + /* Digits [0-9]. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( '0', '9' ); + break; + } + case BT_Alnum: { + /* Alpha numerics [0-9A-Za-z]. */ + FsmGraph *digit = new FsmGraph(), *lower = new FsmGraph(); + FsmGraph *upper = new FsmGraph(); + digit->rangeFsm( '0', '9' ); + upper->rangeFsm( 'A', 'Z' ); + lower->rangeFsm( 'a', 'z' ); + digit->unionOp( upper ); + digit->unionOp( lower ); + digit->minimizePartition2(); + retFsm = digit; + break; + } + case BT_Lower: { + /* Lower case characters. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( 'a', 'z' ); + break; + } + case BT_Upper: { + /* Upper case characters. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( 'A', 'Z' ); + break; + } + case BT_Cntrl: { + /* Control characters. */ + FsmGraph *cntrl = new FsmGraph(); + FsmGraph *highChar = new FsmGraph(); + cntrl->rangeFsm( 0, 31 ); + highChar->concatFsm( 127 ); + cntrl->unionOp( highChar ); + cntrl->minimizePartition2(); + retFsm = cntrl; + break; + } + case BT_Graph: { + /* Graphical ascii characters [!-~]. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( '!', '~' ); + break; + } + case BT_Print: { + /* Printable characters. Same as graph except includes space. */ + retFsm = new FsmGraph(); + retFsm->rangeFsm( ' ', '~' ); + break; + } + case BT_Punct: { + /* Punctuation. */ + FsmGraph *range1 = new FsmGraph(); + FsmGraph *range2 = new FsmGraph(); + FsmGraph *range3 = new FsmGraph(); + FsmGraph *range4 = new FsmGraph(); + range1->rangeFsm( '!', '/' ); + range2->rangeFsm( ':', '@' ); + range3->rangeFsm( '[', '`' ); + range4->rangeFsm( '{', '~' ); + range1->unionOp( range2 ); + range1->unionOp( range3 ); + range1->unionOp( range4 ); + range1->minimizePartition2(); + retFsm = range1; + break; + } + case BT_Space: { + /* Whitespace: [\t\v\f\n\r ]. */ + FsmGraph *cntrl = new FsmGraph(); + FsmGraph *space = new FsmGraph(); + cntrl->rangeFsm( '\t', '\r' ); + space->concatFsm( ' ' ); + cntrl->unionOp( space ); + cntrl->minimizePartition2(); + retFsm = cntrl; + break; + } + case BT_Xdigit: { + /* Hex digits [0-9A-Fa-f]. */ + FsmGraph *digit = new FsmGraph(); + FsmGraph *upper = new FsmGraph(); + FsmGraph *lower = new FsmGraph(); + digit->rangeFsm( '0', '9' ); + upper->rangeFsm( 'A', 'F' ); + lower->rangeFsm( 'a', 'f' ); + digit->unionOp( upper ); + digit->unionOp( lower ); + digit->minimizePartition2(); + retFsm = digit; + break; + } + case BT_Lambda: { + retFsm = new FsmGraph(); + retFsm->lambdaFsm(); + break; + } + case BT_Empty: { + retFsm = new FsmGraph(); + retFsm->emptyFsm(); + break; + }} + + return retFsm; +} + +/* Check if this name inst or any name inst below is referenced. */ +bool NameInst::anyRefsRec() +{ + if ( numRefs > 0 ) + return true; + + /* Recurse on children until true. */ + for ( NameVect::Iter ch = childVect; ch.lte(); ch++ ) { + if ( (*ch)->anyRefsRec() ) + return true; + } + + return false; +} + +/* + * Compiler + */ + +/* Initialize the structure that will collect info during the parse of a + * machine. */ +Compiler::Compiler( const String &fileName, const String §ionName, + const InputLoc §ionLoc, ostream &out ) +: + nextPriorKey(0), + nextLocalErrKey(1), /* 0 is reserved for global error actions. */ + nextNameId(0), + alphTypeSet(false), + getKeyExpr(0), + accessExpr(0), + curStateExpr(0), + lowerNum(0), + upperNum(0), + fileName(fileName), + sectionName(sectionName), + sectionLoc(sectionLoc), + errorCount(0), + curActionOrd(0), + curPriorOrd(0), + nextEpsilonResolvedLink(0), + nextTokenId(1), + rootCodeBlock(0), + mainReturnUT(0), + parserName(sectionName), + out(out), + access(0), + tokenStruct(0), + rootLangEl(0), + eofLangEl(0), + errorLangEl(0), + defaultCharLangEl(0), + rootRegion(0), + defaultRegion(0), + firstNonTermId(0), + prodIdIndex(0), + nextPatReplId(0), + nextGenericId(1), + nextFuncId(0), + loopCleanup(0), + nextObjectId(1), /* 0 is reserved for no object. */ + nextFrameId(0), + nextParserId(0), + nextLabelId(0), + revertOn(true), + predValue(0), + nextMatchEndNum(0), + argvTypeRef(0), + context(0) +{ +} + +/* Clean up the data collected during a parse. */ +Compiler::~Compiler() +{ + /* Delete all the nodes in the action list. Will cause all the + * string data that represents the actions to be deallocated. */ + actionList.empty(); +} + +/* Make a name id in the current name instantiation scope if it is not + * already there. */ +NameInst *Compiler::addNameInst( const InputLoc &loc, char *data, bool isLabel ) +{ + /* Create the name instantitaion object and insert it. */ + NameInst *newNameInst = new NameInst( loc, curNameInst, data, nextNameId++, isLabel ); + curNameInst->childVect.append( newNameInst ); + if ( data != 0 ) + curNameInst->children.insertMulti( data, newNameInst ); + return newNameInst; +} + +void Compiler::initNameWalk( NameInst *rootName ) +{ + curNameInst = rootName; + curNameChild = 0; +} + +/* Goes into the next child scope. The number of the child is already set up. + * We need this for the syncronous name tree and parse tree walk to work + * properly. It is reset on entry into a scope and advanced on poping of a + * scope. A call to enterNameScope should be accompanied by a corresponding + * popNameScope. */ +NameFrame Compiler::enterNameScope( bool isLocal, int numScopes ) +{ + /* Save off the current data. */ + NameFrame retFrame; + retFrame.prevNameInst = curNameInst; + retFrame.prevNameChild = curNameChild; + retFrame.prevLocalScope = localNameScope; + + /* Enter into the new name scope. */ + for ( int i = 0; i < numScopes; i++ ) { + curNameInst = curNameInst->childVect[curNameChild]; + curNameChild = 0; + } + + if ( isLocal ) + localNameScope = curNameInst; + + return retFrame; +} + +/* Return from a child scope to a parent. The parent info must be specified as + * an argument and is obtained from the corresponding call to enterNameScope. + * */ +void Compiler::popNameScope( const NameFrame &frame ) +{ + /* Pop the name scope. */ + curNameInst = frame.prevNameInst; + curNameChild = frame.prevNameChild+1; + localNameScope = frame.prevLocalScope; +} + +void Compiler::resetNameScope( const NameFrame &frame ) +{ + /* Pop the name scope. */ + curNameInst = frame.prevNameInst; + curNameChild = frame.prevNameChild; + localNameScope = frame.prevLocalScope; +} + + +void Compiler::unsetObsoleteEntries( FsmGraph *graph ) +{ + /* Loop the reference names and increment the usage. Names that are no + * longer needed will be unset in graph. */ + for ( NameVect::Iter ref = curNameInst->referencedNames; ref.lte(); ref++ ) { + /* Get the name. */ + NameInst *name = *ref; + name->numUses += 1; + + /* If the name is no longer needed unset its corresponding entry. */ + if ( name->numUses == name->numRefs ) { + assert( graph->entryPoints.find( name->id ) != 0 ); + graph->unsetEntry( name->id ); + } + } +} + +NameSet Compiler::resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly ) +{ + /* Queue needed for breadth-first search, load it with the start node. */ + NameInstList nameQueue; + nameQueue.append( refFrom ); + + NameSet result; + while ( nameQueue.length() > 0 ) { + /* Pull the next from location off the queue. */ + NameInst *from = nameQueue.detachFirst(); + + /* Look for the name. */ + NameMapEl *low, *high; + if ( from->children.findMulti( data, low, high ) ) { + /* Record all instances of the name. */ + for ( ; low <= high; low++ ) + result.insert( low->value ); + } + + /* Name not there, do breadth-first operation of appending all + * childrent to the processing queue. */ + for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) { + if ( !recLabelsOnly || (*name)->isLabel ) + nameQueue.append( *name ); + } + } + + /* Queue exhausted and name never found. */ + return result; +} + +void Compiler::resolveFrom( NameSet &result, NameInst *refFrom, + const NameRef &nameRef, int namePos ) +{ + /* Look for the name in the owning scope of the factor with aug. */ + NameSet partResult = resolvePart( refFrom, nameRef[namePos], false ); + + /* If there are more parts to the name then continue on. */ + if ( ++namePos < nameRef.length() ) { + /* There are more components to the name, search using all the part + * results as the base. */ + for ( NameSet::Iter name = partResult; name.lte(); name++ ) + resolveFrom( result, *name, nameRef, namePos ); + } + else { + /* This is the last component, append the part results to the final + * results. */ + result.insert( partResult ); + } +} + +ostream &operator<<( ostream &out, const Token &token ) +{ + out << token.data; + return out; +} + +/* Write out a name reference. */ +ostream &operator<<( ostream &out, const NameRef &nameRef ) +{ + int pos = 0; + if ( nameRef[pos] == 0 ) { + out << "::"; + pos += 1; + } + out << nameRef[pos++]; + for ( ; pos < nameRef.length(); pos++ ) + out << "::" << nameRef[pos]; + return out; +} + +ostream &operator<<( ostream &out, const NameInst &nameInst ) +{ + /* Count the number fully qualified name parts. */ + int numParents = 0; + NameInst *curParent = nameInst.parent; + while ( curParent != 0 ) { + numParents += 1; + curParent = curParent->parent; + } + + /* Make an array and fill it in. */ + curParent = nameInst.parent; + NameInst **parents = new NameInst*[numParents]; + for ( int p = numParents-1; p >= 0; p-- ) { + parents[p] = curParent; + curParent = curParent->parent; + } + + /* Write the parents out, skip the root. */ + for ( int p = 1; p < numParents; p++ ) + out << "::" << ( parents[p]->name != 0 ? parents[p]->name : "<ANON>" ); + + /* Write the name and cleanup. */ + out << "::" << ( nameInst.name != 0 ? nameInst.name : "<ANON>" ); + delete[] parents; + return out; +} + +struct CmpNameInstLoc +{ + static int compare( const NameInst *ni1, const NameInst *ni2 ) + { + if ( ni1->loc.line < ni2->loc.line ) + return -1; + else if ( ni1->loc.line > ni2->loc.line ) + return 1; + else if ( ni1->loc.col < ni2->loc.col ) + return -1; + else if ( ni1->loc.col > ni2->loc.col ) + return 1; + return 0; + } +}; + +void errorStateLabels( const NameSet &resolved ) +{ + MergeSort<NameInst*, CmpNameInstLoc> mergeSort; + mergeSort.sort( resolved.data, resolved.length() ); + for ( NameSet::Iter res = resolved; res.lte(); res++ ) + error((*res)->loc) << " -> " << **res << endl; +} + + +void Compiler::referenceRegions( NameInst *rootName ) +{ + for ( NameVect::Iter inst = rootName->childVect; inst.lte(); inst++ ) { + /* Inc the reference in the name. This will cause the entry point to + * survive to the end of the graph generating walk. */ + (*inst)->numRefs += 1; + } +} + +/* Walk a name tree starting at from and fill the name index. */ +void Compiler::fillNameIndex( NameInst **nameIndex, NameInst *from ) +{ + /* Fill the value for from in the name index. */ + nameIndex[from->id] = from; + + /* Recurse on the implicit final state and then all children. */ + if ( from->final != 0 ) + fillNameIndex( nameIndex, from->final ); + for ( NameVect::Iter name = from->childVect; name.lte(); name++ ) + fillNameIndex( nameIndex, *name ); +} + +NameInst **Compiler::makeNameIndex( NameInst *rootName ) +{ + /* The number of nodes in the tree can now be given by nextNameId. Put a + * null pointer on the end of the list to terminate it. */ + NameInst **nameIndex = new NameInst*[nextNameId+1]; + memset( nameIndex, 0, sizeof(NameInst*)*(nextNameId+1) ); + fillNameIndex( nameIndex, rootName ); + return nameIndex; +} + +void Compiler::createBuiltin( const char *name, BuiltinMachine builtin ) +{ + Expression *expression = new Expression( builtin ); + Join *join = new Join( expression ); + VarDef *varDef = new VarDef( name, join ); + GraphDictEl *graphDictEl = new GraphDictEl( name, varDef ); + rootNamespace->rlMap.insert( graphDictEl ); +} + +/* Initialize the graph dict with builtin types. */ +void Compiler::initGraphDict( ) +{ + createBuiltin( "any", BT_Any ); + createBuiltin( "ascii", BT_Ascii ); + createBuiltin( "extend", BT_Extend ); + createBuiltin( "alpha", BT_Alpha ); + createBuiltin( "digit", BT_Digit ); + createBuiltin( "alnum", BT_Alnum ); + createBuiltin( "lower", BT_Lower ); + createBuiltin( "upper", BT_Upper ); + createBuiltin( "cntrl", BT_Cntrl ); + createBuiltin( "graph", BT_Graph ); + createBuiltin( "print", BT_Print ); + createBuiltin( "punct", BT_Punct ); + createBuiltin( "space", BT_Space ); + createBuiltin( "xdigit", BT_Xdigit ); + createBuiltin( "null", BT_Lambda ); + createBuiltin( "zlen", BT_Lambda ); + createBuiltin( "empty", BT_Empty ); +} + +/* Initialize the key operators object that will be referenced by all fsms + * created. */ +void Compiler::initKeyOps( ) +{ + /* Signedness and bounds. */ + HostType *alphType = alphTypeSet ? userAlphType : hostLang->defaultAlphType; + thisKeyOps.setAlphType( alphType ); + + if ( lowerNum != 0 ) { + /* If ranges are given then interpret the alphabet type. */ + thisKeyOps.minKey = makeFsmKeyNum( lowerNum, rangeLowLoc, this ); + thisKeyOps.maxKey = makeFsmKeyNum( upperNum, rangeHighLoc, this ); + } + + thisCondData.nextCondKey = thisKeyOps.maxKey; + thisCondData.nextCondKey.increment(); +} + +void Compiler::printNameInst( NameInst *nameInst, int level ) +{ + for ( int i = 0; i < level; i++ ) + cerr << " "; + cerr << (nameInst->name != 0 ? nameInst->name : "<ANON>") << + " id: " << nameInst->id << + " refs: " << nameInst->numRefs << endl; + for ( NameVect::Iter name = nameInst->childVect; name.lte(); name++ ) + printNameInst( *name, level+1 ); +} + +/* Remove duplicates of unique actions from an action table. */ +void Compiler::removeDups( ActionTable &table ) +{ + /* Scan through the table looking for unique actions to + * remove duplicates of. */ + for ( int i = 0; i < table.length(); i++ ) { + /* Remove any duplicates ahead of i. */ + for ( int r = i+1; r < table.length(); ) { + if ( table[r].value == table[i].value ) + table.vremove(r); + else + r += 1; + } + } +} + +/* Remove duplicates from action lists. This operates only on transition and + * eof action lists and so should be called once all actions have been + * transfered to their final resting place. */ +void Compiler::removeActionDups( FsmGraph *graph ) +{ + /* Loop all states. */ + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) { + /* Loop all transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) + removeDups( trans->actionTable ); + removeDups( state->toStateActionTable ); + removeDups( state->fromStateActionTable ); + removeDups( state->eofActionTable ); + } +} + +Action *Compiler::newAction( const String &name, InlineList *inlineList ) +{ + InputLoc loc; + loc.line = 1; + loc.col = 1; + loc.fileName = 0; + + Action *action = new Action( loc, name, inlineList ); + actionList.append( action ); + return action; +} + +void Compiler::initLongestMatchData() +{ + if ( regionList.length() > 0 ) { + /* The initActId action gives act a default value. */ + InlineList *il4 = new InlineList; + il4->append( new InlineItem( InputLoc(), InlineItem::LmInitAct ) ); + initActId = newAction( "initact", il4 ); + initActId->isLmAction = true; + + /* The setTokStart action sets tokstart. */ + InlineList *il5 = new InlineList; + il5->append( new InlineItem( InputLoc(), InlineItem::LmSetTokStart ) ); + setTokStart = newAction( "tokstart", il5 ); + setTokStart->isLmAction = true; + + /* The setTokEnd action sets tokend. */ + InlineList *il3 = new InlineList; + il3->append( new InlineItem( InputLoc(), InlineItem::LmSetTokEnd ) ); + setTokEnd = newAction( "tokend", il3 ); + setTokEnd->isLmAction = true; + + /* The action will also need an ordering: ahead of all user action + * embeddings. */ + initActIdOrd = curActionOrd++; + setTokStartOrd = curActionOrd++; + setTokEndOrd = curActionOrd++; + } +} + +void Compiler::finishGraphBuild( FsmGraph *graph ) +{ + /* Resolve any labels that point to multiple states. Any labels that are + * still around are referenced only by gotos and calls and they need to be + * made into deterministic entry points. */ + graph->deterministicEntry(); + + /* + * All state construction is now complete. + */ + + /* Transfer global error actions. */ + for ( StateList::Iter state = graph->stateList; state.lte(); state++ ) + graph->transferErrorActions( state, 0 ); + + removeActionDups( graph ); + + /* Remove unreachable states. There should be no dead end states. The + * subtract and intersection operators are the only places where they may + * be created and those operators clean them up. */ + graph->removeUnreachableStates(); + + /* No more fsm operations are to be done. Action ordering numbers are + * no longer of use and will just hinder minimization. Clear them. */ + graph->nullActionKeys(); + + /* Transition priorities are no longer of use. We can clear them + * because they will just hinder minimization as well. Clear them. */ + graph->clearAllPriorities(); + + /* Minimize here even if we minimized at every op. Now that function + * keys have been cleared we may get a more minimal fsm. */ + graph->minimizePartition2(); + graph->compressTransitions(); +} + +void Compiler::printNameTree( NameInst *rootName ) +{ + /* Print the name instance map. */ + cerr << "name tree:" << endl; + for ( NameVect::Iter name = rootName->childVect; name.lte(); name++ ) + printNameInst( *name, 0 ); +} + +void Compiler::printNameIndex( NameInst **nameIndex ) +{ + /* The name index is terminated with a null pointer. */ + cerr << "name index:" << endl; + for ( int ni = 0; nameIndex[ni]; ni++ ) { + cerr << ni << ": "; + char *name = nameIndex[ni]->name; + cerr << ( name != 0 ? name : "<ANON>" ) << endl; + } +} + + +/* Build the name tree and supporting data structures. */ +NameInst *Compiler::makeNameTree() +{ + /* Create the root name. */ + nextNameId = 0; + NameInst *rootName = new NameInst( InputLoc(), 0, 0, nextNameId++, false ); + + /* First make the name tree. */ + initNameWalk( rootName ); + for ( RegionGraphList::Iter glel = instanceList; glel.lte(); glel++ ) { + /* Recurse on the instance. */ + glel->value->makeNameTree( glel->loc, this ); + } + + return rootName; +} + +FsmGraph *Compiler::makeAllRegions() +{ + /* Build the name tree and supporting data structures. */ + NameInst *rootName = makeNameTree( ); + NameInst **nameIndex = makeNameIndex( rootName ); + + /* Resovle the implicit name references to the nfa instantiations. */ + referenceRegions( rootName ); + + int numGraphs = 0; + FsmGraph **graphs = new FsmGraph*[instanceList.length()]; + + /* Make all the instantiations, we know that main exists in this list. */ + initNameWalk( rootName ); + for ( RegionGraphList::Iter glel = instanceList; glel.lte(); glel++ ) { + /* Build the graph from a walk of the parse tree. */ + FsmGraph *newGraph = glel->value->walk( this ); + + /* Wrap up the construction. */ + finishGraphBuild( newGraph ); + + /* Save off the new graph. */ + graphs[numGraphs++] = newGraph; + } + + /* NOTE: If putting in minimization here we need to include eofTarget + * into the minimization algorithm. It is currently set by the longest + * match operator and not considered anywhere else. */ + + /* Add all the other graphs into the first. */ + FsmGraph *all = graphs[0]; + all->globOp( graphs+1, numGraphs-1 ); + delete[] graphs; + + /* Go through all the token regions and check for lmRequiresErrorState. */ + for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) { + if ( reg->lmSwitchHandlesError ) + all->lmRequiresErrorState = true; + } + + all->rootName = rootName; + all->nameIndex = nameIndex; + + return all; +} + +void Compiler::analyzeAction( Action *action, InlineList *inlineList ) +{ + /* FIXME: Actions used as conditions should be very constrained. */ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + //if ( item->type == InlineItem::Call || item->type == InlineItem::CallExpr ) + // action->anyCall = true; + + /* Need to recurse into longest match items. */ + if ( item->type == InlineItem::LmSwitch ) { + TokenRegion *lm = item->tokenRegion; + for ( TokenDefListReg::Iter lmi = lm->tokenDefList; lmi.lte(); lmi++ ) { + if ( lmi->action != 0 ) + analyzeAction( action, lmi->action->inlineList ); + } + } + + if ( item->type == InlineItem::LmOnLast || + item->type == InlineItem::LmOnNext || + item->type == InlineItem::LmOnLagBehind ) + { + TokenDef *lmi = item->longestMatchPart; + if ( lmi->action != 0 ) + analyzeAction( action, lmi->action->inlineList ); + } + + if ( item->children != 0 ) + analyzeAction( action, item->children ); + } +} + +void Compiler::analyzeGraph( FsmGraph *graph ) +{ + for ( ActionList::Iter act = actionList; act.lte(); act++ ) + analyzeAction( act, act->inlineList ); + + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + /* The transition list. */ + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + for ( ActionTable::Iter at = trans->actionTable; at.lte(); at++ ) + at->value->numTransRefs += 1; + } + + for ( ActionTable::Iter at = st->toStateActionTable; at.lte(); at++ ) + at->value->numToStateRefs += 1; + + for ( ActionTable::Iter at = st->fromStateActionTable; at.lte(); at++ ) + at->value->numFromStateRefs += 1; + + for ( ActionTable::Iter at = st->eofActionTable; at.lte(); at++ ) + at->value->numEofRefs += 1; + + for ( StateCondList::Iter sc = st->stateCondList; sc.lte(); sc++ ) { + for ( CondSet::Iter sci = sc->condSpace->condSet; sci.lte(); sci++ ) + (*sci)->numCondRefs += 1; + } + } +} + +FsmGraph *Compiler::makeScanner() +{ + /* Make the graph, do minimization. */ + FsmGraph *fsmGraph = makeAllRegions(); + + /* If any errors have occured in the input file then don't write anything. */ + if ( gblErrorCount > 0 ) + return 0; + + analyzeGraph( fsmGraph ); + + /* Decide if an error state is necessary. + * 1. There is an error transition + * 2. There is a gap in the transitions + * 3. The longest match operator requires it. */ + if ( fsmGraph->lmRequiresErrorState || fsmGraph->hasErrorTrans() ) + fsmGraph->errState = fsmGraph->addState(); + + /* State numbers need to be assigned such that all final states have a + * larger state id number than all non-final states. This enables the + * first_final mechanism to function correctly. We also want states to be + * ordered in a predictable fashion. So we first apply a depth-first + * search, then do a stable sort by final state status, then assign + * numbers. */ + + fsmGraph->depthFirstOrdering(); + fsmGraph->sortStatesByFinal(); + fsmGraph->setStateNumbers( 0 ); + + return fsmGraph; +} + +void Compiler::createDefaultScanner() +{ + InputLoc loc = { 0, 0, 0 }; + + const char *name = "___DEFAULT_SCANNER"; + + /* Create the default namespace. */ + defaultNamespace = new Namespace( InputLoc(), name, + namespaceList.length(), 0 ); + namespaceList.append( defaultNamespace ); + + /* Create a scanner which will be used when no other scanner can be + * figured out. It returns single characters. */ + defaultRegion = new TokenRegion( InputLoc(), name, + regionList.length(), 0 ); + regionList.append( defaultRegion ); + + /* Insert the machine definition into the graph dictionary. */ + RegionGraphDictEl *newEl = rootNamespace->graphDict.insert( name ); + assert( newEl != 0 ); + newEl->value = new RegionDef( name, defaultRegion ); + newEl->isInstance = true; + instanceList.append( newEl ); + + Join *join = new Join( new Expression( BT_Any ) ); + + TokenDef *tokenDef = new TokenDef( name, String(), false, false, + join, 0, loc, nextTokenId++, + rootNamespace, defaultRegion, 0, 0, 0 ); + + defaultRegion->tokenDefList.append( tokenDef ); + + /* Now create the one and only token -> "<chr>" / any / */ + name = "___DEFAULT_SCANNER_CHR"; + defaultCharLangEl = addLangEl( this, defaultNamespace, name, LangEl::Term ); + + tokenDef->tdLangEl = defaultCharLangEl; + defaultCharLangEl->tokenDef = tokenDef; +} + +LangEl *Compiler::makeRepeatProd( Namespace *nspace, const String &repeatName, + NamespaceQual *nspaceQual, const String &name ) +{ + LangEl *prodName = addLangEl( this, nspace, repeatName, LangEl::NonTerm ); + prodName->isRepeat = true; + + ProdElList *prodElList1 = new ProdElList; + + /* Build the first production of the repeat. */ + TypeRef *typeRef1 = new TypeRef( InputLoc(), nspaceQual, name ); + ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 ); + + UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName ); + TypeRef *typeRef2 = new TypeRef( InputLoc(), prodNameUT ); + ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef2, 0 ); + + prodElList1->append( factor1 ); + prodElList1->append( factor2 ); + + Definition *newDef1 = new Definition( InputLoc(), + prodName, prodElList1, false, 0, + prodList.length(), prodName->defList.length(), + Definition::Production ); + + prodName->defList.append( newDef1 ); + prodList.append( newDef1 ); + + /* Build the second production of the repeat. */ + ProdElList *prodElList2 = new ProdElList; + + Definition *newDef2 = new Definition( InputLoc(), + prodName, prodElList2, false, 0, + prodList.length(), prodName->defList.length(), + Definition::Production ); + + prodName->defList.append( newDef2 ); + prodList.append( newDef2 ); + + return prodName; +} + +LangEl *Compiler::makeListProd( Namespace *nspace, const String &listName, NamespaceQual *nspaceQual, const String &name ) +{ + LangEl *prodName = addLangEl( this, nspace, listName, LangEl::NonTerm ); + prodName->isList = true; + + /* Build the first production of the list. */ + TypeRef *typeRef1 = new TypeRef( InputLoc(), nspaceQual, name ); + ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 ); + + UniqueType *prodNameUT = findUniqueType( TYPE_TREE, prodName ); + TypeRef *typeRef2 = new TypeRef( InputLoc(), prodNameUT ); + ProdEl *factor2 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef2, 0 ); + + ProdElList *prodElList1 = new ProdElList; + prodElList1->append( factor1 ); + prodElList1->append( factor2 ); + + Definition *newDef1 = new Definition( InputLoc(), + prodName, prodElList1, false, 0, + prodList.length(), prodName->defList.length(), + Definition::Production ); + + prodName->defList.append( newDef1 ); + prodList.append( newDef1 ); + + /* Build the second production of the list. */ + TypeRef *typeRef3 = new TypeRef( InputLoc(), nspaceQual, name ); + ProdEl *factor3 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef3, 0 ); + + ProdElList *prodElList2 = new ProdElList; + prodElList2->append( factor3 ); + + Definition *newDef2 = new Definition( InputLoc(), + prodName, prodElList2, false, 0, + prodList.length(), prodName->defList.length(), + Definition::Production ); + + prodName->defList.append( newDef2 ); + prodList.append( newDef2 ); + + return prodName; +} + +LangEl *Compiler::makeOptProd( Namespace *nspace, const String &optName, NamespaceQual *nspaceQual, const String &name ) +{ + LangEl *prodName = addLangEl( this, nspace, optName, LangEl::NonTerm ); + prodName->isOpt = true; + + ProdElList *prodElList1 = new ProdElList; + + /* Build the first production of the repeat. */ + TypeRef *typeRef1 = new TypeRef( InputLoc(), nspaceQual, name ); + ProdEl *factor1 = new ProdEl( ProdEl::ReferenceType, InputLoc(), 0, false, typeRef1, 0 ); + prodElList1->append( factor1 ); + + Definition *newDef1 = new Definition( InputLoc(), + prodName, prodElList1, false, 0, + prodList.length(), prodName->defList.length(), + Definition::Production ); + + prodName->defList.append( newDef1 ); + prodList.append( newDef1 ); + + /* Build the second production of the repeat. */ + ProdElList *prodElList2 = new ProdElList; + + Definition *newDef2 = new Definition( InputLoc(), + prodName, prodElList2, false, 0, + prodList.length(), prodName->defList.length(), + Definition::Production ); + + prodName->defList.append( newDef2 ); + prodList.append( newDef2 ); + + return prodName; +} + +Namespace *Namespace::findNamespace( const String &name ) +{ + for ( NamespaceVect::Iter c = childNamespaces; c.lte(); c++ ) { + if ( strcmp( name, (*c)->name ) == 0 ) + return *c; + } + return 0; +} + +/* Search from a previously resolved qualification. (name 1+ in a qual list). */ +Namespace *NamespaceQual::searchFrom( Namespace *from, StringVect::Iter &qualPart ) +{ + /* While there are still parts in the qualification. */ + while ( qualPart.lte() ) { + Namespace *child = from->findNamespace( *qualPart ); + if ( child == 0 ) + return 0; + + from = child; + qualPart.increment(); + } + + return from; +} + +Namespace *NamespaceQual::getQual( Compiler *pd ) +{ + /* Do the search only once. */ + if ( cachedNspaceQual != 0 ) + return cachedNspaceQual; + + if ( qualNames.length() == 0 ) { + /* No qualification, use the region the qualification was + * declared in. */ + cachedNspaceQual = declInNspace; + } + else if ( strcmp( qualNames[0], "root" ) == 0 ) { + /* First item is "root." Start the downward search from there. */ + StringVect::Iter qualPart = qualNames; + qualPart.increment(); + cachedNspaceQual = searchFrom( pd->rootNamespace, qualPart ); + return cachedNspaceQual; + } + else { + /* Have a qualification. Move upwards through the declared + * regions looking for the first part. */ + StringVect::Iter qualPart = qualNames; + Namespace *parentNamespace = declInNspace; + while ( parentNamespace != 0 ) { + /* Search for the first part underneath the current parent. */ + Namespace *child = parentNamespace->findNamespace( *qualPart ); + + if ( child != 0 ) { + /* Found the first part. Start going below the result. */ + qualPart.increment(); + cachedNspaceQual = searchFrom( child, qualPart ); + return cachedNspaceQual; + } + + /* Not found, move up to the parent. */ + parentNamespace = parentNamespace->parentNamespace; + } + + /* Failed to find the place to start from. */ + cachedNspaceQual = 0; + } + + return cachedNspaceQual; +} + +void Compiler::initEmptyScanners() +{ + for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) { + if ( reg->tokenDefList.length() == 0 ) { + reg->wasEmpty = true; + + static int def = 1; + InputLoc loc = { 0, 0, 0 }; + String name( reg->name.length() + 16, "__%s_DEF_PAT_%d", reg->name.data, def++ ); + + Join *join = new Join( new Expression( BT_Any ) ); + + TokenDef *tokenDef = new TokenDef( name, String(), false, false, join, + 0, loc, nextTokenId++, rootNamespace, reg, 0, 0, 0 ); + reg->tokenDefList.append( tokenDef ); + + /* These do not go in the namespace so so they cannot get declared + * in the declare pass. */ + LangEl *lel = addLangEl( this, rootNamespace, name, LangEl::Term ); + + tokenDef->tdLangEl = lel; + lel->tokenDef = tokenDef; + } + } +} + + +void Compiler::parsePatterns() +{ + Program *prg = colmNewProgram( runtimeData, 0, 0 ); + + /* Turn off context-dependent parsing. */ + prg->ctxDepParsing = 0; + + Tree **vm_stack = stackAlloc(); + Tree **root = &vm_stack[VM_STACK_SIZE]; + + for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) { + if ( colm_log_compile ) { + cerr << "parsing replacement at " << + repl->loc.line << ' ' << repl->loc.col << endl; + } + + InputStream *in = new InputStream; + FsmRun *fsmRun = new FsmRun; + repl->pdaRun = new PdaRun; + + initInputStream( in ); + initPdaRun( repl->pdaRun, prg, pdaTables, fsmRun, repl->langEl->parserId, 0, false, 0 ); + initFsmRun( fsmRun, prg ); + + Stream *res = streamAllocate( prg ); + res->id = LEL_ID_STREAM; + res->in = newSourceStreamRepl( repl ); + appendStream( in, (Tree*)res ); + setEof( in ); + + newToken( prg, repl->pdaRun, fsmRun ); + long pcr = parseLoop( prg, root, repl->pdaRun, fsmRun, in, PcrStart ); + assert( pcr == PcrDone ); + if ( repl->pdaRun->parseError ) + cout << "parse error" << endp; + } + + for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) { + if ( colm_log_compile ) { + cerr << "parsing pattern at " << + pat->loc.line << ' ' << pat->loc.col << endl; + } + + InputStream *in = new InputStream; + FsmRun *fsmRun = new FsmRun; + pat->pdaRun = new PdaRun; + + initInputStream( in ); + initPdaRun( pat->pdaRun, prg, pdaTables, fsmRun, pat->langEl->parserId, 0, false, 0 ); + initFsmRun( fsmRun, prg ); + + Stream *res = streamAllocate( prg ); + res->id = LEL_ID_STREAM; + res->in = newSourceStreamPattern( pat ); + appendStream( in, (Tree*)res ); + setEof( in ); + + newToken( prg, pat->pdaRun, fsmRun ); + long pcr = parseLoop( prg, root, pat->pdaRun, fsmRun, in, PcrStart ); + assert( pcr == PcrDone ); + if ( pat->pdaRun->parseError ) + cout << "parse error" << endp; + } + + fillInPatterns( prg ); +} + +void Compiler::collectParserEls( BstSet<LangEl*> &parserEls ) +{ + for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) { + /* We assume the reduction action compilation phase was run before + * pattern parsing and it decorated the pattern with the target type. */ + assert( pat->langEl != 0 ); + if ( pat->langEl->type != LangEl::NonTerm ) + error(pat->loc) << "pattern type is not a non-terminal" << endp; + + if ( pat->langEl->parserId < 0 ) { + /* Make a parser for the language element. */ + parserEls.insert( pat->langEl ); + pat->langEl->parserId = nextParserId++; + } + } + + for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) { + /* We assume the reduction action compilation phase was run before + * replacement parsing decorated the replacement with the target type. */ + assert( repl->langEl != 0 ); + + if ( repl->langEl->parserId < 0 ) { + /* Make a parser for the language element. */ + parserEls.insert( repl->langEl ); + repl->langEl->parserId = nextParserId++; + } + } + + /* Make parsers that we need. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->parserId >= 0 ) + parserEls.insert( lel ); + } +} + + +void Compiler::generateOutput() +{ + FsmCodeGen *fsmGen = new FsmCodeGen("<INPUT>", sectionName, + *outStream, redFsm, fsmTables ); + + PdaCodeGen *pdaGen = new PdaCodeGen( outputFileName, "parser", this, *outStream ); + + fsmGen->writeIncludes(); + pdaGen->defineRuntime(); + fsmGen->writeCode(); + + /* Make parsers that we need. */ + pdaGen->writeParserData( 0, pdaTables ); + + /* Write the runtime data. */ + pdaGen->writeRuntimeData( runtimeData, pdaTables ); + + if ( !gblLibrary ) + fsmGen->writeMain(); + + outStream->flush(); +} + + +void Compiler::prepGrammar() +{ + /* This will create language elements. */ + wrapNonTerminals(); + + makeLangElIds(); + makeLangElNames(); + makeDefinitionNames(); + noUndefindLangEls(); + + /* Put the language elements in an index by language element id. */ + langElIndex = new LangEl*[nextSymbolId+1]; + memset( langElIndex, 0, sizeof(LangEl*)*(nextSymbolId+1) ); + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) + langElIndex[lel->id] = lel; + + makeProdFsms(); + + /* Allocate the Runtime data now. Every PdaTable that we make + * will reference it, but it will be filled in after all the tables are + * built. */ + runtimeData = new RuntimeData; +} + +void Compiler::compile() +{ + beginProcessing(); + initKeyOps(); + + + /* Type declaration. */ + typeDeclaration(); + + /* Type resolving. */ + typeResolve(); + + makeTerminalWrappers(); + makeEofElements(); + + /* + * Parsers + */ + + /* Init the longest match data */ + initLongestMatchData(); + FsmGraph *fsmGraph = makeScanner(); + + if ( colm_log_compile ) { + printNameTree( fsmGraph->rootName ); + printNameIndex( fsmGraph->nameIndex ); + } + + prepGrammar(); + + /* Compile bytecode. */ + compileByteCode(); + + /* Make the reduced fsm. */ + RedFsmBuild reduce( sectionName, this, fsmGraph ); + redFsm = reduce.reduceMachine(); + + BstSet<LangEl*> parserEls; + collectParserEls( parserEls ); + + makeParser( parserEls ); + + /* Make the scanner tables. */ + fsmTables = redFsm->makeFsmTables(); + + /* Now that all parsers are built, make the global runtimeData. */ + makeRuntimeData(); + + /* + * All compilation is now complete. + */ + + /* Parse patterns and replacements. */ + parsePatterns(); +} + diff --git a/src/ctinput.cc b/src/ctinput.cc new file mode 100644 index 00000000..b5086268 --- /dev/null +++ b/src/ctinput.cc @@ -0,0 +1,439 @@ +/* + * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "parsedata.h" +#include "parsetree.h" +#include "input.h" +#include "fsmrun.h" +#include "debug.h" +#include "pool.h" + +#include <iostream> + +using std::cerr; +using std::endl; + +SourceFuncs patternFuncs; +SourceFuncs replFuncs; + +/* + * Pattern + */ + +SourceStream *newSourceStreamPattern( Pattern *pattern ) +{ + SourceStream *is = (SourceStream*)malloc(sizeof(SourceStream)); + memset( is, 0, sizeof(SourceStream) ); + is->handlesLine = true; + is->pattern = pattern; + is->patItem = pattern->list->head; + is->funcs = &patternFuncs; + return is; +} + +LangEl *inputStreamPatternGetLangEl( SourceStream *is, long *bindId, char **data, long *length ) +{ + LangEl *klangEl = is->patItem->factor->langEl; + *bindId = is->patItem->bindId; + *data = 0; + *length = 0; + is->line = is->patItem->loc.line; + + is->patItem = is->patItem->next; + is->offset = 0; + return klangEl; +} + +int inputStreamPatternGetData( SourceStream *is, int skip, char *dest, int length, int *copied ) +{ + *copied = 0; + + PatternItem *buf = is->patItem; + int offset = is->offset; + + while ( true ) { + if ( buf == 0 ) + return INPUT_EOD; + + if ( buf->type == PatternItem::FactorType ) + return INPUT_LANG_EL; + + if ( offset == 0 ) + is->line = buf->loc.line; + + assert ( buf->type == PatternItem::InputText ); + int avail = buf->data.length() - offset; + + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[offset]; + int slen = avail <= length ? avail : length; + + /* Need to skip? */ + if ( skip > 0 && slen <= skip ) { + /* Skipping the the whole source. */ + skip -= slen; + } + else { + /* Either skip is zero, or less than slen. Skip goes to zero. + * Some data left over, copy it. */ + src += skip; + slen -= skip; + skip = 0; + + memcpy( dest, src, slen ) ; + *copied += slen; + break; + } + } + + buf = buf->next; + offset = 0; + } + + return INPUT_DATA; +} + +void inputStreamPatternBackup( SourceStream *is ) +{ + if ( is->patItem == 0 ) + is->patItem = is->pattern->list->tail; + else + is->patItem = is->patItem->prev; +} + +void inputStreamPatternPushBackBuf( SourceStream *is, RunBuf *runBuf ) +{ + char *data = runBuf->data + runBuf->offset; + long length = runBuf->length; + + if ( length == 0 ) + return; + + /* While pushing back past the current pattern item start. */ + while ( length > is->offset ) { + length -= is->offset; + if ( is->offset > 0 ) + assert( memcmp( is->patItem->data, data-length, is->offset ) == 0 ); + inputStreamPatternBackup( is ); + is->offset = is->patItem->data.length(); + } + + is->offset -= length; + assert( memcmp( &is->patItem->data[is->offset], data, length ) == 0 ); +} + +void inputStreamPatternUndoConsumeLangEl( SourceStream *is ) +{ + inputStreamPatternBackup( is ); + is->offset = is->patItem->data.length(); +} + +int inputStreamPatternConsumeData( SourceStream *is, int length ) +{ + debug( REALM_INPUT, "consuming %ld bytes\n", length ); + + int consumed = 0; + + while ( true ) { + if ( is->patItem == 0 ) + break; + + int avail = is->patItem->data.length() - is->offset; + + if ( length >= avail ) { + /* Read up to the end of the data. Advance the + * pattern item. */ + is->patItem = is->patItem->next; + is->offset = 0; + + length -= avail; + consumed += avail; + + if ( length == 0 ) + break; + } + else { + is->offset += length; + consumed += length; + break; + } + } + + return consumed; +} + +int inputStreamPatternUndoConsumeData( SourceStream *is, const char *data, int length ) +{ + is->offset -= length; + return length; +} + +extern "C" void initPatternFuncs() +{ + memset( &patternFuncs, 0, sizeof(SourceFuncs) ); + + patternFuncs.getData = &inputStreamPatternGetData; + patternFuncs.consumeData = &inputStreamPatternConsumeData; + patternFuncs.undoConsumeData = &inputStreamPatternUndoConsumeData; + + patternFuncs.consumeLangEl = &inputStreamPatternGetLangEl; + patternFuncs.undoConsumeLangEl = &inputStreamPatternUndoConsumeLangEl; +} + + +/* + * Replacement + */ + +SourceStream *newSourceStreamRepl( Replacement *replacement ) +{ + SourceStream *is = (SourceStream*)malloc(sizeof(SourceStream)); + memset( is, 0, sizeof(SourceStream) ); + is->handlesLine = true; + is->replacement = replacement; + is->replItem = replacement->list->head; + is->funcs = &replFuncs; + return is; +} + +LangEl *inputStreamReplGetLangEl( SourceStream *is, long *bindId, char **data, long *length ) +{ + LangEl *klangEl = is->replItem->type == ReplItem::ExprType ? + is->replItem->langEl : is->replItem->factor->langEl; + *bindId = is->replItem->bindId; + + *data = 0; + *length = 0; + is->line = is->replItem->loc.line; + + if ( is->replItem->type == ReplItem::FactorType ) { + if ( is->replItem->factor->typeRef->pdaLiteral != 0 ) { + bool unusedCI; + prepareLitString( is->replItem->data, unusedCI, + is->replItem->factor->typeRef->pdaLiteral->token.data, + is->replItem->factor->typeRef->pdaLiteral->token.loc ); + + *data = is->replItem->data; + *length = is->replItem->data.length(); + } + } + + is->replItem = is->replItem->next; + is->offset = 0; + return klangEl; +} + +int inputStreamReplGetData( SourceStream *is, int skip, char *dest, int length, int *copied ) +{ + *copied = 0; + + ReplItem *buf = is->replItem; + int offset = is->offset; + + while ( true ) { + if ( buf == 0 ) + return INPUT_EOD; + + if ( buf->type == ReplItem::ExprType || buf->type == ReplItem::FactorType ) + return INPUT_LANG_EL; + + if ( offset == 0 ) + is->line = buf->loc.line; + + assert ( buf->type == ReplItem::InputText ); + int avail = buf->data.length() - offset; + + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[offset]; + int slen = avail <= length ? avail : length; + + /* Need to skip? */ + if ( skip > 0 && slen <= skip ) { + /* Skipping the the whole source. */ + skip -= slen; + } + else { + /* Either skip is zero, or less than slen. Skip goes to zero. + * Some data left over, copy it. */ + src += skip; + slen -= skip; + skip = 0; + + memcpy( dest, src, slen ) ; + *copied += slen; + break; + } + } + + buf = buf->next; + offset = 0; + } + + return INPUT_DATA; +} + +void inputStreamReplBackup( SourceStream *is ) +{ + if ( is->replItem == 0 ) + is->replItem = is->replacement->list->tail; + else + is->replItem = is->replItem->prev; +} + +void inputStreamReplPushBackBuf( SourceStream *is, RunBuf *runBuf ) +{ + char *data = runBuf->data + runBuf->offset; + long length = runBuf->length; + + if ( colm_log_parse ) { + cerr << "push back data: "; + cerr.write( data, length ); + cerr << endl; + } + + if ( length == 0 ) + return; + + /* While pushing back past the current pattern item start. */ + while ( length > is->offset ) { + length -= is->offset; + if ( is->offset > 0 ) + assert( memcmp( is->replItem->data, data-length, is->offset ) == 0 ); + inputStreamReplBackup( is ); + is->offset = is->replItem->data.length(); + } + + is->offset -= length; + assert( memcmp( &is->replItem->data[is->offset], data, length ) == 0 ); +} + +void inputStreamReplUndoConsumeLangEl( SourceStream *is ) +{ + inputStreamReplBackup( is ); + is->offset = is->replItem->data.length(); +} + +int inputStreamReplConsumeData( SourceStream *is, int length ) +{ + int consumed = 0; + + while ( true ) { + if ( is->replItem == 0 ) + break; + + int avail = is->replItem->data.length() - is->offset; + + if ( length >= avail ) { + /* Read up to the end of the data. Advance the + * pattern item. */ + is->replItem = is->replItem->next; + is->offset = 0; + + length -= avail; + consumed += avail; + + if ( length == 0 ) + break; + } + else { + is->offset += length; + consumed += length; + break; + } + } + + return consumed; +} + +int inputStreamReplUndoConsumeData( SourceStream *is, const char *data, int length ) +{ + is->offset -= length; + return length; +} + +extern "C" void initReplFuncs() +{ + memset( &replFuncs, 0, sizeof(SourceFuncs) ); + + replFuncs.getData = &inputStreamReplGetData; + replFuncs.consumeData = &inputStreamReplConsumeData; + replFuncs.undoConsumeData = &inputStreamReplUndoConsumeData; + + replFuncs.consumeLangEl = &inputStreamReplGetLangEl; + replFuncs.undoConsumeLangEl = &inputStreamReplUndoConsumeLangEl; +} + +void sendNamedLangEl( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream ) +{ + /* All three set by consumeLangEl. */ + long bindId; + char *data; + long length; + + LangEl *klangEl = consumeLangEl( inputStream, &bindId, &data, &length ); + + #ifdef COLM_LOG_PARSE + if ( colm_log_parse ) { + cerr << "named langEl: " << prg->rtd->lelInfo[klangEl->id].name << endl; + } + #endif + + /* Copy the token data. */ + Head *tokdata = 0; + if ( data != 0 ) + tokdata = stringAllocFull( prg, data, length ); + + Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, inputStream, klangEl->id, tokdata ); + + incrementSteps( pdaRun ); + + ParseTree *parseTree = parseTreeAllocate( prg ); + parseTree->id = input->tree->id; + parseTree->flags |= PF_NAMED; + parseTree->shadow = input; + + if ( bindId > 0 ) + pushBinding( pdaRun, parseTree ); + + pdaRun->parseInput = parseTree; +} + +void initBindings( PdaRun *pdaRun ) +{ + /* Bindings are indexed at 1. Need a no-binding. */ + pdaRun->bindings = new Bindings; + pdaRun->bindings->push(0); +} + +void pushBinding( PdaRun *pdaRun, ParseTree *parseTree ) +{ + /* If the item is bound then store it in the bindings array. */ + pdaRun->bindings->push( parseTree ); +} + +void popBinding( PdaRun *pdaRun, ParseTree *parseTree ) +{ + ParseTree *lastBound = pdaRun->bindings->top(); + if ( lastBound == parseTree ) + pdaRun->bindings->pop(); +} diff --git a/src/debug.c b/src/debug.c new file mode 100644 index 00000000..8efaf510 --- /dev/null +++ b/src/debug.c @@ -0,0 +1,78 @@ +/* + * Copyright 2010-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <debug.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +long colmActiveRealm = 0; +const char *colmRealmNames[REALMS] = + { + "BYTECODE", + "PARSE", + "MATCH", + "COMPILE", + "POOL", + "PRINT", + "INPUT", + "SCAN", + }; + +int _debug( long realm, const char *fmt, ... ) +{ + int result = 0; + if ( colmActiveRealm & realm ) { + /* Compute the index by shifting. */ + int ind = 0; + while ( (realm & 0x1) != 0x1 ) { + realm >>= 1; + ind += 1; + } + + fprintf( stderr, "%s: ", colmRealmNames[ind] ); + va_list args; + va_start( args, fmt ); + result = vfprintf( stderr, fmt, args ); + va_end( args ); + } + + return result; +} + +void fatal( const char *fmt, ... ) +{ + va_list args; + fprintf( stderr, "fatal: " ); + va_start( args, fmt ); + vfprintf( stderr, fmt, args ); + va_end( args ); + exit(1); +} + +void message( const char *fmt, ... ) +{ + va_list args; + fprintf( stderr, "message: " ); + va_start( args, fmt ); + vfprintf( stderr, fmt, args ); + va_end( args ); +} diff --git a/src/debug.h b/src/debug.h new file mode 100644 index 00000000..3fd9bb8e --- /dev/null +++ b/src/debug.h @@ -0,0 +1,58 @@ +/* + * Copyright 2010-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "config.h" + +void fatal( const char *fmt, ... ); + +#ifdef DEBUG +#define debug( realm, ... ) _debug( realm, __VA_ARGS__ ) +#define check_realm( realm ) _check_realm( realm ) +#else +#define debug( realm, ... ) +#define check_realm( realm ) +#endif + +int _debug( long realm, const char *fmt, ... ); + +void message( const char *fmt, ... ); + +#define REALM_BYTECODE 0x00000001 +#define REALM_PARSE 0x00000002 +#define REALM_MATCH 0x00000004 +#define REALM_COMPILE 0x00000008 +#define REALM_POOL 0x00000010 +#define REALM_PRINT 0x00000020 +#define REALM_INPUT 0x00000040 +#define REALM_SCAN 0x00000080 + +#define REALMS 32 + +extern long colmActiveRealm; +extern const char *colmRealmNames[REALMS]; + +#ifdef __cplusplus +} +#endif diff --git a/src/declare.cc b/src/declare.cc new file mode 100644 index 00000000..167fe050 --- /dev/null +++ b/src/declare.cc @@ -0,0 +1,383 @@ +/* + * Copyright 2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "bytecode.h" +#include "parsedata.h" +#include "fsmrun.h" +#include <iostream> +#include <assert.h> + +LangEl *declareLangEl( Compiler *pd, Namespace *nspace, const String &data, LangEl::Type type ) +{ + /* If the id is already in the dict, it will be placed in last found. If + * it is not there then it will be inserted and last found will be set to it. */ + TypeMapEl *inDict = nspace->typeMap.find( data ); + if ( inDict != 0 ) + error() << "'" << data << "' already defined as something else" << endp; + + /* Language element not there. Make the new lang el and insert.. */ + LangEl *langEl = new LangEl( nspace, data, type ); + TypeMapEl *typeMapEl = new TypeMapEl( data, langEl ); + nspace->typeMap.insert( typeMapEl ); + pd->langEls.append( langEl ); + + return langEl; +} + +/* Does not map the new language element. */ +LangEl *addLangEl( Compiler *pd, Namespace *nspace, const String &data, LangEl::Type type ) +{ + LangEl *langEl = new LangEl( nspace, data, type ); + pd->langEls.append( langEl ); + return langEl; +} + +void declareTypeAlias( Compiler *pd, Namespace *nspace, const String &data, TypeRef *typeRef ) +{ + /* If the id is already in the dict, it will be placed in last found. If + * it is not there then it will be inserted and last found will be set to it. */ + TypeMapEl *inDict = nspace->typeMap.find( data ); + if ( inDict != 0 ) + error() << "'" << data << "' already defined as something else" << endp; + + /* Language element not there. Make the new lang el and insert.. */ + TypeMapEl *typeMapEl = new TypeMapEl( data, typeRef ); + nspace->typeMap.insert( typeMapEl ); +} + +LangEl *findType( Compiler *pd, Namespace *nspace, const String &data ) +{ + /* If the id is already in the dict, it will be placed in last found. If + * it is not there then it will be inserted and last found will be set to it. */ + TypeMapEl *inDict = nspace->typeMap.find( data ); + + if ( inDict == 0 ) + error() << "'" << data << "' not declared as anything" << endp; + + return inDict->value; +} + + +void Compiler::declareBaseLangEls() +{ + /* Order here is important because we make assumptions about the inbuild + * language elements in the runtime. Note tokens are have identifiers set + * in an initial pass. */ + + /* Make a "_notoken" language element. This element is used when a + * generation action fails to generate anything, but there is reverse code + * that needs to be associated with a language element. This allows us to + * always associate reverse code with the first language element produced + * after a generation action. */ + noTokenLangEl = declareLangEl( this, rootNamespace, "_notoken", LangEl::Term ); + noTokenLangEl->ignore = true; + + /* Make the "stream" language element */ + ptrLangEl = declareLangEl( this, rootNamespace, "ptr", LangEl::Term ); + boolLangEl = declareLangEl( this, rootNamespace, "bool", LangEl::Term ); + intLangEl = declareLangEl( this, rootNamespace, "int", LangEl::Term ); + strLangEl = declareLangEl( this, rootNamespace, "str", LangEl::Term ); + streamLangEl = declareLangEl( this, rootNamespace, "stream", LangEl::Term ); + inputLangEl = declareLangEl( this, rootNamespace, "accum_stream", LangEl::Term ); + ignoreLangEl = declareLangEl( this, rootNamespace, "il", LangEl::Term ); + + /* Make the EOF language element. */ + eofLangEl = 0; + + /* Make the "any" language element */ + anyLangEl = declareLangEl( this, rootNamespace, "any", LangEl::NonTerm ); +} + + +void Compiler::addProdRedObjectVar( ObjectDef *localFrame, LangEl *nonTerm ) +{ + UniqueType *prodNameUT = findUniqueType( TYPE_TREE, nonTerm ); + TypeRef *typeRef = new TypeRef( InputLoc(), prodNameUT ); + ObjField *el = new ObjField( InputLoc(), typeRef, "lhs" ); + + el->isLhsEl = true; + + initLocalInstructions( el ); + + localFrame->insertField( el->name, el ); +} + +void Compiler::addProdLHSLoad( Definition *prod, CodeVect &code, long &insertPos ) +{ + ObjField *lhsField = prod->redBlock->localFrame->findField("lhs"); + assert( lhsField != 0 ); + + CodeVect loads; + if ( lhsField->beenReferenced ) { + loads.append( IN_INIT_LHS_EL ); + loads.appendHalf( lhsField->offset ); + } + + code.insert( insertPos, loads ); + insertPos += loads.length(); +} + +void Compiler::addPushBackLHS( Definition *prod, CodeVect &code, long &insertPos ) +{ + CodeBlock *block = prod->redBlock; + + /* If the lhs tree is dirty then we will need to save off the old lhs + * before it gets modified. We want to avoid this for attribute + * modifications. The computation of dirtyTree should deal with this for + * us. */ + ObjField *lhsField = block->localFrame->findField("lhs"); + assert( lhsField != 0 ); + + if ( lhsField->beenReferenced ) { + code.append( IN_STORE_LHS_EL ); + code.appendHalf( lhsField->offset ); + } +} + +void Compiler::addProdRHSVars( ObjectDef *localFrame, ProdElList *prodElList ) +{ + long position = 1; + for ( ProdElList::Iter rhsEl = *prodElList; rhsEl.lte(); rhsEl++, position++ ) { + if ( rhsEl->type == ProdEl::ReferenceType ) { + /* Use an offset of zero. For frame objects we compute the offset on + * demand. */ + String name( 8, "r%d", position ); + ObjField *el = new ObjField( InputLoc(), rhsEl->typeRef, name ); + rhsEl->objField = el; + + /* Right hand side elements are constant. */ + el->isConst = true; + el->isRhsEl = true; + + /* Only ever fetch for reading since they are constant. */ + el->inGetR = IN_GET_LOCAL_R; + + localFrame->insertField( el->name, el ); + } + } +} + +void Compiler::addProdRHSLoads( Definition *prod, CodeVect &code, long &insertPos ) +{ + CodeVect loads; + long elPos = 0; + for ( ProdElList::Iter rhsEl = *prod->prodElList; rhsEl.lte(); rhsEl++, elPos++ ) { + if ( rhsEl->type == ProdEl::ReferenceType ) { + if ( rhsEl->objField->beenReferenced ) { + loads.append ( IN_INIT_RHS_EL ); + loads.appendHalf( elPos ); + loads.appendHalf( rhsEl->objField->offset ); + } + } + } + + /* Insert and update the insert position. */ + code.insert( insertPos, loads ); + insertPos += loads.length(); +} + +void GenericType::declare( Compiler *pd, Namespace *nspace ) +{ + //std::cout << "generic " << g->name << std::endl; + + LangEl *langEl = declareLangEl( pd, nspace, name, LangEl::NonTerm ); + + /* Add one empty production. */ + ProdElList *emptyList = new ProdElList; + //addProduction( g->loc, langEl, emptyList, false, 0, 0 ); + + { + LangEl *prodName = langEl; + assert( prodName->type == LangEl::NonTerm ); + + Definition *newDef = new Definition( InputLoc(), prodName, + emptyList, false, 0, + pd->prodList.length(), prodName->defList.length(), + Definition::Production ); + + prodName->defList.append( newDef ); + pd->prodList.append( newDef ); + newDef->predOf = 0; + } + + langEl->generic = this; + this->langEl = langEl; +} + +void Namespace::declare( Compiler *pd ) +{ + for ( GenericList::Iter g = genericList; g.lte(); g++ ) + g->declare( pd, this ); + + for ( LiteralDict::Iter l = literalDict; l.lte(); l++ ) { + if ( l->value->dupOf != 0 ) { + /* Duplicate of another. Use the lang el of that token. */ + assert( l->value->dupOf->tdLangEl != 0 ); + l->value->tdLangEl = l->value->dupOf->tdLangEl; + } + else { + if ( l->value->isZero ) { + l->value->tdLangEl = l->value->tokenRegion->ciLel; + assert( l->value->tokenRegion->ciLel != 0 ); + } + else { + /* Original. Create a token for the literal. */ + LangEl *newLangEl = declareLangEl( pd, this, l->value->name, LangEl::Term ); + + newLangEl->lit = l->value->literal; + newLangEl->isLiteral = true; + newLangEl->tokenDef = l->value; + + l->value->tdLangEl = newLangEl; + + if ( l->value->noPreIgnore ) + newLangEl->noPreIgnore = true; + if ( l->value->noPostIgnore ) + newLangEl->noPostIgnore = true; + } + } + } + + for ( ContextDefList::Iter c = contextDefList; c.lte(); c++ ) { + LangEl *lel = declareLangEl( pd, this, c->name, LangEl::NonTerm ); + ProdElList *emptyList = new ProdElList; + //addProduction( c->context->loc, c->name, emptyList, false, 0, 0 ); + + { + LangEl *prodName = lel; + assert( prodName->type == LangEl::NonTerm ); + + Definition *newDef = new Definition( loc, prodName, + emptyList, false, 0, + pd->prodList.length(), prodName->defList.length(), + Definition::Production ); + + prodName->defList.append( newDef ); + pd->prodList.append( newDef ); + newDef->predOf = 0; + + /* If the token has the same name as the region it is in, then also + * insert it into the symbol map for the parent region. */ + if ( strcmp( c->name, this->name ) == 0 ) { + /* Insert the name into the top of the region stack after popping the + * region just created. We need it in the parent. */ + TypeMapEl *typeMapEl = new TypeMapEl( c->name, prodName ); + this->parentNamespace->typeMap.insert( typeMapEl ); + } + } + + c->context->lel = lel; + lel->contextDef = c->context; + lel->objectDef = c->context->contextObjDef; + } + + for ( TokenDefListNs::Iter t = tokenDefList; t.lte(); t++ ) { + /* Literals already taken care of. */ + if ( ! t->isLiteral ) { + if ( t->dupOf != 0 ) { + /* Duplicate of another. Use the lang el of that token. */ + assert( t->dupOf->tdLangEl != 0 ); + t->tdLangEl = t->dupOf->tdLangEl; + } + else { + /* Create the token. */ + LangEl *tokEl = declareLangEl( pd, this, t->name, LangEl::Term ); + tokEl->ignore = t->ignore; + tokEl->transBlock = t->codeBlock; + tokEl->objectDef = t->objectDef; + tokEl->contextIn = t->contextIn; + tokEl->tokenDef = t; + + if ( t->noPreIgnore ) + tokEl->noPreIgnore = true; + if ( t->noPostIgnore ) + tokEl->noPostIgnore = true; + + t->tdLangEl = tokEl; + } + } + } + + for ( NtDefList::Iter n = ntDefList; n.lte(); n++ ) { + /* Get the language element. */ + LangEl *langEl = declareLangEl( pd, this, n->name, LangEl::NonTerm ); + //$$->langEl = langEl; + + /* Get the language element. */ + langEl->objectDef = n->objectDef; + langEl->reduceFirst = n->reduceFirst; + langEl->contextIn = n->contextIn; + langEl->defList.transfer( *n->defList ); + + for ( LelDefList::Iter d = langEl->defList; d.lte(); d++ ) { + d->prodName = langEl; + + if ( d->redBlock != 0 ) { + pd->addProdRedObjectVar( d->redBlock->localFrame, langEl ); + pd->addProdRHSVars( d->redBlock->localFrame, d->prodElList ); + } + + /* References to the reduce item. */ + } + } + + for ( TypeAliasList::Iter ta = typeAliasList; ta.lte(); ta++ ) + declareTypeAlias( pd, this, ta->name, ta->typeRef ); + + /* Go into child aliases. */ + for ( NamespaceVect::Iter c = childNamespaces; c.lte(); c++ ) + (*c)->declare( pd ); +} + +void Compiler::setPrecedence() +{ + for ( PredDeclList::Iter predDecl = predDeclList; predDecl != 0; predDecl++ ) { + predDecl->typeRef->lookupType( this ); + + LangEl *langEl = predDecl->typeRef->uniqueType->langEl; + langEl->predType = predDecl->predType; + langEl->predValue = predDecl->predValue; + } +} + +/* + * Type Declaration Root. + */ +void Compiler::typeDeclaration() +{ + /* These must be declared first, since the runtime assumes their identifiers. */ + declareBaseLangEls(); + + makeIgnoreCollectors(); + + rootNamespace->declare( this ); + + /* Fill any empty scanners with a default token. */ + initEmptyScanners(); + + /* Create the default scanner which will return single characters for us + * when we have no other scanner */ + createDefaultScanner(); + + initUniqueTypes(); + + setPrecedence(); +} diff --git a/src/defs.h.in b/src/defs.h.in new file mode 100644 index 00000000..06a3f9df --- /dev/null +++ b/src/defs.h.in @@ -0,0 +1,49 @@ +/* + * Copyright 2001 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Ragel. + * + * Ragel is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Ragel is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Ragel; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _CONFIG_H +#define _CONFIG_H + +/* Configuration */ +#undef COLM_LOG +#undef COLM_LOG_BYTECODE +#undef COLM_LOG_PARSE +#undef COLM_LOG_MATCH +#undef COLM_LOG_COMPILE + +/* If COLM_LOG is defined then turn on all logging options. */ +#ifdef COLM_LOG +#define COLM_LOG_BYTECODE 1 +#define COLM_LOG_PARSE 1 +#define COLM_LOG_MATCH 1 +#define COLM_LOG_COMPILE 1 +#endif + +extern int colm_log_bytecode; +extern int colm_log_parse; +extern int colm_log_match; +extern int colm_log_compile; +extern int colm_log_conds; + +/* The size of `long', as computed by sizeof. */ +#undef SIZEOF_LONG + +#endif /* _CONFIG_H */ diff --git a/src/dotgen.cc b/src/dotgen.cc new file mode 100644 index 00000000..e4474958 --- /dev/null +++ b/src/dotgen.cc @@ -0,0 +1,113 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include "global.h" +#include "parsedata.h" + +using namespace std; + + +void Compiler::writeTransList( PdaState *state ) +{ + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + /* Write out the from and to states. */ + out << "\t" << state->stateNum << " -> " << trans->value->toState->stateNum; + + /* Begin the label. */ + out << " [ label = \""; + long key = trans->key; + LangEl *lel = langElIndex[key]; + if ( lel != 0 ) + out << lel->name; + else + out << (char)key; + + if ( trans->value->actions.length() > 0 ) { + out << " / "; + for ( ActDataList::Iter act = trans->value->actions; act.lte(); act++ ) { + switch ( *act & 0x3 ) { + case 1: + out << "S(" << trans->value->actOrds[act.pos()] << ")"; + break; + case 2: { + out << "R(" << prodIdIndex[(*act >> 2)]->data << + ", " << trans->value->actOrds[act.pos()] << ")"; + break; + } + case 3: { + out << "SR(" << prodIdIndex[(*act >> 2)]->data << + ", " << trans->value->actOrds[act.pos()] << ")"; + break; + }} + if ( ! act.last() ) + out << ", "; + } + } + + out << "\" ];\n"; + } +} + +void Compiler::writeDotFile( PdaGraph *graph ) +{ + out << + "digraph " << parserName << " {\n" + " rankdir=LR;\n" + " ranksep=\"0\"\n" + " nodesep=\"0.25\"\n" + "\n"; + + /* Define the psuedo states. Transitions will be done after the states + * have been defined as either final or not final. */ + out << + " node [ shape = point ];\n"; + + for ( int i = 0; i < graph->entryStateSet.length(); i++ ) + out << "\tENTRY" << i << " [ label = \"\" ];\n"; + + out << + "\n" + " node [ shape = circle, fixedsize = true, height = 0.6 ];\n"; + + /* Walk the states. */ + for ( PdaStateList::Iter st = graph->stateList; st.lte(); st++ ) + out << " " << st->stateNum << " [ label = \"" << st->stateNum << "\" ];\n"; + + out << "\n"; + + /* Walk the states. */ + for ( PdaStateList::Iter st = graph->stateList; st.lte(); st++ ) + writeTransList( st ); + + /* Start state and other entry points. */ + for ( PdaStateSet::Iter st = graph->entryStateSet; st.lte(); st++ ) + out << "\tENTRY" << st.pos() << " -> " << (*st)->stateNum << " [ label = \"\" ];\n"; + + out << + "}\n"; +} + +void Compiler::writeDotFile() +{ + writeDotFile( pdaGraph ); +} + diff --git a/src/dotgen.h b/src/dotgen.h new file mode 100644 index 00000000..d05a2410 --- /dev/null +++ b/src/dotgen.h @@ -0,0 +1,51 @@ +/* + * Copyright 2001-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _GVDOTGEN_H +#define _GVDOTGEN_H + +#include <iostream> + +#if 0 + +class GraphvizDotGen : public CodeGenData +{ +public: + GraphvizDotGen( ostream &out ) : CodeGenData(out) { } + + /* Print an fsm to out stream. */ + void writeTransList( RedState *state ); + void writeDotFile( ); + + virtual void finishRagelDef(); + +private: + /* Writing labels and actions. */ + std::ostream &ONCHAR( Key lowKey, Key highKey ); + std::ostream &TRANS_ACTION( RedState *fromState, RedTrans *trans ); + std::ostream &ACTION( RedAction *action ); + std::ostream &KEY( Key key ); +}; + +#endif + + +#endif /* _GVDOTGEN_H */ diff --git a/src/exports.cc b/src/exports.cc new file mode 100644 index 00000000..f5153330 --- /dev/null +++ b/src/exports.cc @@ -0,0 +1,285 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "parsedata.h" +#include "fsmcodegen.h" +#include "redfsm.h" +#include "bstmap.h" +#include "fsmrun.h" +#include "debug.h" +#include <sstream> +#include <string> + +using std::ostream; +using std::ostringstream; +using std::string; +using std::cerr; +using std::endl; + +void Compiler::openNameSpace( ostream &out, Namespace *nspace ) +{ + if ( nspace == defaultNamespace || nspace == rootNamespace ) + return; + + openNameSpace( out, nspace->parentNamespace ); + out << "namespace " << nspace->name << " { "; +} + +void Compiler::closeNameSpace( ostream &out, Namespace *nspace ) +{ + if ( nspace == defaultNamespace || nspace == rootNamespace ) + return; + + openNameSpace( out, nspace->parentNamespace ); + out << " }"; +} + +void Compiler::generateExports() +{ + ostream &out = *outStream; + + out << + "#ifndef _EXPORTS_H\n" + "#define _EXPORTS_H\n" + "\n" + "#include <colm/colm.h>\n" + "#include <string>\n" + "\n"; + + out << + "inline void appendString( ColmPrintArgs *args, const char *data, int length )\n" + "{\n" + " std::string *str = (std::string*)args->arg;\n" + " *str += std::string( data, length );\n" + "}\n" + "\n"; + + out << + "inline std::string printTreeStr( ColmProgram *prg, ColmTree *tree, bool trim )\n" + "{\n" + " std::string str;\n" + " ColmPrintArgs printArgs = { &str, 1, 0, trim, &appendString, \n" + " &printNull, &printTermTree, &printNull };\n" + " printTreeArgs( prg, vm_root(prg), &printArgs, tree );\n" + " return str;\n" + "}\n" + "\n"; + + /* Declare. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->isEOF ) { + out << "// isEOF\n"; + continue; + } + if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isTokenOnly ) { + out << "// isTokenOnly\n"; + continue; + } + if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isIgnoreOnly ) { + out << "// isIgnoreOnly\n"; + continue; + } + if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isCiOnly ) { + out << "// isCiOnly\n"; + continue; + } + if ( lel->ciRegion != 0 ) { + out << "// ciRegion != 0\n"; + continue; + } + openNameSpace( out, lel->nspace ); + out << "struct " << lel->fullName << ";"; + closeNameSpace( out, lel->nspace ); + out << "\n"; + } + + /* Class definitions. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->isEOF ) { + out << "// isTokenOnly\n"; + continue; + } + if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isTokenOnly ) { + out << "// isTokenOnly\n"; + continue; + } + if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isIgnoreOnly ) { + out << "// isIgnoreOnly\n"; + continue; + } + if ( lel->tokenDef != 0 && lel->tokenDef->tokenRegion != 0 && lel->tokenDef->tokenRegion->isCiOnly ) { + out << "// isCiOnly\n"; + continue; + } + if ( lel->ciRegion != 0 ) { + out << "// ciRegion != 0\n"; + continue; + } + + openNameSpace( out, lel->nspace ); + out << "struct " << lel->fullName << "\n"; + out << "{\n"; + out << " std::string text() { return printTreeStr( prg, tree, true ); }\n"; + out << " std::string text_notrim() { return printTreeStr( prg, tree, false ); }\n"; + out << " operator ColmTree *() { return tree; }\n"; + out << " ColmProgram *prg;\n"; + out << " ColmTree *tree;\n"; + + if ( mainReturnUT != 0 && mainReturnUT->langEl == lel ) { + out << " " << lel->fullName << "( ColmProgram *prg ) : prg(prg), tree(returnVal(prg)) {}\n"; + } + out << " " << lel->fullName << "( ColmProgram *prg, ColmTree *tree ) : prg(prg), tree(tree) {}\n"; + + if ( lel->objectDef != 0 && lel->objectDef->objFieldList != 0 ) { + ObjFieldList *objFieldList = lel->objectDef->objFieldList; + for ( ObjFieldList::Iter ofi = *objFieldList; ofi.lte(); ofi++ ) { + ObjField *field = ofi->value; + if ( field->useOffset && field->typeRef != 0 ) { + UniqueType *ut = field->typeRef->lookupType( this ); + + if ( ut != 0 && ut->typeId == TYPE_TREE ) { + out << " " << ut->langEl->refName << " " << field->name << "();\n"; + } + } + + if ( field->isRhsGet ) { + UniqueType *ut = field->typeRef->lookupType( this ); + + if ( ut != 0 && ut->typeId == TYPE_TREE ) { + out << " " << ut->langEl->refName << " " << field->name << "();\n"; + } + } + } + } + + if ( lel->isRepeat ) { + out << " " << "int end() { return repeatEnd( tree ); }\n"; + out << " " << lel->refName << " next();\n"; + out << " " << lel->repeatOf->refName << " value();\n"; + } + + if ( lel->isList ) { + out << " " << "int last() { return listLast( tree ); }\n"; + out << " " << lel->refName << " next();\n"; + out << " " << lel->repeatOf->refName << " value();\n"; + } + out << "};"; + closeNameSpace( out, lel->nspace ); + out << "\n"; + } + + for ( ObjFieldList::Iter of = *globalObjectDef->objFieldList; of.lte(); of++ ) { + ObjField *field = of->value; + if ( field->isExport ) { + UniqueType *ut = field->typeRef->lookupType(this); + if ( ut != 0 && ut->typeId == TYPE_TREE ) { + out << ut->langEl->refName << " " << field->name << "( ColmProgram *prg );\n"; + } + } + } + + out << "#endif\n"; +} + +void Compiler::generateExportsImpl() +{ + ostream &out = *outStream; + + if ( gblExportTo != 0 ) { + out << "#include \"" << gblExportTo << "\"\n"; + } + + /* Function implementations. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->objectDef != 0 && lel->objectDef->objFieldList != 0 ) { + ObjFieldList *objFieldList = lel->objectDef->objFieldList; + for ( ObjFieldList::Iter ofi = *objFieldList; ofi.lte(); ofi++ ) { + ObjField *field = ofi->value; + if ( field->useOffset && field->typeRef != 0 ) { + UniqueType *ut = field->typeRef->lookupType( this ); + + if ( ut != 0 && ut->typeId == TYPE_TREE ) { + out << ut->langEl->refName << " " << lel->declName << "::" << field->name << + "() { return " << ut->langEl->refName << + "( prg, getAttr( tree, " << field->offset << ") ); }\n"; + } + } + + if ( field->isRhsGet ) { + UniqueType *ut = field->typeRef->lookupType( this ); + + if ( ut != 0 && ut->typeId == TYPE_TREE ) { + out << ut->langEl->refName << " " << lel->declName << "::" << field->name << + "() { static int a[] = {"; + + /* Need to place the array computing the val. */ + out << field->rhsVal.length(); + for ( Vector<RhsVal>::Iter rg = field->rhsVal; rg.lte(); rg++ ) { + out << ", " << rg->prodNum; + out << ", " << rg->childNum; + } + + out << "}; return " << ut->langEl->refName << + "( prg, getRhsVal( prg, tree, a ) ); }\n"; + } + } + } + } + + if ( lel->isRepeat ) { + out << lel->refName << " " << lel->declName << "::" << " next" + "() { return " << lel->refName << + "( prg, getRepeatNext( tree ) ); }\n"; + + out << lel->repeatOf->refName << " " << lel->declName << "::" << " value" + "() { return " << lel->repeatOf->refName << + "( prg, getRepeatVal( tree ) ); }\n"; + + } + + if ( lel->isList ) { + out << lel->refName << " " << lel->declName << "::" << " next" + "() { return " << lel->refName << + "( prg, getRepeatNext( tree ) ); }\n"; + + out << lel->repeatOf->refName << " " << lel->declName << "::" << " value" + "() { return " << lel->repeatOf->refName << + "( prg, getRepeatVal( tree ) ); }\n"; + } + } + + out << "\n"; + + for ( ObjFieldList::Iter of = *globalObjectDef->objFieldList; of.lte(); of++ ) { + ObjField *field = of->value; + if ( field->isExport ) { + UniqueType *ut = field->typeRef->lookupType(this); + if ( ut != 0 && ut->typeId == TYPE_TREE ) { + out << + ut->langEl->refName << " " << field->name << "(ColmProgram *prg)\n" + "{ return " << ut->langEl->refName << "( prg, getGlobal( prg, " << + field->offset << ") ); }\n"; + } + } + } +} + + diff --git a/src/fsmap.cc b/src/fsmap.cc new file mode 100644 index 00000000..a4c072b6 --- /dev/null +++ b/src/fsmap.cc @@ -0,0 +1,856 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" +#include "defs.h" +#include "fsmgraph.h" +#include <iostream> + +using std::cerr; +using std::endl; + +CondData *condData = 0; +KeyOps *keyOps = 0; + +/* Insert an action into an action table. */ +void ActionTable::setAction( int ordering, Action *action ) +{ + /* Multi-insert in case specific instances of an action appear in a + * transition more than once. */ + insertMulti( ordering, action ); +} + +/* Set all the action from another action table in this table. */ +void ActionTable::setActions( const ActionTable &other ) +{ + for ( ActionTable::Iter action = other; action.lte(); action++ ) + insertMulti( action->key, action->value ); +} + +void ActionTable::setActions( int *orderings, Action **actions, int nActs ) +{ + for ( int a = 0; a < nActs; a++ ) + insertMulti( orderings[a], actions[a] ); +} + +bool ActionTable::hasAction( Action *action ) +{ + for ( int a = 0; a < length(); a++ ) { + if ( data[a].value == action ) + return true; + } + return false; +} + +/* Insert an action into an action table. */ +void LmActionTable::setAction( int ordering, TokenDef *action ) +{ + /* Multi-insert in case specific instances of an action appear in a + * transition more than once. */ + insertMulti( ordering, action ); +} + +/* Set all the action from another action table in this table. */ +void LmActionTable::setActions( const LmActionTable &other ) +{ + for ( LmActionTable::Iter action = other; action.lte(); action++ ) + insertMulti( action->key, action->value ); +} + +void ErrActionTable::setAction( int ordering, Action *action, int transferPoint ) +{ + insertMulti( ErrActionTableEl( action, ordering, transferPoint ) ); +} + +void ErrActionTable::setActions( const ErrActionTable &other ) +{ + for ( ErrActionTable::Iter act = other; act.lte(); act++ ) + insertMulti( ErrActionTableEl( act->action, act->ordering, act->transferPoint ) ); +} + +/* Insert a priority into this priority table. Looks out for priorities on + * duplicate keys. */ +void PriorTable::setPrior( int ordering, PriorDesc *desc ) +{ + PriorEl *lastHit = 0; + PriorEl *insed = insert( PriorEl(ordering, desc), &lastHit ); + if ( insed == 0 ) { + /* This already has a priority on the same key as desc. Overwrite the + * priority if the ordering is larger (later in time). */ + if ( ordering >= lastHit->ordering ) + *lastHit = PriorEl( ordering, desc ); + } +} + +/* Set all the priorities from a priorTable in this table. */ +void PriorTable::setPriors( const PriorTable &other ) +{ + /* Loop src priorities once to overwrite duplicates. */ + PriorTable::Iter priorIt = other; + for ( ; priorIt.lte(); priorIt++ ) + setPrior( priorIt->ordering, priorIt->desc ); +} + +/* Set the priority of starting transitions. Isolates the start state so it has + * no other entry points, then sets the priorities of all the transitions out + * of the start state. If the start state is final, then the outPrior of the + * start state is also set. The idea is that a machine that accepts the null + * string can still specify the starting trans prior for when it accepts the + * null word. */ +void FsmGraph::startFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Walk all transitions out of the start state. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->priorTable.setPrior( ordering, prior ); + } +} + +/* Set the priority of all transitions in a graph. Walks all transition lists + * and all def transitions. */ +void FsmGraph::allTransPrior( int ordering, PriorDesc *prior ) +{ + /* Walk the list of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out list of the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->priorTable.setPrior( ordering, prior ); + } + } +} + +/* Set the priority of all transitions that go into a final state. Note that if + * any entry states are final, we will not be setting the priority of any + * transitions that may go into those states in the future. The graph does not + * support pending in transitions in the same way pending out transitions are + * supported. */ +void FsmGraph::finishFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk all in transitions of the final state. */ + for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) + trans->priorTable.setPrior( ordering, prior ); + } +} + +/* Set the priority of any future out transitions that may be made going out of + * this state machine. */ +void FsmGraph::leaveFsmPrior( int ordering, PriorDesc *prior ) +{ + /* Set priority in all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->outPriorTable.setPrior( ordering, prior ); +} + + +/* Set actions to execute on starting transitions. Isolates the start state + * so it has no other entry points, then adds to the transition functions + * of all the transitions out of the start state. If the start state is final, + * then the func is also added to the start state's out func list. The idea is + * that a machine that accepts the null string can execute a start func when it + * matches the null word, which can only be done when leaving the start/final + * state. */ +void FsmGraph::startFsmAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Walk the start state's transitions, setting functions. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->actionTable.setAction( ordering, action ); + } +} + +/* Set functions to execute on all transitions. Walks the out lists of all + * states. */ +void FsmGraph::allTransAction( int ordering, Action *action ) +{ + /* Walk all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out list of the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + trans->actionTable.setAction( ordering, action ); + } + } +} + +/* Specify functions to execute upon entering final states. If the start state + * is final we can't really specify a function to execute upon entering that + * final state the first time. So function really means whenever entering a + * final state from within the same fsm. */ +void FsmGraph::finishFsmAction( int ordering, Action *action ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk the final state's in list. */ + for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) + trans->actionTable.setAction( ordering, action ); + } +} + +/* Add functions to any future out transitions that may be made going out of + * this state machine. */ +void FsmGraph::leaveFsmAction( int ordering, Action *action ) +{ + /* Insert the action in the outActionTable of all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->outActionTable.setAction( ordering, action ); +} + +/* Add functions to the longest match action table for constructing scanners. */ +void FsmGraph::longMatchAction( int ordering, TokenDef *lmPart ) +{ + /* Walk all final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) { + /* Walk the final state's in list. */ + for ( TransInList::Iter trans = (*state)->inList; trans.lte(); trans++ ) + trans->lmActionTable.setAction( ordering, lmPart ); + } +} + +void FsmGraph::fillGaps( FsmState *state ) +{ + if ( state->outList.length() == 0 ) { + /* Add the range on the lower and upper bound. */ + attachNewTrans( state, 0, keyOps->minKey, keyOps->maxKey ); + } + else { + TransList srcList; + srcList.transfer( state->outList ); + + /* Check for a gap at the beginning. */ + TransList::Iter trans = srcList, next; + if ( keyOps->minKey < trans->lowKey ) { + /* Make the high key and append. */ + Key highKey = trans->lowKey; + highKey.decrement(); + + attachNewTrans( state, 0, keyOps->minKey, highKey ); + } + + /* Write the transition. */ + next = trans.next(); + state->outList.append( trans ); + + /* Keep the last high end. */ + Key lastHigh = trans->highKey; + + /* Loop each source range. */ + for ( trans = next; trans.lte(); trans = next ) { + /* Make the next key following the last range. */ + Key nextKey = lastHigh; + nextKey.increment(); + + /* Check for a gap from last up to here. */ + if ( nextKey < trans->lowKey ) { + /* Make the high end of the range that fills the gap. */ + Key highKey = trans->lowKey; + highKey.decrement(); + + attachNewTrans( state, 0, nextKey, highKey ); + } + + /* Reduce the transition. If it reduced to anything then add it. */ + next = trans.next(); + state->outList.append( trans ); + + /* Keep the last high end. */ + lastHigh = trans->highKey; + } + + /* Now check for a gap on the end to fill. */ + if ( lastHigh < keyOps->maxKey ) { + /* Get a copy of the default. */ + lastHigh.increment(); + + attachNewTrans( state, 0, lastHigh, keyOps->maxKey ); + } + } +} + +void FsmGraph::setErrorAction( FsmState *state, int ordering, Action *action ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error transitions in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState == 0 ) + trans->actionTable.setAction( ordering, action ); + } +} + +void FsmGraph::setErrorActions( FsmState *state, const ActionTable &other ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error transitions in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState == 0 ) + trans->actionTable.setActions( other ); + } +} + + +/* Give a target state for error transitions. */ +void FsmGraph::setErrorTarget( FsmState *state, FsmState *target, int *orderings, + Action **actions, int nActs ) +{ + /* Fill any gaps in the out list with an error transition. */ + fillGaps( state ); + + /* Set error target in the transitions that go to error. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState == 0 ) { + /* The trans goes to error, redirect it. */ + redirectErrorTrans( trans->fromState, target, trans ); + trans->actionTable.setActions( orderings, actions, nActs ); + } + } +} + +void FsmGraph::transferErrorActions( FsmState *state, int transferPoint ) +{ + for ( int i = 0; i < state->errActionTable.length(); ) { + ErrActionTableEl *act = state->errActionTable.data + i; + if ( act->transferPoint == transferPoint ) { + /* Transfer the error action and remove it. */ + setErrorAction( state, act->ordering, act->action ); + state->errActionTable.vremove( i ); + } + else { + /* Not transfering and deleting, skip over the item. */ + i += 1; + } + } +} + +/* Set error actions in the start state. */ +void FsmGraph::startErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Add the actions. */ + startState->errActionTable.setAction( ordering, action, transferPoint ); +} + +/* Set error actions in all states where there is a transition out. */ +void FsmGraph::allErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Insert actions in the error action table of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->errActionTable.setAction( ordering, action, transferPoint ); +} + +/* Set error actions in final states. */ +void FsmGraph::finalErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->errActionTable.setAction( ordering, action, transferPoint ); +} + +void FsmGraph::notStartErrorAction( int ordering, Action *action, int transferPoint ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +void FsmGraph::notFinalErrorAction( int ordering, Action *action, int transferPoint ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +/* Set error actions in the states that have transitions into a final state. */ +void FsmGraph::middleErrorAction( int ordering, Action *action, int transferPoint ) +{ + /* Isolate the start state in case it is reachable from in inside the + * machine, in which case we don't want it set. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->errActionTable.setAction( ordering, action, transferPoint ); + } +} + +/* Set EOF actions in the start state. */ +void FsmGraph::startEOFAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + + /* Add the actions. */ + startState->eofActionTable.setAction( ordering, action ); +} + +/* Set EOF actions in all states where there is a transition out. */ +void FsmGraph::allEOFAction( int ordering, Action *action ) +{ + /* Insert actions in the EOF action table of all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->eofActionTable.setAction( ordering, action ); +} + +/* Set EOF actions in final states. */ +void FsmGraph::finalEOFAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->eofActionTable.setAction( ordering, action ); +} + +void FsmGraph::notStartEOFAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->eofActionTable.setAction( ordering, action ); + } +} + +void FsmGraph::notFinalEOFAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->eofActionTable.setAction( ordering, action ); + } +} + +/* Set EOF actions in the states that have transitions into a final state. */ +void FsmGraph::middleEOFAction( int ordering, Action *action ) +{ + /* Set the actions in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->eofActionTable.setAction( ordering, action ); + } +} + +/* + * Set To State Actions. + */ + +/* Set to state actions in the start state. */ +void FsmGraph::startToStateAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + startState->toStateActionTable.setAction( ordering, action ); +} + +/* Set to state actions in all states. */ +void FsmGraph::allToStateAction( int ordering, Action *action ) +{ + /* Insert the action on all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->toStateActionTable.setAction( ordering, action ); +} + +/* Set to state actions in final states. */ +void FsmGraph::finalToStateAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->toStateActionTable.setAction( ordering, action ); +} + +void FsmGraph::notStartToStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +void FsmGraph::notFinalToStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +/* Set to state actions in states that are not final and not the start state. */ +void FsmGraph::middleToStateAction( int ordering, Action *action ) +{ + /* Set the action in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->toStateActionTable.setAction( ordering, action ); + } +} + +/* + * Set From State Actions. + */ + +void FsmGraph::startFromStateAction( int ordering, Action *action ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + startState->fromStateActionTable.setAction( ordering, action ); +} + +void FsmGraph::allFromStateAction( int ordering, Action *action ) +{ + /* Insert the action on all states. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->fromStateActionTable.setAction( ordering, action ); +} + +void FsmGraph::finalFromStateAction( int ordering, Action *action ) +{ + /* Add the action to the error table of final states. */ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->fromStateActionTable.setAction( ordering, action ); +} + +void FsmGraph::notStartFromStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +void FsmGraph::notFinalFromStateAction( int ordering, Action *action ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( ! state->isFinState() ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +void FsmGraph::middleFromStateAction( int ordering, Action *action ) +{ + /* Set the action in all states that are not the start state and not final. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + if ( state != startState && ! state->isFinState() ) + state->fromStateActionTable.setAction( ordering, action ); + } +} + +/* Shift the function ordering of the start transitions to start + * at fromOrder and increase in units of 1. Useful before staring. + * Returns the maximum number of order numbers used. */ +int FsmGraph::shiftStartActionOrder( int fromOrder ) +{ + int maxUsed = 0; + + /* Walk the start state's transitions, shifting function ordering. */ + for ( TransList::Iter trans = startState->outList; trans.lte(); trans++ ) { + /* Walk the function data for the transition and set the keys to + * increasing values starting at fromOrder. */ + int curFromOrder = fromOrder; + ActionTable::Iter action = trans->actionTable; + for ( ; action.lte(); action++ ) + action->key = curFromOrder++; + + /* Keep track of the max number of orders used. */ + if ( curFromOrder - fromOrder > maxUsed ) + maxUsed = curFromOrder - fromOrder; + } + + return maxUsed; +} + +/* Remove all priorities. */ +void FsmGraph::clearAllPriorities() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Clear out priority data. */ + state->outPriorTable.empty(); + + /* Clear transition data from the out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) + trans->priorTable.empty(); + } +} + +/* Zeros out the function ordering keys. This may be called before minimization + * when it is known that no more fsm operations are going to be done. This + * will achieve greater reduction as states will not be separated on the basis + * of function ordering. */ +void FsmGraph::nullActionKeys( ) +{ + /* For each state... */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the transitions for the state. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + /* Walk the action table for the transition. */ + for ( ActionTable::Iter action = trans->actionTable; + action.lte(); action++ ) + action->key = 0; + + /* Walk the action table for the transition. */ + for ( LmActionTable::Iter action = trans->lmActionTable; + action.lte(); action++ ) + action->key = 0; + } + + /* Null the action keys of the to state action table. */ + for ( ActionTable::Iter action = state->toStateActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the from state action table. */ + for ( ActionTable::Iter action = state->fromStateActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the out transtions. */ + for ( ActionTable::Iter action = state->outActionTable; + action.lte(); action++ ) + action->key = 0; + + /* Null the action keys of the error action table. */ + for ( ErrActionTable::Iter action = state->errActionTable; + action.lte(); action++ ) + action->ordering = 0; + + /* Null the action keys eof action table. */ + for ( ActionTable::Iter action = state->eofActionTable; + action.lte(); action++ ) + action->key = 0; + } +} + +/* Walk the list of states and verify that non final states do not have out + * data, that all stateBits are cleared, and that there are no states with + * zero foreign in transitions. */ +void FsmGraph::verifyStates() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Non final states should not have leaving data. */ + if ( ! (state->stateBits & SB_ISFINAL) ) { + assert( state->outActionTable.length() == 0 ); + assert( state->outCondSet.length() == 0 ); + assert( state->outPriorTable.length() == 0 ); + } + + /* Data used in algorithms should be cleared. */ + assert( (state->stateBits & SB_BOTH) == 0 ); + assert( state->foreignInTrans > 0 ); + } +} + +/* Compare two transitions according to their relative priority. Since the + * base transition has no priority associated with it, the default is to + * return equal. */ +int FsmGraph::comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ) +{ + /* Looking for differing priorities on same keys. Need to concurrently + * scan the priority lists. */ + PriorTable::Iter pd1 = priorTable1; + PriorTable::Iter pd2 = priorTable2; + while ( pd1.lte() && pd2.lte() ) { + /* Check keys. */ + if ( pd1->desc->key < pd2->desc->key ) + pd1.increment(); + else if ( pd1->desc->key > pd2->desc->key ) + pd2.increment(); + /* Keys are the same, check priorities. */ + else if ( pd1->desc->priority < pd2->desc->priority ) + return -1; + else if ( pd1->desc->priority > pd2->desc->priority ) + return 1; + else { + /* Keys and priorities are equal, advance both. */ + pd1.increment(); + pd2.increment(); + } + } + + /* No differing priorities on the same key. */ + return 0; +} + +/* Compares two transitions according to priority and functions. Pointers + * should not be null. Does not consider to state or from state. Compare two + * transitions according to the data contained in the transitions. Data means + * any properties added to user transitions that may differentiate them. Since + * the base transition has no data, the default is to return equal. */ +int FsmGraph::compareTransData( FsmTrans *trans1, FsmTrans *trans2 ) +{ + /* Compare the prior table. */ + int cmpRes = CmpPriorTable::compare( trans1->priorTable, + trans2->priorTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Compare longest match action tables. */ + cmpRes = CmpLmActionTable::compare(trans1->lmActionTable, + trans2->lmActionTable); + if ( cmpRes != 0 ) + return cmpRes; + + /* Compare action tables. */ + return CmpActionTable::compare(trans1->actionTable, + trans2->actionTable); +} + +/* Callback invoked when another trans (or possibly this) is added into this + * transition during the merging process. Draw in any properties of srcTrans + * into this transition. AddInTrans is called when a new transitions is made + * that will be a duplicate of another transition or a combination of several + * other transitions. AddInTrans will be called for each transition that the + * new transition is to represent. */ +void FsmGraph::addInTrans( FsmTrans *destTrans, FsmTrans *srcTrans ) +{ + /* Protect against adding in from ourselves. */ + if ( srcTrans == destTrans ) { + /* Adding in ourselves, need to make a copy of the source transitions. + * The priorities are not copied in as that would have no effect. */ + destTrans->lmActionTable.setActions( LmActionTable(srcTrans->lmActionTable) ); + destTrans->actionTable.setActions( ActionTable(srcTrans->actionTable) ); + } + else { + /* Not a copy of ourself, get the functions and priorities. */ + destTrans->lmActionTable.setActions( srcTrans->lmActionTable ); + destTrans->actionTable.setActions( srcTrans->actionTable ); + destTrans->priorTable.setPriors( srcTrans->priorTable ); + } +} + +/* Compare the properties of states that are embedded by users. Compares out + * priorities, out transitions, to, from, out, error and eof action tables. */ +int FsmGraph::compareStateData( const FsmState *state1, const FsmState *state2 ) +{ + /* Compare the out priority table. */ + int cmpRes = CmpPriorTable:: + compare( state1->outPriorTable, state2->outPriorTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test to state action tables. */ + cmpRes = CmpActionTable::compare( state1->toStateActionTable, + state2->toStateActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test from state action tables. */ + cmpRes = CmpActionTable::compare( state1->fromStateActionTable, + state2->fromStateActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out action tables. */ + cmpRes = CmpActionTable::compare( state1->outActionTable, + state2->outActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out condition sets. */ + cmpRes = CmpActionSet::compare( state1->outCondSet, + state2->outCondSet ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test out error action tables. */ + cmpRes = CmpErrActionTable::compare( state1->errActionTable, + state2->errActionTable ); + if ( cmpRes != 0 ) + return cmpRes; + + /* Test eof action tables. */ + return CmpActionTable::compare( state1->eofActionTable, + state2->eofActionTable ); +} + + +/* Invoked when a state looses its final state status and the leaving + * transition embedding data should be deleted. */ +void FsmGraph::clearOutData( FsmState *state ) +{ + /* Kill the out actions and priorities. */ + state->outActionTable.empty(); + state->outCondSet.empty(); + state->outPriorTable.empty(); +} + +bool FsmGraph::hasOutData( FsmState *state ) +{ + return ( state->outActionTable.length() > 0 || + state->outCondSet.length() > 0 || + state->outPriorTable.length() > 0 ); +} + +/* + * Setting Conditions. + */ + +void logNewExpansion( Expansion *exp ); +void logCondSpace( CondSpace *condSpace ); + +CondSpace *FsmGraph::addCondSpace( const CondSet &condSet ) +{ + CondSpace *condSpace = condData->condSpaceMap.find( condSet ); + if ( condSpace == 0 ) { + Key baseKey = condData->nextCondKey; + condData->nextCondKey += (1 << condSet.length() ) * keyOps->alphSize(); + + condSpace = new CondSpace( condSet ); + condSpace->baseKey = baseKey; + condData->condSpaceMap.insert( condSpace ); + + #ifdef COLM_LOG_CONDS + if ( colm_log_conds ) { + cerr << "adding new condition space" << endl; + cerr << " condition set: "; + logCondSpace( condSpace ); + cerr << endl; + cerr << " baseKey: " << baseKey.getVal() << endl; + } + #endif + } + return condSpace; +} + +void FsmGraph::startFsmCondition( Action *condAction ) +{ + /* Make sure the start state has no other entry points. */ + isolateStartState(); + embedCondition( startState, condAction ); +} + +void FsmGraph::allTransCondition( Action *condAction ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + embedCondition( state, condAction ); +} + +void FsmGraph::leaveFsmCondition( Action *condAction ) +{ + for ( StateSet::Iter state = finStateSet; state.lte(); state++ ) + (*state)->outCondSet.insert( condAction ); +} diff --git a/src/fsmattach.cc b/src/fsmattach.cc new file mode 100644 index 00000000..a58ed9a4 --- /dev/null +++ b/src/fsmattach.cc @@ -0,0 +1,425 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <string.h> +#include <assert.h> +#include "fsmgraph.h" + +#include <iostream> +using namespace std; + +/* Insert a transition into an inlist. The head must be supplied. */ +void FsmGraph::attachToInList( FsmState *from, FsmState *to, + FsmTrans *&head, FsmTrans *trans ) +{ + trans->ilnext = head; + trans->ilprev = 0; + + /* If in trans list is not empty, set the head->prev to trans. */ + if ( head != 0 ) + head->ilprev = trans; + + /* Now insert ourselves at the front of the list. */ + head = trans; + + /* Keep track of foreign transitions for from and to. */ + if ( from != to ) { + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * move it from the misfit list to the main list. */ + if ( to->foreignInTrans == 0 ) + stateList.append( misfitList.detach( to ) ); + } + + to->foreignInTrans += 1; + } +}; + +/* Detach a transition from an inlist. The head of the inlist must be supplied. */ +void FsmGraph::detachFromInList( FsmState *from, FsmState *to, + FsmTrans *&head, FsmTrans *trans ) +{ + /* Detach in the inTransList. */ + if ( trans->ilprev == 0 ) + head = trans->ilnext; + else + trans->ilprev->ilnext = trans->ilnext; + + if ( trans->ilnext != 0 ) + trans->ilnext->ilprev = trans->ilprev; + + /* Keep track of foreign transitions for from and to. */ + if ( from != to ) { + to->foreignInTrans -= 1; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions goes down to 0 then move it + * from the main list to the misfit list. */ + if ( to->foreignInTrans == 0 ) + misfitList.append( stateList.detach( to ) ); + } + } +} + +/* Attach states on the default transition, range list or on out/in list key. + * First makes a new transition. If there is already a transition out from + * fromState on the default, then will assertion fail. */ +FsmTrans *FsmGraph::attachNewTrans( FsmState *from, FsmState *to, Key lowKey, Key highKey ) +{ + /* Make the new transition. */ + FsmTrans *retVal = new FsmTrans(); + + /* The transition is now attached. Remember the parties involved. */ + retVal->fromState = from; + retVal->toState = to; + + /* Make the entry in the out list for the transitions. */ + from->outList.append( retVal ); + + /* Set the the keys of the new trans. */ + retVal->lowKey = lowKey; + retVal->highKey = highKey; + + /* Attach using inList as the head pointer. */ + if ( to != 0 ) + attachToInList( from, to, to->inList.head, retVal ); + + return retVal; +} + +/* Attach for range lists or for the default transition. This attach should + * be used when a transition already is allocated and must be attached to a + * target state. Does not handle adding the transition into the out list. */ +void FsmGraph::attachTrans( FsmState *from, FsmState *to, FsmTrans *trans ) +{ + assert( trans->fromState == 0 && trans->toState == 0 ); + trans->fromState = from; + trans->toState = to; + + if ( to != 0 ) { + /* Attach using the inList pointer as the head pointer. */ + attachToInList( from, to, to->inList.head, trans ); + } +} + +/* Redirect a transition away from error and towards some state. This is just + * like attachTrans except it requires fromState to be set and does not touch + * it. */ +void FsmGraph::redirectErrorTrans( FsmState *from, FsmState *to, FsmTrans *trans ) +{ + assert( trans->fromState != 0 && trans->toState == 0 ); + trans->toState = to; + + if ( to != 0 ) { + /* Attach using the inList pointer as the head pointer. */ + attachToInList( from, to, to->inList.head, trans ); + } +} + +/* Detach for out/in lists or for default transition. */ +void FsmGraph::detachTrans( FsmState *from, FsmState *to, FsmTrans *trans ) +{ + assert( trans->fromState == from && trans->toState == to ); + trans->fromState = 0; + trans->toState = 0; + + if ( to != 0 ) { + /* Detach using to's inList pointer as the head. */ + detachFromInList( from, to, to->inList.head, trans ); + } +} + + +/* Detach a state from the graph. Detaches and deletes transitions in and out + * of the state. Empties inList and outList. Removes the state from the final + * state set. A detached state becomes useless and should be deleted. */ +void FsmGraph::detachState( FsmState *state ) +{ + /* Detach the in transitions from the inList list of transitions. */ + while ( state->inList.head != 0 ) { + /* Get pointers to the trans and the state. */ + FsmTrans *trans = state->inList.head; + FsmState *fromState = trans->fromState; + + /* Detach the transitions from the source state. */ + detachTrans( fromState, state, trans ); + + /* Ok to delete the transition. */ + fromState->outList.detach( trans ); + delete trans; + } + + /* Remove the entry points in on the machine. */ + while ( state->entryIds.length() > 0 ) + unsetEntry( state->entryIds[0], state ); + + /* Detach out range transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); ) { + TransList::Iter next = trans.next(); + detachTrans( state, trans->toState, trans ); + delete trans; + trans = next; + } + + /* Delete all of the out range pointers. */ + state->outList.abandon(); + + /* Unset final stateness before detaching from graph. */ + if ( state->stateBits & SB_ISFINAL ) + finStateSet.remove( state ); +} + + +/* Duplicate a transition. Makes a new transition that is attached to the same + * dest as srcTrans. The new transition has functions and priority taken from + * srcTrans. Used for merging a transition in to a free spot. The trans can + * just be dropped in. It does not conflict with an existing trans and need + * not be crossed. Returns the new transition. */ +FsmTrans *FsmGraph::dupTrans( FsmState *from, FsmTrans *srcTrans ) +{ + /* Make a new transition. */ + FsmTrans *newTrans = new FsmTrans(); + + /* We can attach the transition, one does not exist. */ + attachTrans( from, srcTrans->toState, newTrans ); + + /* Call the user callback to add in the original source transition. */ + addInTrans( newTrans, srcTrans ); + + return newTrans; +} + +/* In crossing, src trans and dest trans both go to existing states. Make one + * state from the sets of states that src and dest trans go to. */ +FsmTrans *FsmGraph::fsmAttachStates( MergeData &md, FsmState *from, + FsmTrans *destTrans, FsmTrans *srcTrans ) +{ + /* The priorities are equal. We must merge the transitions. Does the + * existing trans go to the state we are to attach to? ie, are we to + * simply double up the transition? */ + FsmState *toState = srcTrans->toState; + FsmState *existingState = destTrans->toState; + + if ( existingState == toState ) { + /* The transition is a double up to the same state. Copy the src + * trans into itself. We don't need to merge in the from out trans + * data, that was done already. */ + addInTrans( destTrans, srcTrans ); + } + else { + /* The trans is not a double up. Dest trans cannot be the same as src + * trans. Set up the state set. */ + StateSet stateSet; + + /* We go to all the states the existing trans goes to, plus... */ + if ( existingState->stateDictEl == 0 ) + stateSet.insert( existingState ); + else + stateSet.insert( existingState->stateDictEl->stateSet ); + + /* ... all the states that we have been told to go to. */ + if ( toState->stateDictEl == 0 ) + stateSet.insert( toState ); + else + stateSet.insert( toState->stateDictEl->stateSet ); + + /* Look for the state. If it is not there already, make it. */ + StateDictEl *lastFound; + if ( md.stateDict.insert( stateSet, &lastFound ) ) { + /* Make a new state representing the combination of states in + * stateSet. It gets added to the fill list. This means that we + * need to fill in it's transitions sometime in the future. We + * don't do that now (ie, do not recurse). */ + FsmState *combinState = addState(); + + /* Link up the dict element and the state. */ + lastFound->targState = combinState; + combinState->stateDictEl = lastFound; + + /* Add to the fill list. */ + md.fillListAppend( combinState ); + } + + /* Get the state insertted/deleted. */ + FsmState *targ = lastFound->targState; + + /* Detach the state from existing state. */ + detachTrans( from, existingState, destTrans ); + + /* Re-attach to the new target. */ + attachTrans( from, targ, destTrans ); + + /* Add in src trans to the existing transition that we redirected to + * the new state. We don't need to merge in the from out trans data, + * that was done already. */ + addInTrans( destTrans, srcTrans ); + } + + return destTrans; +} + +/* Two transitions are to be crossed, handle the possibility of either going + * to the error state. */ +FsmTrans *FsmGraph::mergeTrans( MergeData &md, FsmState *from, + FsmTrans *destTrans, FsmTrans *srcTrans ) +{ + FsmTrans *retTrans = 0; + if ( destTrans->toState == 0 && srcTrans->toState == 0 ) { + /* Error added into error. */ + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else if ( destTrans->toState == 0 && srcTrans->toState != 0 ) { + /* Non error added into error we need to detach and reattach, */ + detachTrans( from, destTrans->toState, destTrans ); + attachTrans( from, srcTrans->toState, destTrans ); + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else if ( srcTrans->toState == 0 ) { + /* Dest goes somewhere but src doesn't, just add it it in. */ + addInTrans( destTrans, srcTrans ); + retTrans = destTrans; + } + else { + /* Both go somewhere, run the actual cross. */ + retTrans = fsmAttachStates( md, from, destTrans, srcTrans ); + } + + return retTrans; +} + +/* Find the trans with the higher priority. If src is lower priority then dest then + * src is ignored. If src is higher priority than dest, then src overwrites dest. If + * the priorities are equal, then they are merged. */ +FsmTrans *FsmGraph::crossTransitions( MergeData &md, FsmState *from, + FsmTrans *destTrans, FsmTrans *srcTrans ) +{ + FsmTrans *retTrans; + + /* Compare the priority of the dest and src transitions. */ + int compareRes = comparePrior( destTrans->priorTable, srcTrans->priorTable ); + if ( compareRes < 0 ) { + /* Src trans has a higher priority than dest, src overwrites dest. + * Detach dest and return a copy of src. */ + detachTrans( from, destTrans->toState, destTrans ); + retTrans = dupTrans( from, srcTrans ); + } + else if ( compareRes > 0 ) { + /* The dest trans has a higher priority, use dest. */ + retTrans = destTrans; + } + else { + /* Src trans and dest trans have the same priority, they must be merged. */ + retTrans = mergeTrans( md, from, destTrans, srcTrans ); + } + + /* Return the transition that resulted from the cross. */ + return retTrans; +} + +/* Copy the transitions in srcList to the outlist of dest. The srcList should + * not be the outList of dest, otherwise you would be copying the contents of + * srcList into itself as it's iterated: bad news. */ +void FsmGraph::outTransCopy( MergeData &md, FsmState *dest, FsmTrans *srcList ) +{ + /* The destination list. */ + TransList destList; + + /* Set up an iterator to stop at breaks. */ + PairIter<FsmTrans> outPair( dest->outList.head, srcList ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + case RangeInS1: { + /* The pair iter is the authority on the keys. It may have needed + * to break the dest range. */ + FsmTrans *destTrans = outPair.s1Tel.trans; + destTrans->lowKey = outPair.s1Tel.lowKey; + destTrans->highKey = outPair.s1Tel.highKey; + destList.append( destTrans ); + break; + } + case RangeInS2: { + /* Src range may get crossed with dest's default transition. */ + FsmTrans *newTrans = dupTrans( dest, outPair.s2Tel.trans ); + + /* Set up the transition's keys and append to the dest list. */ + newTrans->lowKey = outPair.s2Tel.lowKey; + newTrans->highKey = outPair.s2Tel.highKey; + destList.append( newTrans ); + break; + } + case RangeOverlap: { + /* Exact overlap, cross them. */ + FsmTrans *newTrans = crossTransitions( md, dest, + outPair.s1Tel.trans, outPair.s2Tel.trans ); + + /* Set up the transition's keys and append to the dest list. */ + newTrans->lowKey = outPair.s1Tel.lowKey; + newTrans->highKey = outPair.s1Tel.highKey; + destList.append( newTrans ); + break; + } + case BreakS1: { + /* Since we are always writing to the dest trans, the dest needs + * to be copied when it is broken. The copy goes into the first + * half of the break to "break it off". */ + outPair.s1Tel.trans = dupTrans( dest, outPair.s1Tel.trans ); + break; + } + case BreakS2: + break; + } + } + + /* Abandon the old outList and transfer destList into it. */ + dest->outList.transfer( destList ); +} + + +/* Move all the transitions that go into src so that they go into dest. */ +void FsmGraph::inTransMove( FsmState *dest, FsmState *src ) +{ + /* Do not try to move in trans to and from the same state. */ + assert( dest != src ); + + /* If src is the start state, dest becomes the start state. */ + if ( src == startState ) { + unsetStartState(); + setStartState( dest ); + } + + /* For each entry point into, create an entry point into dest, when the + * state is detached, the entry points to src will be removed. */ + for ( EntryIdSet::Iter enId = src->entryIds; enId.lte(); enId++ ) + changeEntry( *enId, dest, src ); + + /* Move the transitions in inList. */ + while ( src->inList.head != 0 ) { + /* Get trans and from state. */ + FsmTrans *trans = src->inList.head; + FsmState *fromState = trans->fromState; + + /* Detach from src, reattach to dest. */ + detachTrans( fromState, src, trans ); + attachTrans( fromState, dest, trans ); + } +} diff --git a/src/fsmbase.cc b/src/fsmbase.cc new file mode 100644 index 00000000..90341039 --- /dev/null +++ b/src/fsmbase.cc @@ -0,0 +1,602 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <string.h> +#include <assert.h> +#include "fsmgraph.h" + +/* Simple singly linked list append routine for the fill list. The new state + * goes to the end of the list. */ +void MergeData::fillListAppend( FsmState *state ) +{ + state->alg.next = 0; + + if ( stfillHead == 0 ) { + /* List is empty, state becomes head and tail. */ + stfillHead = state; + stfillTail = state; + } + else { + /* List is not empty, state goes after last element. */ + stfillTail->alg.next = state; + stfillTail = state; + } +} + +/* Graph constructor. */ +FsmGraph::FsmGraph() +: + /* No start state. */ + startState(0), + errState(0), + + /* Misfit accounting is a switch, turned on only at specific times. It + * controls what happens when states have no way in from the outside + * world.. */ + misfitAccounting(false), + + lmRequiresErrorState(false) +{ +} + +/* Copy all graph data including transitions. */ +FsmGraph::FsmGraph( const FsmGraph &graph ) +: + /* Lists start empty. Will be filled by copy. */ + stateList(), + misfitList(), + + /* Copy in the entry points, + * pointers will be resolved later. */ + entryPoints(graph.entryPoints), + startState(graph.startState), + errState(0), + + /* Will be filled by copy. */ + finStateSet(), + + /* Misfit accounting is only on during merging. */ + misfitAccounting(false), + + lmRequiresErrorState(graph.lmRequiresErrorState) +{ + /* Create the states and record their map in the original state. */ + StateList::Iter origState = graph.stateList; + for ( ; origState.lte(); origState++ ) { + /* Make the new state. */ + FsmState *newState = new FsmState( *origState ); + + /* Add the state to the list. */ + stateList.append( newState ); + + /* Set the mapsTo item of the old state. */ + origState->alg.stateMap = newState; + } + + /* Derefernce all the state maps. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + /* The points to the original in the src machine. The taget's duplicate + * is in the statemap. */ + FsmState *toState = trans->toState != 0 ? trans->toState->alg.stateMap : 0; + + /* Attach The transition to the duplicate. */ + trans->toState = 0; + attachTrans( state, toState, trans ); + } + } + + /* Fix the state pointers in the entry points array. */ + EntryMapEl *eel = entryPoints.data; + for ( int e = 0; e < entryPoints.length(); e++, eel++ ) { + /* Get the duplicate of the state. */ + eel->value = eel->value->alg.stateMap; + + /* Foreign in transitions must be built up when duping machines so + * increment it here. */ + eel->value->foreignInTrans += 1; + } + + /* Fix the start state pointer and the new start state's count of in + * transiions. */ + startState = startState->alg.stateMap; + startState->foreignInTrans += 1; + + /* Build the final state set. */ + StateSet::Iter st = graph.finStateSet; + for ( ; st.lte(); st++ ) + finStateSet.insert((*st)->alg.stateMap); +} + +/* Deletes all transition data then deletes each state. */ +FsmGraph::~FsmGraph() +{ + /* Delete all the transitions. */ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Iterate the out transitions, deleting them. */ + state->outList.empty(); + } + + /* Delete all the states. */ + stateList.empty(); +} + +/* Set a state final. The state has its isFinState set to true and the state + * is added to the finStateSet. */ +void FsmGraph::setFinState( FsmState *state ) +{ + /* Is it already a fin state. */ + if ( state->stateBits & SB_ISFINAL ) + return; + + state->stateBits |= SB_ISFINAL; + finStateSet.insert( state ); +} + +/* Set a state non-final. The has its isFinState flag set false and the state + * is removed from the final state set. */ +void FsmGraph::unsetFinState( FsmState *state ) +{ + /* Is it already a non-final state? */ + if ( ! (state->stateBits & SB_ISFINAL) ) + return; + + /* When a state looses its final state status it must relinquish all the + * properties that are allowed only for final states. */ + clearOutData( state ); + + state->stateBits &= ~ SB_ISFINAL; + finStateSet.remove( state ); +} + +/* Set and unset a state as the start state. */ +void FsmGraph::setStartState( FsmState *state ) +{ + /* Sould change from unset to set. */ + assert( startState == 0 ); + startState = state; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( state->foreignInTrans == 0 ) + stateList.append( misfitList.detach( state ) ); + } + + /* Up the foreign in transitions to the state. */ + state->foreignInTrans += 1; +} + +void FsmGraph::unsetStartState() +{ + /* Should change from set to unset. */ + assert( startState != 0 ); + + /* Decrement the entry's count of foreign entries. */ + startState->foreignInTrans -= 1; + + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( startState->foreignInTrans == 0 ) + misfitList.append( stateList.detach( startState ) ); + } + + startState = 0; +} + +/* Associate an id with a state. Makes the state a named entry point. Has no + * effect if the entry point is already mapped to the state. */ +void FsmGraph::setEntry( int id, FsmState *state ) +{ + /* Insert the id into the state. If the state is already labelled with id, + * nothing to do. */ + if ( state->entryIds.insert( id ) ) { + /* Insert the entry and assert that it succeeds. */ + entryPoints.insertMulti( id, state ); + + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( state->foreignInTrans == 0 ) + stateList.append( misfitList.detach( state ) ); + } + + /* Up the foreign in transitions to the state. */ + state->foreignInTrans += 1; + } +} + +/* Remove the association of an id with a state. The state looses it's entry + * point status. Assumes that the id is indeed mapped to state. */ +void FsmGraph::unsetEntry( int id, FsmState *state ) +{ + /* Find the entry point in on id. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + while ( enLow->value != state ) + enLow += 1; + + /* Remove the record from the map. */ + entryPoints.remove( enLow ); + + /* Remove the state's sense of the link. */ + state->entryIds.remove( id ); + state->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( state->foreignInTrans == 0 ) + misfitList.append( stateList.detach( state ) ); + } +} + +/* Remove all association of an id with states. Assumes that the id is indeed + * mapped to a state. */ +void FsmGraph::unsetEntry( int id ) +{ + /* Find the entry point in on id. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + for ( EntryMapEl *mel = enLow; mel <= enHigh; mel++ ) { + /* Remove the state's sense of the link. */ + mel->value->entryIds.remove( id ); + mel->value->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 + * then take it off the main list and put it on the misfit list. */ + if ( mel->value->foreignInTrans == 0 ) + misfitList.append( stateList.detach( mel->value ) ); + } + } + + /* Remove the records from the entry points map. */ + entryPoints.removeMulti( enLow, enHigh ); +} + + +void FsmGraph::changeEntry( int id, FsmState *to, FsmState *from ) +{ + /* Find the entry in the entry map. */ + EntryMapEl *enLow = 0, *enHigh = 0; + entryPoints.findMulti( id, enLow, enHigh ); + while ( enLow->value != from ) + enLow += 1; + + /* Change it to the new target. */ + enLow->value = to; + + /* Remove from's sense of the link. */ + from->entryIds.remove( id ); + from->foreignInTrans -= 1; + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 then take + * it off the main list and put it on the misfit list. */ + if ( from->foreignInTrans == 0 ) + misfitList.append( stateList.detach( from ) ); + } + + /* Add to's sense of the link. */ + if ( to->entryIds.insert( id ) != 0 ) { + if ( misfitAccounting ) { + /* If the number of foreign in transitions is about to go up to 1 then + * take it off the misfit list and put it on the head list. */ + if ( to->foreignInTrans == 0 ) + stateList.append( misfitList.detach( to ) ); + } + + /* Up the foreign in transitions to the state. */ + to->foreignInTrans += 1; + } +} + + +/* Clear all entry points from a machine. */ +void FsmGraph::unsetAllEntryPoints() +{ + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) { + /* Kill all the state's entry points at once. */ + if ( en->value->entryIds.length() > 0 ) { + en->value->foreignInTrans -= en->value->entryIds.length(); + + if ( misfitAccounting ) { + /* If the number of foreign in transitions just went down to 0 + * then take it off the main list and put it on the misfit + * list. */ + if ( en->value->foreignInTrans == 0 ) + misfitList.append( stateList.detach( en->value ) ); + } + + /* Clear the set of ids out all at once. */ + en->value->entryIds.empty(); + } + } + + /* Now clear out the entry map all at once. */ + entryPoints.empty(); +} + +/* Assigning an epsilon transition into final states. */ +void FsmGraph::epsilonTrans( int id ) +{ + for ( StateSet::Iter fs = finStateSet; fs.lte(); fs++ ) + (*fs)->epsilonTrans.append( id ); +} + +/* Mark all states reachable from state. Traverses transitions forward. Used + * for removing states that have no path into them. */ +void FsmGraph::markReachableFromHere( FsmState *state ) +{ + /* Base case: return; */ + if ( state->stateBits & SB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states that this + * state has a transition to. */ + state->stateBits |= SB_ISMARKED; + + /* Recurse on all out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) + markReachableFromHere( trans->toState ); + } +} + +void FsmGraph::markReachableFromHereStopFinal( FsmState *state ) +{ + /* Base case: return; */ + if ( state->stateBits & SB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states that this + * state has a transition to. */ + state->stateBits |= SB_ISMARKED; + + /* Recurse on all out transitions. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + FsmState *toState = trans->toState; + if ( toState != 0 && !toState->isFinState() ) + markReachableFromHereStopFinal( toState ); + } +} + +/* Mark all states reachable from state. Traverse transitions backwards. Used + * for removing dead end paths in graphs. */ +void FsmGraph::markReachableFromHereReverse( FsmState *state ) +{ + /* Base case: return; */ + if ( state->stateBits & SB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states with + * transitions into this state. */ + state->stateBits |= SB_ISMARKED; + + /* Recurse on all items in transitions. */ + for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) + markReachableFromHereReverse( trans->fromState ); +} + +/* Determine if there are any entry points into a start state other than the + * start state. Setting starting transitions requires that the start state be + * isolated. In most cases a start state will already be isolated. */ +bool FsmGraph::isStartStateIsolated() +{ + /* If there are any in transitions then the state is not isolated. */ + if ( startState->inList.head != 0 ) + return false; + + /* If there are any entry points then isolated. */ + if ( startState->entryIds.length() > 0 ) + return false; + + return true; +} + +/* Bring in other's entry points. Assumes others states are going to be + * copied into this machine. */ +void FsmGraph::copyInEntryPoints( FsmGraph *other ) +{ + /* Use insert multi because names are not unique. */ + for ( EntryMap::Iter en = other->entryPoints; en.lte(); en++ ) + entryPoints.insertMulti( en->key, en->value ); +} + + +void FsmGraph::unsetAllFinStates() +{ + for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) + (*st)->stateBits &= ~ SB_ISFINAL; + finStateSet.empty(); +} + +void FsmGraph::setFinBits( int finStateBits ) +{ + for ( int s = 0; s < finStateSet.length(); s++ ) + finStateSet.data[s]->stateBits |= finStateBits; +} + + +/* Tests the integrity of the transition lists and the fromStates. */ +void FsmGraph::verifyIntegrity() +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) { + /* Walk the out transitions and assert fromState is correct. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) + assert( trans->fromState == state ); + + /* Walk the inlist and assert toState is correct. */ + for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) + assert( trans->toState == state ); + } +} + +void FsmGraph::verifyReachability() +{ + /* Mark all the states that can be reached + * through the set of entry points. */ + markReachableFromHere( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + markReachableFromHere( en->value ); + + /* Check that everything got marked. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Assert it got marked and then clear the mark. */ + assert( st->stateBits & SB_ISMARKED ); + st->stateBits &= ~ SB_ISMARKED; + } +} + +void FsmGraph::verifyNoDeadEndStates() +{ + /* Mark all states that have paths to the final states. */ + for ( StateSet::Iter pst = finStateSet; pst.lte(); pst++ ) + markReachableFromHereReverse( *pst ); + + /* Start state gets honorary marking. Must be done AFTER recursive call. */ + startState->stateBits |= SB_ISMARKED; + + /* Make sure everything got marked. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Assert the state got marked and unmark it. */ + assert( st->stateBits & SB_ISMARKED ); + st->stateBits &= ~ SB_ISMARKED; + } +} + +void FsmGraph::depthFirstOrdering( FsmState *state ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->stateBits & SB_ONLIST ) + return; + + /* Doing depth first, put state on the list. */ + state->stateBits |= SB_ONLIST; + stateList.append( state ); + + /* Recurse on everything ranges. */ + for ( TransList::Iter tel = state->outList; tel.lte(); tel++ ) { + if ( tel->toState != 0 ) + depthFirstOrdering( tel->toState ); + } +} + +/* Ordering states by transition connections. */ +void FsmGraph::depthFirstOrdering() +{ + /* Init on state list flags. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->stateBits &= ~SB_ONLIST; + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + /* Add back to the state list from the start state and all other entry + * points. */ + if ( errState != 0 ) + depthFirstOrdering( errState ); + depthFirstOrdering( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + depthFirstOrdering( en->value ); + + /* Make sure we put everything back on. */ + assert( stateListLen == stateList.length() ); +} + +/* Stable sort the states by final state status. */ +void FsmGraph::sortStatesByFinal() +{ + /* Move forward through the list and throw final states onto the end. */ + FsmState *state = 0; + FsmState *next = stateList.head; + FsmState *last = stateList.tail; + while ( state != last ) { + /* Move forward and load up the next. */ + state = next; + next = state->next; + + /* Throw to the end? */ + if ( state->isFinState() ) { + stateList.detach( state ); + stateList.append( state ); + } + } +} + +void FsmGraph::setStateNumbers( int base ) +{ + for ( StateList::Iter state = stateList; state.lte(); state++ ) + state->alg.stateNum = base++; +} + + +bool FsmGraph::checkErrTrans( FsmState *state, FsmTrans *trans ) +{ + /* Might go directly to error state. */ + if ( trans->toState == 0 ) + return true; + + if ( trans->prev == 0 ) { + /* If this is the first transition. */ + if ( keyOps->minKey < trans->lowKey ) + return true; + } + else { + /* Not the first transition. Compare against the prev. */ + FsmTrans *prev = trans->prev; + Key nextKey = prev->highKey; + nextKey.increment(); + if ( nextKey < trans->lowKey ) + return true; + } + return false; +} + +bool FsmGraph::checkErrTransFinish( FsmState *state ) +{ + /* Check if there are any ranges already. */ + if ( state->outList.length() == 0 ) + return true; + else { + /* Get the last and check for a gap on the end. */ + FsmTrans *last = state->outList.tail; + if ( last->highKey < keyOps->maxKey ) + return true; + } + return 0; +} + +bool FsmGraph::hasErrorTrans() +{ + bool result; + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + for ( TransList::Iter tr = st->outList; tr.lte(); tr++ ) { + result = checkErrTrans( st, tr ); + if ( result ) + return true; + } + result = checkErrTransFinish( st ); + if ( result ) + return true; + } + return false; +} diff --git a/src/fsmcodegen.cc b/src/fsmcodegen.cc new file mode 100644 index 00000000..8f3ab597 --- /dev/null +++ b/src/fsmcodegen.cc @@ -0,0 +1,1098 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "parsedata.h" +#include "fsmcodegen.h" +#include "redfsm.h" +#include "bstmap.h" +#include "fsmrun.h" +#include <sstream> +#include <string> +#include <assert.h> + + +using std::ostream; +using std::ostringstream; +using std::string; +using std::cerr; +using std::endl; + + +/* Init code gen with in parameters. */ +FsmCodeGen::FsmCodeGen( const char *sourceFileName, const char *fsmName, ostream &out, + RedFsm *redFsm, FsmTables *fsmTables ) +: + sourceFileName(sourceFileName), + fsmName(fsmName), + out(out), + redFsm(redFsm), + fsmTables(fsmTables), + codeGenErrCount(0), + dataPrefix(true), + writeFirstFinal(true), + writeErr(true) +{ +} + +unsigned int FsmCodeGen::arrayTypeSize( unsigned long maxVal ) +{ + long long maxValLL = (long long) maxVal; + HostType *arrayType = keyOps->typeSubsumes( maxValLL ); + assert( arrayType != 0 ); + return arrayType->size; +} + +string FsmCodeGen::ARRAY_TYPE( unsigned long maxVal ) +{ + long long maxValLL = (long long) maxVal; + HostType *arrayType = keyOps->typeSubsumes( maxValLL ); + assert( arrayType != 0 ); + + string ret = arrayType->data1; + if ( arrayType->data2 != 0 ) { + ret += " "; + ret += arrayType->data2; + } + return ret; +} + + +/* Write out the fsm name. */ +string FsmCodeGen::FSM_NAME() +{ + return fsmName; +} + +/* Emit the offset of the start state as a decimal integer. */ +string FsmCodeGen::START_STATE_ID() +{ + ostringstream ret; + ret << redFsm->startState->id; + return ret.str(); +}; + +/* Write out the array of actions. */ +std::ostream &FsmCodeGen::ACTIONS_ARRAY() +{ + out << "\t0, "; + int totalActions = 1; + for ( GenActionTableMap::Iter act = redFsm->actionMap; act.lte(); act++ ) { + /* Write out the length, which will never be the last character. */ + out << act->key.length() << ", "; + /* Put in a line break every 8 */ + if ( totalActions++ % 8 == 7 ) + out << "\n\t"; + + for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) { + out << item->value->actionId; + if ( ! (act.last() && item.last()) ) + out << ", "; + + /* Put in a line break every 8 */ + if ( totalActions++ % 8 == 7 ) + out << "\n\t"; + } + } + out << "\n"; + return out; +} + + +string FsmCodeGen::CS() +{ + ostringstream ret; + /* Expression for retrieving the key, use simple dereference. */ + ret << ACCESS() << "cs"; + return ret.str(); +} + +string FsmCodeGen::GET_WIDE_KEY() +{ + if ( redFsm->anyConditions() ) + return "_widec"; + else + return GET_KEY(); +} + +string FsmCodeGen::GET_WIDE_KEY( RedState *state ) +{ + if ( state->stateCondList.length() > 0 ) + return "_widec"; + else + return GET_KEY(); +} + +string FsmCodeGen::GET_KEY() +{ + ostringstream ret; + /* Expression for retrieving the key, use simple dereference. */ + ret << "(*" << P() << ")"; + return ret.str(); +} + +/* Write out level number of tabs. Makes the nested binary search nice + * looking. */ +string FsmCodeGen::TABS( int level ) +{ + string result; + while ( level-- > 0 ) + result += "\t"; + return result; +} + +/* Write out a key from the fsm code gen. Depends on wether or not the key is + * signed. */ +string FsmCodeGen::KEY( Key key ) +{ + ostringstream ret; + if ( keyOps->isSigned || !hostLang->explicitUnsigned ) + ret << key.getVal(); + else + ret << (unsigned long) key.getVal() << 'u'; + return ret.str(); +} + +void FsmCodeGen::SET_ACT( ostream &ret, InlineItem *item ) +{ + ret << ACT() << " = " << item->longestMatchPart->longestMatchId << ";"; +} + +void FsmCodeGen::SET_TOKEND( ostream &ret, InlineItem *item ) +{ + /* The tokend action sets tokend. */ + ret << TOKEND() << " = " << P() << "+1;"; +} +void FsmCodeGen::INIT_TOKSTART( ostream &ret, InlineItem *item ) +{ + ret << TOKSTART() << " = 0;"; +} + +void FsmCodeGen::INIT_ACT( ostream &ret, InlineItem *item ) +{ + ret << ACT() << " = 0;"; +} + +void FsmCodeGen::SET_TOKSTART( ostream &ret, InlineItem *item ) +{ + ret << TOKSTART() << " = " << P() << ";"; +} + +void FsmCodeGen::EMIT_TOKEN( ostream &ret, LangEl *token ) +{ + ret << " " << MATCHED_TOKEN() << " = " << token->id << ";\n"; +} + +void FsmCodeGen::LM_SWITCH( ostream &ret, InlineItem *item, + int targState, int inFinish ) +{ + ret << + " " << P() << " = " << TOKEND() << ";\n" + " switch( " << ACT() << " ) {\n"; + + /* If the switch handles error then we also forced the error state. It + * will exist. */ + if ( item->tokenRegion->lmSwitchHandlesError ) { + ret << " case 0: " << P() << " = " << TOKSTART() << + "; goto st" << redFsm->errState->id << ";\n"; + } + + for ( TokenDefListReg::Iter lmi = item->tokenRegion->tokenDefList; lmi.lte(); lmi++ ) { + if ( lmi->inLmSelect ) { + assert( lmi->tdLangEl != 0 ); + ret << " case " << lmi->longestMatchId << ":\n"; + EMIT_TOKEN( ret, lmi->tdLangEl ); + ret << " break;\n"; + } + } + + ret << + " }\n" + "\t" + " return;\n"; +} + +void FsmCodeGen::LM_ON_LAST( ostream &ret, InlineItem *item ) +{ + assert( item->longestMatchPart->tdLangEl != 0 ); + + ret << " " << P() << " += 1;\n"; + EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl ); + ret << " return;\n"; +} + +void FsmCodeGen::LM_ON_NEXT( ostream &ret, InlineItem *item ) +{ + assert( item->longestMatchPart->tdLangEl != 0 ); + + EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl ); + ret << " return;\n"; +} + +void FsmCodeGen::LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item ) +{ + assert( item->longestMatchPart->tdLangEl != 0 ); + + ret << " " << P() << " = " << TOKEND() << ";\n"; + EMIT_TOKEN( ret, item->longestMatchPart->tdLangEl ); + ret << " return;\n"; +} + + +/* Write out an inline tree structure. Walks the list and possibly calls out + * to virtual functions than handle language specific items in the tree. */ +void FsmCodeGen::INLINE_LIST( ostream &ret, InlineList *inlineList, + int targState, bool inFinish ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Text: + assert( false ); + break; + case InlineItem::LmSetActId: + SET_ACT( ret, item ); + break; + case InlineItem::LmSetTokEnd: + SET_TOKEND( ret, item ); + break; + case InlineItem::LmInitTokStart: + assert( false ); + break; + case InlineItem::LmInitAct: + INIT_ACT( ret, item ); + break; + case InlineItem::LmSetTokStart: + SET_TOKSTART( ret, item ); + break; + case InlineItem::LmSwitch: + LM_SWITCH( ret, item, targState, inFinish ); + break; + case InlineItem::LmOnLast: + LM_ON_LAST( ret, item ); + break; + case InlineItem::LmOnNext: + LM_ON_NEXT( ret, item ); + break; + case InlineItem::LmOnLagBehind: + LM_ON_LAG_BEHIND( ret, item ); + break; + } + } +} + +/* Write out paths in line directives. Escapes any special characters. */ +string FsmCodeGen::LDIR_PATH( char *path ) +{ + ostringstream ret; + for ( char *pc = path; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + ret << "\\\\"; + else + ret << *pc; + } + return ret.str(); +} + +void FsmCodeGen::ACTION( ostream &ret, GenAction *action, int targState, bool inFinish ) +{ + /* Write the block and close it off. */ + ret << "\t{"; + INLINE_LIST( ret, action->inlineList, targState, inFinish ); + + if ( action->markId > 0 ) + ret << "mark[" << action->markId-1 << "] = " << P() << ";\n"; + + ret << "}\n"; + +} + +void FsmCodeGen::CONDITION( ostream &ret, GenAction *condition ) +{ + ret << "\n"; + INLINE_LIST( ret, condition->inlineList, 0, false ); +} + +string FsmCodeGen::ERROR_STATE() +{ + ostringstream ret; + if ( redFsm->errState != 0 ) + ret << redFsm->errState->id; + else + ret << "-1"; + return ret.str(); +} + +string FsmCodeGen::FIRST_FINAL_STATE() +{ + ostringstream ret; + if ( redFsm->firstFinState != 0 ) + ret << redFsm->firstFinState->id; + else + ret << redFsm->nextStateId; + return ret.str(); +} + +string FsmCodeGen::DATA_PREFIX() +{ + if ( dataPrefix ) + return FSM_NAME() + "_"; + return ""; +} + +/* Emit the alphabet data type. */ +string FsmCodeGen::ALPH_TYPE() +{ + string ret = keyOps->alphType->data1; + if ( keyOps->alphType->data2 != 0 ) { + ret += " "; + ret += + keyOps->alphType->data2; + } + return ret; +} + +/* Emit the alphabet data type. */ +string FsmCodeGen::WIDE_ALPH_TYPE() +{ + string ret; + if ( redFsm->maxKey <= keyOps->maxKey ) + ret = ALPH_TYPE(); + else { + long long maxKeyVal = redFsm->maxKey.getLongLong(); + HostType *wideType = keyOps->typeSubsumes( keyOps->isSigned, maxKeyVal ); + assert( wideType != 0 ); + + ret = wideType->data1; + if ( wideType->data2 != 0 ) { + ret += " "; + ret += wideType->data2; + } + } + return ret; +} + + +string FsmCodeGen::PTR_CONST() +{ + return "const "; +} + +std::ostream &FsmCodeGen::OPEN_ARRAY( string type, string name ) +{ + out << "static const " << type << " " << name << "[] = {\n"; + return out; +} + +std::ostream &FsmCodeGen::CLOSE_ARRAY() +{ + return out << "};\n"; +} + +std::ostream &FsmCodeGen::STATIC_VAR( string type, string name ) +{ + out << "static const " << type << " " << name; + return out; +} + +string FsmCodeGen::UINT( ) +{ + return "unsigned int"; +} + +string FsmCodeGen::ARR_OFF( string ptr, string offset ) +{ + return ptr + " + " + offset; +} + +string FsmCodeGen::CAST( string type ) +{ + return "(" + type + ")"; +} + +std::ostream &FsmCodeGen::TO_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numToStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + return out; +} + +std::ostream &FsmCodeGen::FROM_STATE_ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numFromStateRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + return out; +} + +std::ostream &FsmCodeGen::ACTION_SWITCH() +{ + /* Walk the list of functions, printing the cases. */ + for ( GenActionList::Iter act = redFsm->genActionList; act.lte(); act++ ) { + /* Write out referenced actions. */ + if ( act->numTransRefs > 0 ) { + /* Write the case label, the action and the case break. */ + out << "\tcase " << act->actionId << ":\n"; + ACTION( out, act, 0, false ); + out << "\tbreak;\n"; + } + } + + return out; +} + +void FsmCodeGen::emitSingleSwitch( RedState *state ) +{ + /* Load up the singles. */ + int numSingles = state->outSingle.length(); + RedTransEl *data = state->outSingle.data; + + if ( numSingles == 1 ) { + /* If there is a single single key then write it out as an if. */ + out << "\tif ( " << GET_WIDE_KEY(state) << " == " << + KEY(data[0].lowKey) << " )\n\t\t"; + + /* Virtual function for writing the target of the transition. */ + TRANS_GOTO(data[0].value, 0) << "\n"; + } + else if ( numSingles > 1 ) { + /* Write out single keys in a switch if there is more than one. */ + out << "\tswitch( " << GET_WIDE_KEY(state) << " ) {\n"; + + /* Write out the single indicies. */ + for ( int j = 0; j < numSingles; j++ ) { + out << "\t\tcase " << KEY(data[j].lowKey) << ": "; + TRANS_GOTO(data[j].value, 0) << "\n"; + } + + /* Close off the transition switch. */ + out << "\t}\n"; + } +} + +void FsmCodeGen::emitRangeBSearch( RedState *state, int level, int low, int high ) +{ + /* Get the mid position, staying on the lower end of the range. */ + int mid = (low + high) >> 1; + RedTransEl *data = state->outRange.data; + + /* Determine if we need to look higher or lower. */ + bool anyLower = mid > low; + bool anyHigher = mid < high; + + /* Determine if the keys at mid are the limits of the alphabet. */ + bool limitLow = data[mid].lowKey == keyOps->minKey; + bool limitHigh = data[mid].highKey == keyOps->maxKey; + + if ( anyLower && anyHigher ) { + /* Can go lower and higher than mid. */ + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " << + KEY(data[mid].lowKey) << " ) {\n"; + emitRangeBSearch( state, level+1, low, mid-1 ); + out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " > " << + KEY(data[mid].highKey) << " ) {\n"; + emitRangeBSearch( state, level+1, mid+1, high ); + out << TABS(level) << "} else\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else if ( anyLower && !anyHigher ) { + /* Can go lower than mid but not higher. */ + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " < " << + KEY(data[mid].lowKey) << " ) {\n"; + emitRangeBSearch( state, level+1, low, mid-1 ); + + /* if the higher is the highest in the alphabet then there is no + * sense testing it. */ + if ( limitHigh ) { + out << TABS(level) << "} else\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else { + out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " <= " << + KEY(data[mid].highKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + } + else if ( !anyLower && anyHigher ) { + /* Can go higher than mid but not lower. */ + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " > " << + KEY(data[mid].highKey) << " ) {\n"; + emitRangeBSearch( state, level+1, mid+1, high ); + + /* If the lower end is the lowest in the alphabet then there is no + * sense testing it. */ + if ( limitLow ) { + out << TABS(level) << "} else\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else { + out << TABS(level) << "} else if ( " << GET_WIDE_KEY(state) << " >= " << + KEY(data[mid].lowKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + } + else { + /* Cannot go higher or lower than mid. It's mid or bust. What + * tests to do depends on limits of alphabet. */ + if ( !limitLow && !limitHigh ) { + out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " << + GET_WIDE_KEY(state) << " && " << GET_WIDE_KEY(state) << " <= " << + KEY(data[mid].highKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else if ( limitLow && !limitHigh ) { + out << TABS(level) << "if ( " << GET_WIDE_KEY(state) << " <= " << + KEY(data[mid].highKey) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else if ( !limitLow && limitHigh ) { + out << TABS(level) << "if ( " << KEY(data[mid].lowKey) << " <= " << + GET_WIDE_KEY(state) << " )\n"; + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + else { + /* Both high and low are at the limit. No tests to do. */ + TRANS_GOTO(data[mid].value, level+1) << "\n"; + } + } +} + +void FsmCodeGen::COND_TRANSLATE( GenStateCond *stateCond, int level ) +{ + GenCondSpace *condSpace = stateCond->condSpace; + out << TABS(level) << "_widec = " << CAST(WIDE_ALPH_TYPE()) << "(" << + KEY(condSpace->baseKey) << " + (" << GET_KEY() << + " - " << KEY(keyOps->minKey) << "));\n"; + + for ( GenCondSet::Iter csi = condSpace->condSet; csi.lte(); csi++ ) { + out << TABS(level) << "if ( "; + CONDITION( out, *csi ); + Size condValOffset = ((1 << csi.pos()) * keyOps->alphSize()); + out << " ) _widec += " << condValOffset << ";\n"; + } +} + +void FsmCodeGen::emitCondBSearch( RedState *state, int level, int low, int high ) +{ + /* Get the mid position, staying on the lower end of the range. */ + int mid = (low + high) >> 1; + GenStateCond **data = state->stateCondVect.data; + + /* Determine if we need to look higher or lower. */ + bool anyLower = mid > low; + bool anyHigher = mid < high; + + /* Determine if the keys at mid are the limits of the alphabet. */ + bool limitLow = data[mid]->lowKey == keyOps->minKey; + bool limitHigh = data[mid]->highKey == keyOps->maxKey; + + if ( anyLower && anyHigher ) { + /* Can go lower and higher than mid. */ + out << TABS(level) << "if ( " << GET_KEY() << " < " << + KEY(data[mid]->lowKey) << " ) {\n"; + emitCondBSearch( state, level+1, low, mid-1 ); + out << TABS(level) << "} else if ( " << GET_KEY() << " > " << + KEY(data[mid]->highKey) << " ) {\n"; + emitCondBSearch( state, level+1, mid+1, high ); + out << TABS(level) << "} else {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else if ( anyLower && !anyHigher ) { + /* Can go lower than mid but not higher. */ + out << TABS(level) << "if ( " << GET_KEY() << " < " << + KEY(data[mid]->lowKey) << " ) {\n"; + emitCondBSearch( state, level+1, low, mid-1 ); + + /* if the higher is the highest in the alphabet then there is no + * sense testing it. */ + if ( limitHigh ) { + out << TABS(level) << "} else {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else { + out << TABS(level) << "} else if ( " << GET_KEY() << " <= " << + KEY(data[mid]->highKey) << " ) {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + } + else if ( !anyLower && anyHigher ) { + /* Can go higher than mid but not lower. */ + out << TABS(level) << "if ( " << GET_KEY() << " > " << + KEY(data[mid]->highKey) << " ) {\n"; + emitCondBSearch( state, level+1, mid+1, high ); + + /* If the lower end is the lowest in the alphabet then there is no + * sense testing it. */ + if ( limitLow ) { + out << TABS(level) << "} else {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else { + out << TABS(level) << "} else if ( " << GET_KEY() << " >= " << + KEY(data[mid]->lowKey) << " ) {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + } + else { + /* Cannot go higher or lower than mid. It's mid or bust. What + * tests to do depends on limits of alphabet. */ + if ( !limitLow && !limitHigh ) { + out << TABS(level) << "if ( " << KEY(data[mid]->lowKey) << " <= " << + GET_KEY() << " && " << GET_KEY() << " <= " << + KEY(data[mid]->highKey) << " ) {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else if ( limitLow && !limitHigh ) { + out << TABS(level) << "if ( " << GET_KEY() << " <= " << + KEY(data[mid]->highKey) << " ) {\n"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else if ( !limitLow && limitHigh ) { + out << TABS(level) << "if ( " << KEY(data[mid]->lowKey) << " <= " << + GET_KEY() << " )\n {"; + COND_TRANSLATE(data[mid], level+1); + out << TABS(level) << "}\n"; + } + else { + /* Both high and low are at the limit. No tests to do. */ + COND_TRANSLATE(data[mid], level); + } + } +} + +std::ostream &FsmCodeGen::STATE_GOTOS() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + if ( st == redFsm->errState ) + STATE_GOTO_ERROR(); + else { + /* Writing code above state gotos. */ + GOTO_HEADER( st ); + + if ( st->stateCondVect.length() > 0 ) { + out << " _widec = " << GET_KEY() << ";\n"; + emitCondBSearch( st, 1, 0, st->stateCondVect.length() - 1 ); + } + + /* Try singles. */ + if ( st->outSingle.length() > 0 ) + emitSingleSwitch( st ); + + /* Default case is to binary search for the ranges, if that fails then */ + if ( st->outRange.length() > 0 ) + emitRangeBSearch( st, 1, 0, st->outRange.length() - 1 ); + + /* Write the default transition. */ + TRANS_GOTO( st->defTrans, 1 ) << "\n"; + } + } + return out; +} + +unsigned int FsmCodeGen::TO_STATE_ACTION( RedState *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->location+1; + return act; +} + +unsigned int FsmCodeGen::FROM_STATE_ACTION( RedState *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->location+1; + return act; +} + +std::ostream &FsmCodeGen::TO_STATE_ACTIONS() +{ + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + vals[st->id] = TO_STATE_ACTION(st); + + out << "\t"; + for ( int st = 0; st < redFsm->nextStateId; st++ ) { + /* Write any eof action. */ + out << vals[st]; + if ( st < numStates-1 ) { + out << ", "; + if ( (st+1) % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] vals; + return out; +} + +std::ostream &FsmCodeGen::FROM_STATE_ACTIONS() +{ + /* Take one off for the psuedo start state. */ + int numStates = redFsm->stateList.length(); + unsigned int *vals = new unsigned int[numStates]; + memset( vals, 0, sizeof(unsigned int)*numStates ); + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + vals[st->id] = FROM_STATE_ACTION(st); + + out << "\t"; + for ( int st = 0; st < redFsm->nextStateId; st++ ) { + /* Write any eof action. */ + out << vals[st]; + if ( st < numStates-1 ) { + out << ", "; + if ( (st+1) % IALL == 0 ) + out << "\n\t"; + } + } + out << "\n"; + delete[] vals; + return out; +} + +bool FsmCodeGen::IN_TRANS_ACTIONS( RedState *state ) +{ + /* Emit any transitions that have actions and that go to this state. */ + for ( int it = 0; it < state->numInTrans; it++ ) { + RedTrans *trans = state->inTrans[it]; + if ( trans->action != 0 && trans->labelNeeded ) { + /* Write the label for the transition so it can be jumped to. */ + out << "tr" << trans->id << ":\n"; + + /* If the action contains a next, then we must preload the current + * state since the action may or may not set it. */ + if ( trans->action->anyNextStmt() ) + out << " " << CS() << " = " << trans->targ->id << ";\n"; + + /* Write each action in the list. */ + for ( GenActionTable::Iter item = trans->action->key; item.lte(); item++ ) + ACTION( out, item->value, trans->targ->id, false ); + + out << "\tgoto st" << trans->targ->id << ";\n"; + } + } + + return 0; +} + +/* Called from FsmCodeGen::STATE_GOTOS just before writing the gotos for each + * state. */ +void FsmCodeGen::GOTO_HEADER( RedState *state ) +{ + IN_TRANS_ACTIONS( state ); + + if ( state->labelNeeded ) + out << "st" << state->id << ":\n"; + + if ( state->toStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + for ( GenActionTable::Iter item = state->toStateAction->key; item.lte(); item++ ) + ACTION( out, item->value, state->id, false ); + } + + /* Give the state a switch case. */ + out << "case " << state->id << ":\n"; + + /* Advance and test buffer pos. */ + out << + " if ( ++" << P() << " == " << PE() << " )\n" + " goto out" << state->id << ";\n"; + + if ( state->fromStateAction != 0 ) { + /* Remember that we wrote an action. Write every action in the list. */ + for ( GenActionTable::Iter item = state->fromStateAction->key; item.lte(); item++ ) + ACTION( out, item->value, state->id, false ); + } + + /* Record the prev state if necessary. */ + if ( state->anyRegCurStateRef() ) + out << " _ps = " << state->id << ";\n"; +} + +void FsmCodeGen::STATE_GOTO_ERROR() +{ + /* In the error state we need to emit some stuff that usually goes into + * the header. */ + RedState *state = redFsm->errState; + IN_TRANS_ACTIONS( state ); + + if ( state->labelNeeded ) + out << "st" << state->id << ":\n"; + + /* We do not need a case label here because the the error state is checked + * at the head of the loop. */ + + /* Break out here. */ + out << " goto out" << state->id << ";\n"; +} + + +/* Emit the goto to take for a given transition. */ +std::ostream &FsmCodeGen::TRANS_GOTO( RedTrans *trans, int level ) +{ + if ( trans->action != 0 ) { + /* Go to the transition which will go to the state. */ + out << TABS(level) << "goto tr" << trans->id << ";"; + } + else { + /* Go directly to the target state. */ + out << TABS(level) << "goto st" << trans->targ->id << ";"; + } + return out; +} + +std::ostream &FsmCodeGen::EXIT_STATES() +{ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + out << " case " << st->id << ": out" << st->id << ": "; + if ( st->eofTrans != 0 ) { + out << "if ( " << PE() << " == " << PEOF() << " ) {"; + TRANS_GOTO( st->eofTrans, 0 ); + out << "\n"; + out << "}"; + } + + /* Exit. */ + out << CS() << " = " << st->id << "; goto out; \n"; + } + return out; +} + +/* Set up labelNeeded flag for each state. */ +void FsmCodeGen::setLabelsNeeded() +{ + /* Do not use all labels by default, init all labelNeeded vars to false. */ + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->labelNeeded = false; + + if ( redFsm->errState != 0 && redFsm->anyLmSwitchError() ) + redFsm->errState->labelNeeded = true; + + /* Walk all transitions and set only those that have targs. */ + for ( RedTransSet::Iter trans = redFsm->transSet; trans.lte(); trans++ ) { + /* If there is no action with a next statement, then the label will be + * needed. */ + if ( trans->action == 0 || !trans->action->anyNextStmt() ) + trans->targ->labelNeeded = true; + } + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) + st->outNeeded = st->labelNeeded; +} + +void FsmCodeGen::writeData() +{ + out << "#define " << START() << " " << START_STATE_ID() << "\n"; + out << "#define " << FIRST_FINAL() << " " << FIRST_FINAL_STATE() << "\n"; + out << "#define " << ERROR() << " " << ERROR_STATE() << "\n"; + out << "#define false 0\n"; + out << "#define true 1\n"; + out << "\n"; + + out << "long " << ENTRY_BY_REGION() << "[] = {\n\t"; + for ( int i = 0; i < fsmTables->numRegions; i++ ) { + out << fsmTables->entryByRegion[i]; + + if ( i < fsmTables->numRegions-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << + "FsmTables fsmTables_start =\n" + "{\n" + " 0, " /* actions */ + " 0, " /* keyOffsets */ + " 0, " /* transKeys */ + " 0, " /* singleLengths */ + " 0, " /* rangeLengths */ + " 0, " /* indexOffsets */ + " 0, " /* transTargsWI */ + " 0, " /* transActionsWI */ + " 0, " /* toStateActions */ + " 0, " /* fromStateActions */ + " 0, " /* eofActions */ + " 0,\n" /* eofTargs */ + " " << ENTRY_BY_REGION() << ",\n" + + "\n" + " 0, " /* numStates */ + " 0, " /* numActions */ + " 0, " /* numTransKeys */ + " 0, " /* numSingleLengths */ + " 0, " /* numRangeLengths */ + " 0, " /* numIndexOffsets */ + " 0, " /* numTransTargsWI */ + " 0,\n" /* numTransActionsWI */ + " " << redFsm->regionToEntry.length() << ",\n" + "\n" + " " << START() << ",\n" + " " << FIRST_FINAL() << ",\n" + " " << ERROR() << ",\n" + "\n" + " 0,\n" /* actionSwitch */ + " 0\n" /* numActionSwitch */ + "};\n" + "\n"; +} + +void FsmCodeGen::writeInit() +{ + out << + " " << CS() << " = " << START() << ";\n"; + + /* If there are any calls, then the stack top needs initialization. */ + if ( redFsm->anyActionCalls() || redFsm->anyActionRets() ) + out << "\t" << TOP() << " = 0;\n"; + + out << + " " << TOKSTART() << " = 0;\n" + " " << TOKEND() << " = 0;\n" + " " << ACT() << " = 0;\n"; + + out << "\n"; +} + +void FsmCodeGen::writeExec() +{ + setLabelsNeeded(); + + out << + "void fsmExecute( FsmRun *fsmRun, InputStream *inputStream )\n" + "{\n" + "/*_resume:*/\n"; + + if ( redFsm->errState != 0 ) { + out << + " if ( " << CS() << " == " << redFsm->errState->id << " )\n" + " goto out;\n"; + } + + out << + " if ( " << P() << " == " << PE() << " )\n" + " goto out_switch;\n" + " --" << P() << ";\n" + "\n" + " switch ( " << CS() << " )\n {\n"; + STATE_GOTOS() << + " }\n"; + + out << + "out_switch:\n" + " switch ( " << CS() << " )\n {\n"; + EXIT_STATES() << + " }\n"; + + out << + " out: {}\n" + "}\n" + "\n"; +} + +void FsmCodeGen::writeIncludes() +{ + out << + "#include <pdarun.h>\n" + "#include <fsmrun.h>\n" + "#include <debug.h>\n" + "#include <bytecode.h>\n" + "#include <config.h>\n" + "#include <defs.h>\n" + "#include <input.h>\n" + "#include <tree.h>\n" + "#include <program.h>\n" + "#include <colm.h>\n" + "#include <stdio.h>\n" + "#include <stdlib.h>\n" + "#include <string.h>\n" + "#include <assert.h>\n" + "\n" + "\n"; +} + +void FsmCodeGen::writeCode() +{ + redFsm->depthFirstOrdering(); + + + writeData(); + writeExec(); + + /* Referenced in the runtime lib, but used only in the compiler. Probably + * should use the preprocessor to make these go away. */ + out << + "void sendNamedLangEl( Program *prg, Tree **tree, PdaRun *pdaRun,\n" + " FsmRun *fsmRun, InputStream *inputStream ) { }\n" + "void initBindings( PdaRun *pdaRun ) {}\n" + "void pushBinding( PdaRun *pdaRun, ParseTree *parseTree ) {}\n" + "void popBinding( PdaRun *pdaRun, ParseTree *tree ) {}\n" + "void initStaticFuncs() {}\n" + "void initPatternFuncs() {}\n" + "void initReplFuncs() {}\n" + "void initInputFuncs();\n" + "\n" + "\n"; +} + +ostream &FsmCodeGen::source_warning( const InputLoc &loc ) +{ + cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": warning: "; + return cerr; +} + +ostream &FsmCodeGen::source_error( const InputLoc &loc ) +{ + codeGenErrCount += 1; + assert( sourceFileName != 0 ); + cerr << sourceFileName << ":" << loc.line << ":" << loc.col << ": "; + return cerr; +} + + diff --git a/src/fsmcodegen.h b/src/fsmcodegen.h new file mode 100644 index 00000000..41cd88ec --- /dev/null +++ b/src/fsmcodegen.h @@ -0,0 +1,212 @@ +/* + * Copyright 2001-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FSMCODEGEN_H +#define _FSMCODEGEN_H + +#include <iostream> +#include <string> +#include <stdio.h> +#include "keyops.h" +#include "parsedata.h" +#include "redfsm.h" +#include "fsmrun.h" + +using std::string; +using std::ostream; + +/* Integer array line length. */ +#define IALL 8 + +/* Forwards. */ +struct RedFsm; +struct RedState; +struct GenAction; +struct NameInst; +struct RedAction; +struct LongestMatch; +struct TokenDef; +struct InlineList; +struct InlineItem; +struct NameInst; +struct FsmCodeGen; + +typedef unsigned long ulong; +typedef unsigned char uchar; + + +/* + * The interface to the parser + */ + +std::ostream *openOutput( char *inputFile ); + +inline string itoa( int i ) +{ + char buf[16]; + sprintf( buf, "%i", i ); + return buf; +} + +/* + * class FsmCodeGen + */ +class FsmCodeGen +{ +public: + FsmCodeGen( const char *sourceFileName, const char *fsmName, ostream &out, + RedFsm *redFsm, FsmTables *fsmTables ); + +protected: + string FSM_NAME(); + string START_STATE_ID(); + ostream &ACTIONS_ARRAY(); + string GET_WIDE_KEY(); + string GET_WIDE_KEY( RedState *state ); + string TABS( int level ); + string KEY( Key key ); + string LDIR_PATH( char *path ); + void ACTION( ostream &ret, GenAction *action, int targState, bool inFinish ); + void CONDITION( ostream &ret, GenAction *condition ); + string ALPH_TYPE(); + string WIDE_ALPH_TYPE(); + string ARRAY_TYPE( unsigned long maxVal ); + + string ARR_OFF( string ptr, string offset ); + string CAST( string type ); + string UINT(); + string GET_KEY(); + + string ACCESS() { return "fsmRun->"; } + + string P() { return ACCESS() + "p"; } + string PE() { return ACCESS() + "pe"; } + string PEOF() { return ACCESS() + "peof"; } + + string CS(); + string TOP() { return ACCESS() + "top"; } + string TOKSTART() { return ACCESS() + "tokstart"; } + string TOKEND() { return ACCESS() + "tokend"; } + string ACT() { return ACCESS() + "act"; } + string MATCHED_TOKEN() { return ACCESS() + "matchedToken"; } + + string DATA_PREFIX(); + + string START() { return DATA_PREFIX() + "start"; } + string ERROR() { return DATA_PREFIX() + "error"; } + string FIRST_FINAL() { return DATA_PREFIX() + "first_final"; } + + string ENTRY_BY_REGION() { return DATA_PREFIX() + "entryByRegion"; } + + + void INLINE_LIST( ostream &ret, InlineList *inlineList, + int targState, bool inFinish ); + void EXEC_TOKEND( ostream &ret, InlineItem *item, int targState, int inFinish ); + void EXECTE( ostream &ret, InlineItem *item, int targState, int inFinish ); + void LM_SWITCH( ostream &ret, InlineItem *item, int targState, int inFinish ); + void SET_ACT( ostream &ret, InlineItem *item ); + void INIT_TOKSTART( ostream &ret, InlineItem *item ); + void INIT_ACT( ostream &ret, InlineItem *item ); + void SET_TOKSTART( ostream &ret, InlineItem *item ); + void SET_TOKEND( ostream &ret, InlineItem *item ); + void GET_TOKEND( ostream &ret, InlineItem *item ); + void SUB_ACTION( ostream &ret, InlineItem *item, int targState, bool inFinish ); + void LM_ON_LAST( ostream &ret, InlineItem *item ); + void LM_ON_NEXT( ostream &ret, InlineItem *item ); + void LM_ON_LAG_BEHIND( ostream &ret, InlineItem *item ); + void EXEC_TOKEND( ostream &ret ); + void EMIT_TOKEN( ostream &ret, LangEl *token ); + + string ERROR_STATE(); + string FIRST_FINAL_STATE(); + + string PTR_CONST(); + ostream &OPEN_ARRAY( string type, string name ); + ostream &CLOSE_ARRAY(); + ostream &STATIC_VAR( string type, string name ); + + string CTRL_FLOW(); + + ostream &source_warning(const InputLoc &loc); + ostream &source_error(const InputLoc &loc); + + unsigned int arrayTypeSize( unsigned long maxVal ); + +/* subclass */ + +public: + const char *sourceFileName; + const char *fsmName; + ostream &out; + RedFsm *redFsm; + FsmTables *fsmTables; + int codeGenErrCount; + + /* Write options. */ + bool dataPrefix; + bool writeFirstFinal; + bool writeErr; + + std::ostream &TO_STATE_ACTION_SWITCH(); + std::ostream &FROM_STATE_ACTION_SWITCH(); + std::ostream &ACTION_SWITCH(); + std::ostream &STATE_GOTOS(); + std::ostream &TRANSITIONS(); + std::ostream &EXEC_FUNCS(); + + unsigned int TO_STATE_ACTION( RedState *state ); + unsigned int FROM_STATE_ACTION( RedState *state ); + + std::ostream &TO_STATE_ACTIONS(); + std::ostream &FROM_STATE_ACTIONS(); + + void COND_TRANSLATE( GenStateCond *stateCond, int level ); + void emitCondBSearch( RedState *state, int level, int low, int high ); + void STATE_CONDS( RedState *state, bool genDefault ); + + void emitSingleSwitch( RedState *state ); + void emitRangeBSearch( RedState *state, int level, int low, int high ); + + std::ostream &EXIT_STATES(); + std::ostream &TRANS_GOTO( RedTrans *trans, int level ); + std::ostream &FINISH_CASES(); + + void writeIncludes(); + void writeData(); + void writeInit(); + void writeExec(); + void writeCode(); + void writeMain(); + +protected: + bool useAgainLabel(); + + /* Called from GotoCodeGen::STATE_GOTOS just before writing the gotos for + * each state. */ + bool IN_TRANS_ACTIONS( RedState *state ); + void GOTO_HEADER( RedState *state ); + void STATE_GOTO_ERROR(); + + /* Set up labelNeeded flag for each state. */ + void setLabelsNeeded(); +}; + +#endif /* _FSMCODEGEN_H */ diff --git a/src/fsmexec.cc b/src/fsmexec.cc new file mode 100644 index 00000000..f922c7a4 --- /dev/null +++ b/src/fsmexec.cc @@ -0,0 +1,208 @@ +/* + * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <string.h> +#include <iostream> + +#include "config.h" +#include "defs.h" +#include "fsmrun.h" +#include "redfsm.h" +#include "parsedata.h" +#include "parsetree.h" +#include "pdarun.h" +#include "global.h" + +void execAction( FsmRun *fsmRun, GenAction *genAction ) +{ + for ( InlineList::Iter item = *genAction->inlineList; item.lte(); item++ ) { + switch ( item->type ) { + case InlineItem::Text: + assert(false); + break; + case InlineItem::LmSetActId: + fsmRun->act = item->longestMatchPart->longestMatchId; + break; + case InlineItem::LmSetTokEnd: + fsmRun->tokend = fsmRun->p + 1; + break; + case InlineItem::LmInitTokStart: + assert(false); + break; + case InlineItem::LmInitAct: + fsmRun->act = 0; + break; + case InlineItem::LmSetTokStart: + fsmRun->tokstart = fsmRun->p; + break; + case InlineItem::LmSwitch: + /* If the switch handles error then we also forced the error state. It + * will exist. */ + fsmRun->p = fsmRun->tokend; + if ( item->tokenRegion->lmSwitchHandlesError && fsmRun->act == 0 ) { + fsmRun->p = fsmRun->tokstart; + fsmRun->cs = fsmRun->tables->errorState; + } + else { + for ( TokenDefListReg::Iter lmi = item->tokenRegion->tokenDefList; + lmi.lte(); lmi++ ) + { + if ( lmi->inLmSelect && fsmRun->act == lmi->longestMatchId ) + fsmRun->matchedToken = lmi->tdLangEl->id; + } + } + fsmRun->returnResult = true; + break; + case InlineItem::LmOnLast: + fsmRun->p += 1; + fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id; + fsmRun->returnResult = true; + break; + case InlineItem::LmOnNext: + fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id; + fsmRun->returnResult = true; + break; + case InlineItem::LmOnLagBehind: + fsmRun->p = fsmRun->tokend; + fsmRun->matchedToken = item->longestMatchPart->tdLangEl->id; + fsmRun->returnResult = true; + break; + } + } + + if ( genAction->markType == MarkMark ) + fsmRun->mark[genAction->markId-1] = fsmRun->p; +} + +void fsmExecute( FsmRun *fsmRun, InputStream *inputStream ) +{ + int _klen; + unsigned int _trans; + const long *_acts; + unsigned int _nacts; + const char *_keys; + + /* Init the token match to nothing (the sentinal). */ + fsmRun->matchedToken = 0; + +/*_resume:*/ + if ( fsmRun->cs == fsmRun->tables->errorState ) + goto out; + + if ( fsmRun->p == fsmRun->pe ) + goto out; + +_loop_head: + _acts = fsmRun->tables->actions + fsmRun->tables->fromStateActions[fsmRun->cs]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) + execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] ); + + _keys = fsmRun->tables->transKeys + fsmRun->tables->keyOffsets[fsmRun->cs]; + _trans = fsmRun->tables->indexOffsets[fsmRun->cs]; + + _klen = fsmRun->tables->singleLengths[fsmRun->cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + _klen - 1; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + ((_upper-_lower) >> 1); + if ( (*fsmRun->p) < *_mid ) + _upper = _mid - 1; + else if ( (*fsmRun->p) > *_mid ) + _lower = _mid + 1; + else { + _trans += (_mid - _keys); + goto _match; + } + } + _keys += _klen; + _trans += _klen; + } + + _klen = fsmRun->tables->rangeLengths[fsmRun->cs]; + if ( _klen > 0 ) { + const char *_lower = _keys; + const char *_mid; + const char *_upper = _keys + (_klen<<1) - 2; + while (1) { + if ( _upper < _lower ) + break; + + _mid = _lower + (((_upper-_lower) >> 1) & ~1); + if ( (*fsmRun->p) < _mid[0] ) + _upper = _mid - 2; + else if ( (*fsmRun->p) > _mid[1] ) + _lower = _mid + 2; + else { + _trans += ((_mid - _keys)>>1); + goto _match; + } + } + _trans += _klen; + } + +_match: + fsmRun->cs = fsmRun->tables->transTargsWI[_trans]; + + if ( fsmRun->tables->transActionsWI[_trans] == 0 ) + goto _again; + + fsmRun->returnResult = false; + _acts = fsmRun->tables->actions + fsmRun->tables->transActionsWI[_trans]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) + execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] ); + if ( fsmRun->returnResult ) + return; + +_again: + _acts = fsmRun->tables->actions + fsmRun->tables->toStateActions[fsmRun->cs]; + _nacts = (unsigned int) *_acts++; + while ( _nacts-- > 0 ) + execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] ); + + if ( fsmRun->cs == fsmRun->tables->errorState ) + goto out; + + if ( ++fsmRun->p != fsmRun->pe ) + goto _loop_head; +out: + if ( fsmRun->p == fsmRun->peof ) { + fsmRun->returnResult = false; + _acts = fsmRun->tables->actions + fsmRun->tables->eofActions[fsmRun->cs]; + _nacts = (unsigned int) *_acts++; + + if ( fsmRun->tables->eofTargs[fsmRun->cs] >= 0 ) + fsmRun->cs = fsmRun->tables->eofTargs[fsmRun->cs]; + + while ( _nacts-- > 0 ) + execAction( fsmRun, fsmRun->tables->actionSwitch[*_acts++] ); + if ( fsmRun->returnResult ) + return; + } +} + + diff --git a/src/fsmgraph.cc b/src/fsmgraph.cc new file mode 100644 index 00000000..590d7902 --- /dev/null +++ b/src/fsmgraph.cc @@ -0,0 +1,1408 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <assert.h> +#include <iostream> + +#include "config.h" +#include "defs.h" +#include "fsmgraph.h" +#include "mergesort.h" + +using std::cerr; +using std::endl; + +/* Make a new state. The new state will be put on the graph's + * list of state. The new state can be created final or non final. */ +FsmState *FsmGraph::addState() +{ + /* Make the new state to return. */ + FsmState *state = new FsmState(); + + if ( misfitAccounting ) { + /* Create the new state on the misfit list. All states are created + * with no foreign in transitions. */ + misfitList.append( state ); + } + else { + /* Create the new state. */ + stateList.append( state ); + } + + return state; +} + +/* Construct an FSM that is the concatenation of an array of characters. A new + * machine will be made that has len+1 states with one transition between each + * state for each integer in str. IsSigned determines if the integers are to + * be considered as signed or unsigned ints. */ +void FsmGraph::concatFsm( Key *str, int len ) +{ + /* Make the first state and set it as the start state. */ + FsmState *last = addState(); + setStartState( last ); + + /* Attach subsequent states. */ + for ( int i = 0; i < len; i++ ) { + FsmState *newState = addState(); + attachNewTrans( last, newState, str[i], str[i] ); + last = newState; + } + + /* Make the last state the final state. */ + setFinState( last ); +} + +/* Case insensitive version of concatFsm. */ +void FsmGraph::concatFsmCI( Key *str, int len ) +{ + /* Make the first state and set it as the start state. */ + FsmState *last = addState(); + setStartState( last ); + + /* Attach subsequent states. */ + for ( int i = 0; i < len; i++ ) { + FsmState *newState = addState(); + + KeySet keySet; + if ( str[i].isLower() ) + keySet.insert( str[i].toUpper() ); + if ( str[i].isUpper() ) + keySet.insert( str[i].toLower() ); + keySet.insert( str[i] ); + + for ( int i = 0; i < keySet.length(); i++ ) + attachNewTrans( last, newState, keySet[i], keySet[i] ); + + last = newState; + } + + /* Make the last state the final state. */ + setFinState( last ); +} + +/* Construct a machine that matches one character. A new machine will be made + * that has two states with a single transition between the states. IsSigned + * determines if the integers are to be considered as signed or unsigned ints. */ +void FsmGraph::concatFsm( Key chr ) +{ + /* Two states first start, second final. */ + setStartState( addState() ); + + FsmState *end = addState(); + setFinState( end ); + + /* Attach on the character. */ + attachNewTrans( startState, end, chr, chr ); +} + +/* Construct a machine that matches any character in set. A new machine will + * be made that has two states and len transitions between the them. The set + * should be ordered correctly accroding to KeyOps and should not contain + * any duplicates. */ +void FsmGraph::orFsm( Key *set, int len ) +{ + /* Two states first start, second final. */ + setStartState( addState() ); + + FsmState *end = addState(); + setFinState( end ); + + for ( int i = 1; i < len; i++ ) + assert( set[i-1] < set[i] ); + + /* Attach on all the integers in the given string of ints. */ + for ( int i = 0; i < len; i++ ) + attachNewTrans( startState, end, set[i], set[i] ); +} + +/* Construct a machine that matches a range of characters. A new machine will + * be made with two states and a range transition between them. The range will + * match any characters from low to high inclusive. Low should be less than or + * equal to high otherwise undefined behaviour results. IsSigned determines + * if the integers are to be considered as signed or unsigned ints. */ +void FsmGraph::rangeFsm( Key low, Key high ) +{ + /* Two states first start, second final. */ + setStartState( addState() ); + + FsmState *end = addState(); + setFinState( end ); + + /* Attach using the range of characters. */ + attachNewTrans( startState, end, low, high ); +} + +/* Construct a machine that a repeated range of characters. */ +void FsmGraph::rangeStarFsm( Key low, Key high) +{ + /* One state which is final and is the start state. */ + setStartState( addState() ); + setFinState( startState ); + + /* Attach start to start using range of characters. */ + attachNewTrans( startState, startState, low, high ); +} + +/* Construct a machine that matches the empty string. A new machine will be + * made with only one state. The new state will be both a start and final + * state. IsSigned determines if the machine has a signed or unsigned + * alphabet. Fsm operations must be done on machines with the same alphabet + * signedness. */ +void FsmGraph::lambdaFsm( ) +{ + /* Give it one state with no transitions making it + * the start state and final state. */ + setStartState( addState() ); + setFinState( startState ); +} + +/* Construct a machine that matches nothing at all. A new machine will be + * made with only one state. It will not be final. */ +void FsmGraph::emptyFsm( ) +{ + /* Give it one state with no transitions making it + * the start state and final state. */ + setStartState( addState() ); +} + +void FsmGraph::transferOutData( FsmState *destState, FsmState *srcState ) +{ + for ( TransList::Iter trans = destState->outList; trans.lte(); trans++ ) { + if ( trans->toState != 0 ) { + /* Get the actions data from the outActionTable. */ + trans->actionTable.setActions( srcState->outActionTable ); + + /* Get the priorities from the outPriorTable. */ + trans->priorTable.setPriors( srcState->outPriorTable ); + } + } +} + +/* Kleene star operator. Makes this machine the kleene star of itself. Any + * transitions made going out of the machine and back into itself will be + * notified that they are leaving transitions by having the leavingFromState + * callback invoked. */ +void FsmGraph::starOp( ) +{ + /* For the merging process. */ + MergeData md; + + /* Turn on misfit accounting to possibly catch the old start state. */ + setMisfitAccounting( true ); + + /* Create the new new start state. It will be set final after the merging + * of the final states with the start state is complete. */ + FsmState *prevStartState = startState; + unsetStartState(); + setStartState( addState() ); + + /* Merge the new start state with the old one to isolate it. */ + mergeStates( md, startState, prevStartState ); + + /* Merge the start state into all final states. Except the start state on + * the first pass. If the start state is set final we will be doubling up + * its transitions, which will get transfered to any final states that + * follow it in the final state set. This will be determined by the order + * of items in the final state set. To prevent this we just merge with the + * start on a second pass. */ + for ( StateSet::Iter st = finStateSet; st.lte(); st++ ) { + if ( *st != startState ) + mergeStatesLeaving( md, *st, startState ); + } + + /* Now it is safe to merge the start state with itself (provided it + * is set final). */ + if ( startState->isFinState() ) + mergeStatesLeaving( md, startState, startState ); + + /* Now ensure the new start state is a final state. */ + setFinState( startState ); + + /* Fill in any states that were newed up as combinations of others. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +void FsmGraph::repeatOp( int times ) +{ + /* Must be 1 and up. 0 produces null machine and requires deleting this. */ + assert( times > 0 ); + + /* A repeat of one does absolutely nothing. */ + if ( times == 1 ) + return; + + /* Make a machine to make copies from. */ + FsmGraph *copyFrom = new FsmGraph( *this ); + + /* Concatentate duplicates onto the end up until before the last. */ + for ( int i = 1; i < times-1; i++ ) { + FsmGraph *dup = new FsmGraph( *copyFrom ); + doConcat( dup, 0, false ); + } + + /* Now use the copyFrom on the end. */ + doConcat( copyFrom, 0, false ); +} + +void FsmGraph::optionalRepeatOp( int times ) +{ + /* Must be 1 and up. 0 produces null machine and requires deleting this. */ + assert( times > 0 ); + + /* A repeat of one optional merely allows zero string. */ + if ( times == 1 ) { + setFinState( startState ); + return; + } + + /* Make a machine to make copies from. */ + FsmGraph *copyFrom = new FsmGraph( *this ); + + /* The state set used in the from end of the concatentation. Starts with + * the initial final state set, then after each concatenation, gets set to + * the the final states that come from the the duplicate. */ + StateSet lastFinSet( finStateSet ); + + /* Set the initial state to zero to allow zero copies. */ + setFinState( startState ); + + /* Concatentate duplicates onto the end up until before the last. */ + for ( int i = 1; i < times-1; i++ ) { + /* Make a duplicate for concating and set the fin bits to graph 2 so we + * can pick out it's final states after the optional style concat. */ + FsmGraph *dup = new FsmGraph( *copyFrom ); + dup->setFinBits( SB_GRAPH2 ); + doConcat( dup, &lastFinSet, true ); + + /* Clear the last final state set and make the new one by taking only + * the final states that come from graph 2.*/ + lastFinSet.empty(); + for ( int i = 0; i < finStateSet.length(); i++ ) { + /* If the state came from graph 2, add it to the last set and clear + * the bits. */ + FsmState *fs = finStateSet[i]; + if ( fs->stateBits & SB_GRAPH2 ) { + lastFinSet.insert( fs ); + fs->stateBits &= ~SB_GRAPH2; + } + } + } + + /* Now use the copyFrom on the end, no bits set, no bits to clear. */ + doConcat( copyFrom, &lastFinSet, true ); +} + + +/* Fsm concatentation worker. Supports treating the concatentation as optional, + * which essentially leaves the final states of machine one as final. */ +void FsmGraph::doConcat( FsmGraph *other, StateSet *fromStates, bool optional ) +{ + /* For the merging process. */ + StateSet finStateSetCopy, startStateSet; + MergeData md; + + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Get the other's start state. */ + FsmState *otherStartState = other->startState; + + /* Unset other's start state before bringing in the entry points. */ + other->unsetStartState(); + + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( other ); + other->entryPoints.empty(); + + /* Bring in other's states into our state lists. */ + stateList.append( other->stateList ); + misfitList.append( other->misfitList ); + + /* If from states is not set, then get a copy of our final state set before + * we clobber it and use it instead. */ + if ( fromStates == 0 ) { + finStateSetCopy = finStateSet; + fromStates = &finStateSetCopy; + } + + /* Unset all of our final states and get the final states from other. */ + if ( !optional ) + unsetAllFinStates(); + finStateSet.insert( other->finStateSet ); + + /* Since other's lists are empty, we can delete the fsm without + * affecting any states. */ + delete other; + + /* Merge our former final states with the start state of other. */ + for ( int i = 0; i < fromStates->length(); i++ ) { + FsmState *state = fromStates->data[i]; + + /* Merge the former final state with other's start state. */ + mergeStatesLeaving( md, state, otherStartState ); + + /* If the former final state was not reset final then we must clear + * the state's out trans data. If it got reset final then it gets to + * keep its out trans data. This must be done before fillInStates gets + * called to prevent the data from being sourced. */ + if ( ! state->isFinState() ) + clearOutData( state ); + } + + /* Fill in any new states made from merging. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Concatenates other to the end of this machine. Other is deleted. Any + * transitions made leaving this machine and entering into other are notified + * that they are leaving transitions by having the leavingFromState callback + * invoked. */ +void FsmGraph::concatOp( FsmGraph *other ) +{ + /* Assert same signedness and return graph concatenation op. */ + doConcat( other, 0, false ); +} + + +void FsmGraph::doOr( FsmGraph *other ) +{ + /* For the merging process. */ + MergeData md; + + /* Build a state set consisting of both start states */ + StateSet startStateSet; + startStateSet.insert( startState ); + startStateSet.insert( other->startState ); + + /* Both of the original start states loose their start state status. */ + unsetStartState(); + other->unsetStartState(); + + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( other ); + other->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other + * into this. No states will be deleted. */ + stateList.append( other->stateList ); + misfitList.append( other->misfitList ); + + /* Move the final set data from other into this. */ + finStateSet.insert(other->finStateSet); + other->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete other; + + /* Create a new start state. */ + setStartState( addState() ); + + /* Merge the start states. */ + mergeStates( md, startState, startStateSet.data, startStateSet.length() ); + + /* Fill in any new states made from merging. */ + fillInStates( md ); +} + +/* Unions other with this machine. Other is deleted. */ +void FsmGraph::unionOp( FsmGraph *other ) +{ + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Call Worker routine. */ + doOr( other ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Intersects other with this machine. Other is deleted. */ +void FsmGraph::intersectOp( FsmGraph *other ) +{ + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Set the fin bits on this and other to want each other. */ + setFinBits( SB_GRAPH1 ); + other->setFinBits( SB_GRAPH2 ); + + /* Call worker Or routine. */ + doOr( other ); + + /* Unset any final states that are no longer to + * be final due to final bits. */ + unsetIncompleteFinals(); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); + + /* Remove states that have no path to a final state. */ + removeDeadEndStates(); +} + +/* Set subtracts other machine from this machine. Other is deleted. */ +void FsmGraph::subtractOp( FsmGraph *other ) +{ + /* Turn on misfit accounting for both graphs. */ + setMisfitAccounting( true ); + other->setMisfitAccounting( true ); + + /* Set the fin bits of other to be killers. */ + other->setFinBits( SB_GRAPH1 ); + + /* Call worker Or routine. */ + doOr( other ); + + /* Unset any final states that are no longer to + * be final due to final bits. */ + unsetKilledFinals(); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); + + /* Remove states that have no path to a final state. */ + removeDeadEndStates(); +} + +bool FsmGraph::inEptVect( EptVect *eptVect, FsmState *state ) +{ + if ( eptVect != 0 ) { + /* Vect is there, walk it looking for state. */ + for ( int i = 0; i < eptVect->length(); i++ ) { + if ( eptVect->data[i].targ == state ) + return true; + } + } + return false; +} + +/* Fill epsilon vectors in a root state from a given starting point. Epmploys + * a depth first search through the graph of epsilon transitions. */ +void FsmGraph::epsilonFillEptVectFrom( FsmState *root, FsmState *from, bool parentLeaving ) +{ + /* Walk the epsilon transitions out of the state. */ + for ( EpsilonTrans::Iter ep = from->epsilonTrans; ep.lte(); ep++ ) { + /* Find the entry point, if the it does not resove, ignore it. */ + EntryMapEl *enLow, *enHigh; + if ( entryPoints.findMulti( *ep, enLow, enHigh ) ) { + /* Loop the targets. */ + for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) { + /* Do not add the root or states already in eptVect. */ + FsmState *targ = en->value; + if ( targ != from && !inEptVect(root->eptVect, targ) ) { + /* Maybe need to create the eptVect. */ + if ( root->eptVect == 0 ) + root->eptVect = new EptVect(); + + /* If moving to a different graph or if any parent is + * leaving then we are leaving. */ + bool leaving = parentLeaving || + root->owningGraph != targ->owningGraph; + + /* All ok, add the target epsilon and recurse. */ + root->eptVect->append( EptVectEl(targ, leaving) ); + epsilonFillEptVectFrom( root, targ, leaving ); + } + } + } + } +} + +void FsmGraph::shadowReadWriteStates( MergeData &md ) +{ + /* Init isolatedShadow algorithm data. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->isolatedShadow = 0; + + /* Any states that may be both read from and written to must + * be shadowed. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* Find such states by looping through stateVect lists, which give us + * the states that will be read from. May cause us to visit the states + * that we are interested in more than once. */ + if ( st->eptVect != 0 ) { + /* For all states that will be read from. */ + for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { + /* Check for read and write to the same state. */ + FsmState *targ = ept->targ; + if ( targ->eptVect != 0 ) { + /* State is to be written to, if the shadow is not already + * there, create it. */ + if ( targ->isolatedShadow == 0 ) { + FsmState *shadow = addState(); + mergeStates( md, shadow, targ ); + targ->isolatedShadow = shadow; + } + + /* Write shadow into the state vector so that it is the + * state that the epsilon transition will read from. */ + ept->targ = targ->isolatedShadow; + } + } + } + } +} + +void FsmGraph::resolveEpsilonTrans( MergeData &md ) +{ + /* Walk the state list and invoke recursive worker on each state. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + epsilonFillEptVectFrom( st, st, false ); + + /* Prevent reading from and writing to of the same state. */ + shadowReadWriteStates( md ); + + /* For all states that have epsilon transitions out, draw the transitions, + * clear the epsilon transitions. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + /* If there is a state vector, then create the pre-merge state. */ + if ( st->eptVect != 0 ) { + /* Merge all the epsilon targets into the state. */ + for ( EptVect::Iter ept = *st->eptVect; ept.lte(); ept++ ) { + if ( ept->leaving ) + mergeStatesLeaving( md, st, ept->targ ); + else + mergeStates( md, st, ept->targ ); + } + + /* Clean up the target list. */ + delete st->eptVect; + st->eptVect = 0; + } + + /* Clear the epsilon transitions vector. */ + st->epsilonTrans.empty(); + } +} + +void FsmGraph::epsilonOp() +{ + /* For merging process. */ + MergeData md; + + setMisfitAccounting( true ); + + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->owningGraph = 0; + + /* Perform merges. */ + resolveEpsilonTrans( md ); + + /* Epsilons can caused merges which leave behind unreachable states. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Make a new maching by joining together a bunch of machines without making + * any transitions between them. A negative finalId results in there being no + * final id. */ +void FsmGraph::joinOp( int startId, int finalId, FsmGraph **others, int numOthers ) +{ + /* For the merging process. */ + MergeData md; + + /* Set the owning machines. Start at one. Zero is reserved for the start + * and final states. */ + for ( StateList::Iter st = stateList; st.lte(); st++ ) + st->owningGraph = 1; + for ( int m = 0; m < numOthers; m++ ) { + for ( StateList::Iter st = others[m]->stateList; st.lte(); st++ ) + st->owningGraph = 2+m; + } + + /* All machines loose start state status. */ + unsetStartState(); + for ( int m = 0; m < numOthers; m++ ) + others[m]->unsetStartState(); + + /* Bring the other machines into this. */ + for ( int m = 0; m < numOthers; m++ ) { + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( others[m] ); + others[m]->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other into + * this. No states will be deleted. */ + stateList.append( others[m]->stateList ); + assert( others[m]->misfitList.length() == 0 ); + + /* Move the final set data from other into this. */ + finStateSet.insert( others[m]->finStateSet ); + others[m]->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete others[m]; + } + + /* Look up the start entry point. */ + EntryMapEl *enLow = 0, *enHigh = 0; + bool findRes = entryPoints.findMulti( startId, enLow, enHigh ); + if ( ! findRes ) { + /* No start state. Set a default one and proceed with the join. Note + * that the result of the join will be a very uninteresting machine. */ + setStartState( addState() ); + } + else { + /* There is at least one start state, create a state that will become + * the new start state. */ + FsmState *newStart = addState(); + setStartState( newStart ); + + /* The start state is in an owning machine class all it's own. */ + newStart->owningGraph = 0; + + /* Create the set of states to merge from. */ + StateSet stateSet; + for ( EntryMapEl *en = enLow; en <= enHigh; en++ ) + stateSet.insert( en->value ); + + /* Merge in the set of start states into the new start state. */ + mergeStates( md, newStart, stateSet.data, stateSet.length() ); + } + + /* Take a copy of the final state set, before unsetting them all. This + * will allow us to call clearOutData on the states that don't get + * final state status back back. */ + StateSet finStateSetCopy = finStateSet; + + /* Now all final states are unset. */ + unsetAllFinStates(); + + if ( finalId >= 0 ) { + /* Create the implicit final state. */ + FsmState *finState = addState(); + setFinState( finState ); + + /* Assign an entry into the final state on the final state entry id. Note + * that there may already be an entry on this id. That's ok. Also set the + * final state owning machine id. It's in a class all it's own. */ + setEntry( finalId, finState ); + finState->owningGraph = 0; + } + + /* Hand over to workers for resolving epsilon trans. This will merge states + * with the targets of their epsilon transitions. */ + resolveEpsilonTrans( md ); + + /* Invoke the relinquish final callback on any states that did not get + * final state status back. */ + for ( StateSet::Iter st = finStateSetCopy; st.lte(); st++ ) { + if ( !((*st)->stateBits & SB_ISFINAL) ) + clearOutData( *st ); + } + + /* Fill in any new states made from merging. */ + fillInStates( md ); + + /* Joining can be messy. Instead of having misfit accounting on (which is + * tricky here) do a full cleaning. */ + removeUnreachableStates(); +} + +void FsmGraph::globOp( FsmGraph **others, int numOthers ) +{ + /* All other machines loose start states status. */ + for ( int m = 0; m < numOthers; m++ ) + others[m]->unsetStartState(); + + /* Bring the other machines into this. */ + for ( int m = 0; m < numOthers; m++ ) { + /* Bring in the rest of other's entry points. */ + copyInEntryPoints( others[m] ); + others[m]->entryPoints.empty(); + + /* Merge the lists. This will move all the states from other into + * this. No states will be deleted. */ + stateList.append( others[m]->stateList ); + assert( others[m]->misfitList.length() == 0 ); + + /* Move the final set data from other into this. */ + finStateSet.insert( others[m]->finStateSet ); + others[m]->finStateSet.empty(); + + /* Since other's list is empty, we can delete the fsm without + * affecting any states. */ + delete others[m]; + } +} + +void FsmGraph::deterministicEntry() +{ + /* For the merging process. */ + MergeData md; + + /* States may loose their entry points, turn on misfit accounting. */ + setMisfitAccounting( true ); + + /* Get a copy of the entry map then clear all the entry points. As we + * iterate the old entry map finding duplicates we will add the entry + * points for the new states that we create. */ + EntryMap prevEntry = entryPoints; + unsetAllEntryPoints(); + + for ( int enId = 0; enId < prevEntry.length(); ) { + /* Count the number of states on this entry key. */ + int highId = enId; + while ( highId < prevEntry.length() && prevEntry[enId].key == prevEntry[highId].key ) + highId += 1; + + int numIds = highId - enId; + if ( numIds == 1 ) { + /* Only a single entry point, just set the entry. */ + setEntry( prevEntry[enId].key, prevEntry[enId].value ); + } + else { + /* Multiple entry points, need to create a new state and merge in + * all the targets of entry points. */ + FsmState *newEntry = addState(); + for ( int en = enId; en < highId; en++ ) + mergeStates( md, newEntry, prevEntry[en].value ); + + /* Add the new state as the single entry point. */ + setEntry( prevEntry[enId].key, newEntry ); + } + + enId += numIds; + } + + /* The old start state may be unreachable. Remove the misfits and turn off + * misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +/* Unset any final states that are no longer to be final due to final bits. */ +void FsmGraph::unsetKilledFinals() +{ + /* Duplicate the final state set before we begin modifying it. */ + StateSet fin( finStateSet ); + + for ( int s = 0; s < fin.length(); s++ ) { + /* Check for killing bit. */ + FsmState *state = fin.data[s]; + if ( state->stateBits & SB_GRAPH1 ) { + /* One final state is a killer, set to non-final. */ + unsetFinState( state ); + } + + /* Clear all killing bits. Non final states should never have had those + * state bits set in the first place. */ + state->stateBits &= ~SB_GRAPH1; + } +} + +/* Unset any final states that are no longer to be final due to final bits. */ +void FsmGraph::unsetIncompleteFinals() +{ + /* Duplicate the final state set before we begin modifying it. */ + StateSet fin( finStateSet ); + + for ( int s = 0; s < fin.length(); s++ ) { + /* Check for one set but not the other. */ + FsmState *state = fin.data[s]; + if ( state->stateBits & SB_BOTH && + (state->stateBits & SB_BOTH) != SB_BOTH ) + { + /* One state wants the other but it is not there. */ + unsetFinState( state ); + } + + /* Clear wanting bits. Non final states should never have had those + * state bits set in the first place. */ + state->stateBits &= ~SB_BOTH; + } +} + +/* Ensure that the start state is free of entry points (aside from the fact + * that it is the start state). If the start state has entry points then Make a + * new start state by merging with the old one. Useful before modifying start + * transitions. If the existing start state has any entry points other than the + * start state entry then modifying its transitions changes more than the start + * transitions. So isolate the start state by separating it out such that it + * only has start stateness as it's entry point. */ +void FsmGraph::isolateStartState( ) +{ + /* For the merging process. */ + MergeData md; + + /* Bail out if the start state is already isolated. */ + if ( isStartStateIsolated() ) + return; + + /* Turn on misfit accounting to possibly catch the old start state. */ + setMisfitAccounting( true ); + + /* This will be the new start state. The existing start + * state is merged with it. */ + FsmState *prevStartState = startState; + unsetStartState(); + setStartState( addState() ); + + /* Merge the new start state with the old one to isolate it. */ + mergeStates( md, startState, prevStartState ); + + /* Stfil and stateDict will be empty because the merging of the old start + * state into the new one will not have any conflicting transitions. */ + assert( md.stateDict.treeSize == 0 ); + assert( md.stfillHead == 0 ); + + /* The old start state may be unreachable. Remove the misfits and turn off + * misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +#if COLM_LOG_CONDS +void logCondSpace( CondSpace *condSpace ) +{ + if ( condSpace == 0 ) + cerr << "<empty>"; + else { + for ( CondSet::Iter csi = condSpace->condSet.last(); csi.gtb(); csi-- ) { + if ( ! csi.last() ) + cerr << ','; + (*csi)->actionName( cerr ); + } + } +} + +void logNewExpansion( Expansion *exp ) +{ + cerr << "created expansion:" << endl; + cerr << " range: " << exp->lowKey.getVal() << " .. " << + exp->highKey.getVal() << endl; + + cerr << " fromCondSpace: "; + logCondSpace( exp->fromCondSpace ); + cerr << endl; + cerr << " fromVals: " << exp->fromVals << endl; + + cerr << " toCondSpace: "; + logCondSpace( exp->toCondSpace ); + cerr << endl; + cerr << " toValsList: "; + for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ ) + cerr << " " << *to; + cerr << endl; +} +#endif + + +void FsmGraph::findTransExpansions( ExpansionList &expansionList, + FsmState *destState, FsmState *srcState ) +{ + PairIter<FsmTrans, StateCond> transCond( destState->outList.head, + srcState->stateCondList.head ); + for ( ; !transCond.end(); transCond++ ) { + if ( transCond.userState == RangeOverlap ) { + Expansion *expansion = new Expansion( transCond.s1Tel.lowKey, + transCond.s1Tel.highKey ); + expansion->fromTrans = new FsmTrans(*transCond.s1Tel.trans); + expansion->fromTrans->fromState = 0; + expansion->fromTrans->toState = transCond.s1Tel.trans->toState; + expansion->fromCondSpace = 0; + expansion->fromVals = 0; + CondSpace *srcCS = transCond.s2Tel.trans->condSpace; + expansion->toCondSpace = srcCS; + + long numTargVals = (1 << srcCS->condSet.length()); + for ( long targVals = 0; targVals < numTargVals; targVals++ ) + expansion->toValsList.append( targVals ); + + #ifdef COLM_LOG_CONDS + if ( colm_log_conds ) { + logNewExpansion( expansion ); + } + #endif + expansionList.append( expansion ); + } + } +} + +void FsmGraph::findCondExpInTrans( ExpansionList &expansionList, FsmState *state, + Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace, + long fromVals, LongVect &toValsList ) +{ + FsmTrans searchTrans; + searchTrans.lowKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() + + (lowKey - keyOps->minKey); + searchTrans.highKey = fromCondSpace->baseKey + fromVals * keyOps->alphSize() + + (highKey - keyOps->minKey); + searchTrans.prev = searchTrans.next = 0; + + PairIter<FsmTrans> pairIter( state->outList.head, &searchTrans ); + for ( ; !pairIter.end(); pairIter++ ) { + if ( pairIter.userState == RangeOverlap ) { + Expansion *expansion = new Expansion( lowKey, highKey ); + expansion->fromTrans = new FsmTrans(*pairIter.s1Tel.trans); + expansion->fromTrans->fromState = 0; + expansion->fromTrans->toState = pairIter.s1Tel.trans->toState; + expansion->fromCondSpace = fromCondSpace; + expansion->fromVals = fromVals; + expansion->toCondSpace = toCondSpace; + expansion->toValsList = toValsList; + + expansionList.append( expansion ); + #ifdef COLM_LOG_CONDS + if ( colm_log_conds ) { + logNewExpansion( expansion ); + } + #endif + } + } +} + +void FsmGraph::findCondExpansions( ExpansionList &expansionList, + FsmState *destState, FsmState *srcState ) +{ + PairIter<StateCond, StateCond> condCond( destState->stateCondList.head, + srcState->stateCondList.head ); + for ( ; !condCond.end(); condCond++ ) { + if ( condCond.userState == RangeOverlap ) { + /* Loop over all existing condVals . */ + CondSet &destCS = condCond.s1Tel.trans->condSpace->condSet; + long destLen = destCS.length(); + + /* Find the items in src cond set that are not in dest + * cond set. These are the items that we must expand. */ + CondSet srcOnlyCS = condCond.s2Tel.trans->condSpace->condSet; + for ( CondSet::Iter dcsi = destCS; dcsi.lte(); dcsi++ ) + srcOnlyCS.remove( *dcsi ); + long srcOnlyLen = srcOnlyCS.length(); + + if ( srcOnlyCS.length() > 0 ) { + #ifdef COLM_LOG_CONDS + if ( colm_log_conds ) { + cerr << "there are " << srcOnlyCS.length() << " item(s) that are " + "only in the srcCS" << endl; + } + #endif + + CondSet mergedCS = destCS; + mergedCS.insert( condCond.s2Tel.trans->condSpace->condSet ); + + CondSpace *fromCondSpace = addCondSpace( destCS ); + CondSpace *toCondSpace = addCondSpace( mergedCS ); + + /* Loop all values in the dest space. */ + for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) { + long basicVals = 0; + for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) { + if ( destVals & (1 << csi.pos()) ) { + Action **cim = mergedCS.find( *csi ); + long bitPos = (cim - mergedCS.data); + basicVals |= 1 << bitPos; + } + } + + /* Loop all new values. */ + LongVect expandToVals; + for ( long soVals = 0; soVals < (1 << srcOnlyLen); soVals++ ) { + long targVals = basicVals; + for ( CondSet::Iter csi = srcOnlyCS; csi.lte(); csi++ ) { + if ( soVals & (1 << csi.pos()) ) { + Action **cim = mergedCS.find( *csi ); + long bitPos = (cim - mergedCS.data); + targVals |= 1 << bitPos; + } + } + expandToVals.append( targVals ); + } + + findCondExpInTrans( expansionList, destState, + condCond.s1Tel.lowKey, condCond.s1Tel.highKey, + fromCondSpace, toCondSpace, destVals, expandToVals ); + } + } + } + } +} + +void FsmGraph::doExpand( MergeData &md, FsmState *destState, ExpansionList &expList1 ) +{ + for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) { + for ( LongVect::Iter to = exp->toValsList; to.lte(); to++ ) { + long targVals = *to; + + /* We will use the copy of the transition that was made when the + * expansion was created. It will get used multiple times. Each + * time we must set up the keys, everything else is constant and + * and already prepared. */ + FsmTrans *srcTrans = exp->fromTrans; + + srcTrans->lowKey = exp->toCondSpace->baseKey + + targVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey); + srcTrans->highKey = exp->toCondSpace->baseKey + + targVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey); + + TransList srcList; + srcList.append( srcTrans ); + outTransCopy( md, destState, srcList.head ); + srcList.abandon(); + } + } +} + + +void FsmGraph::doRemove( MergeData &md, FsmState *destState, ExpansionList &expList1 ) +{ + for ( ExpansionList::Iter exp = expList1; exp.lte(); exp++ ) { + Removal removal; + if ( exp->fromCondSpace == 0 ) { + removal.lowKey = exp->lowKey; + removal.highKey = exp->highKey; + } + else { + removal.lowKey = exp->fromCondSpace->baseKey + + exp->fromVals * keyOps->alphSize() + (exp->lowKey - keyOps->minKey); + removal.highKey = exp->fromCondSpace->baseKey + + exp->fromVals * keyOps->alphSize() + (exp->highKey - keyOps->minKey); + } + removal.next = 0; + + TransList destList; + PairIter<FsmTrans, Removal> pairIter( destState->outList.head, &removal ); + for ( ; !pairIter.end(); pairIter++ ) { + switch ( pairIter.userState ) { + case RangeInS1: { + FsmTrans *destTrans = pairIter.s1Tel.trans; + destTrans->lowKey = pairIter.s1Tel.lowKey; + destTrans->highKey = pairIter.s1Tel.highKey; + destList.append( destTrans ); + break; + } + case RangeInS2: + break; + case RangeOverlap: { + FsmTrans *trans = pairIter.s1Tel.trans; + detachTrans( trans->fromState, trans->toState, trans ); + delete trans; + break; + } + case BreakS1: { + pairIter.s1Tel.trans = dupTrans( destState, + pairIter.s1Tel.trans ); + break; + } + case BreakS2: + break; + } + } + destState->outList.transfer( destList ); + } +} + +void FsmGraph::mergeStateConds( FsmState *destState, FsmState *srcState ) +{ + StateCondList destList; + PairIter<StateCond> pairIter( destState->stateCondList.head, + srcState->stateCondList.head ); + for ( ; !pairIter.end(); pairIter++ ) { + switch ( pairIter.userState ) { + case RangeInS1: { + StateCond *destCond = pairIter.s1Tel.trans; + destCond->lowKey = pairIter.s1Tel.lowKey; + destCond->highKey = pairIter.s1Tel.highKey; + destList.append( destCond ); + break; + } + case RangeInS2: { + StateCond *newCond = new StateCond( *pairIter.s2Tel.trans ); + newCond->lowKey = pairIter.s2Tel.lowKey; + newCond->highKey = pairIter.s2Tel.highKey; + destList.append( newCond ); + break; + } + case RangeOverlap: { + StateCond *destCond = pairIter.s1Tel.trans; + StateCond *srcCond = pairIter.s2Tel.trans; + CondSet mergedCondSet; + mergedCondSet.insert( destCond->condSpace->condSet ); + mergedCondSet.insert( srcCond->condSpace->condSet ); + destCond->condSpace = addCondSpace( mergedCondSet ); + + destCond->lowKey = pairIter.s1Tel.lowKey; + destCond->highKey = pairIter.s1Tel.highKey; + destList.append( destCond ); + break; + } + case BreakS1: + pairIter.s1Tel.trans = new StateCond( *pairIter.s1Tel.trans ); + break; + + case BreakS2: + break; + } + } + destState->stateCondList.transfer( destList ); +} + +/* A state merge which represents the drawing in of leaving transitions. If + * there is any out data then we duplicate the souce state, transfer the out + * data, then merge in the state. The new state will be reaped because it will + * not be given any in transitions. */ +void FsmGraph::mergeStatesLeaving( MergeData &md, FsmState *destState, FsmState *srcState ) +{ + if ( !hasOutData( destState ) ) + mergeStates( md, destState, srcState ); + else { + FsmState *ssMutable = addState(); + mergeStates( md, ssMutable, srcState ); + transferOutData( ssMutable, destState ); + + for ( ActionSet::Iter cond = destState->outCondSet; cond.lte(); cond++ ) + embedCondition( md, ssMutable, *cond ); + + mergeStates( md, destState, ssMutable ); + } +} + +void FsmGraph::mergeStates( MergeData &md, FsmState *destState, + FsmState **srcStates, int numSrc ) +{ + for ( int s = 0; s < numSrc; s++ ) + mergeStates( md, destState, srcStates[s] ); +} + +void FsmGraph::mergeStates( MergeData &md, FsmState *destState, FsmState *srcState ) +{ + ExpansionList expList1; + ExpansionList expList2; + + findTransExpansions( expList1, destState, srcState ); + findCondExpansions( expList1, destState, srcState ); + findTransExpansions( expList2, srcState, destState ); + findCondExpansions( expList2, srcState, destState ); + + mergeStateConds( destState, srcState ); + + outTransCopy( md, destState, srcState->outList.head ); + + doExpand( md, destState, expList1 ); + doExpand( md, destState, expList2 ); + + doRemove( md, destState, expList1 ); + doRemove( md, destState, expList2 ); + + expList1.empty(); + expList2.empty(); + + /* Get its bits and final state status. */ + destState->stateBits |= ( srcState->stateBits & ~SB_ISFINAL ); + if ( srcState->isFinState() ) + setFinState( destState ); + + /* Draw in any properties of srcState into destState. */ + if ( srcState == destState ) { + /* Duplicate the list to protect against write to source. The + * priorities sets are not copied in because that would have no + * effect. */ + destState->epsilonTrans.append( EpsilonTrans( srcState->epsilonTrans ) ); + + /* Get all actions, duplicating to protect against write to source. */ + destState->toStateActionTable.setActions( + ActionTable( srcState->toStateActionTable ) ); + destState->fromStateActionTable.setActions( + ActionTable( srcState->fromStateActionTable ) ); + destState->outActionTable.setActions( ActionTable( srcState->outActionTable ) ); + destState->outCondSet.insert( ActionSet( srcState->outCondSet ) ); + destState->errActionTable.setActions( ErrActionTable( srcState->errActionTable ) ); + destState->eofActionTable.setActions( ActionTable( srcState->eofActionTable ) ); + } + else { + /* Get the epsilons, out priorities. */ + destState->epsilonTrans.append( srcState->epsilonTrans ); + destState->outPriorTable.setPriors( srcState->outPriorTable ); + + /* Get all actions. */ + destState->toStateActionTable.setActions( srcState->toStateActionTable ); + destState->fromStateActionTable.setActions( srcState->fromStateActionTable ); + destState->outActionTable.setActions( srcState->outActionTable ); + destState->outCondSet.insert( srcState->outCondSet ); + destState->errActionTable.setActions( srcState->errActionTable ); + destState->eofActionTable.setActions( srcState->eofActionTable ); + } +} + +void FsmGraph::fillInStates( MergeData &md ) +{ + /* Merge any states that are awaiting merging. This will likey cause + * other states to be added to the stfil list. */ + FsmState *state = md.stfillHead; + while ( state != 0 ) { + StateSet *stateSet = &state->stateDictEl->stateSet; + mergeStates( md, state, stateSet->data, stateSet->length() ); + state = state->alg.next; + } + + /* Delete the state sets of all states that are on the fill list. */ + state = md.stfillHead; + while ( state != 0 ) { + /* Delete and reset the state set. */ + delete state->stateDictEl; + state->stateDictEl = 0; + + /* Next state in the stfill list. */ + state = state->alg.next; + } + + /* StateDict will still have its ptrs/size set but all of it's element + * will be deleted so we don't need to clean it up. */ +} + +void FsmGraph::findEmbedExpansions( ExpansionList &expansionList, + FsmState *destState, Action *condAction ) +{ + StateCondList destList; + PairIter<FsmTrans, StateCond> transCond( destState->outList.head, + destState->stateCondList.head ); + for ( ; !transCond.end(); transCond++ ) { + switch ( transCond.userState ) { + case RangeInS1: { + if ( transCond.s1Tel.lowKey <= keyOps->maxKey ) { + assert( transCond.s1Tel.highKey <= keyOps->maxKey ); + + /* Make a new state cond. */ + StateCond *newStateCond = new StateCond( transCond.s1Tel.lowKey, + transCond.s1Tel.highKey ); + newStateCond->condSpace = addCondSpace( CondSet( condAction ) ); + destList.append( newStateCond ); + + /* Create the expansion. */ + Expansion *expansion = new Expansion( transCond.s1Tel.lowKey, + transCond.s1Tel.highKey ); + expansion->fromTrans = new FsmTrans(*transCond.s1Tel.trans); + expansion->fromTrans->fromState = 0; + expansion->fromTrans->toState = transCond.s1Tel.trans->toState; + expansion->fromCondSpace = 0; + expansion->fromVals = 0; + expansion->toCondSpace = newStateCond->condSpace; + expansion->toValsList.append( 1 ); + #ifdef COLM_LOG_CONDS + if ( colm_log_conds ) { + logNewExpansion( expansion ); + } + #endif + expansionList.append( expansion ); + } + break; + } + case RangeInS2: { + /* Enhance state cond and find the expansion. */ + StateCond *stateCond = transCond.s2Tel.trans; + stateCond->lowKey = transCond.s2Tel.lowKey; + stateCond->highKey = transCond.s2Tel.highKey; + + CondSet &destCS = stateCond->condSpace->condSet; + long destLen = destCS.length(); + CondSpace *fromCondSpace = stateCond->condSpace; + + CondSet mergedCS = destCS; + mergedCS.insert( condAction ); + CondSpace *toCondSpace = addCondSpace( mergedCS ); + stateCond->condSpace = toCondSpace; + destList.append( stateCond ); + + /* Loop all values in the dest space. */ + for ( long destVals = 0; destVals < (1 << destLen); destVals++ ) { + long basicVals = 0; + for ( CondSet::Iter csi = destCS; csi.lte(); csi++ ) { + if ( destVals & (1 << csi.pos()) ) { + Action **cim = mergedCS.find( *csi ); + long bitPos = (cim - mergedCS.data); + basicVals |= 1 << bitPos; + } + } + + long targVals = basicVals; + Action **cim = mergedCS.find( condAction ); + long bitPos = (cim - mergedCS.data); + targVals |= 1 << bitPos; + + LongVect expandToVals( targVals ); + findCondExpInTrans( expansionList, destState, + transCond.s2Tel.lowKey, transCond.s2Tel.highKey, + fromCondSpace, toCondSpace, destVals, expandToVals ); + } + break; + } + + + case RangeOverlap: + case BreakS1: + case BreakS2: + assert( false ); + break; + } + } + + destState->stateCondList.transfer( destList ); +} + +void FsmGraph::embedCondition( FsmState *state, Action *condAction ) +{ + MergeData md; + ExpansionList expList; + + /* Turn on misfit accounting to possibly catch the old start state. */ + setMisfitAccounting( true ); + + /* Worker. */ + embedCondition( md, state, condAction ); + + /* Fill in any states that were newed up as combinations of others. */ + fillInStates( md ); + + /* Remove the misfits and turn off misfit accounting. */ + removeMisfits(); + setMisfitAccounting( false ); +} + +void FsmGraph::embedCondition( MergeData &md, FsmState *state, Action *condAction ) +{ + ExpansionList expList; + + findEmbedExpansions( expList, state, condAction ); + doExpand( md, state, expList ); + doRemove( md, state, expList ); + expList.empty(); +} diff --git a/src/fsmgraph.h b/src/fsmgraph.h new file mode 100644 index 00000000..fca23cc1 --- /dev/null +++ b/src/fsmgraph.h @@ -0,0 +1,1388 @@ +/* + * Copyright 2001-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FSMGRAPH_H +#define _FSMGRAPH_H + +#include <assert.h> +#include "keyops.h" +#include "vector.h" +#include "bstset.h" +#include "compare.h" +#include "avltree.h" +#include "dlist.h" +#include "bstmap.h" +#include "sbstmap.h" +#include "sbstset.h" +#include "sbsttable.h" +#include "avlset.h" +#include "avlmap.h" + +/* Flags that control merging. */ +#define SB_GRAPH1 0x01 +#define SB_GRAPH2 0x02 +#define SB_BOTH 0x03 +#define SB_ISFINAL 0x04 +#define SB_ISMARKED 0x08 +#define SB_ONLIST 0x10 + +struct FsmTrans; +struct FsmState; +struct FsmGraph; +struct Action; +struct TokenDef; +struct NameInst; + +/* State list element for unambiguous access to list element. */ +struct FsmListEl +{ + FsmState *prev, *next; +}; + +/* This is the marked index for a state pair. Used in minimization. It keeps + * track of whether or not the state pair is marked. */ +struct MarkIndex +{ + MarkIndex(int states); + ~MarkIndex(); + + void markPair(int state1, int state2); + bool isPairMarked(int state1, int state2); + +private: + int numStates; + bool *array; +}; + +extern KeyOps *keyOps; + +/* Transistion Action Element. */ +typedef SBstMapEl< int, Action* > ActionTableEl; + +/* Transition Action Table. */ +struct ActionTable + : public SBstMap< int, Action*, CmpOrd<int> > +{ + void setAction( int ordering, Action *action ); + void setActions( int *orderings, Action **actions, int nActs ); + void setActions( const ActionTable &other ); + + bool hasAction( Action *action ); +}; + +typedef SBstSet< Action*, CmpOrd<Action*> > ActionSet; +typedef CmpSTable< Action*, CmpOrd<Action*> > CmpActionSet; + +/* Transistion Action Element. */ +typedef SBstMapEl< int, TokenDef* > LmActionTableEl; + +/* Transition Action Table. */ +struct LmActionTable + : public SBstMap< int, TokenDef*, CmpOrd<int> > +{ + void setAction( int ordering, TokenDef *action ); + void setActions( const LmActionTable &other ); +}; + +/* Compare of a whole action table element (key & value). */ +struct CmpActionTableEl +{ + static int compare( const ActionTableEl &action1, + const ActionTableEl &action2 ) + { + if ( action1.key < action2.key ) + return -1; + else if ( action1.key > action2.key ) + return 1; + else if ( action1.value < action2.value ) + return -1; + else if ( action1.value > action2.value ) + return 1; + return 0; + } +}; + +/* Compare for ActionTable. */ +typedef CmpSTable< ActionTableEl, CmpActionTableEl > CmpActionTable; + +/* Compare of a whole lm action table element (key & value). */ +struct CmpLmActionTableEl +{ + static int compare( const LmActionTableEl &lmAction1, + const LmActionTableEl &lmAction2 ) + { + if ( lmAction1.key < lmAction2.key ) + return -1; + else if ( lmAction1.key > lmAction2.key ) + return 1; + else if ( lmAction1.value < lmAction2.value ) + return -1; + else if ( lmAction1.value > lmAction2.value ) + return 1; + return 0; + } +}; + +/* Compare for ActionTable. */ +typedef CmpSTable< LmActionTableEl, CmpLmActionTableEl > CmpLmActionTable; + +/* Action table element for error action tables. Adds the encoding of transfer + * point. */ +struct ErrActionTableEl +{ + ErrActionTableEl( Action *action, int ordering, int transferPoint ) + : ordering(ordering), action(action), transferPoint(transferPoint) { } + + /* Ordering and id of the action embedding. */ + int ordering; + Action *action; + + /* Id of point of transfere from Error action table to transtions and + * eofActionTable. */ + int transferPoint; + + int getKey() const { return ordering; } +}; + +struct ErrActionTable + : public SBstTable< ErrActionTableEl, int, CmpOrd<int> > +{ + void setAction( int ordering, Action *action, int transferPoint ); + void setActions( const ErrActionTable &other ); +}; + +/* Compare of an error action table element (key & value). */ +struct CmpErrActionTableEl +{ + static int compare( const ErrActionTableEl &action1, + const ErrActionTableEl &action2 ) + { + if ( action1.ordering < action2.ordering ) + return -1; + else if ( action1.ordering > action2.ordering ) + return 1; + else if ( action1.action < action2.action ) + return -1; + else if ( action1.action > action2.action ) + return 1; + else if ( action1.transferPoint < action2.transferPoint ) + return -1; + else if ( action1.transferPoint > action2.transferPoint ) + return 1; + return 0; + } +}; + +/* Compare for ErrActionTable. */ +typedef CmpSTable< ErrActionTableEl, CmpErrActionTableEl > CmpErrActionTable; + + +/* Descibe a priority, shared among PriorEls. + * Has key and whether or not used. */ +struct PriorDesc +{ + int key; + int priority; +}; + +/* Element in the arrays of priorities for transitions and arrays. Ordering is + * unique among instantiations of machines, desc is shared. */ +struct PriorEl +{ + PriorEl( int ordering, PriorDesc *desc ) + : ordering(ordering), desc(desc) { } + + int ordering; + PriorDesc *desc; +}; + +/* Compare priority elements, which are ordered by the priority descriptor + * key. */ +struct PriorElCmp +{ + static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) + { + if ( pel1.desc->key < pel2.desc->key ) + return -1; + else if ( pel1.desc->key > pel2.desc->key ) + return 1; + else + return 0; + } +}; + + +/* Priority Table. */ +struct PriorTable + : public SBstSet< PriorEl, PriorElCmp > +{ + void setPrior( int ordering, PriorDesc *desc ); + void setPriors( const PriorTable &other ); +}; + +/* Compare of prior table elements for distinguising state data. */ +struct CmpPriorEl +{ + static inline int compare( const PriorEl &pel1, const PriorEl &pel2 ) + { + if ( pel1.desc < pel2.desc ) + return -1; + else if ( pel1.desc > pel2.desc ) + return 1; + else if ( pel1.ordering < pel2.ordering ) + return -1; + else if ( pel1.ordering > pel2.ordering ) + return 1; + return 0; + } +}; + +/* Compare of PriorTable distinguising state data. Using a compare of the + * pointers is a little more strict than it needs be. It requires that + * prioritiy tables have the exact same set of priority assignment operators + * (from the input lang) to be considered equal. + * + * Really only key-value pairs need be tested and ordering be merged. However + * this would require that in the fuseing of states, priority descriptors be + * chosen for the new fused state based on priority. Since the out transition + * lists and ranges aren't necessarily going to line up, this is more work for + * little gain. Final compression resets all priorities first, so this would + * only be useful for compression at every operator, which is only an + * undocumented test feature. + */ +typedef CmpSTable<PriorEl, CmpPriorEl> CmpPriorTable; + +/* Plain action list that imposes no ordering. */ +typedef Vector<int> TransFuncList; + +/* Comparison for TransFuncList. */ +typedef CmpTable< int, CmpOrd<int> > TransFuncListCompare; + +/* Transition class that implements actions and priorities. */ +struct FsmTrans +{ + FsmTrans() : fromState(0), toState(0) {} + FsmTrans( const FsmTrans &other ) : + lowKey(other.lowKey), + highKey(other.highKey), + fromState(0), toState(0), + actionTable(other.actionTable), + priorTable(other.priorTable) + { + assert( lmActionTable.length() == 0 && other.lmActionTable.length() == 0 ); + } + + Key lowKey, highKey; + FsmState *fromState; + FsmState *toState; + + /* Pointers for outlist. */ + FsmTrans *prev, *next; + + /* Pointers for in-list. */ + FsmTrans *ilprev, *ilnext; + + /* The function table and priority for the transition. */ + ActionTable actionTable; + PriorTable priorTable; + + LmActionTable lmActionTable; +}; + +/* In transition list. Like DList except only has head pointers, which is all + * that is required. Insertion and deletion is handled by the graph. This + * class provides the iterator of a single list. */ +struct TransInList +{ + TransInList() : head(0) { } + + FsmTrans *head; + + struct Iter + { + /* Default construct. */ + Iter() : ptr(0) { } + + /* Construct, assign from a list. */ + Iter( const TransInList &il ) : ptr(il.head) { } + Iter &operator=( const TransInList &dl ) { ptr = dl.head; return *this; } + + /* At the end */ + bool lte() const { return ptr != 0; } + bool end() const { return ptr == 0; } + + /* At the first, last element. */ + bool first() const { return ptr && ptr->ilprev == 0; } + bool last() const { return ptr && ptr->ilnext == 0; } + + /* Cast, dereference, arrow ops. */ + operator FsmTrans*() const { return ptr; } + FsmTrans &operator *() const { return *ptr; } + FsmTrans *operator->() const { return ptr; } + + /* Increment, decrement. */ + inline void operator++(int) { ptr = ptr->ilnext; } + inline void operator--(int) { ptr = ptr->ilprev; } + + /* The iterator is simply a pointer. */ + FsmTrans *ptr; + }; +}; + +typedef DList<FsmTrans> TransList; + +/* Set of states, list of states. */ +typedef BstSet<FsmState*> StateSet; +typedef DList<FsmState> StateList; + +/* A element in a state dict. */ +struct StateDictEl +: + public AvlTreeEl<StateDictEl> +{ + StateDictEl(const StateSet &stateSet) + : stateSet(stateSet) { } + + const StateSet &getKey() { return stateSet; } + StateSet stateSet; + FsmState *targState; +}; + +/* Dictionary mapping a set of states to a target state. */ +typedef AvlTree< StateDictEl, StateSet, CmpTable<FsmState*> > StateDict; + +/* Data needed for a merge operation. */ +struct MergeData +{ + MergeData() + : stfillHead(0), stfillTail(0) { } + + StateDict stateDict; + + FsmState *stfillHead; + FsmState *stfillTail; + + void fillListAppend( FsmState *state ); +}; + +struct TransEl +{ + /* Constructors. */ + TransEl() { } + TransEl( Key lowKey, Key highKey ) + : lowKey(lowKey), highKey(highKey) { } + TransEl( Key lowKey, Key highKey, FsmTrans *value ) + : lowKey(lowKey), highKey(highKey), value(value) { } + + Key lowKey, highKey; + FsmTrans *value; +}; + +struct CmpKey +{ + static int compare( const Key key1, const Key key2 ) + { + if ( key1 < key2 ) + return -1; + else if ( key1 > key2 ) + return 1; + else + return 0; + } +}; + +/* Vector based set of key items. */ +typedef BstSet<Key, CmpKey> KeySet; + +struct MinPartition +{ + MinPartition() : active(false) { } + + StateList list; + bool active; + + MinPartition *prev, *next; +}; + +/* Epsilon transition stored in a state. Specifies the target */ +typedef Vector<int> EpsilonTrans; + +/* List of states that are to be drawn into this. */ +struct EptVectEl +{ + EptVectEl( FsmState *targ, bool leaving ) + : targ(targ), leaving(leaving) { } + + FsmState *targ; + bool leaving; +}; +typedef Vector<EptVectEl> EptVect; + +/* Set of entry ids that go into this state. */ +typedef BstSet<int> EntryIdSet; + +/* Set of longest match items that may be active in a given state. */ +typedef BstSet<TokenDef*> LmItemSet; + +/* Conditions. */ +typedef BstSet< Action*, CmpOrd<Action*> > CondSet; +typedef CmpTable< Action*, CmpOrd<Action*> > CmpCondSet; + +struct CondSpace + : public AvlTreeEl<CondSpace> +{ + CondSpace( const CondSet &condSet ) + : condSet(condSet) {} + + const CondSet &getKey() { return condSet; } + + CondSet condSet; + Key baseKey; + long condSpaceId; +}; + +typedef Vector<CondSpace*> CondSpaceVect; + +typedef AvlTree<CondSpace, CondSet, CmpCondSet> CondSpaceMap; + +struct StateCond +{ + StateCond( Key lowKey, Key highKey ) : + lowKey(lowKey), highKey(highKey) {} + + Key lowKey; + Key highKey; + CondSpace *condSpace; + + StateCond *prev, *next; +}; + +typedef DList<StateCond> StateCondList; +typedef Vector<long> LongVect; + +struct Expansion +{ + Expansion( Key lowKey, Key highKey ) : + lowKey(lowKey), highKey(highKey), + fromTrans(0), fromCondSpace(0), + toCondSpace(0) {} + + ~Expansion() + { + if ( fromTrans != 0 ) + delete fromTrans; + } + + Key lowKey; + Key highKey; + + FsmTrans *fromTrans; + CondSpace *fromCondSpace; + long fromVals; + + CondSpace *toCondSpace; + LongVect toValsList; + + Expansion *prev, *next; +}; + +typedef DList<Expansion> ExpansionList; + +struct Removal +{ + Key lowKey; + Key highKey; + + Removal *next; +}; + +struct CondData +{ + CondData() : nextCondKey(0) {} + + /* Condition info. */ + Key nextCondKey; + + CondSpaceMap condSpaceMap; +}; + +extern CondData *condData; + +/* State class that implements actions and priorities. */ +struct FsmState +{ + FsmState(); + FsmState(const FsmState &other); + ~FsmState(); + + /* Is the state final? */ + bool isFinState() { return stateBits & SB_ISFINAL; } + + /* Out transition list and the pointer for the default out trans. */ + TransList outList; + + /* In transition Lists. */ + TransInList inList; + + /* Entry points into the state. */ + EntryIdSet entryIds; + + /* Epsilon transitions. */ + EpsilonTrans epsilonTrans; + + /* Condition info. */ + StateCondList stateCondList; + + /* Number of in transitions from states other than ourselves. */ + int foreignInTrans; + + /* Temporary data for various algorithms. */ + union { + /* When duplicating the fsm we need to map each + * state to the new state representing it. */ + FsmState *stateMap; + + /* When minimizing machines by partitioning, this maps to the group + * the state is in. */ + MinPartition *partition; + + /* When merging states (state machine operations) this next pointer is + * used for the list of states that need to be filled in. */ + FsmState *next; + + /* Identification for printing and stable minimization. */ + int stateNum; + + } alg; + + /* Data used in epsilon operation, maybe fit into alg? */ + FsmState *isolatedShadow; + int owningGraph; + + /* A pointer to a dict element that contains the set of states this state + * represents. This cannot go into alg, because alg.next is used during + * the merging process. */ + StateDictEl *stateDictEl; + + /* When drawing epsilon transitions, holds the list of states to merge + * with. */ + EptVect *eptVect; + + /* Bits controlling the behaviour of the state during collapsing to dfa. */ + int stateBits; + + /* State list elements. */ + FsmState *next, *prev; + + /* + * Priority and Action data. + */ + + /* Out priorities transfered to out transitions. */ + PriorTable outPriorTable; + + /* The following two action tables are distinguished by the fact that when + * toState actions are executed immediatly after transition actions of + * incoming transitions and the current character will be the same as the + * one available then. The fromState actions are executed immediately + * before the transition actions of outgoing transitions and the current + * character is same as the one available then. */ + + /* Actions to execute upon entering into a state. */ + ActionTable toStateActionTable; + + /* Actions to execute when going from the state to the transition. */ + ActionTable fromStateActionTable; + + /* Actions to add to any future transitions that leave via this state. */ + ActionTable outActionTable; + + /* Conditions to add to any future transiions that leave via this sttate. */ + ActionSet outCondSet; + + /* Error action tables. */ + ErrActionTable errActionTable; + + /* Actions to execute on eof. */ + ActionTable eofActionTable; + + /* Set of longest match items that may be active in this state. */ + LmItemSet lmItemSet; + + FsmState *eofTarget; +}; + +template <class ListItem> struct NextTrans +{ + Key lowKey, highKey; + ListItem *trans; + ListItem *next; + + void load() { + if ( trans == 0 ) + next = 0; + else { + next = trans->next; + lowKey = trans->lowKey; + highKey = trans->highKey; + } + } + + void set( ListItem *t ) { + trans = t; + load(); + } + + void increment() { + trans = next; + load(); + } +}; + + +/* Encodes the different states that are meaningful to the of the iterator. */ +enum PairIterUserState +{ + RangeInS1, RangeInS2, + RangeOverlap, + BreakS1, BreakS2 +}; + +template <class ListItem1, class ListItem2 = ListItem1> struct PairIter +{ + /* Encodes the different states that an fsm iterator can be in. */ + enum IterState { + Begin, + ConsumeS1Range, ConsumeS2Range, + OnlyInS1Range, OnlyInS2Range, + S1SticksOut, S1SticksOutBreak, + S2SticksOut, S2SticksOutBreak, + S1DragsBehind, S1DragsBehindBreak, + S2DragsBehind, S2DragsBehindBreak, + ExactOverlap, End + }; + + PairIter( ListItem1 *list1, ListItem2 *list2 ); + + /* Query iterator. */ + bool lte() { return itState != End; } + bool end() { return itState == End; } + void operator++(int) { findNext(); } + void operator++() { findNext(); } + + /* Iterator state. */ + ListItem1 *list1; + ListItem2 *list2; + IterState itState; + PairIterUserState userState; + + NextTrans<ListItem1> s1Tel; + NextTrans<ListItem2> s2Tel; + Key bottomLow, bottomHigh; + ListItem1 *bottomTrans1; + ListItem2 *bottomTrans2; + +private: + void findNext(); +}; + +/* Init the iterator by advancing to the first item. */ +template <class ListItem1, class ListItem2> PairIter<ListItem1, ListItem2>::PairIter( + ListItem1 *list1, ListItem2 *list2 ) +: + list1(list1), + list2(list2), + itState(Begin) +{ + findNext(); +} + +/* Return and re-entry for the co-routine iterators. This should ALWAYS be + * used inside of a block. */ +#define CO_RETURN(label) \ + itState = label; \ + return; \ + entry##label: {} + +/* Return and re-entry for the co-routine iterators. This should ALWAYS be + * used inside of a block. */ +#define CO_RETURN2(label, uState) \ + itState = label; \ + userState = uState; \ + return; \ + entry##label: {} + +/* Advance to the next transition. When returns, trans points to the next + * transition, unless there are no more, in which case end() returns true. */ +template <class ListItem1, class ListItem2> void PairIter<ListItem1, ListItem2>::findNext() +{ + /* Jump into the iterator routine base on the iterator state. */ + switch ( itState ) { + case Begin: goto entryBegin; + case ConsumeS1Range: goto entryConsumeS1Range; + case ConsumeS2Range: goto entryConsumeS2Range; + case OnlyInS1Range: goto entryOnlyInS1Range; + case OnlyInS2Range: goto entryOnlyInS2Range; + case S1SticksOut: goto entryS1SticksOut; + case S1SticksOutBreak: goto entryS1SticksOutBreak; + case S2SticksOut: goto entryS2SticksOut; + case S2SticksOutBreak: goto entryS2SticksOutBreak; + case S1DragsBehind: goto entryS1DragsBehind; + case S1DragsBehindBreak: goto entryS1DragsBehindBreak; + case S2DragsBehind: goto entryS2DragsBehind; + case S2DragsBehindBreak: goto entryS2DragsBehindBreak; + case ExactOverlap: goto entryExactOverlap; + case End: goto entryEnd; + } + +entryBegin: + /* Set up the next structs at the head of the transition lists. */ + s1Tel.set( list1 ); + s2Tel.set( list2 ); + + /* Concurrently scan both out ranges. */ + while ( true ) { + if ( s1Tel.trans == 0 ) { + /* We are at the end of state1's ranges. Process the rest of + * state2's ranges. */ + while ( s2Tel.trans != 0 ) { + /* Range is only in s2. */ + CO_RETURN2( ConsumeS2Range, RangeInS2 ); + s2Tel.increment(); + } + break; + } + else if ( s2Tel.trans == 0 ) { + /* We are at the end of state2's ranges. Process the rest of + * state1's ranges. */ + while ( s1Tel.trans != 0 ) { + /* Range is only in s1. */ + CO_RETURN2( ConsumeS1Range, RangeInS1 ); + s1Tel.increment(); + } + break; + } + /* Both state1's and state2's transition elements are good. + * The signiture of no overlap is a back key being in front of a + * front key. */ + else if ( s1Tel.highKey < s2Tel.lowKey ) { + /* A range exists in state1 that does not overlap with state2. */ + CO_RETURN2( OnlyInS1Range, RangeInS1 ); + s1Tel.increment(); + } + else if ( s2Tel.highKey < s1Tel.lowKey ) { + /* A range exists in state2 that does not overlap with state1. */ + CO_RETURN2( OnlyInS2Range, RangeInS2 ); + s2Tel.increment(); + } + /* There is overlap, must mix the ranges in some way. */ + else if ( s1Tel.lowKey < s2Tel.lowKey ) { + /* Range from state1 sticks out front. Must break it into + * non-overlaping and overlaping segments. */ + bottomLow = s2Tel.lowKey; + bottomHigh = s1Tel.highKey; + s1Tel.highKey = s2Tel.lowKey; + s1Tel.highKey.decrement(); + bottomTrans1 = s1Tel.trans; + + /* Notify the caller that we are breaking s1. This gives them a + * chance to duplicate s1Tel[0,1].value. */ + CO_RETURN2( S1SticksOutBreak, BreakS1 ); + + /* Broken off range is only in s1. */ + CO_RETURN2( S1SticksOut, RangeInS1 ); + + /* Advance over the part sticking out front. */ + s1Tel.lowKey = bottomLow; + s1Tel.highKey = bottomHigh; + s1Tel.trans = bottomTrans1; + } + else if ( s2Tel.lowKey < s1Tel.lowKey ) { + /* Range from state2 sticks out front. Must break it into + * non-overlaping and overlaping segments. */ + bottomLow = s1Tel.lowKey; + bottomHigh = s2Tel.highKey; + s2Tel.highKey = s1Tel.lowKey; + s2Tel.highKey.decrement(); + bottomTrans2 = s2Tel.trans; + + /* Notify the caller that we are breaking s2. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S2SticksOutBreak, BreakS2 ); + + /* Broken off range is only in s2. */ + CO_RETURN2( S2SticksOut, RangeInS2 ); + + /* Advance over the part sticking out front. */ + s2Tel.lowKey = bottomLow; + s2Tel.highKey = bottomHigh; + s2Tel.trans = bottomTrans2; + } + /* Low ends are even. Are the high ends even? */ + else if ( s1Tel.highKey < s2Tel.highKey ) { + /* Range from state2 goes longer than the range from state1. We + * must break the range from state2 into an evenly overlaping + * segment. */ + bottomLow = s1Tel.highKey; + bottomLow.increment(); + bottomHigh = s2Tel.highKey; + s2Tel.highKey = s1Tel.highKey; + bottomTrans2 = s2Tel.trans; + + /* Notify the caller that we are breaking s2. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S2DragsBehindBreak, BreakS2 ); + + /* Breaking s2 produces exact overlap. */ + CO_RETURN2( S2DragsBehind, RangeOverlap ); + + /* Advance over the front we just broke off of range 2. */ + s2Tel.lowKey = bottomLow; + s2Tel.highKey = bottomHigh; + s2Tel.trans = bottomTrans2; + + /* Advance over the entire s1Tel. We have consumed it. */ + s1Tel.increment(); + } + else if ( s2Tel.highKey < s1Tel.highKey ) { + /* Range from state1 goes longer than the range from state2. We + * must break the range from state1 into an evenly overlaping + * segment. */ + bottomLow = s2Tel.highKey; + bottomLow.increment(); + bottomHigh = s1Tel.highKey; + s1Tel.highKey = s2Tel.highKey; + bottomTrans1 = s1Tel.trans; + + /* Notify the caller that we are breaking s1. This gives them a + * chance to duplicate s2Tel[0,1].value. */ + CO_RETURN2( S1DragsBehindBreak, BreakS1 ); + + /* Breaking s1 produces exact overlap. */ + CO_RETURN2( S1DragsBehind, RangeOverlap ); + + /* Advance over the front we just broke off of range 1. */ + s1Tel.lowKey = bottomLow; + s1Tel.highKey = bottomHigh; + s1Tel.trans = bottomTrans1; + + /* Advance over the entire s2Tel. We have consumed it. */ + s2Tel.increment(); + } + else { + /* There is an exact overlap. */ + CO_RETURN2( ExactOverlap, RangeOverlap ); + + s1Tel.increment(); + s2Tel.increment(); + } + } + + /* Done, go into end state. */ + CO_RETURN( End ); +} + + +/* Compare lists of epsilon transitions. Entries are name ids of targets. */ +typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans; + +/* Compare class for the Approximate minimization. */ +class ApproxCompare +{ +public: + ApproxCompare() { } + int compare( const FsmState *pState1, const FsmState *pState2 ); +}; + +/* Compare class for the initial partitioning of a partition minimization. */ +class InitPartitionCompare +{ +public: + InitPartitionCompare() { } + int compare( const FsmState *pState1, const FsmState *pState2 ); +}; + +/* Compare class for the regular partitioning of a partition minimization. */ +class PartitionCompare +{ +public: + PartitionCompare() { } + int compare( const FsmState *pState1, const FsmState *pState2 ); +}; + +/* Compare class for a minimization that marks pairs. Provides the shouldMark + * routine. */ +class MarkCompare +{ +public: + MarkCompare() { } + bool shouldMark( MarkIndex &markIndex, const FsmState *pState1, + const FsmState *pState2 ); +}; + +/* List of partitions. */ +typedef DList< MinPartition > PartitionList; + +/* List of transtions out of a state. */ +typedef Vector<TransEl> TransListVect; + +/* Entry point map used for keeping track of entry points in a machine. */ +typedef BstSet< int > EntryIdSet; +typedef BstMapEl< int, FsmState* > EntryMapEl; +typedef BstMap< int, FsmState* > EntryMap; +typedef Vector<EntryMapEl> EntryMapBase; + +/* Graph class that implements actions and priorities. */ +struct FsmGraph +{ + /* Constructors/Destructors. */ + FsmGraph( ); + FsmGraph( const FsmGraph &graph ); + ~FsmGraph(); + + /* The list of states. */ + StateList stateList; + StateList misfitList; + + /* The map of entry points. */ + EntryMap entryPoints; + + /* The start state. */ + FsmState *startState; + + /* Error state, possibly created only when the final machine has been + * created and the XML machine is about to be written. No transitions + * point to this state. */ + FsmState *errState; + + /* The set of final states. */ + StateSet finStateSet; + + /* Misfit Accounting. Are misfits put on a separate list. */ + bool misfitAccounting; + + bool lmRequiresErrorState; + NameInst *rootName; + NameInst **nameIndex; + + /* + * Transition actions and priorities. + */ + + /* Set priorities on transtions. */ + void startFsmPrior( int ordering, PriorDesc *prior ); + void allTransPrior( int ordering, PriorDesc *prior ); + void finishFsmPrior( int ordering, PriorDesc *prior ); + void leaveFsmPrior( int ordering, PriorDesc *prior ); + + /* Action setting support. */ + void transferErrorActions( FsmState *state, int transferPoint ); + void setErrorAction( FsmState *state, int ordering, Action *action ); + void setErrorActions( FsmState *state, const ActionTable &other ); + + /* Fill all spaces in a transition list with an error transition. */ + void fillGaps( FsmState *state ); + + /* Similar to setErrorAction, instead gives a state to go to on error. */ + void setErrorTarget( FsmState *state, FsmState *target, int *orderings, + Action **actions, int nActs ); + + /* Set actions to execute. */ + void startFsmAction( int ordering, Action *action ); + void allTransAction( int ordering, Action *action ); + void finishFsmAction( int ordering, Action *action ); + void leaveFsmAction( int ordering, Action *action ); + void longMatchAction( int ordering, TokenDef *lmPart ); + + /* Set conditions. */ + CondSpace *addCondSpace( const CondSet &condSet ); + + void findEmbedExpansions( ExpansionList &expansionList, + FsmState *destState, Action *condAction ); + void embedCondition( MergeData &md, FsmState *state, Action *condAction ); + void embedCondition( FsmState *state, Action *condAction ); + + void startFsmCondition( Action *condAction ); + void allTransCondition( Action *condAction ); + void leaveFsmCondition( Action *condAction ); + + /* Set error actions to execute. */ + void startErrorAction( int ordering, Action *action, int transferPoint ); + void allErrorAction( int ordering, Action *action, int transferPoint ); + void finalErrorAction( int ordering, Action *action, int transferPoint ); + void notStartErrorAction( int ordering, Action *action, int transferPoint ); + void notFinalErrorAction( int ordering, Action *action, int transferPoint ); + void middleErrorAction( int ordering, Action *action, int transferPoint ); + + /* Set EOF actions. */ + void startEOFAction( int ordering, Action *action ); + void allEOFAction( int ordering, Action *action ); + void finalEOFAction( int ordering, Action *action ); + void notStartEOFAction( int ordering, Action *action ); + void notFinalEOFAction( int ordering, Action *action ); + void middleEOFAction( int ordering, Action *action ); + + /* Set To State actions. */ + void startToStateAction( int ordering, Action *action ); + void allToStateAction( int ordering, Action *action ); + void finalToStateAction( int ordering, Action *action ); + void notStartToStateAction( int ordering, Action *action ); + void notFinalToStateAction( int ordering, Action *action ); + void middleToStateAction( int ordering, Action *action ); + + /* Set From State actions. */ + void startFromStateAction( int ordering, Action *action ); + void allFromStateAction( int ordering, Action *action ); + void finalFromStateAction( int ordering, Action *action ); + void notStartFromStateAction( int ordering, Action *action ); + void notFinalFromStateAction( int ordering, Action *action ); + void middleFromStateAction( int ordering, Action *action ); + + /* Shift the action ordering of the start transitions to start at + * fromOrder and increase in units of 1. Useful before kleene star + * operation. */ + int shiftStartActionOrder( int fromOrder ); + + /* Clear all priorities from the fsm to so they won't affcet minimization + * of the final fsm. */ + void clearAllPriorities(); + + /* Zero out all the function keys. */ + void nullActionKeys(); + + /* Walk the list of states and verify state properties. */ + void verifyStates(); + + /* Misfit Accounting. Are misfits put on a separate list. */ + void setMisfitAccounting( bool val ) + { misfitAccounting = val; } + + /* Set and Unset a state as final. */ + void setFinState( FsmState *state ); + void unsetFinState( FsmState *state ); + + void setStartState( FsmState *state ); + void unsetStartState( ); + + /* Set and unset a state as an entry point. */ + void setEntry( int id, FsmState *state ); + void changeEntry( int id, FsmState *to, FsmState *from ); + void unsetEntry( int id, FsmState *state ); + void unsetEntry( int id ); + void unsetAllEntryPoints(); + + /* Epsilon transitions. */ + void epsilonTrans( int id ); + void shadowReadWriteStates( MergeData &md ); + + /* + * Basic attaching and detaching. + */ + + /* Common to attaching/detaching list and default. */ + void attachToInList( FsmState *from, FsmState *to, FsmTrans *&head, FsmTrans *trans ); + void detachFromInList( FsmState *from, FsmState *to, FsmTrans *&head, FsmTrans *trans ); + + /* Attach with a new transition. */ + FsmTrans *attachNewTrans( FsmState *from, FsmState *to, + Key onChar1, Key onChar2 ); + + /* Attach with an existing transition that already in an out list. */ + void attachTrans( FsmState *from, FsmState *to, FsmTrans *trans ); + + /* Redirect a transition away from error and towards some state. */ + void redirectErrorTrans( FsmState *from, FsmState *to, FsmTrans *trans ); + + /* Detach a transition from a target state. */ + void detachTrans( FsmState *from, FsmState *to, FsmTrans *trans ); + + /* Detach a state from the graph. */ + void detachState( FsmState *state ); + + /* + * NFA to DFA conversion routines. + */ + + /* Duplicate a transition that will dropin to a free spot. */ + FsmTrans *dupTrans( FsmState *from, FsmTrans *srcTrans ); + + /* In crossing, two transitions both go to real states. */ + FsmTrans *fsmAttachStates( MergeData &md, FsmState *from, + FsmTrans *destTrans, FsmTrans *srcTrans ); + + /* Two transitions are to be crossed, handle the possibility of either + * going to the error state. */ + FsmTrans *mergeTrans( MergeData &md, FsmState *from, + FsmTrans *destTrans, FsmTrans *srcTrans ); + + /* Compare deterimne relative priorities of two transition tables. */ + int comparePrior( const PriorTable &priorTable1, const PriorTable &priorTable2 ); + + /* Cross a src transition with one that is already occupying a spot. */ + FsmTrans *crossTransitions( MergeData &md, FsmState *from, + FsmTrans *destTrans, FsmTrans *srcTrans ); + + void outTransCopy( MergeData &md, FsmState *dest, FsmTrans *srcList ); + + void doRemove( MergeData &md, FsmState *destState, ExpansionList &expList1 ); + void doExpand( MergeData &md, FsmState *destState, ExpansionList &expList1 ); + void findCondExpInTrans( ExpansionList &expansionList, FsmState *state, + Key lowKey, Key highKey, CondSpace *fromCondSpace, CondSpace *toCondSpace, + long destVals, LongVect &toValsList ); + void findTransExpansions( ExpansionList &expansionList, + FsmState *destState, FsmState *srcState ); + void findCondExpansions( ExpansionList &expansionList, + FsmState *destState, FsmState *srcState ); + void mergeStateConds( FsmState *destState, FsmState *srcState ); + + /* Merge a set of states into newState. */ + void mergeStates( MergeData &md, FsmState *destState, + FsmState **srcStates, int numSrc ); + void mergeStatesLeaving( MergeData &md, FsmState *destState, FsmState *srcState ); + void mergeStates( MergeData &md, FsmState *destState, FsmState *srcState ); + + /* Make all states that are combinations of other states and that + * have not yet had their out transitions filled in. This will + * empty out stateDict and stFil. */ + void fillInStates( MergeData &md ); + + /* + * Transition Comparison. + */ + + /* Compare transition data. Either of the pointers may be null. */ + static inline int compareDataPtr( FsmTrans *trans1, FsmTrans *trans2 ); + + /* Compare target state and transition data. Either pointer may be null. */ + static inline int compareFullPtr( FsmTrans *trans1, FsmTrans *trans2 ); + + /* Compare target partitions. Either pointer may be null. */ + static inline int comparePartPtr( FsmTrans *trans1, FsmTrans *trans2 ); + + /* Check marked status of target states. Either pointer may be null. */ + static inline bool shouldMarkPtr( MarkIndex &markIndex, + FsmTrans *trans1, FsmTrans *trans2 ); + + /* + * Callbacks. + */ + + /* Compare priority and function table of transitions. */ + static int compareTransData( FsmTrans *trans1, FsmTrans *trans2 ); + + /* Add in the properties of srcTrans into this. */ + void addInTrans( FsmTrans *destTrans, FsmTrans *srcTrans ); + + /* Compare states on data stored in the states. */ + static int compareStateData( const FsmState *state1, const FsmState *state2 ); + + /* Out transition data. */ + void clearOutData( FsmState *state ); + bool hasOutData( FsmState *state ); + void transferOutData( FsmState *destState, FsmState *srcState ); + + /* + * Allocation. + */ + + /* New up a state and add it to the graph. */ + FsmState *addState(); + + /* + * Building basic machines + */ + + void concatFsm( Key c ); + void concatFsm( Key *str, int len ); + void concatFsmCI( Key *str, int len ); + void orFsm( Key *set, int len ); + void rangeFsm( Key low, Key high ); + void rangeStarFsm( Key low, Key high ); + void emptyFsm( ); + void lambdaFsm( ); + + /* + * Fsm operators. + */ + + void starOp( ); + void repeatOp( int times ); + void optionalRepeatOp( int times ); + void concatOp( FsmGraph *other ); + void unionOp( FsmGraph *other ); + void intersectOp( FsmGraph *other ); + void subtractOp( FsmGraph *other ); + void epsilonOp(); + void joinOp( int startId, int finalId, FsmGraph **others, int numOthers ); + void globOp( FsmGraph **others, int numOthers ); + void deterministicEntry(); + + /* + * Operator workers + */ + + /* Determine if there are any entry points into a start state other than + * the start state. */ + bool isStartStateIsolated(); + + /* Make a new start state that has no entry points. Will not change the + * identity of the fsm. */ + void isolateStartState(); + + /* Workers for resolving epsilon transitions. */ + bool inEptVect( EptVect *eptVect, FsmState *targ ); + void epsilonFillEptVectFrom( FsmState *root, FsmState *from, bool parentLeaving ); + void resolveEpsilonTrans( MergeData &md ); + + /* Workers for concatenation and union. */ + void doConcat( FsmGraph *other, StateSet *fromStates, bool optional ); + void doOr( FsmGraph *other ); + + /* + * Final states + */ + + /* Unset any final states that are no longer to be final + * due to final bits. */ + void unsetIncompleteFinals(); + void unsetKilledFinals(); + + /* Bring in other's entry points. Assumes others states are going to be + * copied into this machine. */ + void copyInEntryPoints( FsmGraph *other ); + + /* Ordering states. */ + void depthFirstOrdering( FsmState *state ); + void depthFirstOrdering(); + void sortStatesByFinal(); + + /* Set sqequential state numbers starting at 0. */ + void setStateNumbers( int base ); + + /* Unset all final states. */ + void unsetAllFinStates(); + + /* Set the bits of final states and clear the bits of non final states. */ + void setFinBits( int finStateBits ); + + /* + * Self-consistency checks. + */ + + /* Run a sanity check on the machine. */ + void verifyIntegrity(); + + /* Verify that there are no unreachable states, or dead end states. */ + void verifyReachability(); + void verifyNoDeadEndStates(); + + /* + * Path pruning + */ + + /* Mark all states reachable from state. */ + void markReachableFromHereReverse( FsmState *state ); + + /* Mark all states reachable from state. */ + void markReachableFromHere( FsmState *state ); + void markReachableFromHereStopFinal( FsmState *state ); + + /* Removes states that cannot be reached by any path in the fsm and are + * thus wasted silicon. */ + void removeDeadEndStates(); + + /* Removes states that cannot be reached by any path in the fsm and are + * thus wasted silicon. */ + void removeUnreachableStates(); + + /* Remove error actions from states on which the error transition will + * never be taken. */ + bool outListCovers( FsmState *state ); + bool anyErrorRange( FsmState *state ); + + /* Remove states that are on the misfit list. */ + void removeMisfits(); + + /* + * FSM Minimization + */ + + /* Minimization by partitioning. */ + void minimizePartition1(); + void minimizePartition2(); + + /* Minimize the final state Machine. The result is the minimal fsm. Slow + * but stable, correct minimization. Uses n^2 space (lookout) and average + * n^2 time. Worst case n^3 time, but a that is a very rare case. */ + void minimizeStable(); + + /* Minimize the final state machine. Does not find the minimal fsm, but a + * pretty good approximation. Does not use any extra space. Average n^2 + * time. Worst case n^3 time, but a that is a very rare case. */ + void minimizeApproximate(); + + /* This is the worker for the minimize approximate solution. It merges + * states that have identical out transitions. */ + bool minimizeRound( ); + + /* Given an intial partioning of states, split partitions that have out trans + * to differing partitions. */ + int partitionRound( FsmState **statePtrs, MinPartition *parts, int numParts ); + + /* Split partitions that have a transition to a previously split partition, until + * there are no more partitions to split. */ + int splitCandidates( FsmState **statePtrs, MinPartition *parts, int numParts ); + + /* Fuse together states in the same partition. */ + void fusePartitions( MinPartition *parts, int numParts ); + + /* Mark pairs where out final stateness differs, out trans data differs, + * trans pairs go to a marked pair or trans data differs. Should get + * alot of pairs. */ + void initialMarkRound( MarkIndex &markIndex ); + + /* One marking round on all state pairs. Considers if trans pairs go + * to a marked state only. Returns whether or not a pair was marked. */ + bool markRound( MarkIndex &markIndex ); + + /* Move the in trans into src into dest. */ + void inTransMove(FsmState *dest, FsmState *src); + + /* Make state src and dest the same state. */ + void fuseEquivStates(FsmState *dest, FsmState *src); + + /* Find any states that didn't get marked by the marking algorithm and + * merge them into the primary states of their equivalence class. */ + void fuseUnmarkedPairs( MarkIndex &markIndex ); + + /* Merge neighboring transitions go to the same state and have the same + * transitions data. */ + void compressTransitions(); + + /* Returns true if there is a transtion (either explicit or by a gap) to + * the error state. */ + bool checkErrTrans( FsmState *state, FsmTrans *trans ); + bool checkErrTransFinish( FsmState *state ); + bool hasErrorTrans(); +}; + + +#endif /* _FSMGRAPH_H */ diff --git a/src/fsmmin.cc b/src/fsmmin.cc new file mode 100644 index 00000000..cbb2b99f --- /dev/null +++ b/src/fsmmin.cc @@ -0,0 +1,732 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "fsmgraph.h" +#include "mergesort.h" + +int FsmGraph::partitionRound( FsmState **statePtrs, MinPartition *parts, int numParts ) +{ + /* Need a mergesort object and a single partition compare. */ + MergeSort<FsmState*, PartitionCompare> mergeSort; + PartitionCompare partCompare; + + /* For each partition. */ + for ( int p = 0; p < numParts; p++ ) { + /* Fill the pointer array with the states in the partition. */ + StateList::Iter state = parts[p].list; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the partitioning compare. */ + int numStates = parts[p].list.length(); + mergeSort.sort( statePtrs, numStates ); + + /* Assign the states into partitions based on the results of the sort. */ + int destPart = p, firstNewPart = numParts; + for ( int s = 1; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* The new partition is the next avail spot. */ + destPart = numParts; + numParts += 1; + } + + /* If the state is not staying in the first partition, then + * transfer it to its destination partition. */ + if ( destPart != p ) { + FsmState *state = parts[p].list.detach( statePtrs[s] ); + parts[destPart].list.append( state ); + } + } + + /* Fix the partition pointer for all the states that got moved to a new + * partition. This must be done after the states are transfered so the + * result of the sort is not altered. */ + for ( int newPart = firstNewPart; newPart < numParts; newPart++ ) { + StateList::Iter state = parts[newPart].list; + for ( ; state.lte(); state++ ) + state->alg.partition = &parts[newPart]; + } + } + + return numParts; +} + +/** + * \brief Minimize by partitioning version 1. + * + * Repeatedly tries to split partitions until all partitions are unsplittable. + * Produces the most minimal FSM possible. + */ +void FsmGraph::minimizePartition1() +{ + /* Need one mergesort object and partition compares. */ + MergeSort<FsmState*, InitPartitionCompare> mergeSort; + InitPartitionCompare initPartCompare; + + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return; + + /* + * First thing is to partition the states by final state status and + * transition functions. This gives us an initial partitioning to work + * with. + */ + + /* Make a array of pointers to states. */ + int numStates = stateList.length(); + FsmState** statePtrs = new FsmState*[numStates]; + + /* Fill up an array of pointers to the states for easy sorting. */ + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the array of states. */ + mergeSort.sort( statePtrs, numStates ); + + /* An array of lists of states is used to partition the states. */ + MinPartition *parts = new MinPartition[numStates]; + + /* Assign the states into partitions. */ + int destPart = 0; + for ( int s = 0; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* Move to the next partition. */ + destPart += 1; + } + + /* Put the state into its partition. */ + statePtrs[s]->alg.partition = &parts[destPart]; + parts[destPart].list.append( statePtrs[s] ); + } + + /* We just moved all the states from the main list into partitions without + * taking them off the main list. So clean up the main list now. */ + stateList.abandon(); + + /* Split partitions. */ + int numParts = destPart + 1; + while ( true ) { + /* Test all partitions for splitting. */ + int newNum = partitionRound( statePtrs, parts, numParts ); + + /* When no partitions can be split, stop. */ + if ( newNum == numParts ) + break; + + numParts = newNum; + } + + /* Fuse states in the same partition. The states will end up back on the + * main list. */ + fusePartitions( parts, numParts ); + + /* Cleanup. */ + delete[] statePtrs; + delete[] parts; +} + +/* Split partitions that need splittting, decide which partitions might need + * to be split as a result, continue until there are no more that might need + * to be split. */ +int FsmGraph::splitCandidates( FsmState **statePtrs, MinPartition *parts, int numParts ) +{ + /* Need a mergesort and a partition compare. */ + MergeSort<FsmState*, PartitionCompare> mergeSort; + PartitionCompare partCompare; + + /* The lists of unsplitable (partList) and splitable partitions. + * Only partitions in the splitable list are check for needing splitting. */ + PartitionList partList, splittable; + + /* Initially, all partitions are born from a split (the initial + * partitioning) and can cause other partitions to be split. So any + * partition with a state with a transition out to another partition is a + * candidate for splitting. This will make every partition except possibly + * partitions of final states split candidates. */ + for ( int p = 0; p < numParts; p++ ) { + /* Assume not active. */ + parts[p].active = false; + + /* Look for a trans out of any state in the partition. */ + for ( StateList::Iter state = parts[p].list; state.lte(); state++ ) { + /* If there is at least one transition out to another state then + * the partition becomes splittable. */ + if ( state->outList.length() > 0 ) { + parts[p].active = true; + break; + } + } + + /* If it was found active then it goes on the splittable list. */ + if ( parts[p].active ) + splittable.append( &parts[p] ); + else + partList.append( &parts[p] ); + } + + /* While there are partitions that are splittable, pull one off and try + * to split it. If it splits, determine which partitions may now be split + * as a result of the newly split partition. */ + while ( splittable.length() > 0 ) { + MinPartition *partition = splittable.detachFirst(); + + /* Fill the pointer array with the states in the partition. */ + StateList::Iter state = partition->list; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the partitioning compare. */ + int numStates = partition->list.length(); + mergeSort.sort( statePtrs, numStates ); + + /* Assign the states into partitions based on the results of the sort. */ + MinPartition *destPart = partition; + int firstNewPart = numParts; + for ( int s = 1; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* The new partition is the next avail spot. */ + destPart = &parts[numParts]; + numParts += 1; + } + + /* If the state is not staying in the first partition, then + * transfer it to its destination partition. */ + if ( destPart != partition ) { + FsmState *state = partition->list.detach( statePtrs[s] ); + destPart->list.append( state ); + } + } + + /* Fix the partition pointer for all the states that got moved to a new + * partition. This must be done after the states are transfered so the + * result of the sort is not altered. */ + int newPart; + for ( newPart = firstNewPart; newPart < numParts; newPart++ ) { + StateList::Iter state = parts[newPart].list; + for ( ; state.lte(); state++ ) + state->alg.partition = &parts[newPart]; + } + + /* Put the partition we just split and any new partitions that came out + * of the split onto the inactive list. */ + partition->active = false; + partList.append( partition ); + for ( newPart = firstNewPart; newPart < numParts; newPart++ ) { + parts[newPart].active = false; + partList.append( &parts[newPart] ); + } + + if ( destPart == partition ) + continue; + + /* Now determine which partitions are splittable as a result of + * splitting partition by walking the in lists of the states in + * partitions that got split. Partition is the faked first item in the + * loop. */ + MinPartition *causalPart = partition; + newPart = firstNewPart - 1; + while ( newPart < numParts ) { + /* Loop all states in the causal partition. */ + StateList::Iter state = causalPart->list; + for ( ; state.lte(); state++ ) { + /* Walk all transition into the state and put the partition + * that the from state is in onto the splittable list. */ + for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) { + MinPartition *fromPart = trans->fromState->alg.partition; + if ( ! fromPart->active ) { + fromPart->active = true; + partList.detach( fromPart ); + splittable.append( fromPart ); + } + } + } + + newPart += 1; + causalPart = &parts[newPart]; + } + } + return numParts; +} + + +/** + * \brief Minimize by partitioning version 2 (best alg). + * + * Repeatedly tries to split partitions that may splittable until there are no + * more partitions that might possibly need splitting. Runs faster than + * version 1. Produces the most minimal fsm possible. + */ +void FsmGraph::minimizePartition2() +{ + /* Need a mergesort and an initial partition compare. */ + MergeSort<FsmState*, InitPartitionCompare> mergeSort; + InitPartitionCompare initPartCompare; + + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return; + + /* + * First thing is to partition the states by final state status and + * transition functions. This gives us an initial partitioning to work + * with. + */ + + /* Make a array of pointers to states. */ + int numStates = stateList.length(); + FsmState** statePtrs = new FsmState*[numStates]; + + /* Fill up an array of pointers to the states for easy sorting. */ + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + /* Sort the states using the array of states. */ + mergeSort.sort( statePtrs, numStates ); + + /* An array of lists of states is used to partition the states. */ + MinPartition *parts = new MinPartition[numStates]; + + /* Assign the states into partitions. */ + int destPart = 0; + for ( int s = 0; s < numStates; s++ ) { + /* If this state differs from the last then move to the next partition. */ + if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) { + /* Move to the next partition. */ + destPart += 1; + } + + /* Put the state into its partition. */ + statePtrs[s]->alg.partition = &parts[destPart]; + parts[destPart].list.append( statePtrs[s] ); + } + + /* We just moved all the states from the main list into partitions without + * taking them off the main list. So clean up the main list now. */ + stateList.abandon(); + + /* Split partitions. */ + int numParts = splitCandidates( statePtrs, parts, destPart+1 ); + + /* Fuse states in the same partition. The states will end up back on the + * main list. */ + fusePartitions( parts, numParts ); + + /* Cleanup. */ + delete[] statePtrs; + delete[] parts; +} + +void FsmGraph::initialMarkRound( MarkIndex &markIndex ) +{ + /* P and q for walking pairs. */ + FsmState *p = stateList.head, *q; + + /* Need an initial partition compare. */ + InitPartitionCompare initPartCompare; + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + q = stateList.head; + while ( q != p ) { + /* If the states differ on final state status, out transitions or + * any transition data then they should be separated on the initial + * round. */ + if ( initPartCompare.compare( p, q ) != 0 ) + markIndex.markPair( p->alg.stateNum, q->alg.stateNum ); + + q = q->next; + } + p = p->next; + } +} + +bool FsmGraph::markRound( MarkIndex &markIndex ) +{ + /* P an q for walking pairs. Take note if any pair gets marked. */ + FsmState *p = stateList.head, *q; + bool pairWasMarked = false; + + /* Need a mark comparison. */ + MarkCompare markCompare; + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + q = stateList.head; + while ( q != p ) { + /* Should we mark the pair? */ + if ( !markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) { + if ( markCompare.shouldMark( markIndex, p, q ) ) { + markIndex.markPair( p->alg.stateNum, q->alg.stateNum ); + pairWasMarked = true; + } + } + q = q->next; + } + p = p->next; + } + + return pairWasMarked; +} + + +/** + * \brief Minimize by pair marking. + * + * Decides if each pair of states is distinct or not. Uses O(n^2) memory and + * should only be used on small graphs. Produces the most minmimal FSM + * possible. + */ +void FsmGraph::minimizeStable() +{ + /* Set the state numbers. */ + setStateNumbers( 0 ); + + /* This keeps track of which pairs have been marked. */ + MarkIndex markIndex( stateList.length() ); + + /* Mark pairs where final stateness, out trans, or trans data differ. */ + initialMarkRound( markIndex ); + + /* While the last round of marking succeeded in marking a state + * continue to do another round. */ + int modified = markRound( markIndex ); + while (modified) + modified = markRound( markIndex ); + + /* Merge pairs that are unmarked. */ + fuseUnmarkedPairs( markIndex ); +} + +bool FsmGraph::minimizeRound() +{ + /* Nothing to do if there are no states. */ + if ( stateList.length() == 0 ) + return false; + + /* Need a mergesort on approx compare and an approx compare. */ + MergeSort<FsmState*, ApproxCompare> mergeSort; + ApproxCompare approxCompare; + + /* Fill up an array of pointers to the states. */ + FsmState **statePtrs = new FsmState*[stateList.length()]; + StateList::Iter state = stateList; + for ( int s = 0; state.lte(); state++, s++ ) + statePtrs[s] = state; + + bool modified = false; + + /* Sort The list. */ + mergeSort.sort( statePtrs, stateList.length() ); + + /* Walk the list looking for duplicates next to each other, + * merge in any duplicates. */ + FsmState **pLast = statePtrs; + FsmState **pState = statePtrs + 1; + for ( int i = 1; i < stateList.length(); i++, pState++ ) { + if ( approxCompare.compare( *pLast, *pState ) == 0 ) { + /* Last and pState are the same, so fuse together. Move forward + * with pState but not with pLast. If any more are identical, we + * must */ + fuseEquivStates( *pLast, *pState ); + modified = true; + } + else { + /* Last and this are different, do not set to merge them. Move + * pLast to the current (it may be way behind from merging many + * states) and pState forward one to consider the next pair. */ + pLast = pState; + } + } + delete[] statePtrs; + return modified; +} + +/** + * \brief Minmimize by an approximation. + * + * Repeatedly tries to find states with transitions out to the same set of + * states on the same set of keys until no more identical states can be found. + * Does not produce the most minimial FSM possible. + */ +void FsmGraph::minimizeApproximate() +{ + /* While the last minimization round succeeded in compacting states, + * continue to try to compact states. */ + while ( true ) { + bool modified = minimizeRound(); + if ( ! modified ) + break; + } +} + + +/* Remove states that have no path to them from the start state. Recursively + * traverses the graph marking states that have paths into them. Then removes + * all states that did not get marked. */ +void FsmGraph::removeUnreachableStates() +{ + /* Misfit accounting should be off and there should be no states on the + * misfit list. */ + assert( !misfitAccounting && misfitList.length() == 0 ); + + /* Mark all the states that can be reached + * through the existing set of entry points. */ + markReachableFromHere( startState ); + for ( EntryMap::Iter en = entryPoints; en.lte(); en++ ) + markReachableFromHere( en->value ); + + /* Delete all states that are not marked + * and unmark the ones that are marked. */ + FsmState *state = stateList.head; + while ( state ) { + FsmState *next = state->next; + + if ( state->stateBits & SB_ISMARKED ) + state->stateBits &= ~ SB_ISMARKED; + else { + detachState( state ); + stateList.detach( state ); + delete state; + } + + state = next; + } +} + +bool FsmGraph::outListCovers( FsmState *state ) +{ + /* Must be at least one range to cover. */ + if ( state->outList.length() == 0 ) + return false; + + /* The first must start at the lower bound. */ + TransList::Iter trans = state->outList.first(); + if ( keyOps->minKey < trans->lowKey ) + return false; + + /* Loop starts at second el. */ + trans.increment(); + + /* Loop checks lower against prev upper. */ + for ( ; trans.lte(); trans++ ) { + /* Lower end of the trans must be one greater than the + * previous' high end. */ + Key lowKey = trans->lowKey; + lowKey.decrement(); + if ( trans->prev->highKey < lowKey ) + return false; + } + + /* Require that the last range extends to the upper bound. */ + trans = state->outList.last(); + if ( trans->highKey < keyOps->maxKey ) + return false; + + return true; +} + +/* Remove states that that do not lead to a final states. Works recursivly traversing + * the graph in reverse (starting from all final states) and marking seen states. Then + * removes states that did not get marked. */ +void FsmGraph::removeDeadEndStates() +{ + /* Misfit accounting should be off and there should be no states on the + * misfit list. */ + assert( !misfitAccounting && misfitList.length() == 0 ); + + /* Mark all states that have paths to the final states. */ + FsmState **st = finStateSet.data; + int nst = finStateSet.length(); + for ( int i = 0; i < nst; i++, st++ ) + markReachableFromHereReverse( *st ); + + /* Start state gets honorary marking. If the machine accepts nothing we + * still want the start state to hang around. This must be done after the + * recursive call on all the final states so that it does not cause the + * start state in transitions to be skipped when the start state is + * visited by the traversal. */ + startState->stateBits |= SB_ISMARKED; + + /* Delete all states that are not marked + * and unmark the ones that are marked. */ + FsmState *state = stateList.head; + while ( state != 0 ) { + FsmState *next = state->next; + + if ( state->stateBits & SB_ISMARKED ) + state->stateBits &= ~ SB_ISMARKED; + else { + detachState( state ); + stateList.detach( state ); + delete state; + } + + state = next; + } +} + +/* Remove states on the misfit list. To work properly misfit accounting should + * be on when this is called. The detaching of a state will likely cause + * another misfit to be collected and it can then be removed. */ +void FsmGraph::removeMisfits() +{ + while ( misfitList.length() > 0 ) { + /* Get the first state. */ + FsmState *state = misfitList.head; + + /* Detach and delete. */ + detachState( state ); + + /* The state was previously on the misfit list and detaching can only + * remove in transitions so the state must still be on the misfit + * list. */ + misfitList.detach( state ); + delete state; + } +} + +/* Fuse src into dest because they have been deemed equivalent states. + * Involves moving transitions into src to go into dest and invoking + * callbacks. Src is deleted detached from the graph and deleted. */ +void FsmGraph::fuseEquivStates( FsmState *dest, FsmState *src ) +{ + /* This would get ugly. */ + assert( dest != src ); + + /* Cur is a duplicate. We can merge it with trail. */ + inTransMove( dest, src ); + + detachState( src ); + stateList.detach( src ); + delete src; +} + +void FsmGraph::fuseUnmarkedPairs( MarkIndex &markIndex ) +{ + FsmState *p = stateList.head, *nextP, *q; + + /* Definition: The primary state of an equivalence class is the first state + * encounterd that belongs to the equivalence class. All equivalence + * classes have primary state including equivalence classes with one state + * in it. */ + + /* For each unmarked pair merge p into q and delete p. q is always the + * primary state of it's equivalence class. We wouldn't have landed on it + * here if it were not, because it would have been deleted. + * + * Proof that q is the primaray state of it's equivalence class: Assume q + * is not the primary state of it's equivalence class, then it would be + * merged into some state that came before it and thus p would be + * equivalent to that state. But q is the first state that p is equivalent + * to so we have a contradiction. */ + + /* Walk all unordered pairs of (p, q) where p != q. + * The second depth of the walk stops before reaching p. This + * gives us all unordered pairs of states (p, q) where p != q. */ + while ( p != 0 ) { + nextP = p->next; + + q = stateList.head; + while ( q != p ) { + /* If one of p or q is a final state then mark. */ + if ( ! markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) { + fuseEquivStates( q, p ); + break; + } + q = q->next; + } + p = nextP; + } +} + +void FsmGraph::fusePartitions( MinPartition *parts, int numParts ) +{ + /* For each partition, fuse state 2, 3, ... into state 1. */ + for ( int p = 0; p < numParts; p++ ) { + /* Assume that there will always be at least one state. */ + FsmState *first = parts[p].list.head, *toFuse = first->next; + + /* Put the first state back onto the main state list. Don't bother + * removing it from the partition list first. */ + stateList.append( first ); + + /* Fuse the rest of the state into the first. */ + while ( toFuse != 0 ) { + /* Save the next. We will trash it before it is needed. */ + FsmState *next = toFuse->next; + + /* Put the state to be fused in to the first back onto the main + * list before it is fuse. the graph. The state needs to be on + * the main list for the detach from the graph to work. Don't + * bother removing the state from the partition list first. We + * need not maintain it. */ + stateList.append( toFuse ); + + /* Now fuse to the first. */ + fuseEquivStates( first, toFuse ); + + /* Go to the next that we saved before trashing the next pointer. */ + toFuse = next; + } + + /* We transfered the states from the partition list into the main list without + * removing the states from the partition list first. Clean it up. */ + parts[p].list.abandon(); + } +} + + +/* Merge neighboring transitions go to the same state and have the same + * transitions data. */ +void FsmGraph::compressTransitions() +{ + for ( StateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->outList.length() > 1 ) { + for ( TransList::Iter trans = st->outList, next = trans.next(); next.lte(); ) { + Key nextLow = next->lowKey; + nextLow.decrement(); + if ( trans->highKey == nextLow && trans->toState == next->toState && + CmpActionTable::compare( trans->actionTable, next->actionTable ) == 0 ) + { + trans->highKey = next->highKey; + st->outList.detach( next ); + detachTrans( next->fromState, next->toState, next ); + delete next; + next = trans.next(); + } + else { + trans.increment(); + next.increment(); + } + } + } + } +} diff --git a/src/fsmrun.h b/src/fsmrun.h new file mode 100644 index 00000000..f92b5e5e --- /dev/null +++ b/src/fsmrun.h @@ -0,0 +1,36 @@ +/* + * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FSMRUN2_H +#define _FSMRUN2_H + +#include <input.h> + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/fsmstate.cc b/src/fsmstate.cc new file mode 100644 index 00000000..dae1479b --- /dev/null +++ b/src/fsmstate.cc @@ -0,0 +1,467 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <string.h> +#include <assert.h> +#include "fsmgraph.h" + +#include <iostream> +using namespace std; + +/* Construct a mark index for a specified number of states. Must new up + * an array that is states^2 in size. */ +MarkIndex::MarkIndex( int states ) : numStates(states) +{ + /* Total pairs is states^2. Actually only use half of these, but we allocate + * them all to make indexing into the array easier. */ + int total = states * states; + + /* New up chars so that individual DListEl constructors are + * not called. Zero out the mem manually. */ + array = new bool[total]; + memset( array, 0, sizeof(bool) * total ); +} + +/* Free the array used to store state pairs. */ +MarkIndex::~MarkIndex() +{ + delete[] array; +} + +/* Mark a pair of states. States are specified by their number. The + * marked states are moved from the unmarked list to the marked list. */ +void MarkIndex::markPair(int state1, int state2) +{ + int pos = ( state1 >= state2 ) ? + ( state1 * numStates ) + state2 : + ( state2 * numStates ) + state1; + + array[pos] = true; +} + +/* Returns true if the pair of states are marked. Returns false otherwise. + * Ordering of states given does not matter. */ +bool MarkIndex::isPairMarked(int state1, int state2) +{ + int pos = ( state1 >= state2 ) ? + ( state1 * numStates ) + state2 : + ( state2 * numStates ) + state1; + + return array[pos]; +} + +/* Create a new fsm state. State has not out transitions or in transitions, not + * out out transition data and not number. */ +FsmState::FsmState() +: + /* No out or in transitions. */ + outList(), + inList(), + + /* No entry points, or epsilon trans. */ + entryIds(), + epsilonTrans(), + + /* Conditions. */ + stateCondList(), + + /* No transitions in from other states. */ + foreignInTrans(0), + + /* Only used during merging. Normally null. */ + stateDictEl(0), + eptVect(0), + + /* No state identification bits. */ + stateBits(0), + + /* No Priority data. */ + outPriorTable(), + + /* No Action data. */ + toStateActionTable(), + fromStateActionTable(), + outActionTable(), + outCondSet(), + errActionTable(), + eofActionTable(), + + eofTarget(0) +{ +} + +/* Copy everything except actual the transitions. That is left up to the + * FsmGraph copy constructor. */ +FsmState::FsmState(const FsmState &other) +: + /* All lists are cleared. They will be filled in when the + * individual transitions are duplicated and attached. */ + outList(), + inList(), + + /* Duplicate the entry id set and epsilon transitions. These + * are sets of integers and as such need no fixing. */ + entryIds(other.entryIds), + epsilonTrans(other.epsilonTrans), + + /* Copy in the elements of the conditions. */ + stateCondList( other.stateCondList ), + + /* No transitions in from other states. */ + foreignInTrans(0), + + /* This is only used during merging. Normally null. */ + stateDictEl(0), + eptVect(0), + + /* Fsm state data. */ + stateBits(other.stateBits), + + /* Copy in priority data. */ + outPriorTable(other.outPriorTable), + + /* Copy in action data. */ + toStateActionTable(other.toStateActionTable), + fromStateActionTable(other.fromStateActionTable), + outActionTable(other.outActionTable), + outCondSet(other.outCondSet), + errActionTable(other.errActionTable), + eofActionTable(other.eofActionTable), + + eofTarget(0) +{ + /* Duplicate all the transitions. */ + for ( TransList::Iter trans = other.outList; trans.lte(); trans++ ) { + /* Dupicate and store the orginal target in the transition. This will + * be corrected once all the states have been created. */ + FsmTrans *newTrans = new FsmTrans(*trans); + newTrans->toState = trans->toState; + outList.append( newTrans ); + } +} + +/* If there is a state dict element, then delete it. Everything else is left + * up to the FsmGraph destructor. */ +FsmState::~FsmState() +{ + if ( stateDictEl != 0 ) + delete stateDictEl; +} + +/* Compare two states using pointers to the states. With the approximate + * compare the idea is that if the compare finds them the same, they can + * immediately be merged. */ +int ApproxCompare::compare( const FsmState *state1 , const FsmState *state2 ) +{ + int compareRes; + + /* Test final state status. */ + if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) ) + return -1; + else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) ) + return 1; + + /* Test epsilon transition sets. */ + compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans, + state2->epsilonTrans ); + if ( compareRes != 0 ) + return compareRes; + + /* Compare the out transitions. */ + compareRes = FsmGraph::compareStateData( state1, state2 ); + if ( compareRes != 0 ) + return compareRes; + + /* Use a pair iterator to get the transition pairs. */ + PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + compareRes = FsmGraph::compareFullPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeInS2: + compareRes = FsmGraph::compareFullPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeOverlap: + compareRes = FsmGraph::compareFullPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + /* Got through the entire state comparison, deem them equal. */ + return 0; +} + +/* Compare class for the sort that does the intial partition of compaction. */ +int InitPartitionCompare::compare( const FsmState *state1 , const FsmState *state2 ) +{ + int compareRes; + + /* Test final state status. */ + if ( (state1->stateBits & SB_ISFINAL) && !(state2->stateBits & SB_ISFINAL) ) + return -1; + else if ( !(state1->stateBits & SB_ISFINAL) && (state2->stateBits & SB_ISFINAL) ) + return 1; + + /* Test epsilon transition sets. */ + compareRes = CmpEpsilonTrans::compare( state1->epsilonTrans, + state2->epsilonTrans ); + if ( compareRes != 0 ) + return compareRes; + + /* Compare the out transitions. */ + compareRes = FsmGraph::compareStateData( state1, state2 ); + if ( compareRes != 0 ) + return compareRes; + + /* Use a pair iterator to test the condition pairs. */ + PairIter<StateCond> condPair( state1->stateCondList.head, state2->stateCondList.head ); + for ( ; !condPair.end(); condPair++ ) { + switch ( condPair.userState ) { + case RangeInS1: + return 1; + case RangeInS2: + return -1; + + case RangeOverlap: { + CondSpace *condSpace1 = condPair.s1Tel.trans->condSpace; + CondSpace *condSpace2 = condPair.s2Tel.trans->condSpace; + if ( condSpace1 < condSpace2 ) + return -1; + else if ( condSpace1 > condSpace2 ) + return 1; + break; + } + case BreakS1: + case BreakS2: + break; + } + } + + /* Use a pair iterator to test the transition pairs. */ + PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + compareRes = FsmGraph::compareDataPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeInS2: + compareRes = FsmGraph::compareDataPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeOverlap: + compareRes = FsmGraph::compareDataPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + return 0; +} + +/* Compare class for the sort that does the partitioning. */ +int PartitionCompare::compare( const FsmState *state1, const FsmState *state2 ) +{ + int compareRes; + + /* Use a pair iterator to get the transition pairs. */ + PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + compareRes = FsmGraph::comparePartPtr( outPair.s1Tel.trans, 0 ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeInS2: + compareRes = FsmGraph::comparePartPtr( 0, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case RangeOverlap: + compareRes = FsmGraph::comparePartPtr( + outPair.s1Tel.trans, outPair.s2Tel.trans ); + if ( compareRes != 0 ) + return compareRes; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + return 0; +} + +/* Compare class for the sort that does the partitioning. */ +bool MarkCompare::shouldMark( MarkIndex &markIndex, const FsmState *state1, + const FsmState *state2 ) +{ + /* Use a pair iterator to get the transition pairs. */ + PairIter<FsmTrans> outPair( state1->outList.head, state2->outList.head ); + for ( ; !outPair.end(); outPair++ ) { + switch ( outPair.userState ) { + + case RangeInS1: + if ( FsmGraph::shouldMarkPtr( markIndex, outPair.s1Tel.trans, 0 ) ) + return true; + break; + + case RangeInS2: + if ( FsmGraph::shouldMarkPtr( markIndex, 0, outPair.s2Tel.trans ) ) + return true; + break; + + case RangeOverlap: + if ( FsmGraph::shouldMarkPtr( markIndex, + outPair.s1Tel.trans, outPair.s2Tel.trans ) ) + return true; + break; + + case BreakS1: + case BreakS2: + break; + } + } + + return false; +} + +/* + * Transition Comparison. + */ + +/* Compare target partitions. Either pointer may be null. */ +int FsmGraph::comparePartPtr( FsmTrans *trans1, FsmTrans *trans2 ) +{ + if ( trans1 != 0 ) { + /* If trans1 is set then so should trans2. The initial partitioning + * guarantees this for us. */ + if ( trans1->toState == 0 && trans2->toState != 0 ) + return -1; + else if ( trans1->toState != 0 && trans2->toState == 0 ) + return 1; + else if ( trans1->toState != 0 ) { + /* Both of targets are set. */ + return CmpOrd< MinPartition* >::compare( + trans1->toState->alg.partition, trans2->toState->alg.partition ); + } + } + return 0; +} + + +/* Compares two transition pointers according to priority and functions. + * Either pointer may be null. Does not consider to state or from state. */ +int FsmGraph::compareDataPtr( FsmTrans *trans1, FsmTrans *trans2 ) +{ + if ( trans1 == 0 && trans2 != 0 ) + return -1; + else if ( trans1 != 0 && trans2 == 0 ) + return 1; + else if ( trans1 != 0 ) { + /* Both of the transition pointers are set. */ + int compareRes = compareTransData( trans1, trans2 ); + if ( compareRes != 0 ) + return compareRes; + } + return 0; +} + +/* Compares two transitions according to target state, priority and functions. + * Does not consider from state. Either of the pointers may be null. */ +int FsmGraph::compareFullPtr( FsmTrans *trans1, FsmTrans *trans2 ) +{ + if ( (trans1 != 0) ^ (trans2 != 0) ) { + /* Exactly one of the transitions is set. */ + if ( trans1 != 0 ) + return -1; + else + return 1; + } + else if ( trans1 != 0 ) { + /* Both of the transition pointers are set. Test target state, + * priority and funcs. */ + if ( trans1->toState < trans2->toState ) + return -1; + else if ( trans1->toState > trans2->toState ) + return 1; + else if ( trans1->toState != 0 ) { + /* Test transition data. */ + int compareRes = compareTransData( trans1, trans2 ); + if ( compareRes != 0 ) + return compareRes; + } + } + return 0; +} + + +bool FsmGraph::shouldMarkPtr( MarkIndex &markIndex, FsmTrans *trans1, + FsmTrans *trans2 ) +{ + if ( (trans1 != 0) ^ (trans2 != 0) ) { + /* Exactly one of the transitions is set. The initial mark round + * should rule out this case. */ + assert( false ); + } + else if ( trans1 != 0 ) { + /* Both of the transitions are set. If the target pair is marked, then + * the pair we are considering gets marked. */ + return markIndex.isPairMarked( trans1->toState->alg.stateNum, + trans2->toState->alg.stateNum ); + } + + /* Neither of the transitiosn are set. */ + return false; +} + + diff --git a/src/global.h b/src/global.h new file mode 100644 index 00000000..d67c55e4 --- /dev/null +++ b/src/global.h @@ -0,0 +1,90 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __COLM_GLOBAL_H +#define __COLM_GLOBAL_H + +#include <stdio.h> +#include <iostream> +#include <fstream> +#include <fstream> +#include <string> + +#include "config.h" +#include "defs.h" +#include "avltree.h" +#include "keyops.h" + +#define PROGNAME "colm" + +/* IO filenames and stream. */ +extern bool genGraphviz; +extern int gblErrorCount; + +std::ostream &error(); + +/* IO filenames and stream. */ +extern const char *outputFileName; +extern std::ostream *outStream; +extern bool generateGraphviz; +extern bool branchPointInfo; +extern bool verbose, logging; +extern bool addUniqueEmptyProductions; + +extern int gblErrorCount; +extern char startDefName[]; + +/* Error reporting. */ +std::ostream &error(); +std::ostream &error( int first_line, int first_column ); +std::ostream &warning( ); +std::ostream &warning( int first_line, int first_column ); + +extern std::ostream *outStream; +extern bool printStatistics; + +extern int gblErrorCount; +extern char machineMain[]; +extern bool gblLibrary; +extern const char *gblExportTo; + +/* Location in an input file. */ +struct InputLoc +{ + const char *fileName; + int line; + int col; +}; + +/* Error reporting. */ +std::ostream &error(); +std::ostream &error( const InputLoc &loc ); +std::ostream &warning( const InputLoc &loc ); + +void scan( char *fileName, std::istream &input, std::ostream &output ); +void terminateAllParsers( ); +void checkMachines( ); + +void xmlEscapeHost( std::ostream &out, char *data, int len ); +void openOutput(); +void escapeLiteralString( std::ostream &out, const char *data ); + +#endif diff --git a/src/input.c b/src/input.c new file mode 100644 index 00000000..d181b556 --- /dev/null +++ b/src/input.c @@ -0,0 +1,847 @@ +/* + * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <input.h> +#include <fsmrun.h> +#include <pdarun.h> +#include <debug.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <unistd.h> + +#define true 1 +#define false 0 + +RunBuf *newRunBuf() +{ + RunBuf *rb = (RunBuf*)malloc(sizeof(RunBuf)); + memset( rb, 0, sizeof(RunBuf) ); + return rb; +} + +void initFdFuncs(); +void initFileFuncs(); +void initPatternFuncs(); +void initReplFuncs(); + +struct SourceFuncs dynamicFuncs; +struct SourceFuncs fileFuncs; +struct SourceFuncs fdFuncs; + +void initSourceStream( SourceStream *inputStream ) +{ + /* FIXME: correct values here. */ + inputStream->line = 1; + inputStream->column = 1; + inputStream->byte = 0; +} + +void clearSourceStream( struct ColmProgram *prg, Tree **sp, SourceStream *sourceStream ) +{ + RunBuf *buf = sourceStream->queue; + while ( buf != 0 ) { + switch ( buf->type ) { + case RunBufDataType: + break; + + case RunBufTokenType: + case RunBufIgnoreType: + case RunBufSourceType: + treeDownref( prg, sp, buf->tree ); + break; + } + + RunBuf *next = buf->next; + free( buf ); + buf = next; + } + + sourceStream->queue = 0; +} + +SourceStream *newSourceStreamFile( FILE *file ) +{ + SourceStream *is = (SourceStream*)malloc(sizeof(SourceStream)); + memset( is, 0, sizeof(SourceStream) ); + is->line = 1; + is->column = 1; + is->file = file; + is->funcs = &fileFuncs; + return is; +} + +SourceStream *newSourceStreamFd( long fd ) +{ + SourceStream *is = (SourceStream*)malloc(sizeof(SourceStream)); + memset( is, 0, sizeof(SourceStream) ); + is->line = 1; + is->column = 1; + is->fd = fd; + is->funcs = &fdFuncs; + return is; +} + +static RunBuf *sourceStreamPopHead( SourceStream *is ) +{ + RunBuf *ret = is->queue; + is->queue = is->queue->next; + if ( is->queue == 0 ) + is->queueTail = 0; + else + is->queue->prev = 0; + return ret; +} + +static void sourceStreamAppend( SourceStream *is, RunBuf *runBuf ) +{ + if ( is->queue == 0 ) { + runBuf->prev = runBuf->next = 0; + is->queue = is->queueTail = runBuf; + } + else { + is->queueTail->next = runBuf; + runBuf->prev = is->queueTail; + runBuf->next = 0; + is->queueTail = runBuf; + } +} + +static void sourceStreamPrepend( SourceStream *is, RunBuf *runBuf ) +{ + if ( is->queue == 0 ) { + runBuf->prev = runBuf->next = 0; + is->queue = is->queueTail = runBuf; + } + else { + is->queue->prev = runBuf; + runBuf->prev = 0; + runBuf->next = is->queue; + is->queue = runBuf; + } +} + +void initInputFuncs() +{ + initFdFuncs(); + initFileFuncs(); + initPatternFuncs(); + initReplFuncs(); +} + +/* + * Base run-time input streams. + */ + +int fdGetData( SourceStream *is, int skip, char *dest, int length, int *copied ) +{ + int ret = 0; + *copied = 0; + + if ( skip == 9 && length == 6 ) { + debug( REALM_INPUT, "foo\n" ); + } + + /* Move over skip bytes. */ + RunBuf *buf = is->queue; + while ( true ) { + if ( buf == 0 ) { + /* Got through the in-mem buffers without copying anything. */ + RunBuf *runBuf = newRunBuf(); + sourceStreamAppend( is, runBuf ); + int received = is->funcs->getDataImpl( is, runBuf->data, FSM_BUFSIZE ); + if ( received == 0 ) { + ret = INPUT_EOD; + break; + } + runBuf->length = received; + + int slen = received < length ? received : length; + memcpy( dest, runBuf->data, slen ); + *copied = slen; + ret = INPUT_DATA; + break; + } + + int avail = buf->length - buf->offset; + + /* Anything available in the current buffer. */ + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[buf->offset]; + + /* Need to skip? */ + if ( skip > 0 && skip >= avail ) { + /* Skipping the the whole source. */ + skip -= avail; + } + else { + /* Either skip is zero, or less than slen. Skip goes to zero. + * Some data left over, copy it. */ + src += skip; + avail -= skip; + skip = 0; + + int slen = avail < length ? avail : length; + memcpy( dest, src, slen ) ; + *copied += slen; + ret = INPUT_DATA; + break; + } + } + + buf = buf->next; + } + + return ret; +} + +int fdConsumeData( SourceStream *is, int length ) +{ + debug( REALM_INPUT, "source consuming %ld bytes\n", length ); + + int consumed = 0; + + /* Move over skip bytes. */ + while ( true ) { + RunBuf *buf = is->queue; + + if ( buf == 0 ) + break; + + if ( buf->type == RunBufTokenType ) + break; + else if ( buf->type == RunBufIgnoreType ) + break; + else { + /* Anything available in the current buffer. */ + int avail = buf->length - buf->offset; + if ( avail > 0 ) { + /* The source data from the current buffer. */ + int slen = avail <= length ? avail : length; + debug( REALM_INPUT, "consumed: %.*s\n", slen, buf->data + buf->offset ); + consumed += slen; + length -= slen; + buf->offset += slen; + } + } + + if ( length == 0 ) + break; + + RunBuf *runBuf = sourceStreamPopHead( is ); + free( runBuf ); + } + + return consumed; +} + +int fdUndoConsumeData( SourceStream *is, const char *data, int length ) +{ + debug( REALM_INPUT, "undoing consume of %ld bytes\n", length ); + + RunBuf *newBuf = newRunBuf(); + newBuf->length = length; + memcpy( newBuf->data, data, length ); + sourceStreamPrepend( is, newBuf ); + + return length; +} + +/* + * File + */ + +int fileGetDataImpl( SourceStream *is, char *dest, int length ) +{ + debug( REALM_INPUT, "inputStreamFileGetDataImpl length = %ld\n", length ); + size_t res = fread( dest, 1, length, is->file ); + return res; +} + +void initFileFuncs() +{ + memset( &fileFuncs, 0, sizeof(struct SourceFuncs) ); + fileFuncs.getData = &fdGetData; + fileFuncs.consumeData = &fdConsumeData; + fileFuncs.undoConsumeData = &fdUndoConsumeData; + fileFuncs.getDataImpl = &fileGetDataImpl; +} + +/* + * FD + */ + +int fdGetDataImpl( SourceStream *is, char *dest, int length ) +{ + long got = read( is->fd, dest, length ); + return got; +} + +void initFdFuncs() +{ + memset( &fdFuncs, 0, sizeof(struct SourceFuncs) ); + fdFuncs.getData = &fdGetData; + fdFuncs.consumeData = &fdConsumeData; + fdFuncs.undoConsumeData = &fdUndoConsumeData; + fdFuncs.getDataImpl = &fdGetDataImpl; +} + +/* + * InputStream struct, this wraps the list of input streams. + */ + +void initInputStream( InputStream *inputStream ) +{ + memset( inputStream, 0, sizeof(InputStream) ); + + /* FIXME: correct values here. */ + inputStream->line = 1; + inputStream->column = 1; + inputStream->byte = 0; +} + +void clearInputStream( struct ColmProgram *prg, Tree **sp, InputStream *inputStream ) +{ + RunBuf *buf = inputStream->queue; + while ( buf != 0 ) { + switch ( buf->type ) { + case RunBufDataType: + break; + + case RunBufTokenType: + case RunBufIgnoreType: + case RunBufSourceType: + treeDownref( prg, sp, buf->tree ); + break; + } + + RunBuf *next = buf->next; + free( buf ); + buf = next; + } + + inputStream->queue = 0; +} + +static void inputStreamPrepend( InputStream *is, RunBuf *runBuf ) +{ + if ( is->queue == 0 ) { + runBuf->prev = runBuf->next = 0; + is->queue = is->queueTail = runBuf; + } + else { + is->queue->prev = runBuf; + runBuf->prev = 0; + runBuf->next = is->queue; + is->queue = runBuf; + } +} + +static RunBuf *inputStreamPopHead( InputStream *is ) +{ + RunBuf *ret = is->queue; + is->queue = is->queue->next; + if ( is->queue == 0 ) + is->queueTail = 0; + else + is->queue->prev = 0; + return ret; +} + +static void inputStreamAppend( InputStream *is, RunBuf *runBuf ) +{ + if ( is->queue == 0 ) { + runBuf->prev = runBuf->next = 0; + is->queue = is->queueTail = runBuf; + } + else { + is->queueTail->next = runBuf; + runBuf->prev = is->queueTail; + runBuf->next = 0; + is->queueTail = runBuf; + } +} + +static RunBuf *inputStreamPopTail( InputStream *is ) +{ + RunBuf *ret = is->queueTail; + is->queueTail = is->queueTail->prev; + if ( is->queueTail == 0 ) + is->queue = 0; + else + is->queueTail->next = 0; + return ret; +} + +static int isSourceStream( InputStream *is ) +{ + if ( is->queue != 0 && is->queue->type == RunBufSourceType ) + return true; + return false; +} + +void setEof( InputStream *is ) +{ + debug( REALM_INPUT, "setting EOF in input stream\n" ); + is->eof = true; +} + +void unsetEof( InputStream *is ) +{ + if ( isSourceStream( is ) ) { + Stream *stream = (Stream*)is->queue->tree; + stream->in->eof = false; + } + else { + is->eof = false; + } +} + +int getData( FsmRun *fsmRun, InputStream *is, int skip, char *dest, int length, int *copied ) +{ + int ret = 0; + *copied = 0; + + /* Move over skip bytes. */ + RunBuf *buf = is->queue; + while ( true ) { + if ( buf == 0 ) { + /* Got through the in-mem buffers without copying anything. */ + ret = is->eof ? INPUT_EOF : INPUT_EOD; + break; + } + + if ( buf->type == RunBufSourceType ) { + Stream *stream = (Stream*)buf->tree; + int type = stream->in->funcs->getData( stream->in, skip, dest, length, copied ); + + attachSource( fsmRun, stream->in ); + + if ( type == INPUT_EOD && is->eof ) { + ret = INPUT_EOF; + break; + } + + ret = type; + break; + } + + if ( buf->type == RunBufTokenType ) { + ret = INPUT_TREE; + break; + } + + if ( buf->type == RunBufIgnoreType ) { + ret = INPUT_IGNORE; + break; + } + + int avail = buf->length - buf->offset; + + /* Anything available in the current buffer. */ + if ( avail > 0 ) { + /* The source data from the current buffer. */ + char *src = &buf->data[buf->offset]; + + /* Need to skip? */ + if ( skip > 0 && skip >= avail ) { + /* Skipping the the whole source. */ + skip -= avail; + } + else { + /* Either skip is zero, or less than slen. Skip goes to zero. + * Some data left over, copy it. */ + src += skip; + avail -= skip; + skip = 0; + + int slen = avail <= length ? avail : length; + memcpy( dest, src, slen ) ; + *copied += slen; + ret = INPUT_DATA; + break; + } + } + + buf = buf->next; + } + + attachInput( fsmRun, is ); + +#if DEBUG + switch ( ret ) { + case INPUT_DATA: + debug( REALM_INPUT, "get data: DATA copied: %d: %.*s\n", *copied, (int)*copied, dest ); + break; + case INPUT_EOD: + debug( REALM_INPUT, "get data: EOD\n" ); + break; + case INPUT_EOF: + debug( REALM_INPUT, "get data: EOF\n" ); + break; + case INPUT_TREE: + debug( REALM_INPUT, "get data: TREE\n" ); + break; + case INPUT_IGNORE: + debug( REALM_INPUT, "get data: IGNORE\n" ); + break; + case INPUT_LANG_EL: + debug( REALM_INPUT, "get data: LANG_EL\n" ); + break; + } +#endif + + return ret; +} + +int consumeData( InputStream *is, int length ) +{ + debug( REALM_INPUT, "consuming %d bytes\n", length ); + + int consumed = 0; + + /* Move over skip bytes. */ + while ( true ) { + RunBuf *buf = is->queue; + + if ( buf == 0 ) + break; + + if ( buf->type == RunBufSourceType ) { + Stream *stream = (Stream*)buf->tree; + int slen = stream->in->funcs->consumeData( stream->in, length ); + + consumed += slen; + length -= slen; + } + else if ( buf->type == RunBufTokenType ) + break; + else if ( buf->type == RunBufIgnoreType ) + break; + else { + /* Anything available in the current buffer. */ + int avail = buf->length - buf->offset; + if ( avail > 0 ) { + /* The source data from the current buffer. */ + int slen = avail <= length ? avail : length; + consumed += slen; + length -= slen; + buf->offset += slen; + } + } + + if ( length == 0 ) + break; + + RunBuf *runBuf = inputStreamPopHead( is ); + free( runBuf ); + } + + return consumed; +} + +int undoConsumeData( FsmRun *fsmRun, InputStream *is, const char *data, int length ) +{ + debug( REALM_INPUT, "undoing consume of %ld bytes\n", length ); + + if ( isSourceStream( is ) ) { + Stream *stream = (Stream*)is->queue->tree; + int len = stream->in->funcs->undoConsumeData( stream->in, data, length ); + + if ( stream->in->attached != 0 ) + detachSource( stream->in->attached, stream->in ); + + return len; + } + else { + RunBuf *newBuf = newRunBuf(); + newBuf->length = length; + memcpy( newBuf->data, data, length ); + inputStreamPrepend( is, newBuf ); + + if ( is->attached != 0 ) + detachInput( is->attached, is ); + + return length; + } +} + +Tree *consumeTree( InputStream *is ) +{ + while ( is->queue != 0 && is->queue->type == RunBufDataType && is->queue->offset == is->queue->length ) { + RunBuf *runBuf = inputStreamPopHead( is ); + free( runBuf ); + } + + if ( is->queue != 0 && (is->queue->type == RunBufTokenType || is->queue->type == RunBufIgnoreType) ) { + RunBuf *runBuf = inputStreamPopHead( is ); + + /* FIXME: using runbufs here for this is a poor use of memory. */ + Tree *tree = runBuf->tree; + free(runBuf); + return tree; + } + + return 0; +} + +void undoConsumeTree( InputStream *is, Tree *tree, int ignore ) +{ + if ( is->attached != 0 ) + detachInput( is->attached, is ); + + /* Create a new buffer for the data. This is the easy implementation. + * Something better is needed here. It puts a max on the amount of + * data that can be pushed back to the inputStream. */ + RunBuf *newBuf = newRunBuf(); + newBuf->type = ignore ? RunBufIgnoreType : RunBufTokenType; + newBuf->tree = tree; + inputStreamPrepend( is, newBuf ); +} + +struct LangEl *consumeLangEl( InputStream *is, long *bindId, char **data, long *length ) +{ + if ( isSourceStream( is ) ) { + Stream *stream = (Stream*)is->queue->tree; + return stream->in->funcs->consumeLangEl( stream->in, bindId, data, length ); + } + else { + assert( false ); + } +} + +void undoConsumeLangEl( InputStream *is ) +{ + if ( isSourceStream( is ) ) { + Stream *stream = (Stream*)is->queue->tree; + return stream->in->funcs->undoConsumeLangEl( stream->in ); + } + else { + assert( false ); + } +} + +void prependData( InputStream *is, const char *data, long length ) +{ + if ( is->attached != 0 ) + detachInput( is->attached, is ); + + /* Create a new buffer for the data. This is the easy implementation. + * Something better is needed here. It puts a max on the amount of + * data that can be pushed back to the inputStream. */ + assert( length < FSM_BUFSIZE ); + + RunBuf *newBuf = newRunBuf(); + newBuf->length = length; + memcpy( newBuf->data, data, length ); + + inputStreamPrepend( is, newBuf ); +} + +int undoPrependData( InputStream *is, int length ) +{ + if ( is->attached != 0 ) + detachInput( is->attached, is ); + + debug( REALM_INPUT, "consuming %d bytes\n", length ); + + int consumed = 0; + + /* Move over skip bytes. */ + while ( true ) { + RunBuf *buf = is->queue; + + if ( buf == 0 ) + break; + + if ( buf->type == RunBufSourceType ) { + Stream *stream = (Stream*)buf->tree; + int slen = stream->in->funcs->consumeData( stream->in, length ); + + consumed += slen; + length -= slen; + } + else if ( buf->type == RunBufTokenType ) + break; + else if ( buf->type == RunBufIgnoreType ) + break; + else { + /* Anything available in the current buffer. */ + int avail = buf->length - buf->offset; + if ( avail > 0 ) { + /* The source data from the current buffer. */ + int slen = avail <= length ? avail : length; + consumed += slen; + length -= slen; + buf->offset += slen; + } + } + + if ( length == 0 ) + break; + + RunBuf *runBuf = inputStreamPopHead( is ); + free( runBuf ); + } + + return consumed; +} + +void prependTree( InputStream *is, Tree *tree, int ignore ) +{ + if ( is->attached != 0 ) + detachInput( is->attached, is ); + + /* Create a new buffer for the data. This is the easy implementation. + * Something better is needed here. It puts a max on the amount of + * data that can be pushed back to the inputStream. */ + RunBuf *newBuf = newRunBuf(); + newBuf->type = ignore ? RunBufIgnoreType : RunBufTokenType; + newBuf->tree = tree; + inputStreamPrepend( is, newBuf ); +} + +Tree *undoPrependTree( InputStream *is ) +{ + if ( is->attached != 0 ) + detachInput( is->attached, is ); + + while ( is->queue != 0 && is->queue->type == RunBufDataType && is->queue->offset == is->queue->length ) { + RunBuf *runBuf = inputStreamPopHead( is ); + free( runBuf ); + } + + if ( is->queue != 0 && (is->queue->type == RunBufTokenType || is->queue->type == RunBufIgnoreType) ) { + RunBuf *runBuf = inputStreamPopHead( is ); + + /* FIXME: using runbufs here for this is a poor use of memory. */ + Tree *tree = runBuf->tree; + free(runBuf); + return tree; + } + + return 0; +} + +void appendData( InputStream *is, const char *data, long len ) +{ + while ( len > 0 ) { + RunBuf *ad = newRunBuf(); + inputStreamAppend( is, ad ); + + long consume = + len <= (long)sizeof(ad->data) ? + len : (long)sizeof(ad->data); + + memcpy( ad->data, data, consume ); + ad->length = consume; + + len -= consume; + data += consume; + } +} + +Tree *undoAppendData( InputStream *is, int length ) +{ + if ( is->attached != 0 ) + detachInput( is->attached, is ); + + int consumed = 0; + + /* Move over skip bytes. */ + while ( true ) { + RunBuf *buf = is->queueTail; + + if ( buf == 0 ) + break; + + if ( buf->type == RunBufTokenType ) + break; + else if ( buf->type == RunBufIgnoreType ) + break; + else { + /* Anything available in the current buffer. */ + int avail = buf->length - buf->offset; + if ( avail > 0 ) { + /* The source data from the current buffer. */ + int slen = avail <= length ? avail : length; + consumed += slen; + length -= slen; + buf->length -= slen; + } + } + + if ( length == 0 ) + break; + + RunBuf *runBuf = inputStreamPopTail( is ); + free( runBuf ); + } + + return 0; +} + +void appendTree( InputStream *is, Tree *tree ) +{ + RunBuf *ad = newRunBuf(); + + inputStreamAppend( is, ad ); + + ad->type = RunBufTokenType; + ad->tree = tree; + ad->length = 0; +} + +void appendStream( InputStream *in, struct ColmTree *tree ) +{ + RunBuf *ad = newRunBuf(); + + inputStreamAppend( in, ad ); + + ad->type = RunBufSourceType; + ad->tree = tree; + ad->length = 0; +} + +Tree *undoAppendStream( InputStream *is ) +{ + if ( is->attached != 0 ) + detachInput( is->attached, is ); + + RunBuf *runBuf = inputStreamPopTail( is ); + Tree *tree = runBuf->tree; + free( runBuf ); + return tree; +} + +Tree *undoAppendTree( InputStream *is ) +{ + if ( is->attached != 0 ) + detachInput( is->attached, is ); + + RunBuf *runBuf = inputStreamPopTail( is ); + Tree *tree = runBuf->tree; + free( runBuf ); + return tree; +} diff --git a/src/input.h b/src/input.h new file mode 100644 index 00000000..882c6b31 --- /dev/null +++ b/src/input.h @@ -0,0 +1,214 @@ +/* + * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _INPUT_H +#define _INPUT_H + +#include <stdio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define FSM_BUFSIZE 8192 +//#define FSM_BUFSIZE 8 + +#define INPUT_DATA 1 +/* This is for data sources to return, not for the wrapper. */ +#define INPUT_EOD 2 +#define INPUT_EOF 3 +#define INPUT_LANG_EL 4 +#define INPUT_TREE 5 +#define INPUT_IGNORE 6 + +/* + * pdaRun <- fsmRun <- stream + * + * Activities we need to support: + * + * 1. Stuff data into an input stream each time we << + * 2. Detach an input stream, and attach another when we include + * 3. Send data back to an input stream when the parser backtracks + * 4. Temporarily stop parsing due to a lack of input. + * + * At any given time, the fsmRun struct may have a prefix of the stream's + * input. If getting data we first get what we can out of the fsmRun, then + * consult the stream. If sending data back, we first shift pointers in the + * fsmRun, then ship to the stream. If changing streams the old stream needs to + * take back unprocessed data from the fsmRun. + */ + +struct LangEl; +struct Pattern; +struct PatternItem; +struct Replacement; +struct ReplItem; +struct _FsmRun; +struct ColmTree; + +enum RunBufType { + RunBufDataType = 0, + RunBufTokenType, + RunBufIgnoreType, + RunBufSourceType +}; + +typedef struct _RunBuf +{ + enum RunBufType type; + char data[FSM_BUFSIZE]; + long length; + struct ColmTree *tree; + long offset; + struct _RunBuf *next, *prev; +} RunBuf; + +RunBuf *newRunBuf(); + +typedef struct _SourceStream SourceStream; + +struct SourceFuncs +{ + /* Data. */ + int (*getData)( SourceStream *is, int offset, char *dest, int length, int *copied ); + int (*consumeData)( SourceStream *is, int length ); + int (*undoConsumeData)( SourceStream *is, const char *data, int length ); + + /* Language elments (compile-time). */ + struct LangEl *(*consumeLangEl)( SourceStream *is, long *bindId, char **data, long *length ); + void (*undoConsumeLangEl)( SourceStream *is ); + + /* Private implmentation for some shared get data functions. */ + int (*getDataImpl)( SourceStream *is, char *dest, int length ); +}; + +struct _SourceStream +{ + struct SourceFuncs *funcs; + + struct _FsmRun *hasData; + + char eofSent; + char eof; + + long line; + long column; + long byte; + + /* This is set true for input streams that do their own line counting. + * Causes FsmRun to ignore NLs. */ + int handlesLine; + + RunBuf *queue; + RunBuf *queueTail; + + const char *data; + long dlen; + int offset; + + FILE *file; + long fd; + + struct Pattern *pattern; + struct PatternItem *patItem; + struct Replacement *replacement; + struct ReplItem *replItem; + + struct _FsmRun *attached; +}; + +SourceStream *newSourceStreamPattern( struct Pattern *pattern ); +SourceStream *newSourceStreamRepl( struct Replacement *replacement ); +SourceStream *newSourceStreamFile( FILE *file ); +SourceStream *newSourceStreamFd( long fd ); + +void initInputFuncs(); +void initStaticFuncs(); +void initPatternFuncs(); +void initReplFuncs(); + +/* List of input streams. Enables streams to be pushed/popped. */ +struct _InputStream +{ + char eofSent; + char eof; + + long line; + long column; + long byte; + + /* This is set true for input streams that do their own line counting. + * Causes FsmRun to ignore NLs. */ + int handlesLine; + + RunBuf *queue; + RunBuf *queueTail; + + const char *data; + long dlen; + int offset; + + FILE *file; + long fd; + + struct Pattern *pattern; + struct PatternItem *patItem; + struct Replacement *replacement; + struct ReplItem *replItem; + + struct _FsmRun *attached; +}; + +typedef struct _InputStream InputStream; + +/* The input stream interface. */ + +int getData( struct _FsmRun *fsmRun, InputStream *in, int offset, char *dest, int length, int *copied ); +int consumeData( InputStream *in, int length ); +int undoConsumeData( struct _FsmRun *fsmRun, InputStream *is, const char *data, int length ); + +struct ColmTree *consumeTree( InputStream *in ); +void undoConsumeTree( InputStream *in, struct ColmTree *tree, int ignore ); + +struct LangEl *consumeLangEl( InputStream *in, long *bindId, char **data, long *length ); +void undoConsumeLangEl( InputStream *in ); + +void setEof( InputStream *is ); +void unsetEof( InputStream *is ); + +void prependData( InputStream *in, const char *data, long len ); +int undoPrependData( InputStream *is, int length ); + +void prependTree( InputStream *is, struct ColmTree *tree, int ignore ); +struct ColmTree *undoPrependTree( InputStream *is ); + +void appendData( InputStream *in, const char *data, long len ); +void appendTree( InputStream *in, struct ColmTree *tree ); +void appendStream( InputStream *in, struct ColmTree *tree ); +struct ColmTree *undoAppendData( InputStream *in, int length ); +struct ColmTree *undoAppendStream( InputStream *in ); +struct ColmTree *undoAppendTree( InputStream *in ); + +#ifdef __cplusplus +} +#endif + +#endif /* _INPUT_H */ diff --git a/src/keyops.h b/src/keyops.h new file mode 100644 index 00000000..1808c6a6 --- /dev/null +++ b/src/keyops.h @@ -0,0 +1,283 @@ +/* + * Copyright 2001-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _KEYOPS_H +#define _KEYOPS_H + +#include <fstream> +#include <climits> + +enum MarkType +{ + MarkNone = 0, + MarkMark +}; + +typedef unsigned long long Size; + +struct Key +{ +private: + long key; + +public: + friend inline Key operator+(const Key key1, const Key key2); + friend inline Key operator-(const Key key1, const Key key2); + friend inline Key operator/(const Key key1, const Key key2); + friend inline long operator&(const Key key1, const Key key2); + + friend inline bool operator<( const Key key1, const Key key2 ); + friend inline bool operator<=( const Key key1, const Key key2 ); + friend inline bool operator>( const Key key1, const Key key2 ); + friend inline bool operator>=( const Key key1, const Key key2 ); + friend inline bool operator==( const Key key1, const Key key2 ); + friend inline bool operator!=( const Key key1, const Key key2 ); + + friend struct KeyOps; + + Key( ) {} + Key( const Key &key ) : key(key.key) {} + Key( long key ) : key(key) {} + + /* Returns the value used to represent the key. This value must be + * interpreted based on signedness. */ + long getVal() const { return key; }; + + /* Returns the key casted to a long long. This form of the key does not + * require and signedness interpretation. */ + long long getLongLong() const; + + bool isUpper() const { return ( 'A' <= key && key <= 'Z' ); } + bool isLower() const { return ( 'a' <= key && key <= 'z' ); } + bool isPrintable() const { return ( 32 <= key && key < 127 ); } + + Key toUpper() const + { return Key( 'A' + ( key - 'a' ) ); } + Key toLower() const + { return Key( 'a' + ( key - 'A' ) ); } + + void operator+=( const Key other ) + { + /* FIXME: must be made aware of isSigned. */ + key += other.key; + } + + void operator-=( const Key other ) + { + /* FIXME: must be made aware of isSigned. */ + key -= other.key; + } + + void operator|=( const Key other ) + { + /* FIXME: must be made aware of isSigned. */ + key |= other.key; + } + + /* Decrement. Needed only for ranges. */ + inline void decrement(); + inline void increment(); +}; + +struct HostType +{ + const char *data1; + const char *data2; + bool isSigned; + long long minVal; + long long maxVal; + unsigned int size; +}; + +struct HostLang +{ + HostType *hostTypes; + int numHostTypes; + HostType *defaultAlphType; + bool explicitUnsigned; +}; + + +/* Target language. */ +enum HostLangType +{ + CCode, + DCode, + JavaCode, + RubyCode +}; + +extern HostLang *hostLang; +extern HostLangType hostLangType; + +extern HostLang hostLangC; +extern HostLang hostLangD; +extern HostLang hostLangJava; +extern HostLang hostLangRuby; + +/* An abstraction of the key operators that manages key operations such as + * comparison and increment according the signedness of the key. */ +struct KeyOps +{ + /* Default to signed alphabet. */ + KeyOps() : + isSigned(true), + alphType(0) + {} + + /* Default to signed alphabet. */ + KeyOps( bool isSigned ) + :isSigned(isSigned) {} + + bool isSigned; + Key minKey, maxKey; + HostType *alphType; + + void setAlphType( HostType *alphType ) + { + this->alphType = alphType; + isSigned = alphType->isSigned; + if ( isSigned ) { + minKey = (long) alphType->minVal; + maxKey = (long) alphType->maxVal; + } + else { + minKey = (long) (unsigned long) alphType->minVal; + maxKey = (long) (unsigned long) alphType->maxVal; + } + } + + /* Compute the distance between two keys. */ + Size span( Key key1, Key key2 ) + { + return isSigned ? + (unsigned long long)( + (long long)key2.key - + (long long)key1.key + 1) : + (unsigned long long)( + (unsigned long)key2.key) - + (unsigned long long)((unsigned long)key1.key) + 1; + } + + Size alphSize() + { return span( minKey, maxKey ); } + + HostType *typeSubsumes( long long maxVal ) + { + for ( int i = 0; i < hostLang->numHostTypes; i++ ) { + if ( maxVal <= hostLang->hostTypes[i].maxVal ) + return hostLang->hostTypes + i; + } + return 0; + } + + HostType *typeSubsumes( bool isSigned, long long maxVal ) + { + for ( int i = 0; i < hostLang->numHostTypes; i++ ) { + if ( ( (isSigned && hostLang->hostTypes[i].isSigned) || !isSigned ) && + maxVal <= hostLang->hostTypes[i].maxVal ) + return hostLang->hostTypes + i; + } + return 0; + } +}; + +extern KeyOps *keyOps; + +inline bool operator<( const Key key1, const Key key2 ) +{ + return keyOps->isSigned ? key1.key < key2.key : + (unsigned long)key1.key < (unsigned long)key2.key; +} + +inline bool operator<=( const Key key1, const Key key2 ) +{ + return keyOps->isSigned ? key1.key <= key2.key : + (unsigned long)key1.key <= (unsigned long)key2.key; +} + +inline bool operator>( const Key key1, const Key key2 ) +{ + return keyOps->isSigned ? key1.key > key2.key : + (unsigned long)key1.key > (unsigned long)key2.key; +} + +inline bool operator>=( const Key key1, const Key key2 ) +{ + return keyOps->isSigned ? key1.key >= key2.key : + (unsigned long)key1.key >= (unsigned long)key2.key; +} + +inline bool operator==( const Key key1, const Key key2 ) +{ + return key1.key == key2.key; +} + +inline bool operator!=( const Key key1, const Key key2 ) +{ + return key1.key != key2.key; +} + +/* Decrement. Needed only for ranges. */ +inline void Key::decrement() +{ + key = keyOps->isSigned ? key - 1 : ((unsigned long)key)-1; +} + +/* Increment. Needed only for ranges. */ +inline void Key::increment() +{ + key = keyOps->isSigned ? key+1 : ((unsigned long)key)+1; +} + +inline long long Key::getLongLong() const +{ + return keyOps->isSigned ? (long long)key : (long long)(unsigned long)key; +} + +inline Key operator+(const Key key1, const Key key2) +{ + /* FIXME: must be made aware of isSigned. */ + return Key( key1.key + key2.key ); +} + +inline Key operator-(const Key key1, const Key key2) +{ + /* FIXME: must be made aware of isSigned. */ + return Key( key1.key - key2.key ); +} + +inline long operator&(const Key key1, const Key key2) +{ + /* FIXME: must be made aware of isSigned. */ + return key1.key & key2.key; +} + +inline Key operator/(const Key key1, const Key key2) +{ + /* FIXME: must be made aware of isSigned. */ + return key1.key / key2.key; +} + +const char *findFileExtension( const char *stemFile ); +char *fileNameFromStem( const char *stemFile, const char *suffix ); + +#endif /* _KEYOPS_H */ diff --git a/src/list.c b/src/list.c new file mode 100644 index 00000000..f57109e1 --- /dev/null +++ b/src/list.c @@ -0,0 +1,105 @@ +/* + * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <pdarun.h> + +void listAddAfter( List *list, ListEl *prev_el, ListEl *new_el ) +{ + /* Set the previous pointer of new_el to prev_el. We do + * this regardless of the state of the list. */ + new_el->prev = prev_el; + + /* Set forward pointers. */ + if (prev_el == 0) { + /* There was no prev_el, we are inserting at the head. */ + new_el->next = list->head; + list->head = new_el; + } + else { + /* There was a prev_el, we can access previous next. */ + new_el->next = prev_el->next; + prev_el->next = new_el; + } + + /* Set reverse pointers. */ + if (new_el->next == 0) { + /* There is no next element. Set the tail pointer. */ + list->tail = new_el; + } + else { + /* There is a next element. Set it's prev pointer. */ + new_el->next->prev = new_el; + } + + /* Update list length. */ + list->listLen++; +} + +void listAddBefore( List *list, ListEl *next_el, ListEl *new_el) +{ + /* Set the next pointer of the new element to next_el. We do + * this regardless of the state of the list. */ + new_el->next = next_el; + + /* Set reverse pointers. */ + if (next_el == 0) { + /* There is no next elememnt. We are inserting at the tail. */ + new_el->prev = list->tail; + list->tail = new_el; + } + else { + /* There is a next element and we can access next's previous. */ + new_el->prev = next_el->prev; + next_el->prev = new_el; + } + + /* Set forward pointers. */ + if (new_el->prev == 0) { + /* There is no previous element. Set the head pointer.*/ + list->head = new_el; + } + else { + /* There is a previous element, set it's next pointer to new_el. */ + new_el->prev->next = new_el; + } + + list->listLen++; +} + +ListEl *listDetach( List *list, ListEl *el ) +{ + /* Set forward pointers to skip over el. */ + if (el->prev == 0) + list->head = el->next; + else + el->prev->next = el->next; + + /* Set reverse pointers to skip over el. */ + if (el->next == 0) + list->tail = el->prev; + else + el->next->prev = el->prev; + + /* Update List length and return element we detached. */ + list->listLen--; + return el; +} + diff --git a/src/lmparse.kh b/src/lmparse.kh new file mode 100644 index 00000000..86b70b6f --- /dev/null +++ b/src/lmparse.kh @@ -0,0 +1,120 @@ +/* + * Copyright 2001-2007 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef RLPARSE_H +#define RLPARSE_H + +#include <iostream> +#include "avltree.h" +#include "parsedata.h" + +#define PROPERTY_REDUCE_FIRST 0x1 + +struct ColmParser +{ + ColmParser( Compiler *pd, const char *fileName, const char *sectionName, const InputLoc §ionLoc ) + : pd(pd), sectionName(sectionName), enterRl(false) + {} + + %%{ + parser ColmParser; + + # Use a class for tokens. + token uses class Token; + + # Atoms. + token TK_Word, TK_Literal, TK_SingleLit, TK_DoubleLit, TK_Number, TK_UInt, + TK_Hex, KW_Nil, KW_True, KW_False; + + # General tokens. + token TK_DotDot, TK_ColonGt, TK_ColonGtGt, TK_LtColon, + TK_DoubleArrow, TK_StarStar, TK_NameSep, TK_DashDash, TK_DoubleEql, + TK_NotEql, TK_DoubleColon, TK_LessEql, TK_GrtrEql, TK_RightArrow, + TK_LitPat, TK_AmpAmp, TK_BarBar, TK_SqOpen, TK_SqOpenNeg, TK_SqClose, + TK_Dash, TK_ReChar, TK_LtLt; + + # Defining things. + token KW_Rl, KW_Def, KW_Lex, KW_Context, KW_Ignore, KW_Token, KW_Commit, KW_Namespace, + KW_Literal, KW_ReduceFirst, KW_Map, KW_List, KW_Vector, KW_Accum, KW_Global, KW_Export, + KW_Iter, KW_Reject, KW_Ptr, KW_Ref, KW_Deref; + + # Language. + token KW_If, KW_While, KW_Else, KW_Elsif, KW_For, KW_Return, KW_Yield, KW_In, + KW_Break, KW_PrintXMLAC, KW_PrintXML, KW_Print, KW_PrintStream, KW_Require; + + # Patterns. + token KW_Match, KW_Construct, KW_Parse, KW_ParseStop, KW_New, KW_MakeToken, + KW_MakeTree, KW_TypeId, KW_Alias, KW_Send, KW_Ni, KW_Ci; + + token KW_Include, KW_Preeof; + + token KW_Left, KW_Right, KW_Nonassoc, KW_Prec; + + }%% + + %% write instance_data; + + + void init(); + int parseLangEl( int type, const Token *token ); + + int token( InputLoc &loc, int tokId, char *tokstart, int toklen ); + void addRegularDef( const InputLoc &loc, Namespace *nspace, + const String &name, Join *join ); + TokenRegion *createRegion( String &name ); + void addRegionDef( const InputLoc &loc, Namespace *nspace, + const String &name, TokenRegion *join ); + void addProduction( const InputLoc &loc, const String &name, + ProdElList *prodElList, bool commit, CodeBlock *redBlock, LangEl *predOf ); + void addArgvList(); + + /* Report an error encountered by the parser. */ + ostream &parse_error( int tokId, Token &token ); + + Compiler *pd; + + /* The name of the root section, this does not change during an include. */ + const char *sectionName; + + NameRef nameRef; + NameRefList nameRefList; + + LangElVect langElVect; + + PatternItemList *patternItemList; + ReplItemList *replItemList; + RegionVect regionStack; + NamespaceVect namespaceStack; + ContextVect contextStack; + + String curDefineId; + LelDefList *curDefList; + ProdElList *curProdElList; + + PredType predType; + ReCaptureVect reCaptureVect; + + bool enterRl; +}; + +%% write token_defs; + +#endif diff --git a/src/lmparse.kl b/src/lmparse.kl new file mode 100644 index 00000000..3ead7c98 --- /dev/null +++ b/src/lmparse.kl @@ -0,0 +1,2677 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <iostream> +#include <errno.h> + +#include "config.h" +#include "lmparse.h" +#include "global.h" +#include "input.h" +#include "fsmrun.h" + +using std::cout; +using std::cerr; +using std::endl; + +ParserDict parserDict; + +%%{ + +parser ColmParser; + +include "lmparse.kh"; + +start: root_item_list + final { + if ( colm_log_compile ) { + cerr << "parsing complete" << endl; + } + + pd->rootCodeBlock = new CodeBlock( $1->stmtList ); + }; + +nonterm root_item_list uses lang_stmt_list; + +root_item_list: root_item_list root_item + final { + $$->stmtList = $1->stmtList; + + /* Maybe a statement. */ + if ( $2->stmt != 0 ) + $$->stmtList->append( $2->stmt ); + }; + +root_item_list: + final { + $$->stmtList = new StmtList; + }; + +nonterm root_item uses statement; + +root_item: literal_def commit final { $$->stmt = 0; }; +root_item: rl_def commit final { $$->stmt = 0; }; +root_item: token_def commit final { $$->stmt = 0; }; +root_item: cfl_def commit final { $$->stmt = 0; }; +root_item: region_def commit final { $$->stmt = 0; }; +root_item: context_def commit final { $$->stmt = 0; }; +root_item: namespace_def commit final { $$->stmt = 0; }; +root_item: function_def commit final { $$->stmt = 0; }; +root_item: iter_def commit final { $$->stmt = 0; }; +root_item: global_def commit final { $$->stmt = $1->stmt; }; +root_item: statement commit final { $$->stmt = $1->stmt; }; +root_item: pre_eof commit final { $$->stmt = 0; }; +root_item: precedence commit final { $$->stmt = 0; }; +root_item: typedef commit final { $$->stmt = 0; }; + +nonterm block_open +{ + ObjectDef *localFrame; +}; + +block_open: '{' + final { + /* Init the object representing the local frame. */ + $$->localFrame = new ObjectDef( ObjectDef::FrameType, + "local", pd->nextObjectId++ ); + + pd->curLocalFrame = $$->localFrame; + + /* Add captures to the local frame. We Depend on these becoming the + * first local variables so we can compute their location. */ + + /* Make local variables corresponding to the local capture vector. */ + for ( ReCaptureVect::Iter c = reCaptureVect; c.lte(); c++ ) + { + ObjField *objField = new ObjField( c->objField->loc, + c->objField->typeRef, c->objField->name ); + + /* Insert it into the field map. */ + pd->curLocalFrame->insertField( objField->name, objField ); + } + }; + +block_close: '}' + final { + /* Pop the cur local frame, back to the root. */ + pd->curLocalFrame = pd->rootLocalFrame; + }; + + +iter_def: + KW_Iter TK_Word '(' opt_param_list ')' block_open lang_stmt_list block_close + final { + CodeBlock *codeBlock = new CodeBlock( $7->stmtList ); + codeBlock->localFrame = $6->localFrame; + Function *newFunction = new Function( 0, $2->data, + $4->paramList, codeBlock, pd->nextFuncId++, true ); + pd->functionList.append( newFunction ); + }; + +function_def: + type_ref TK_Word '(' opt_param_list ')' block_open lang_stmt_list block_close + final { + CodeBlock *codeBlock = new CodeBlock( $7->stmtList ); + codeBlock->localFrame = $6->localFrame; + Function *newFunction = new Function( $1->typeRef, $2->data, + $4->paramList, codeBlock, pd->nextFuncId++, false ); + pd->functionList.append( newFunction ); + + if ( contextStack.length() > 0 ) + newFunction->inContext = contextStack.top(); + }; + +nonterm opt_param_list uses param_list; + +opt_param_list: param_list + final { + $$->paramList = $1->paramList; + }; + +opt_param_list: + final { + $$->paramList = new ParameterList; + }; + +nonterm param_list +{ + ParameterList *paramList; +}; + +param_list: param_list param_var_def + final { + $$->paramList = $1->paramList; + $$->paramList->append( $2->objField ); + }; + +param_list: param_var_def + final { + /* Create the map and insert the first item. */ + $$->paramList = new ParameterList; + $$->paramList->append( $1->objField ); + }; + +nonterm param_var_def uses var_def; + +param_var_def: TK_Word ':' type_ref + final { + $$->objField = new ObjField( $1->loc, $3->typeRef, $1->data ); + $$->objField->isParam = true; + }; +param_var_def: TK_Word ':' reference_type_ref + final { + $$->objField = new ObjField( $1->loc, $3->typeRef, $1->data ); + $$->objField->isParam = true; + }; + +nonterm reference_type_ref uses type_ref; + +reference_type_ref: KW_Ref type_ref + final { + $$->typeRef = new TypeRef( TypeRef::Ref, $1->loc, $2->typeRef ); + }; + +nonterm global_def uses statement; + +global_def: KW_Export var_def opt_def_init + final { + $$->stmt = 0; + + if ( contextStack.length() != 0 ) + error($2->objField->loc) << "cannot export parser context variables" << endp; + + ObjectDef *object = pd->globalObjectDef; + + if ( object->checkRedecl( $2->objField->name ) != 0 ) + error($2->objField->loc) << "object field renamed" << endp; + + object->insertField( $2->objField->name, $2->objField ); + $2->objField->isExport = true; + + if ( $3->expr != 0 ) { + LangVarRef *varRef = new LangVarRef( $2->objField->loc, + new QualItemVect, $2->objField->name ); + + $$->stmt = new LangStmt( $2->objField->loc, + $3->assignType, varRef, $3->expr ); + } + }; + +global_def: KW_Global var_def opt_def_init + final { + $$->stmt = 0; + + ObjectDef *object; + if ( contextStack.length() == 0 ) + object = pd->globalObjectDef; + else { + Context *context = contextStack.top(); + $2->objField->context = context; + object = context->contextObjDef; + } + + if ( object->checkRedecl( $2->objField->name ) != 0 ) + error($2->objField->loc) << "object field renamed" << endp; + + object->insertField( $2->objField->name, $2->objField ); + + if ( $3->expr != 0 ) { + LangVarRef *varRef = new LangVarRef( $2->objField->loc, + new QualItemVect, $2->objField->name ); + + $$->stmt = new LangStmt( $2->objField->loc, + $3->assignType, varRef, $3->expr ); + } + }; + +precedence: pred_type pred_token_list final { pd->predValue++; }; + +pred_type: KW_Left final { predType = PredLeft; }; +pred_type: KW_Right final { predType = PredRight; }; +pred_type: KW_Nonassoc final { predType = PredNonassoc; }; + +pred_token_list: pred_token_list ',' pred_token + final { + }; + +pred_token_list: pred_token; + +nonterm pred_token +{ + ProdEl *factor; + TypeRef *typeRef; +}; + +pred_token: + region_qual TK_Word + final { + TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, $2->data ); + + PredDecl *predDecl = new PredDecl( typeRef, predType, pd->predValue ); + pd->predDeclList.append( predDecl ); + }; + +pred_token: + region_qual TK_Literal + final { + PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 ); + TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, literal ); + + PredDecl *predDecl = new PredDecl( typeRef, predType, pd->predValue ); + pd->predDeclList.append( predDecl ); + }; + +typedef: + KW_Alias TK_Word type_ref + final { + Namespace *nspace = namespaceStack.top(); + TypeAlias *typeAlias = new TypeAlias( + $1->loc, nspace, $2->data, $3->typeRef ); + nspace->typeAliasList.append( typeAlias ); + }; + +cfl_def: cfl_def_head obj_var_list properties_list cfl_prod_list + final { + Namespace *nspace = namespaceStack.top(); + NtDef *ntDef = new NtDef( + curDefineId, + nspace, + $4->defList, + pd->objectDef, + contextStack.length() > 0 ? contextStack.top() : 0, + $3->property & PROPERTY_REDUCE_FIRST ); + + nspace->ntDefList.append( ntDef ); + }; + +cfl_def_head: KW_Def TK_Word + final { + curDefineId = $2->data; + curDefList = new LelDefList; + }; + +nonterm cfl_prod_list +{ + LelDefList *defList; +}; + +cfl_prod_list: cfl_prod_list '|' define_prod + final { + $$->defList = $1->defList; + $3->definition->prodNum = $$->defList->length(); + $$->defList->append( $3->definition ); + }; +cfl_prod_list: define_prod + final { + $$->defList = curDefList; + $1->definition->prodNum = $$->defList->length(); + $$->defList->append( $1->definition ); + }; + +nonterm property +{ + long property; +}; + +nonterm properties_list uses property; + +properties_list: properties_list property + final { + $$->property = $1->property | $2->property; + }; +properties_list: + final { + $$->property = 0; + }; + +property: + KW_ReduceFirst + final { + $$->property = PROPERTY_REDUCE_FIRST; + }; + +nonterm opt_prec +{ + LangEl *predOf; +}; + +opt_prec: + final { + $$->predOf = 0; + }; + +opt_prec: + KW_Prec pred_token + final { + $$->predOf = $2->factor->langEl; + }; + +nonterm define_prod +{ + Definition *definition; +}; + +define_prod: '[' prod_el_list ']' opt_commit opt_reduce_code opt_prec + final { + const InputLoc &loc = $1->loc; + //const String &name = curDefineId; + ProdElList *prodElList = curProdElList; + bool commit = $4->commit; + CodeBlock *redBlock = $5->codeBlock; + LangEl *predOf = $6->predOf; + + //Namespace *nspace = namespaceStack.top(); + + Definition *newDef = new Definition( loc, 0/*prodName*/, + prodElList, commit, redBlock, + pd->prodList.length(), 0, Definition::Production ); + newDef->predOf = predOf; + + pd->prodList.append( newDef ); + + $$->definition = newDef; + }; + +obj_var_list: obj_var_list var_def + final { + if ( pd->objectDef->checkRedecl( $2->objField->name ) != 0 ) + error() << "object field renamed" << endp; + + pd->objectDef->insertField( $2->objField->name, $2->objField ); + }; + +obj_var_list: + final { + pd->objectDef = new ObjectDef( ObjectDef::UserType, + curDefineId, pd->nextObjectId++ ); + }; + + +nonterm type_ref +{ + TypeRef *typeRef; +}; + +type_ref: basic_type_ref + final { + $$->typeRef = $1->typeRef; + }; + +type_ref: KW_Map '<' type_ref type_ref '>' + final { + NamespaceQual *nspaceQual = new NamespaceQual( + namespaceStack.top(), regionStack.top() ); + $$->typeRef = new TypeRef( TypeRef::Map, InputLoc(), nspaceQual, + $3->typeRef, $4->typeRef ); + }; + +type_ref: KW_List '<' type_ref '>' + final { + NamespaceQual *nspaceQual = new NamespaceQual( + namespaceStack.top(), regionStack.top() ); + $$->typeRef = new TypeRef( TypeRef::List, InputLoc(), nspaceQual, $3->typeRef, 0 ); + }; +type_ref: KW_Vector '<' type_ref '>' + final { + NamespaceQual *nspaceQual = new NamespaceQual( + namespaceStack.top(), regionStack.top() ); + $$->typeRef = new TypeRef( TypeRef::Vector, InputLoc(), nspaceQual, $3->typeRef, 0 ); + }; +type_ref: KW_Accum '<' type_ref '>' + final { + NamespaceQual *nspaceQual = new NamespaceQual( + namespaceStack.top(), regionStack.top() ); + $$->typeRef = new TypeRef( TypeRef::Parser, InputLoc(), nspaceQual, $3->typeRef, 0 ); + }; + +nonterm basic_type_ref uses type_ref; + +basic_type_ref: region_qual TK_Word opt_repeat + final { + $$->typeRef = new TypeRef( $2->loc, $1->nspaceQual, $2->data ); + $$->typeRef->repeatType = $3->repeatType; + }; + +basic_type_ref: KW_Ptr region_qual TK_Word opt_repeat + final { + $$->typeRef = new TypeRef( $1->loc, $2->nspaceQual, $3->data ); + $$->typeRef->repeatType = $4->repeatType; + $$->typeRef = new TypeRef( TypeRef::Ptr, $1->loc, $$->typeRef ); + }; + + +nonterm var_def +{ + InputLoc loc; + ObjField *objField; +}; + +var_def: TK_Word ':' type_ref + final { + /* Return an object field object. The user of this nonterminal must + * load it into the approrpriate map and do error checking. */ + $$->objField = new ObjField( $1->loc, $3->typeRef, $1->data ); + }; + +region_def: + region_head '{' root_item_list '}' + final { + /* Pop the top of the stack. */ + regionStack.pop(); + }; + +region_head: + KW_Lex TK_Word + final { + /* Just for ignores. */ + String scannerNameIgn( $2->data.length() + 2, "<%s>-ign", $2->data.data ); + TokenRegion *tokenRegionIgn = createRegion( scannerNameIgn ); + + /* Just for collect ignores. Will use the ignore-only start state. */ + String scannerNameCi( $2->data.length() + 2, "<%s>-ci", $2->data.data ); + TokenRegion *tokenRegionCi = createRegion( scannerNameCi ); + + /* Just for tokens. */ + String scannerNameTok( $2->data.length() + 2, "<%s>-tok", $2->data.data ); + TokenRegion *tokenRegionTok = createRegion( scannerNameTok ); + + /* Make the new token region. */ + String scannerName( $2->data.length() + 2, "<%s>", $2->data.data ); + TokenRegion *tokenRegion = createRegion( scannerName ); + + regionStack.push( tokenRegion ); + + tokenRegion->ignoreOnlyRegion = tokenRegionIgn; + tokenRegion->tokenOnlyRegion = tokenRegionTok; + tokenRegion->ciRegion = tokenRegionCi; + + tokenRegion->isFullRegion = true; + tokenRegionIgn->isIgnoreOnly = true; + tokenRegionCi->isCiOnly = true; + tokenRegionTok->isTokenOnly = true; + + tokenRegionIgn->derivedFrom = tokenRegion; + tokenRegionCi->derivedFrom = tokenRegion; + tokenRegionTok->derivedFrom = tokenRegion; + }; + +namespace_def: + namespace_head '{' root_item_list '}' + final { + namespaceStack.pop(); + }; + +namespace_head: + KW_Namespace TK_Word + final { + /* Make the new namespace. */ + Namespace *nspace = new Namespace( InputLoc(), $2->data, + pd->namespaceList.length(), namespaceStack.top() ); + namespaceStack.top()->childNamespaces.append( nspace ); + pd->namespaceList.append( nspace ); + namespaceStack.push( nspace ); + }; + +context_var_def: + var_def + final { + ObjectDef *object; + if ( contextStack.length() == 0 ) + error($1->loc) << "internal error: no context stack items found" << endp; + + Context *context = contextStack.top(); + $1->objField->context = context; + object = context->contextObjDef; + + if ( object->checkRedecl( $1->objField->name ) != 0 ) + error($1->objField->loc) << "object field renamed" << endp; + + object->insertField( $1->objField->name, $1->objField ); + }; + + +context_item: context_var_def commit; +context_item: literal_def commit; +context_item: rl_def commit; +context_item: token_def commit; +context_item: cfl_def commit; +context_item: region_def commit; +context_item: context_def commit; +context_item: function_def commit; +context_item: iter_def commit; +context_item: pre_eof commit; +context_item: precedence commit; + +context_item_list: + context_item_list context_item; +context_item_list: + ; + +context_def: + context_head '{' context_item_list '}' + final { + contextStack.pop(); + namespaceStack.pop(); + }; + +context_head: + KW_Context TK_Word + final { + /* Make the new namespace. */ + Namespace *nspace = new Namespace( InputLoc(), $2->data, + pd->namespaceList.length(), namespaceStack.top() ); + namespaceStack.top()->childNamespaces.append( nspace ); + pd->namespaceList.append( nspace ); + namespaceStack.push( nspace ); + + Context *context = new Context( $1->loc, 0 ); + contextStack.push( context ); + + ContextDef *contextDef = new ContextDef( $2->data, context, nspace ); + nspace->contextDefList.append( contextDef ); + + context->contextObjDef = new ObjectDef( ObjectDef::UserType, + $2->data, pd->nextObjectId++ ); + }; + +pattern_list: pattern_list pattern; +pattern_list: init_pattern_list pattern; + +init_pattern_list: + final { + patternItemList = new PatternItemList; + }; + +pattern: '"' litpat_el_list '"'; +pattern: '[' pattern_el_list ']'; + +litpat_el_list: litpat_el_list litpat_el; +litpat_el_list: ; + +litpat_el: TK_LitPat + final { + PatternItem *patternItem = new PatternItem( $1->loc, $1->data, + PatternItem::InputText ); + patternItemList->append( patternItem ); + }; + +litpat_el: '[' pattern_el_list ']'; + +pattern_el_list: pattern_el_list pattern_el; +pattern_el_list: ; + +pattern_el: opt_label pattern_el_type_or_lit + final { + /* Store the variable reference in the pattern itemm. */ + $2->patternItem->varRef = $1->varRef; + + if ( $1->varRef != 0 ) { + if ( pd->curLocalFrame->checkRedecl( $1->varRef->name ) != 0 ) { + error( $1->varRef->loc ) << "variable " << $1->varRef->name << + " redeclared" << endp; + } + + TypeRef *typeRef = $2->patternItem->factor->typeRef; + ObjField *objField = new ObjField( InputLoc(), typeRef, $1->varRef->name ); + + /* Insert it into the field map. */ + pd->curLocalFrame->insertField( $1->varRef->name, objField ); + } + }; + +pattern_el: '"' litpat_el_list '"'; +pattern_el: '?' TK_Word + final { + /* FIXME: Implement */ + assert(false); + }; + +nonterm pattern_el_type_or_lit +{ + PatternItem *patternItem; +}; + +pattern_el_type_or_lit: region_qual TK_Word opt_repeat + final { + TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, $2->data ); + typeRef->repeatType = $3->repeatType; + ProdEl *factor = new ProdEl( ProdEl::ReferenceType, $2->loc, 0, false, typeRef, 0 ); + $$->patternItem = new PatternItem( $2->loc, factor, PatternItem::FactorType ); + patternItemList->append( $$->patternItem ); + }; + +pattern_el_type_or_lit: region_qual TK_Literal opt_repeat + final { + PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 ); + TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, literal ); + typeRef->repeatType = $3->repeatType; + + ProdEl *factor = new ProdEl( ProdEl::ReferenceType, $2->loc, 0, false, typeRef, 0 ); + $$->patternItem = new PatternItem( $2->loc, factor, PatternItem::FactorType ); + patternItemList->append( $$->patternItem ); + }; + +nonterm opt_label +{ + /* Variable reference. */ + LangVarRef *varRef; +}; + +opt_label: TK_Word ':' + final { + $$->varRef = new LangVarRef( $1->loc, new QualItemVect, $1->data ); + }; +opt_label: + final { + $$->varRef = 0; + }; + +# +# Replacement +# + +repl_list: repl_list replacement; +repl_list: init_repl_list replacement; + +init_repl_list: + final { + replItemList = new ReplItemList; + }; + +replacement: '"' lit_repl_el_list '"'; +replacement: '[' repl_el_list ']'; + +lit_repl_el_list: lit_repl_el_list lit_repl_el; +lit_repl_el_list: ; + +lit_repl_el: TK_LitPat + final { + ReplItem *replItem = new ReplItem( $1->loc, ReplItem::InputText, $1->data ); + replItemList->append( replItem ); + }; + +lit_repl_el: '[' repl_el_list ']'; + +repl_el_list: repl_el_list repl_el; +repl_el_list: ; + +repl_el: region_qual TK_Literal + final { + PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 ); + TypeRef *typeRef = new TypeRef( $2->loc, $1->nspaceQual, literal ); + typeRef->repeatType = RepeatNone; + ProdEl *factor = new ProdEl( ProdEl::LiteralType, $2->loc, 0, false, typeRef, 0 ); + ReplItem *replItem = new ReplItem( $2->loc, ReplItem::FactorType, factor ); + replItemList->append( replItem ); + }; +repl_el: '"' lit_repl_el_list '"'; + +repl_el: code_expr + final { + ReplItem *replItem = new ReplItem( $1->expr->loc, ReplItem::ExprType, $1->expr ); + replItemList->append( replItem ); + }; + +# +# Accum +# +accumulate: init_repl_list accum_list; +accumulate: init_repl_list code_expr + final { + ReplItem *replItem = new ReplItem( $2->expr->loc, ReplItem::ExprType, $2->expr ); + replItemList->append( replItem ); + }; + +accum_list: accum_list accum; +accum_list: accum; + +init_accum_list: + final { + replItemList = new ReplItemList; + }; + +accum: '"' lit_accum_el_list '"'; +accum: '[' accum_el_list ']'; + +lit_accum_el_list: lit_accum_el_list lit_accum_el; +lit_accum_el_list: ; + +lit_accum_el: TK_LitPat + final { + ReplItem *replItem = new ReplItem( $1->loc, ReplItem::InputText, $1->data ); + replItemList->append( replItem ); + }; + +lit_accum_el: '[' accum_el_list ']'; + +accum_el_list: accum_el_list accum_el; +accum_el_list: ; + +#accum_el: region_qual TK_Literal +# final { +# PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 ); +# ProdEl *factor = new ProdEl( $2->loc, false, $1->nspaceQual, +# literal, 0 ); +# ReplItem *replItem = new ReplItem( $2->loc, ReplItem::FactorType, factor ); +# replItemList->append( replItem ); +# }; +accum_el: code_expr + final { + ReplItem *replItem = new ReplItem( $1->expr->loc, ReplItem::ExprType, $1->expr ); + replItemList->append( replItem ); + }; + +accum_el: '"' lit_accum_el_list '"'; + + +# +# String +# + +string_list: string_list string; +string_list: init_string_list string; + +init_string_list: + final { + replItemList = new ReplItemList; + }; + +string: '"' lit_string_el_list '"'; +string: '[' string_el_list ']'; + +lit_string_el_list: lit_string_el_list lit_string_el; +lit_string_el_list: ; + +lit_string_el: TK_LitPat + final { + ReplItem *replItem = new ReplItem( $1->loc, ReplItem::InputText, $1->data ); + replItemList->append( replItem ); + }; + +lit_string_el: '[' string_el_list ']'; + +string_el_list: string_el_list string_el; +string_el_list: ; + +#accum_el: region_qual TK_Literal +# final { +# PdaLiteral *literal = new PdaLiteral( $2->loc, *$2 ); +# ProdEl *factor = new ProdEl( $2->loc, false, $1->nspaceQual, +# literal, 0 ); +# ReplItem *replItem = new ReplItem( $2->loc, ReplItem::FactorType, factor ); +# replItemList->append( replItem ); +# }; +string_el: code_expr + final { + ReplItem *replItem = new ReplItem( $1->expr->loc, ReplItem::ExprType, $1->expr ); + replItemList->append( replItem ); + }; + +string_el: '"' lit_string_el_list '"'; + +prod_el_list: + prod_el_list prod_el + final { + curProdElList->append( $2->factor ); + }; + +prod_el_list: + final { curProdElList = new ProdElList; }; + +nonterm opt_no_ignore { bool value; }; + +opt_no_ignore: KW_Ni final { $$->value = true; }; +opt_no_ignore: final { $$->value = false; }; + +nonterm prod_el +{ + ProdEl *factor; +}; + +prod_el: + opt_capture opt_commit region_qual TK_Word opt_repeat + final { + TypeRef *typeRef = new TypeRef( $4->loc, $3->nspaceQual, $4->data ); + typeRef->repeatType = $5->repeatType; + $$->factor = new ProdEl( ProdEl::ReferenceType, $4->loc, $1->objField, $2->commit, typeRef, 0 ); + + /* If there is a capture, create the field. */ + if ( $1->objField != 0 ) { + /* Might already exist. */ + ObjField *objField = pd->objectDef->checkRedecl( $1->objField->name ); + if ( objField == 0 ) { + objField = $1->objField; + objField->typeRef = typeRef; + pd->objectDef->insertField( objField->name, objField ); + } + else { + /* FIXME: check the types are the same. */ + //error() << "object field renamed" << endp; + } + + objField->isRhsGet = true; + RhsVal rhsVal( curDefList->length(), curProdElList->length() ); + objField->rhsVal.append( RhsVal( curDefList->length(), curProdElList->length() ) ); + } + }; + +prod_el: + opt_capture opt_commit region_qual TK_Literal opt_repeat + final { + /* Create a new factor node going to a concat literal. */ + PdaLiteral *literal = new PdaLiteral( $4->loc, *$4 ); + TypeRef *typeRef = new TypeRef( $4->loc, $3->nspaceQual, literal ); + typeRef->repeatType = $5->repeatType; + $$->factor = new ProdEl( ProdEl::LiteralType, $4->loc, $1->objField, $2->commit, typeRef, 0 ); + + /* If there is a capture, create the field. */ + if ( $1->objField != 0 ) { + $1->objField->typeRef = typeRef; + if ( pd->objectDef->checkRedecl( $1->objField->name ) != 0 ) + error() << "object field renamed" << endp; + + pd->objectDef->insertField( $1->objField->name, $1->objField ); + } + }; + +nonterm opt_repeat +{ + bool opt; + bool repeat; + RepeatType repeatType; +}; + +opt_repeat: '*' final { $$->opt = false; $$->repeat = true; $$->repeatType = RepeatRepeat; }; +opt_repeat: '+' final { $$->opt = false; $$->repeat = false; $$->repeatType = RepeatList; }; +opt_repeat: '?' final { $$->opt = true; $$->repeat = false; $$->repeatType = RepeatOpt; }; +opt_repeat: final { $$->opt = false; $$->repeat = false; $$->repeatType = RepeatNone; }; + +nonterm region_qual +{ + NamespaceQual *nspaceQual; +}; + +region_qual: region_qual TK_Word TK_DoubleColon + final { + $$->nspaceQual = $1->nspaceQual; + $$->nspaceQual->qualNames.append( $2->data ); + }; + +region_qual: + final { + $$->nspaceQual = new NamespaceQual( namespaceStack.top(), regionStack.top() ); + }; + +literal_def: KW_Literal literal_list; + +literal_list: literal_list ',' literal_item; +literal_list: literal_item; + +literal_item: opt_no_ignore TK_Literal opt_no_ignore + final { + /* Create a name for the literal. */ + String name( 32, "_literal_%.4x", pd->nextTokenId ); + + bool insideRegion = regionStack.top() != pd->rootRegion; + if ( !insideRegion ) { + /* Just for ignores. */ + String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data ); + TokenRegion *tokenRegionIgn = createRegion( scannerNameIgn ); + + /* Just for collect ignores. Will use the ignore-only start state. */ + String scannerNameCi( name.length() + 2, "<%s>-ci", name.data ); + TokenRegion *tokenRegionCi = createRegion( scannerNameCi ); + + /* Just for tokens. */ + String scannerNameTok( name.length() + 2, "<%s>-tok", name.data ); + TokenRegion *tokenRegionTok = createRegion( scannerNameTok ); + + /* Make a new token region just for the token. */ + String scannerName( name.length() + 2, "<%s>", name.data ); + TokenRegion *tokenRegion = createRegion( scannerName ); + + regionStack.push( tokenRegion ); + + tokenRegion->ignoreOnlyRegion = tokenRegionIgn; + tokenRegion->tokenOnlyRegion = tokenRegionTok; + tokenRegion->ciRegion = tokenRegionCi; + + tokenRegion->isFullRegion = true; + tokenRegionIgn->isIgnoreOnly = true; + tokenRegionCi->isCiOnly = true; + tokenRegionTok->isTokenOnly = true; + + tokenRegionIgn->derivedFrom = tokenRegion; + tokenRegionCi->derivedFrom = tokenRegion; + tokenRegionTok->derivedFrom = tokenRegion; + } + + bool unusedCI; + String interp; + prepareLitString( interp, unusedCI, $2->data, $2->loc ); + + /* Look for the production's associated region. */ + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + + + LiteralDictEl *ldel = nspace->literalDict.find( interp ); + if ( ldel != 0 ) + error( $2->loc ) << "literal already defined in this namespace" << endp; + else { + Join *join = new Join( new Expression( new Term( new FactorWithAug( + new FactorWithRep( $2->loc, new FactorWithNeg( $2->loc, new Factor( + new Literal( $2->loc, $2->data, + Literal::LitString ) ) ) ) ) ) ) ); + + if ( strcmp( interp.data, "" ) == 0 ) { + TokenDef *tokenDef = new TokenDef( name, $2->data, true, false, join, + 0, $2->loc, pd->nextTokenId++, nspace, region, 0, 0, 0 ); + + //region->tokenDefList.append( tokenDef ); + + ldel = nspace->literalDict.insert( interp, tokenDef ); + nspace->tokenDefList.append( tokenDef ); + + tokenDef->isZero = true; + } + else { + TokenDef *tokenDef = new TokenDef( name, $2->data, true, false, join, + 0, $2->loc, pd->nextTokenId++, nspace, region, 0, 0, 0 ); + region->tokenDefList.append( tokenDef ); + ldel = nspace->literalDict.insert( interp, tokenDef ); + nspace->tokenDefList.append( tokenDef ); + + if ( $1->value ) + tokenDef->noPreIgnore = true; + if ( $3->value ) + tokenDef->noPostIgnore = true; + + TokenDef *tokenDefTok = new TokenDef( name + "_tok", $2->data, true, false, join, + 0, $2->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion, 0, 0, 0 ); + tokenDefTok->dupOf = tokenDef; + region->tokenOnlyRegion->tokenDefList.append( tokenDefTok ); + ldel = nspace->literalDict.insert( "|" + interp + "_tok", tokenDefTok ); + nspace->tokenDefList.append( tokenDefTok ); + } + } + + if ( !insideRegion ) { + /* Leave the region just for this token. */ + regionStack.pop(); + } + }; + + +# These two productions are responsible for setting and unsetting the Regular +# language scanning context. +enter_rl: + try { + enterRl = true; + } + undo { + enterRl = false; + }; +leave_rl: + try { + enterRl = false; + } + undo { + enterRl = true; + }; + +token_def: + token_or_ignore token_def_name obj_var_list + enter_rl opt_no_ignore '/' opt_rl_join leave_rl '/' opt_no_ignore + opt_translate + final { + bool ignore = $1->ignore; + String name = $2->name; + Join *join = $7->join; + CodeBlock *transBlock = $11->transBlock; + + /* Check the region if this is for an ignore. */ + if ( ignore && !pd->insideRegion ) + error($1->loc) << "ignore tokens can only appear inside scanners" << endp; + + /* Check the name if this is a token. */ + if ( !ignore && name == 0 ) + error($1->loc) << "tokens must have a name" << endp; + + /* Give a default name to ignores. */ + if ( name == 0 ) + name.setAs( 32, "_ignore_%.4x", pd->nextTokenId ); + + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + + TokenDef *tokenDef = new TokenDef( name, String(), false, ignore, join, + transBlock, $1->loc, pd->nextTokenId++, nspace, region, + &reCaptureVect, pd->objectDef, + contextStack.length() > 0 ? contextStack.top() : 0 ); + + region->tokenDefList.append( tokenDef ); + nspace->tokenDefList.append( tokenDef ); + + if ( $5->value ) + tokenDef->noPreIgnore = true; + if ( $10->value ) + tokenDef->noPostIgnore = true; + + /* All again for the ignore. */ + if ( ignore ) { + TokenDef *tokenDefIgn = new TokenDef( name + "_ign", String(), false, ignore, join, + 0, $1->loc, pd->nextTokenId++, nspace, region->ignoreOnlyRegion, + &reCaptureVect, pd->objectDef, + contextStack.length() > 0 ? contextStack.top() : 0 ); + + tokenDefIgn->dupOf = tokenDef; + + region->ignoreOnlyRegion->tokenDefList.append( tokenDefIgn ); + nspace->tokenDefList.append( tokenDefIgn ); + } + else { + TokenDef *tokenDefTok = new TokenDef( name + "_tok", String(), false, ignore, join, + 0, $1->loc, pd->nextTokenId++, nspace, region->tokenOnlyRegion, + &reCaptureVect, pd->objectDef, + contextStack.length() > 0 ? contextStack.top() : 0 ); + + tokenDefTok->dupOf = tokenDef; + + region->tokenOnlyRegion->tokenDefList.append( tokenDefTok ); + nspace->tokenDefList.append( tokenDefTok ); + } + + /* This is created and pushed in the name. */ + if ( !pd->insideRegion ) { + /* Leave the region that we made just for this token. */ + regionStack.pop(); + } + + if ( join != 0 ) { + /* Create a regular language definition so the token can be used to + * make other tokens */ + addRegularDef( $1->loc, namespaceStack.top(), name, join ); + } + + + reCaptureVect.empty(); + }; + +nonterm token_or_ignore +{ + InputLoc loc; + bool ignore; +}; + +token_or_ignore: KW_Token + final { $$->loc = $1->loc; $$->ignore = false; }; + +token_or_ignore: KW_Ignore + final { $$->loc = $1->loc; $$->ignore = true; }; + +nonterm class token_def_name +{ + String name; +}; + +token_def_name: + opt_name + final { + String name = $1->name; + + $$->name = name; + pd->insideRegion = regionStack.top() != pd->rootRegion; + curDefineId = name; + + if ( !pd->insideRegion ) { + /* For just ignores. */ + String scannerNameIgn( name.length() + 2, "<%s>-ign", name.data ); + TokenRegion *tokenRegionIgn = createRegion( scannerNameIgn ); + + /* Just for explicitly collecting ignores. */ + String scannerNameCi( name.length() + 2, "<%s>-ci", name.data ); + TokenRegion *tokenRegionCi = createRegion( scannerNameCi ); + + /* Just for tokens. */ + String scannerNameTok( name.length() + 2, "<%s>-tok", name.data ); + TokenRegion *tokenRegionTok = createRegion( scannerNameTok ); + + /* If not inside a region, make one for the token. */ + String scannerName( name.length() + 2, "<%s>", name.data ); + TokenRegion *tokenRegion = createRegion( scannerName ); + + regionStack.push( tokenRegion ); + + tokenRegion->ignoreOnlyRegion = tokenRegionIgn; + tokenRegion->tokenOnlyRegion = tokenRegionTok; + tokenRegion->ciRegion = tokenRegionCi; + + tokenRegion->isFullRegion = true; + tokenRegionIgn->isIgnoreOnly = true; + tokenRegionCi->isCiOnly = true; + tokenRegionTok->isTokenOnly = true; + + tokenRegionIgn->derivedFrom = tokenRegion; + tokenRegionCi->derivedFrom = tokenRegion; + tokenRegionTok->derivedFrom = tokenRegion; + } + + /* Reset the lable id counter. */ + pd->nextLabelId = 0; + }; + +nonterm class opt_name +{ + String name; +}; + +opt_name: TK_Word final { $$->name = $1->data; }; +opt_name: ; + +nonterm opt_translate +{ + CodeBlock *transBlock; +}; + +opt_translate: + block_open lang_stmt_list block_close + final { + $$->transBlock = new CodeBlock( $2->stmtList ); + $$->transBlock->localFrame = $1->localFrame; + $$->transBlock->context = contextStack.length() == 0 ? 0 : contextStack.top(); + }; + +opt_translate: + final { + $$->transBlock = 0; + }; + +pre_eof: + KW_Preeof block_open lang_stmt_list block_close + final { + bool insideRegion = regionStack.top() != pd->rootRegion; + if ( !insideRegion ) + error($1->loc) << "preeof must be used inside an existing region" << endl; + + CodeBlock *codeBlock = new CodeBlock( $3->stmtList ); + codeBlock->localFrame = $2->localFrame; + codeBlock->context = contextStack.length() == 0 ? 0 : contextStack.top(); + + TokenRegion *region = regionStack.top(); + region->preEofBlock = codeBlock; + }; + +rl_def: + KW_Rl machine_name enter_rl '/' rl_join leave_rl '/' + final { + /* Generic creation of machine for instantiation and assignment. */ + addRegularDef( $2->loc, namespaceStack.top(), $2->data, $5->join ); + + if ( reCaptureVect.length() > 0 ) + error($1->loc) << "rl definitions cannot capture vars" << endl; + }; + +type class token_data +{ + InputLoc loc; + String data; +}; + +nonterm machine_name uses token_data; + +machine_name: + TK_Word + final { + /* Make/get the priority key. The name may have already been referenced + * and therefore exist. */ + PriorDictEl *priorDictEl; + if ( pd->priorDict.insert( $1->data, pd->nextPriorKey, &priorDictEl ) ) + pd->nextPriorKey += 1; + pd->curDefPriorKey = priorDictEl->value; + + /* Make/get the local error key. */ + LocalErrDictEl *localErrDictEl; + if ( pd->localErrDict.insert( $1->data, pd->nextLocalErrKey, &localErrDictEl ) ) + pd->nextLocalErrKey += 1; + pd->curDefLocalErrKey = localErrDictEl->value; + + $$->loc = $1->loc; + $$->data = $1->data; + }; + +# +# Reduce statements +# + +nonterm opt_reduce_code +{ + CodeBlock *codeBlock; +}; + +opt_reduce_code: + final { $$->codeBlock = 0; }; + +opt_reduce_code: + start_reduce lang_stmt_list block_close + final { + $$->codeBlock = new CodeBlock( $2->stmtList ); + $$->codeBlock->localFrame = $1->localFrame; + $$->codeBlock->context = contextStack.length() == 0 ? 0 : contextStack.top(); + }; + +nonterm start_reduce uses block_open; + +start_reduce: + block_open + final { + $$->localFrame = $1->localFrame; + }; + +nonterm lang_stmt_list +{ + StmtList *stmtList; +}; + +lang_stmt_list: rec_stmt_list opt_require_stmt + final { + $$->stmtList = $1->stmtList; + if ( $2->stmt != 0 ) + $$->stmtList->append( $2->stmt ); + }; + +nonterm rec_stmt_list uses lang_stmt_list; + +rec_stmt_list: rec_stmt_list statement + final { + $$->stmtList = $1->stmtList; + + /* Maybe a statement was generated. */ + if ( $2->stmt != 0 ) + $$->stmtList->append( $2->stmt ); + }; + +rec_stmt_list: + final { + $$->stmtList = new StmtList; + }; + +nonterm opt_def_init +{ + LangExpr *expr; + LangStmt::Type assignType; +}; + +opt_def_init: '=' code_expr + final { + $$->expr = $2->expr; + $$->assignType = LangStmt::AssignType; + }; +opt_def_init: + final { + $$->expr = 0; + }; + +scope_push: + final { + pd->curLocalFrame->pushScope(); + //cout << "push scope" << endl; + }; + +scope_pop: + final { + pd->curLocalFrame->popScope(); + //cout << "pop scope" << endl; + }; + +nonterm statement +{ + LangStmt *stmt; +}; +nonterm for_scope uses statement; + +statement: var_def opt_def_init + final { + /* By default no statement here. Maybe will add an initialization. */ + $$->stmt = 0; + + /* Check for redeclaration. */ + if ( pd->curLocalFrame->checkRedecl( $1->objField->name ) != 0 ) { + error( $1->objField->loc ) << "variable " << $1->objField->name << + " redeclared" << endp; + } + + /* Insert it into the field map. */ + pd->curLocalFrame->insertField( $1->objField->name, $1->objField ); + + //cout << "var def " << $1->objField->name << endl; + + if ( $2->expr != 0 ) { + LangVarRef *varRef = new LangVarRef( $1->objField->loc, + new QualItemVect, $1->objField->name ); + + $$->stmt = new LangStmt( $1->objField->loc, + $2->assignType, varRef, $2->expr ); + } + }; +statement: var_ref '=' code_expr + final { + $$->stmt = new LangStmt( $2->loc, LangStmt::AssignType, $1->varRef, $3->expr ); + }; +statement: KW_Print '(' code_expr_list ')' + final { + $$->stmt = new LangStmt( $1->loc, LangStmt::PrintType, $3->exprVect ); + }; +statement: KW_PrintXMLAC '(' code_expr_list ')' + final { + $$->stmt = new LangStmt( $1->loc, LangStmt::PrintXMLACType, $3->exprVect ); + }; +statement: KW_PrintXML '(' code_expr_list ')' + final { + $$->stmt = new LangStmt( $1->loc, LangStmt::PrintXMLType, $3->exprVect ); + }; +statement: KW_PrintStream '(' code_expr_list ')' + final { + $$->stmt = new LangStmt( $1->loc, LangStmt::PrintStreamType, $3->exprVect ); + }; +statement: code_expr + final { + $$->stmt = new LangStmt( InputLoc(), LangStmt::ExprType, $1->expr ); + }; +statement: if_stmt + final { + $$->stmt = $1->stmt; + }; +statement: KW_Reject + final { + $$->stmt = new LangStmt( $1->loc, LangStmt::RejectType ); + }; +statement: KW_While scope_push code_expr block_or_single scope_pop + final { + $$->stmt = new LangStmt( LangStmt::WhileType, $3->expr, $4->stmtList ); + }; + +for_scope: TK_Word ':' type_ref KW_In iter_call block_or_single + final { + /* Check for redeclaration. */ + if ( pd->curLocalFrame->checkRedecl( $1->data ) != 0 ) + error( $1->loc ) << "variable " << $1->data << " redeclared" << endp; + + /* Note that we pass in a null type reference. This type is dependent + * on the result of the iter_call lookup since it must contain a reference + * to the iterator that is called. This lookup is done at compile time. */ + ObjField *iterField = new ObjField( $1->loc, (TypeRef*)0, $1->data ); + pd->curLocalFrame->insertField( $1->data, iterField ); + + $$->stmt = new LangStmt( $1->loc, LangStmt::ForIterType, + iterField, $3->typeRef, $5->langTerm, $6->stmtList ); + }; + +statement: KW_For scope_push for_scope scope_pop + final { + $$->stmt = $3->stmt; + }; + +statement: KW_Return code_expr + final { + $$->stmt = new LangStmt( $1->loc, LangStmt::ReturnType, $2->expr ); + }; +statement: KW_Break + final { + $$->stmt = new LangStmt( LangStmt::BreakType ); + }; +statement: KW_Yield var_ref + final { + $$->stmt = new LangStmt( LangStmt::YieldType, $2->varRef ); + }; +statement: var_ref TK_LtLt accumulate + final { + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + ParserText *parserText = new ParserText( $2->loc, nspace, region, replItemList ); + pd->parserTextList.append( parserText ); + + $$->stmt = new LangStmt( LangStmt::ParserType, $1->varRef, parserText ); + }; +statement: KW_Send var_ref accumulate + final { + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + ParserText *parserText = new ParserText( $1->loc, nspace, region, replItemList ); + pd->parserTextList.append( parserText ); + + $$->stmt = new LangStmt( LangStmt::ParserType, $2->varRef, parserText ); + }; + +nonterm opt_require_stmt uses statement; + +opt_require_stmt: + scope_push require_pattern lang_stmt_list scope_pop + final { + $$->stmt = new LangStmt( LangStmt::IfType, $2->expr, $3->stmtList, 0 ); + }; +opt_require_stmt: + final { + $$->stmt = 0; + }; + +nonterm require_pattern uses code_expr; + +require_pattern: + KW_Require var_ref pattern_list + final { + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + Pattern *pattern = new Pattern( $1->loc, nspace, region, + patternItemList, pd->nextPatReplId++ ); + pd->patternList.append( pattern ); + + $$->expr = new LangExpr( + new LangTerm( LangTerm::MatchType, $2->varRef, pattern ) ); + }; + +nonterm block_or_single uses lang_stmt_list; + +block_or_single: '{' lang_stmt_list '}' + final { + $$->stmtList = $2->stmtList; + }; +block_or_single: statement + final { + $$->stmtList = new StmtList; + $$->stmtList->append( $1->stmt ); + }; + +nonterm iter_call +{ + LangTerm *langTerm; +}; + +iter_call: var_ref '(' opt_code_expr_list ')' + final { + $$->langTerm = new LangTerm( $1->varRef, $3->exprVect ); + }; +iter_call: TK_Word + final { + $$->langTerm = new LangTerm( LangTerm::VarRefType, + new LangVarRef( $1->loc, new QualItemVect, $1->data ) ); + }; + +# +# If Statements +# + +nonterm if_stmt uses statement; + +if_stmt: KW_If scope_push code_expr block_or_single scope_pop elsif_list + final { + $$->stmt = new LangStmt( LangStmt::IfType, $3->expr, $4->stmtList, $6->stmt ); + }; + +nonterm elsif_list +{ + LangStmt *stmt; +}; + +elsif_list: + elsif_clause elsif_list + final { + /* Put any of the followng elseif part, an else, or null into the elsePart. */ + $$->stmt = $1->stmt; + $$->stmt->elsePart = $2->stmt; + }; +elsif_list: + optional_else + final { + $$->stmt = $1->stmt; + }; + +nonterm elsif_clause +{ + LangStmt *stmt; +}; + +elsif_clause: + KW_Elsif scope_push code_expr block_or_single scope_pop + final { + $$->stmt = new LangStmt( LangStmt::IfType, $3->expr, $4->stmtList, 0 ); + }; + +nonterm optional_else +{ + LangStmt *stmt; +}; + +optional_else: + KW_Else scope_push block_or_single scope_pop + final { + $$->stmt = new LangStmt( LangStmt::ElseType, $3->stmtList ); + }; + +optional_else: + final { + $$->stmt = 0; + }; + +# +# Code Expression Lists. +# +nonterm code_expr_list +{ + ExprVect *exprVect; +}; + +code_expr_list: code_expr_list code_expr + final { + $$->exprVect = $1->exprVect; + $$->exprVect->append( $2->expr ); + }; +code_expr_list: code_expr + final { + $$->exprVect = new ExprVect; + $$->exprVect->append( $1->expr ); + }; + +nonterm opt_code_expr_list uses code_expr_list; + +opt_code_expr_list: code_expr_list + final { + $$->exprVect = $1->exprVect; + }; + +opt_code_expr_list: + final { + $$->exprVect = 0; + }; + +# +# Type list +# + +nonterm type_list +{ + TypeRefVect *typeRefVect; +}; + +type_list: type_list ',' type_ref + final { + $$->typeRefVect = $1->typeRefVect; + $$->typeRefVect->append( $3->typeRef ); + }; +type_list: type_ref + final { + $$->typeRefVect = new TypeRefVect; + $$->typeRefVect->append( $1->typeRef ); + }; + +nonterm opt_type_list uses type_list; + +opt_type_list: type_list + final { + $$->typeRefVect = $1->typeRefVect; + }; + +opt_type_list: + final { + $$->typeRefVect = 0; + }; + + +# +# Variable reference +# + +nonterm var_ref +{ + LangVarRef *varRef; +}; + +var_ref: qual TK_Word + final { + $$->varRef = new LangVarRef( $2->loc, $1->qual, $2->data ); + }; + +nonterm qual +{ + QualItemVect *qual; +}; + +qual: qual TK_Word '.' + final { + $$->qual = $1->qual; + $$->qual->append( QualItem( $2->loc, $2->data, QualItem::Dot ) ); + }; +qual: qual TK_Word TK_RightArrow + final { + $$->qual = $1->qual; + $$->qual->append( QualItem( $2->loc, $2->data, QualItem::Arrow ) ); + }; +qual: + final { + $$->qual = new QualItemVect; + }; + +# +# Code expression +# + +nonterm code_expr +{ + LangExpr *expr; +}; + +code_expr: code_expr TK_AmpAmp code_relational + final { + $$->expr = new LangExpr( $2->loc, $1->expr, OP_LogicalAnd, $3->expr ); + }; + +code_expr: code_expr TK_BarBar code_relational + final { + $$->expr = new LangExpr( $2->loc, $1->expr, OP_LogicalOr, $3->expr ); + }; + +code_expr: code_relational + final { + $$->expr = $1->expr; + }; + +nonterm code_relational uses code_expr; + +code_relational: code_relational TK_DoubleEql code_additive + final { + $$->expr = new LangExpr( $2->loc, $1->expr, OP_DoubleEql, $3->expr ); + }; + +code_relational: code_relational TK_NotEql code_additive + final { + $$->expr = new LangExpr( $2->loc, $1->expr, OP_NotEql, $3->expr ); + }; + +code_relational: code_relational '<' code_additive + final { + $$->expr = new LangExpr( $2->loc, $1->expr, '<', $3->expr ); + }; + +code_relational: code_relational '>' code_additive + final { + $$->expr = new LangExpr( $2->loc, $1->expr, '>', $3->expr ); + }; + +code_relational: code_relational TK_LessEql code_additive + final { + $$->expr = new LangExpr( $2->loc, $1->expr, OP_LessEql, $3->expr ); + }; + +code_relational: code_relational TK_GrtrEql code_additive + final { + $$->expr = new LangExpr( $2->loc, $1->expr, OP_GrtrEql, $3->expr ); + }; + + +code_relational: code_additive + final { + $$->expr = $1->expr; + }; + +nonterm code_additive uses code_expr; + +code_additive: code_additive '+' code_multiplicitive + final { + $$->expr = new LangExpr( $2->loc, $1->expr, '+', $3->expr ); + }; + +code_additive: code_additive '-' code_multiplicitive + final { + $$->expr = new LangExpr( $2->loc, $1->expr, '-', $3->expr ); + }; + +code_additive: code_multiplicitive + final { + $$->expr = $1->expr; + }; + +nonterm code_multiplicitive uses code_expr; + +code_multiplicitive: code_multiplicitive '*' code_unary + final { + $$->expr = new LangExpr( $2->loc, $1->expr, '*', $3->expr ); + }; + +code_multiplicitive: code_multiplicitive '/' code_unary + final { + $$->expr = new LangExpr( $2->loc, $1->expr, '/', $3->expr ); + }; + +code_multiplicitive: code_unary + final { + $$->expr = $1->expr; + }; + +nonterm code_unary uses code_expr; +code_unary: '!' code_factor + final { + $$->expr = new LangExpr( $1->loc, '!', $2->expr ); + }; +code_unary: '$' code_factor + final { + $$->expr = new LangExpr( $1->loc, '$', $2->expr ); + }; +code_unary: '^' code_factor + final { + $$->expr = new LangExpr( $1->loc, '^', $2->expr ); + }; +code_unary: '%' code_factor + final { + $$->expr = new LangExpr( $1->loc, '%', $2->expr ); + }; +code_unary: code_factor + final { + $$->expr = $1->expr; + }; + +nonterm opt_capture uses var_def; + +opt_capture: TK_Word ':' + final { + $$->objField = new ObjField( $1->loc, 0, $1->data ); + }; +opt_capture: + final { + $$->objField = 0; + }; + +nonterm code_factor uses code_expr; + +code_factor: TK_Number + final { + $$->expr = new LangExpr( new LangTerm( LangTerm::NumberType, $1->data ) ); + }; +code_factor: TK_Literal + final { + $$->expr = new LangExpr( new LangTerm( LangTerm::StringType, $1->data ) ); + }; +code_factor: var_ref '(' opt_code_expr_list ')' + final { + $$->expr = new LangExpr( new LangTerm( $1->varRef, $3->exprVect ) ); + }; +code_factor: var_ref + final { + $$->expr = new LangExpr( new LangTerm( LangTerm::VarRefType, $1->varRef ) ); + }; +code_factor: KW_Match var_ref pattern_list + final { + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + Pattern *pattern = new Pattern( $1->loc, nspace, region, + patternItemList, pd->nextPatReplId++ ); + pd->patternList.append( pattern ); + + $$->expr = new LangExpr( new LangTerm( LangTerm::MatchType, $2->varRef, pattern ) ); + }; +code_factor: KW_New code_factor + final { + $$->expr = new LangExpr( new LangTerm( LangTerm::NewType, $2->expr ) ); + }; +code_factor: + KW_Construct opt_capture type_ref opt_field_init repl_list + final { + Namespace *nspace = namespaceStack.top(); + TokenRegion *region = regionStack.top(); + Replacement *replacement = new Replacement( $1->loc, nspace, region, + replItemList, pd->nextPatReplId++ ); + pd->replList.append( replacement ); + + LangVarRef *varRef = 0; + if ( $2->objField != 0 ) + varRef = new LangVarRef( $2->objField->loc, new QualItemVect, $2->objField->name ); + + $$->expr = new LangExpr( new LangTerm( $1->loc, LangTerm::ConstructType, + varRef, $2->objField, $3->typeRef, $4->fieldInitVect, replacement ) ); + + /* Check for redeclaration. */ + if ( $2->objField != 0 ) { + if ( pd->curLocalFrame->checkRedecl( $2->objField->name ) != 0 ) { + error( $2->objField->loc ) << "variable " << $2->objField->name << + " redeclared" << endp; + } + + /* Insert it into the field map. */ + $2->objField->typeRef = $3->typeRef; + pd->curLocalFrame->insertField( $2->objField->name, $2->objField ); + } + }; +code_factor: KW_Parse opt_capture type_ref '(' opt_code_expr_list ')' + final { + String parserName = $3->typeRef->typeName + "_parser"; + + /* Get the language element. */ + Namespace *nspace = namespaceStack.top(); + + GenericType *generic = 0; + + NamespaceQual *nspaceQual = new NamespaceQual( + namespaceStack.top(), regionStack.top() ); + TypeRef *parserTypeRef = new TypeRef( TypeRef::Parser, + InputLoc(), nspaceQual, $3->typeRef, 0 ); + + Replacement *replacement = new Replacement( $1->loc, nspace, pd->rootRegion, + new ReplItemList, pd->nextPatReplId++ ); + pd->replList.append( replacement ); + + LangVarRef *varRef = 0; + if ( $2->objField != 0 ) + varRef = new LangVarRef( $2->objField->loc, new QualItemVect, $2->objField->name ); + + $$->expr = new LangExpr( new LangTerm( $1->loc, LangTerm::ParseType, + varRef, $2->objField, $3->typeRef, generic, parserTypeRef, replacement ) ); + $$->expr->term->args = $5->exprVect; + + /* Check for redeclaration. */ + if ( $2->objField != 0 ) { + if ( pd->curLocalFrame->checkRedecl( $2->objField->name ) != 0 ) { + error( $2->objField->loc ) << "variable " << $2->objField->name << + " redeclared" << endp; + } + + /* Insert it into the field map. */ + $2->objField->typeRef = $3->typeRef; + pd->curLocalFrame->insertField( $2->objField->name, $2->objField ); + } + }; +code_factor: KW_ParseStop opt_capture type_ref '(' opt_code_expr_list ')' + final { + /* This is a silly clone. To be fixed later. */ + String parserName = $3->typeRef->typeName + "_parser"; + + /* Get the language element. */ + Namespace *nspace = namespaceStack.top(); + + GenericType *generic = 0; + + NamespaceQual *nspaceQual = new NamespaceQual( + namespaceStack.top(), regionStack.top() ); + TypeRef *parserTypeRef = new TypeRef( TypeRef::Parser, + InputLoc(), nspaceQual, $3->typeRef, 0 ); + + Replacement *replacement = new Replacement( $1->loc, nspace, pd->rootRegion, + new ReplItemList, pd->nextPatReplId++ ); + pd->replList.append( replacement ); + + LangVarRef *varRef = 0; + if ( $2->objField != 0 ) + varRef = new LangVarRef( $2->objField->loc, new QualItemVect, $2->objField->name ); + + $$->expr = new LangExpr( new LangTerm( $1->loc, LangTerm::ParseStopType, + varRef, $2->objField, $3->typeRef, generic, parserTypeRef, replacement ) ); + $$->expr->term->args = $5->exprVect; + + /* Check for redeclaration. */ + if ( $2->objField != 0 ) { + if ( pd->curLocalFrame->checkRedecl( $2->objField->name ) != 0 ) { + error( $2->objField->loc ) << "variable " << $2->objField->name << + " redeclared" << endp; + } + + /* Insert it into the field map. */ + $2->objField->typeRef = $3->typeRef; + pd->curLocalFrame->insertField( $2->objField->name, $2->objField ); + } + + }; +code_factor: KW_TypeId '<' type_ref '>' + final { + $$->expr = new LangExpr( new LangTerm( $1->loc, + LangTerm::TypeIdType, $3->typeRef ) ); + }; +code_factor: type_ref KW_In var_ref + final { + $$->expr = new LangExpr( new LangTerm( $2->loc, + LangTerm::SearchType, $1->typeRef, $3->varRef ) ); + }; +code_factor: KW_Nil + final { + $$->expr = new LangExpr( new LangTerm( $1->loc, + LangTerm::NilType ) ); + }; +code_factor: KW_True + final { + $$->expr = new LangExpr( new LangTerm( $1->loc, + LangTerm::TrueType ) ); + }; +code_factor: KW_False + final { + $$->expr = new LangExpr( new LangTerm( $1->loc, + LangTerm::FalseType ) ); + }; +code_factor: '(' code_expr ')' + final { + $$->expr = $2->expr; + }; +code_factor: KW_MakeTree '(' opt_code_expr_list ')' + final { + $$->expr = new LangExpr( new LangTerm( $1->loc, + LangTerm::MakeTreeType, $3->exprVect ) ); + }; +code_factor: KW_MakeToken '(' opt_code_expr_list ')' + final { + $$->expr = new LangExpr( new LangTerm( $1->loc, + LangTerm::MakeTokenType, $3->exprVect ) ); + }; +code_factor: KW_Deref code_expr + final { + $$->expr = new LangExpr( $1->loc, OP_Deref, $2->expr ); + }; +code_factor: string_list + final { + $$->expr = new LangExpr( new LangTerm( replItemList ) ); + }; + +nonterm opt_field_init uses field_init_list; + +opt_field_init: '(' opt_field_init_list ')' + final { + $$->fieldInitVect = $2->fieldInitVect; + }; +opt_field_init: + final { + $$->fieldInitVect = 0; + }; + +nonterm opt_field_init_list uses field_init_list; + +opt_field_init_list: field_init_list + final { + $$->fieldInitVect = $1->fieldInitVect; + }; +opt_field_init_list: + final { + $$->fieldInitVect = 0; + }; + +nonterm field_init_list +{ + FieldInitVect *fieldInitVect; +}; + +field_init_list: field_init_list field_init + final { + $$->fieldInitVect = $1->fieldInitVect; + $$->fieldInitVect->append( $2->fieldInit ); + }; +field_init_list: field_init + final { + $$->fieldInitVect = new FieldInitVect; + $$->fieldInitVect->append( $1->fieldInit ); + }; + +nonterm field_init +{ + FieldInit *fieldInit; +}; + +field_init: code_expr + final { + $$->fieldInit = new FieldInit( InputLoc(), "_name", $1->expr ); + }; + +# +# Regular Expressions +# + +nonterm opt_rl_join uses rl_join; + +opt_rl_join: rl_join opt_context + final { + $$->join = $1->join; + $$->context = $2->context; + + if ( $2->context != 0 ) { + /* Create the enter and leaving actions that will mark the substring. */ + Action *mark = new Action( MarkMark, pd->nextMatchEndNum++ ); + pd->actionList.append( mark ); + + $$->join->context = $2->context; + $$->join->mark = mark; + } + }; + +opt_rl_join: + final { + $$->join = 0; + $$->context = 0; + }; + +nonterm rl_join +{ + Join *join; + Join *context; +}; + +rl_join: + rl_join ',' rl_expr + final { + /* Append the expression to the list and return it. */ + $1->join->exprList.append( $3->expression ); + $$->join = $1->join; + }; +rl_join: + rl_expr + final { + $$->join = new Join( $1->expression ); + }; + +# Context at the end of a pattern that is not included in the match +nonterm opt_context uses rl_join; + +opt_context: '@' rl_join final { $$->context = $2->join; }; +opt_context: final { $$->context = 0; }; + +nonterm rl_expr +{ + Expression *expression; +}; + +rl_expr: + rl_expr '|' rl_term_short final { + $$->expression = new Expression( $1->expression, + $3->term, Expression::OrType ); + }; +rl_expr: + rl_expr '&' rl_term_short final { + $$->expression = new Expression( $1->expression, + $3->term, Expression::IntersectType ); + }; +# This priority specification overrides the innermost parsing strategy which +# results ordered choice interpretation of the grammar. +rl_expr: + rl_expr '-' rl_term_short final { + $$->expression = new Expression( $1->expression, + $3->term, Expression::SubtractType ); + }; +rl_expr: + rl_expr TK_DashDash rl_term_short final { + $$->expression = new Expression( $1->expression, + $3->term, Expression::StrongSubtractType ); + }; +rl_expr: + rl_term_short final { + $$->expression = new Expression( $1->term ); + }; + +nonterm rl_term_short +{ + Term *term; +}; + +shortest rl_term_short; + +rl_term_short: rl_term + final { $$->term = $1->term; }; + +nonterm rl_term +{ + Term *term; +}; + +rl_term: + rl_term factor_with_label final { + $$->term = new Term( $1->term, $2->factorWithAug ); + }; +rl_term: + rl_term '.' factor_with_label final { + $$->term = new Term( $1->term, $3->factorWithAug ); + }; +rl_term: + rl_term TK_ColonGt factor_with_label final { + $$->term = new Term( $1->term, $3->factorWithAug, Term::RightStartType ); + }; +rl_term: + rl_term TK_ColonGtGt factor_with_label final { + $$->term = new Term( $1->term, $3->factorWithAug, Term::RightFinishType ); + }; +rl_term: + rl_term TK_LtColon factor_with_label final { + $$->term = new Term( $1->term, + $3->factorWithAug, Term::LeftType ); + }; +rl_term: + factor_with_label final { + $$->term = new Term( $1->factorWithAug ); + }; + +nonterm factor_with_label +{ + FactorWithAug *factorWithAug; +}; + +factor_with_label: + factor_with_ep final { + $$->factorWithAug = $1->factorWithAug; + }; + +factor_with_label: + TK_Word ':' factor_with_label final { + $$->factorWithAug = $3->factorWithAug; + + if ( pd->objectDef->checkRedecl( $1->data ) != 0 ) + error($1->loc) << "label name \"" << $1->data << "\" already in use" << endp; + + /* Create the object field. */ + NamespaceQual *qual = new NamespaceQual( namespaceStack.top(), regionStack.top() ); + TypeRef *typeRef = new TypeRef( $1->loc, qual, "str" ); + ObjField *objField = new ObjField( $1->loc, typeRef, $1->data ); + + /* Insert it into the map. */ + pd->objectDef->insertField( $1->data, objField ); + + /* Create the enter and leaving actions that will mark the substring. */ + Action *enter = new Action( MarkMark, pd->nextMatchEndNum++ ); + Action *leave = new Action( MarkMark, pd->nextMatchEndNum++ ); + pd->actionList.append( enter ); + pd->actionList.append( leave ); + + /* Add entering and leaving actions. */ + $$->factorWithAug->actions.append( ParserAction( $1->loc, at_start, 0, enter ) ); + $$->factorWithAug->actions.append( ParserAction( $1->loc, at_leave, 0, leave ) ); + + reCaptureVect.append( ReCapture( enter, leave, objField ) ); + }; + +nonterm factor_with_ep +{ + FactorWithAug *factorWithAug; +}; + +factor_with_ep: + factor_with_aug final { + $$->factorWithAug = $1->factorWithAug; + }; + +nonterm factor_with_aug +{ + FactorWithAug *factorWithAug; +}; + +factor_with_aug: + factor_with_rep final { + $$->factorWithAug = new FactorWithAug( $1->factorWithRep ); + }; + + +# The fourth level of precedence. These are the trailing unary operators that +# allow for repetition. + +nonterm factor_with_rep +{ + FactorWithRep *factorWithRep; +}; + +factor_with_rep: + factor_with_rep '*' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::StarType ); + }; +factor_with_rep: + factor_with_rep TK_StarStar final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::StarStarType ); + }; +factor_with_rep: + factor_with_rep '?' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::OptionalType ); + }; +factor_with_rep: + factor_with_rep '+' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, 0, FactorWithRep::PlusType ); + }; +factor_with_rep: + factor_with_rep '{' factor_rep_num '}' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + $3->rep, 0, FactorWithRep::ExactType ); + }; +factor_with_rep: + factor_with_rep '{' ',' factor_rep_num '}' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + 0, $4->rep, FactorWithRep::MaxType ); + }; +factor_with_rep: + factor_with_rep '{' factor_rep_num ',' '}' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + $3->rep, 0, FactorWithRep::MinType ); + }; +factor_with_rep: + factor_with_rep '{' factor_rep_num ',' factor_rep_num '}' final { + $$->factorWithRep = new FactorWithRep( $2->loc, $1->factorWithRep, + $3->rep, $5->rep, FactorWithRep::RangeType ); + }; +factor_with_rep: + factor_with_neg final { + $$->factorWithRep = new FactorWithRep( + $1->factorWithNeg->loc, $1->factorWithNeg ); + }; + +nonterm factor_rep_num +{ + int rep; +}; + +factor_rep_num: + TK_UInt final { + // Convert the priority number to a long. Check for overflow. + errno = 0; + int rep = strtol( $1->data, 0, 10 ); + if ( errno == ERANGE && rep == LONG_MAX ) { + // Repetition too large. Recover by returing repetition 1. */ + error($1->loc) << "repetition number " << $1->data << " overflows" << endl; + $$->rep = 1; + } + else { + // Cannot be negative, so no overflow. + $$->rep = rep; + } + }; + + +# +# The fifth level up in precedence. Negation. +# + +nonterm factor_with_neg +{ + FactorWithNeg *factorWithNeg; +}; + +factor_with_neg: + '!' factor_with_neg final { + $$->factorWithNeg = new FactorWithNeg( $1->loc, + $2->factorWithNeg, FactorWithNeg::NegateType ); + }; +factor_with_neg: + '^' factor_with_neg final { + $$->factorWithNeg = new FactorWithNeg( $1->loc, + $2->factorWithNeg, FactorWithNeg::CharNegateType ); + }; +factor_with_neg: + rl_factor final { + $$->factorWithNeg = new FactorWithNeg( $1->factor->loc, $1->factor ); + }; + +nonterm rl_factor +{ + Factor *factor; +}; + +rl_factor: + TK_Literal final { + /* Create a new factor node going to a concat literal. */ + $$->factor = new Factor( new Literal( $1->loc, $1->data, Literal::LitString ) ); + }; +rl_factor: + alphabet_num final { + /* Create a new factor node going to a literal number. */ + $$->factor = new Factor( new Literal( $1->loc, + $1->data, Literal::Number ) ); + }; +rl_factor: + TK_Word final { + /* Find the named graph. */ + Namespace *nspace = namespaceStack.top(); + + while ( nspace != 0 ) { + GraphDictEl *gdNode = nspace->rlMap.find( $1->data ); + if ( gdNode != 0 ) { + if ( gdNode->isInstance ) { + /* Recover by retuning null as the factor node. */ + error($1->loc) << "references to graph instantiations not allowed " + "in expressions" << endl; + $$->factor = 0; + } + else { + /* Create a factor node that is a lookup of an expression. */ + $$->factor = new Factor( $1->loc, gdNode->value ); + } + break; + } + + nspace = nspace->parentNamespace; + } + + if ( nspace == 0 ) { + /* Recover by returning null as the factor node. */ + error($1->loc) << "graph lookup of \"" << $1->data << "\" failed" << endl; + $$->factor = 0; + } + }; +rl_factor: + TK_SqOpen regular_expr_or_data TK_SqClose final { + /* Create a new factor node going to an OR expression. */ + $$->factor = new Factor( new ReItem( $1->loc, $2->reOrBlock, ReItem::OrBlock ) ); + }; +rl_factor: + TK_SqOpenNeg regular_expr_or_data TK_SqClose final { + /* Create a new factor node going to a negated OR expression. */ + $$->factor = new Factor( new ReItem( $1->loc, $2->reOrBlock, ReItem::NegOrBlock ) ); + }; +rl_factor: + range_lit TK_DotDot range_lit final { + /* Create a new factor node going to a range. */ + $$->factor = new Factor( new Range( $1->literal, $3->literal ) ); + }; +rl_factor: + '(' rl_join ')' final { + /* Create a new factor going to a parenthesized join. */ + $$->factor = new Factor( $2->join ); + }; + +nonterm range_lit +{ + Literal *literal; +}; + +# Literals which can be the end points of ranges. +range_lit: + TK_Literal final { + /* Range literas must have only one char. We restrict this in the parse tree. */ + $$->literal = new Literal( $1->loc, $1->data, Literal::LitString ); + }; +range_lit: + alphabet_num final { + /* Create a new literal number. */ + $$->literal = new Literal( $1->loc, $1->data, Literal::Number ); + }; + +nonterm alphabet_num uses token_data; + +# Any form of a number that can be used as a basic machine. */ +alphabet_num: + TK_UInt final { + $$->loc = $1->loc; + $$->data = $1->data; + }; +alphabet_num: + '-' TK_UInt final { + $$->loc = $1->loc; + $$->data = '+'; + $$->data += $2->data; + }; +alphabet_num: + TK_Hex final { + $$->loc = $1->loc; + $$->data = $1->data; + }; + +# +# Regular Expressions. +# + + +# The data inside of a [] expression in a regular expression. Accepts any +# number of characters or ranges. */ +nonterm regular_expr_or_data +{ + ReOrBlock *reOrBlock; +}; + +regular_expr_or_data: + regular_expr_or_data regular_expr_or_char final { + /* An optimization to lessen the tree size. If an or char is directly + * under the left side on the right and the right side is another or + * char then paste them together and return the left side. Otherwise + * just put the two under a new or data node. */ + if ( $2->reOrItem->type == ReOrItem::Data && + $1->reOrBlock->type == ReOrBlock::RecurseItem && + $1->reOrBlock->item->type == ReOrItem::Data ) + { + /* Append the right side to right side of the left and toss the + * right side. */ + $1->reOrBlock->item->data += $2->reOrItem->data; + delete $2->reOrItem; + $$->reOrBlock = $1->reOrBlock; + } + else { + /* Can't optimize, put the left and right under a new node. */ + $$->reOrBlock = new ReOrBlock( $1->reOrBlock, $2->reOrItem ); + } + }; +regular_expr_or_data: + final { + $$->reOrBlock = new ReOrBlock(); + }; + +# A single character inside of an or expression. Can either be a character or a +# set of characters. +nonterm regular_expr_or_char +{ + ReOrItem *reOrItem; +}; + +regular_expr_or_char: + TK_ReChar final { + $$->reOrItem = new ReOrItem( $1->loc, $1->data ); + }; +regular_expr_or_char: + TK_ReChar TK_Dash TK_ReChar final { + $$->reOrItem = new ReOrItem( $2->loc, $1->data[0], $3->data[0] ); + }; + +# A local state reference. Cannot have :: prefix. +local_state_ref: + no_name_sep state_ref_names; + +# Clear the name ref structure. +no_name_sep: + final { + nameRef.empty(); + }; + +# A qualified state reference. +state_ref: opt_name_sep state_ref_names; + +# Optional leading name separator. +opt_name_sep: + TK_NameSep + final { + /* Insert an initial null pointer val to indicate the existence of the + * initial name seperator. */ + nameRef.setAs( 0 ); + }; +opt_name_sep: + final { + nameRef.empty(); + }; + +# List of names separated by :: +state_ref_names: + state_ref_names TK_NameSep TK_Word + final { + nameRef.append( $3->data ); + }; +state_ref_names: + TK_Word + final { + nameRef.append( $1->data ); + }; + +nonterm opt_commit +{ + bool commit; +}; + +opt_commit: final { $$->commit = false; }; +opt_commit: KW_Commit final { $$->commit = true; }; + +# +# Grammar Finished +# + + write types; + write data; +}%% + +void ColmParser::init() +{ + /* Set up the root namespace. */ + const char *rootNamespaceName = "___ROOT_NAMESPACE"; + Namespace *rootNamespace = new Namespace( InputLoc(), + rootNamespaceName, pd->namespaceList.length(), 0 ); + pd->namespaceList.append( rootNamespace ); + namespaceStack.push( rootNamespace ); + pd->rootNamespace = rootNamespace; + + /* Set up the root token region. */ + const char *rootRegionName = "___ROOT_REGION"; + + TokenRegion *rootRegion = new TokenRegion( InputLoc(), rootRegionName, + pd->regionList.length(), 0 ); + pd->regionList.append( rootRegion ); + addRegionDef( InputLoc(), namespaceStack.top(), rootRegionName, rootRegion ); + + regionStack.push( rootRegion ); + + pd->rootRegion = rootRegion; + + /* Set up the global object. */ + String global = "global"; + pd->globalObjectDef = new ObjectDef( ObjectDef::UserType, + global, pd->nextObjectId++ ); + + /* The eofTokenRegion defaults to the root region. */ + pd->eofTokenRegion = rootRegion; + + /* Initialize the dictionary of graphs. This is our symbol table. The + * initialization needs to be done on construction which happens at the + * beginning of a machine spec so any assignment operators can reference + * the builtins. */ + pd->initGraphDict(); + + pd->rootLocalFrame = new ObjectDef( ObjectDef::FrameType, + "local", pd->nextObjectId++ ); + pd->curLocalFrame = pd->rootLocalFrame; + + %% write init; + + addArgvList(); +} + +void ColmParser::addArgvList() +{ + NamespaceQual *nspaceQual1 = new NamespaceQual( + namespaceStack.top(), regionStack.top() ); + TypeRef *typeRef = new TypeRef( InputLoc(), nspaceQual1, "str" ); + + NamespaceQual *nspaceQual2 = new NamespaceQual( + namespaceStack.top(), regionStack.top() ); + + pd->argvTypeRef = new TypeRef( TypeRef::List, InputLoc(), + nspaceQual2, typeRef, 0 ); +} + +int ColmParser::parseLangEl( int type, const Token *token ) +{ + %% write exec; + return errCount == 0 ? 0 : -1; +} + +void ColmParser::addRegularDef( const InputLoc &loc, Namespace *nspace, + const String &name, Join *join ) +{ + GraphDictEl *newEl = nspace->rlMap.insert( name ); + if ( newEl != 0 ) { + /* New element in the dict, all good. */ + newEl->value = new VarDef( name, join ); + newEl->isInstance = false; + newEl->loc = loc; + } + else { + // Recover by ignoring the duplicate. + error(loc) << "regular definition \"" << name << "\" already exists" << endl; + } +} + +TokenRegion *ColmParser::createRegion( String &scannerName ) +{ + TokenRegion *tokenRegion = new TokenRegion( InputLoc(), scannerName, + pd->regionList.length(), regionStack.top() ); + + regionStack.top()->childRegions.append( tokenRegion ); + + pd->regionList.append( tokenRegion ); + + addRegionDef( InputLoc(), namespaceStack.top(), scannerName, tokenRegion ); + + return tokenRegion; +} + + +void ColmParser::addRegionDef( const InputLoc &loc, Namespace *nspace, + const String &name, TokenRegion *tokenRegion ) +{ + RegionGraphDictEl *newEl = nspace->graphDict.insert( name ); + if ( newEl != 0 ) { + /* New element in the dict, all good. */ + newEl->value = new RegionDef( name, tokenRegion ); + newEl->isInstance = true; + newEl->loc = loc; + + /* It it is an instance, put on the instance list. */ + pd->instanceList.append( newEl ); + } + else { + // Recover by ignoring the duplicate. + error(loc) << "regular definition \"" << name << "\" already exists" << endl; + } +} + +ostream &ColmParser::parse_error( int tokId, Token &token ) +{ + /* Maintain the error count. */ + gblErrorCount += 1; + + cerr << token.loc.fileName << ":" << token.loc.line << ":" << token.loc.col << ": "; + cerr << "at token "; + if ( tokId < 128 ) + cerr << "\"" << ColmParser_lelNames[tokId] << "\""; + else + cerr << ColmParser_lelNames[tokId]; + if ( token.data != 0 ) + cerr << " with data \"" << token.data << "\""; + cerr << ": "; + + return cerr; +} + +int ColmParser::token( InputLoc &loc, int tokId, char *tokstart, int toklen ) +{ + Token token; + + if ( toklen > 0 ) + token.data.setAs( tokstart, toklen ); + + token.loc = loc; + int res = parseLangEl( tokId, &token ); + if ( res < 0 ) { + parse_error(tokId, token) << "parse error" << endl; + exit(1); + } + return res; +} diff --git a/src/lmscan.h b/src/lmscan.h new file mode 100644 index 00000000..5badaed5 --- /dev/null +++ b/src/lmscan.h @@ -0,0 +1,118 @@ +/* + * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _RLSCAN_H +#define _RLSCAN_H + +#include <iostream> +#include <fstream> +#include <string.h> + +#include "global.h" +#include "lmparse.h" +#include "parsedata.h" +#include "avltree.h" +#include "vector.h" +#include "buffer.h" + +using std::ifstream; +using std::istream; +using std::ostream; +using std::cout; +using std::cerr; +using std::endl; + +extern char *Parser_lelNames[]; + +/* This is used for tracking the current stack of include file/machine pairs. It is + * is used to detect and recursive include structure. */ +struct IncludeStackItem +{ + IncludeStackItem( const char *fileName ) + : fileName(fileName) {} + + const char *fileName; +}; + +typedef Vector<IncludeStackItem> IncludeStack; +typedef Vector<const char *> ArgsVector; + +extern ArgsVector includePaths; + +struct ColmScanner +{ + ColmScanner( const char *fileName, istream &input, + ostream &output, ColmParser *parser, int includeDepth ) + : + fileName(fileName), input(input), output(output), + includeDepth(includeDepth), + line(1), column(1), lastnl(0), + parser(parser), + parserExistsError(false), + whitespaceOn(true) + { + } + + ifstream *tryOpenInclude( char **pathChecks, long &found ); + char **makeIncludePathChecks( const char *thisFileName, const char *fileName ); + bool recursiveInclude( const char *inclFileName ); + + void sectionParseInit(); + void token( int type, char *start, char *end ); + void token( int type, char c ); + void token( int type ); + void updateCol(); + void endSection(); + void scan(); + void eof(); + ostream &scan_error(); + + const char *fileName; + istream &input; + ostream &output; + int includeDepth; + + int cs; + int line; + char *word, *lit; + int word_len, lit_len; + InputLoc sectionLoc; + char *ts, *te; + int column; + char *lastnl; + + /* Set by machine statements, these persist from section to section + * allowing for unnamed sections. */ + ColmParser *parser; + IncludeStack includeStack; + + /* This is set if ragel has already emitted an error stating that + * no section name has been seen and thus no parser exists. */ + bool parserExistsError; + + /* This is for inline code. By default it is on. It goes off for + * statements and values in inline blocks which are parsed. */ + bool whitespaceOn; + + Buffer litBuf; +}; + +#endif /* _RLSCAN_H */ diff --git a/src/lmscan.rl b/src/lmscan.rl new file mode 100644 index 00000000..070a1e66 --- /dev/null +++ b/src/lmscan.rl @@ -0,0 +1,636 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <iostream> +#include <fstream> +#include <string.h> + +#include "global.h" +#include "lmscan.h" +#include "lmparse.h" +#include "parsedata.h" +#include "avltree.h" +#include "vector.h" + +//#define PRINT_TOKENS + +using std::ifstream; +using std::istream; +using std::ostream; +using std::cout; +using std::cerr; +using std::endl; + +%%{ + machine section_parse; + alphtype int; + write data; +}%% + +void ColmScanner::sectionParseInit() +{ + %% write init; +} + +ostream &ColmScanner::scan_error() +{ + /* Maintain the error count. */ + gblErrorCount += 1; + cerr << fileName << ":" << line << ":" << column << ": "; + return cerr; +} + +bool ColmScanner::recursiveInclude( const char *inclFileName ) +{ + for ( IncludeStack::Iter si = includeStack; si.lte(); si++ ) { + if ( strcmp( si->fileName, inclFileName ) == 0 ) + return true; + } + return false; +} + +void ColmScanner::updateCol() +{ + char *from = lastnl; + if ( from == 0 ) + from = ts; + //cerr << "adding " << te - from << " to column" << endl; + column += te - from; + lastnl = 0; +} + +void ColmScanner::token( int type, char c ) +{ + token( type, &c, &c + 1 ); +} + +void ColmScanner::token( int type ) +{ + token( type, 0, 0 ); +} + +bool isAbsolutePath( const char *path ) +{ + return path[0] == '/'; +} + +ifstream *ColmScanner::tryOpenInclude( char **pathChecks, long &found ) +{ + char **check = pathChecks; + ifstream *inFile = new ifstream; + + while ( *check != 0 ) { + inFile->open( *check ); + if ( inFile->is_open() ) { + found = check - pathChecks; + return inFile; + } + check += 1; + } + + found = -1; + delete inFile; + return 0; +} + +char **ColmScanner::makeIncludePathChecks( const char *thisFileName, const char *fileName ) +{ + char **checks = 0; + long nextCheck = 0; + char *data = strdup(fileName); + long length = strlen(fileName); + + /* Absolute path? */ + if ( isAbsolutePath( data ) ) { + checks = new char*[2]; + checks[nextCheck++] = data; + } + else { + /* Search from the the location of the current file. */ + checks = new char *[2 + includePaths.length()]; + const char *lastSlash = strrchr( thisFileName, '/' ); + if ( lastSlash == 0 ) + checks[nextCheck++] = data; + else { + long givenPathLen = (lastSlash - thisFileName) + 1; + long checklen = givenPathLen + length; + char *check = new char[checklen+1]; + memcpy( check, thisFileName, givenPathLen ); + memcpy( check+givenPathLen, data, length ); + check[checklen] = 0; + checks[nextCheck++] = check; + } + + /* Search from the include paths given on the command line. */ + for ( ArgsVector::Iter incp = includePaths; incp.lte(); incp++ ) { + long pathLen = strlen( *incp ); + long checkLen = pathLen + 1 + length; + char *check = new char[checkLen+1]; + memcpy( check, *incp, pathLen ); + check[pathLen] = '/'; + memcpy( check+pathLen+1, data, length ); + check[checkLen] = 0; + checks[nextCheck++] = check; + } + } + + checks[nextCheck] = 0; + return checks; +} + + +%%{ + machine section_parse; + import "lmparse.h"; + + action clear_words { word = lit = 0; word_len = lit_len = 0; } + action store_lit { lit = tokdata; lit_len = toklen; } + + action mach_err { scan_error() << "bad machine statement" << endl; } + action incl_err { scan_error() << "bad include statement" << endl; } + action write_err { scan_error() << "bad write statement" << endl; } + + action handle_include + { + String src( lit, lit_len ); + String fileName; + bool unused; + + /* Need a location. */ + InputLoc here; + here.fileName = fileName; + here.line = line; + here.col = column; + + prepareLitString( fileName, unused, src, here ); + char **checks = makeIncludePathChecks( this->fileName, fileName ); + + /* Open the input file for reading. */ + long found = 0; + ifstream *inFile = tryOpenInclude( checks, found ); + if ( inFile == 0 ) { + scan_error() << "include: could not open " << + fileName << " for reading" << endl; + } + else { + /* Only proceed with the include if it was found. */ + if ( recursiveInclude( checks[found] ) ) + scan_error() << "include: this is a recursive include operation" << endl; + + /* Check for a recursive include structure. Add the current file/section + * name then check if what we are including is already in the stack. */ + includeStack.append( IncludeStackItem( checks[found] ) ); + + ColmScanner *scanner = new ColmScanner( fileName, *inFile, output, parser, includeDepth+1 ); + scanner->scan(); + delete inFile; + + /* Remove the last element (len-1) */ + includeStack.remove( -1 ); + + delete scanner; + } + } + + include_target = + TK_Literal >clear_words @store_lit; + + include_stmt = + ( KW_Include include_target ) @handle_include + <>err incl_err <>eof incl_err; + + action handle_token + { +// cout << Parser_lelNames[type] << " "; +// if ( start != 0 ) { +// cout.write( start, end-start ); +// } +// cout << endl; + + InputLoc loc; + + #ifdef PRINT_TOKENS + cerr << "scanner:" << line << ":" << column << + ": sending token to the parser " << Parser_lelNames[*p]; + cerr << " " << toklen; + if ( tokdata != 0 ) + cerr << " " << tokdata; + cerr << endl; + #endif + + loc.fileName = fileName; + loc.line = line; + loc.col = column; + + if ( tokdata != 0 && tokdata[toklen-1] == '\n' ) + loc.line -= 1; + + parser->token( loc, type, tokdata, toklen ); + } + + # Catch everything else. + everything_else = ^( KW_Include ) @handle_token; + + main := ( + include_stmt | + everything_else + )*; +}%% + +void ColmScanner::token( int type, char *start, char *end ) +{ + char *tokdata = 0; + int toklen = 0; + int *p = &type; + int *pe = &type + 1; + int *eof = 0; + + if ( start != 0 ) { + toklen = end-start; + tokdata = new char[toklen+1]; + memcpy( tokdata, start, toklen ); + tokdata[toklen] = 0; + } + + %%{ + machine section_parse; + write exec; + }%% + + updateCol(); +} + +void ColmScanner::endSection( ) +{ + /* Execute the eof actions for the section parser. */ + /* Probably use: token( -1 ); */ +} + +%%{ + machine rlscan; + + # This is sent by the driver code. + EOF = 0; + + action inc_nl { + lastnl = p; + column = 0; + line++; + } + NL = '\n' @inc_nl; + + # Identifiers, numbers, commetns, and other common things. + ident = ( alpha | '_' ) ( alpha |digit |'_' )*; + number = digit+; + hex_number = '0x' [0-9a-fA-F]+; + + # These literal forms are common to C-like host code and ragel. + s_literal = "'" ([^'\\] | NL | '\\' (any | NL))* "'"; + d_literal = '"' ([^"\\] | NL | '\\' (any | NL))* '"'; + + whitespace = [ \t] | NL; + pound_comment = '#' [^\n]* NL; + + or_literal := |* + # Escape sequences in OR expressions. + '\\0' => { token( TK_ReChar, '\0' ); }; + '\\a' => { token( TK_ReChar, '\a' ); }; + '\\b' => { token( TK_ReChar, '\b' ); }; + '\\t' => { token( TK_ReChar, '\t' ); }; + '\\n' => { token( TK_ReChar, '\n' ); }; + '\\v' => { token( TK_ReChar, '\v' ); }; + '\\f' => { token( TK_ReChar, '\f' ); }; + '\\r' => { token( TK_ReChar, '\r' ); }; + '\\\n' => { updateCol(); }; + '\\' any => { token( TK_ReChar, ts+1, te ); }; + + # Range dash in an OR expression. + '-' => { token( TK_Dash, 0, 0 ); }; + + # Terminate an OR expression. + ']' => { token( TK_SqClose ); fret; }; + + EOF => { + scan_error() << "unterminated OR literal" << endl; + }; + + # Characters in an OR expression. + [^\]] => { token( TK_ReChar, ts, te ); }; + + *|; + + regular_type := |* + # Identifiers. + ident => { token( TK_Word, ts, te ); } ; + + # Numbers + number => { token( TK_UInt, ts, te ); }; + hex_number => { token( TK_Hex, ts, te ); }; + + # Literals, with optionals. + ( s_literal | d_literal ) [i]? + => { token( TK_Literal, ts, te ); }; + + '[' => { token( TK_SqOpen ); fcall or_literal; }; + '[^' => { token( TK_SqOpenNeg ); fcall or_literal; }; + + '/' => { token( '/'); fret; }; + + # Ignore. + pound_comment => { updateCol(); }; + + '..' => { token( TK_DotDot ); }; + '**' => { token( TK_StarStar ); }; + '--' => { token( TK_DashDash ); }; + + ':>' => { token( TK_ColonGt ); }; + ':>>' => { token( TK_ColonGtGt ); }; + '<:' => { token( TK_LtColon ); }; + + # Whitespace other than newline. + [ \t\r]+ => { updateCol(); }; + + # If we are in a single line machine then newline may end the spec. + NL => { updateCol(); }; + + # Consume eof. + EOF; + + any => { token( *ts ); } ; + *|; + + literal_pattern := |* + '\\' '0' { litBuf.append( '\0' ); }; + '\\' 'a' { litBuf.append( '\a' ); }; + '\\' 'b' { litBuf.append( '\b' ); }; + '\\' 't' { litBuf.append( '\t' ); }; + '\\' 'n' { litBuf.append( '\n' ); }; + '\\' 'v' { litBuf.append( '\v' ); }; + '\\' 'f' { litBuf.append( '\f' ); }; + '\\' 'r' { litBuf.append( '\r' ); }; + + '\\' any { + litBuf.append( ts[1] ); + }; + '"' => { + if ( litBuf.length > 0 ) { + token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length ); + litBuf.clear(); + } + token( '"' ); + fret; + }; + NL => { + litBuf.append( '\n' ); + token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length ); + litBuf.clear(); + token( '"' ); + fret; + }; + '[' => { + if ( litBuf.length > 0 ) { + token( TK_LitPat, litBuf.data, litBuf.data+litBuf.length ); + litBuf.clear(); + } + token( '[' ); + fcall main; + }; + any => { + litBuf.append( *ts ); + }; + *|; + + # Parser definitions. + main := |* + 'lex' => { token( KW_Lex ); }; + 'commit' => { token( KW_Commit ); }; + 'token' => { token( KW_Token ); }; + 'literal' => { token( KW_Literal ); }; + 'rl' => { token( KW_Rl ); }; + 'def' => { token( KW_Def ); }; + 'ignore' => { token( KW_Ignore ); }; + 'construct' => { token( KW_Construct ); }; + 'cons' => { token( KW_Construct ); }; + 'new' => { token( KW_New ); }; + 'if' => { token( KW_If ); }; + 'reject' => { token( KW_Reject ); }; + 'while' => { token( KW_While ); }; + 'else' => { token( KW_Else ); }; + 'elsif' => { token( KW_Elsif ); }; + 'match' => { token( KW_Match ); }; + 'for' => { token( KW_For ); }; + 'iter' => { token( KW_Iter ); }; + 'prints' => { token( KW_PrintStream ); }; + 'print' => { token( KW_Print ); }; + 'print_xml_ac' => { token( KW_PrintXMLAC ); }; + 'print_xml' => { token( KW_PrintXML ); }; + 'namespace' => { token( KW_Namespace ); }; + 'lex' => { token( KW_Lex ); }; + 'map' => { token( KW_Map ); }; + 'list' => { token( KW_List ); }; + 'vector' => { token( KW_Vector ); }; + 'accum' => { token( KW_Accum ); }; + 'parser' => { token( KW_Accum ); }; + 'return' => { token( KW_Return ); }; + 'break' => { token( KW_Break ); }; + 'yield' => { token( KW_Yield ); }; + 'typeid' => { token( KW_TypeId ); }; + 'make_token' => { token( KW_MakeToken ); }; + 'make_tree' => { token( KW_MakeTree ); }; + 'reducefirst' => { token( KW_ReduceFirst ); }; + 'for' => { token( KW_For ); }; + 'in' => { token( KW_In ); }; + 'nil' => { token( KW_Nil ); }; + 'true' => { token( KW_True ); }; + 'false' => { token( KW_False ); }; + 'parse' => { token( KW_Parse ); }; + 'parse_stop' => { token( KW_ParseStop ); }; + 'global' => { token( KW_Global ); }; + 'export' => { token( KW_Export ); }; + 'ptr' => { token( KW_Ptr ); }; + 'ref' => { token( KW_Ref ); }; + 'deref' => { token( KW_Deref ); }; + 'require' => { token( KW_Require ); }; + 'preeof' => { token( KW_Preeof ); }; + 'left' => { token( KW_Left ); }; + 'right' => { token( KW_Right ); }; + 'nonassoc' => { token( KW_Nonassoc ); }; + 'prec' => { token( KW_Prec ); }; + 'include' => { token( KW_Include ); }; + 'context' => { token( KW_Context ); }; + 'alias' => { token( KW_Alias ); }; + 'send' => { token( KW_Send ); }; + 'ni' => { token( KW_Ni ); }; + 'ci' => { token( KW_Ci ); }; + + # Identifiers. + ident => { token( TK_Word, ts, te ); } ; + + number => { token( TK_Number, ts, te ); }; + + '/' => { + token( '/' ); + if ( parser->enterRl ) + fcall regular_type; + }; + + "~" [^\n]* NL => { + token( '"' ); + token( TK_LitPat, ts+1, te ); + token( '"' ); + }; + + "'" ([^'\\\n] | '\\' (any | NL))* ( "'" | NL ) => { + token( TK_Literal, ts, te ); + }; + + '"' => { + token( '"' ); + litBuf.clear(); + fcall literal_pattern; + }; + '[' => { + token( '[' ); + fcall main; + }; + + ']' => { + token( ']' ); + if ( top > 0 ) + fret; + }; + + # Ignore. + pound_comment => { updateCol(); }; + + '=>' => { token( TK_DoubleArrow ); }; + '==' => { token( TK_DoubleEql ); }; + '!=' => { token( TK_NotEql ); }; + '::' => { token( TK_DoubleColon ); }; + '<=' => { token( TK_LessEql ); }; + '>=' => { token( TK_GrtrEql ); }; + '->' => { token( TK_RightArrow ); }; + '&&' => { token( TK_AmpAmp ); }; + '||' => { token( TK_BarBar ); }; + '<<' => { token( TK_LtLt ); }; + + ('+' | '-' | '*' | '/' | '(' | ')' | '@' | '$' | '^' ) => { token( *ts ); }; + + + # Whitespace other than newline. + [ \t\r]+ => { updateCol(); }; + NL => { updateCol(); }; + + # Consume eof. + EOF; + + any => { token( *ts ); } ; + *|; +}%% + +%% write data; + +void ColmScanner::scan() +{ + int bufsize = 8; + char *buf = new char[bufsize]; + const char last_char = 0; + int cs, act, have = 0; + int top, stack[32]; + bool execute = true; + + sectionParseInit(); + %% write init; + + while ( execute ) { + char *p = buf + have; + int space = bufsize - have; + + if ( space == 0 ) { + /* We filled up the buffer trying to scan a token. Grow it. */ + bufsize = bufsize * 2; + char *newbuf = new char[bufsize]; + + /* Recompute p and space. */ + p = newbuf + have; + space = bufsize - have; + + /* Patch up pointers possibly in use. */ + if ( ts != 0 ) + ts = newbuf + ( ts - buf ); + te = newbuf + ( te - buf ); + + /* Copy the new buffer in. */ + memcpy( newbuf, buf, have ); + delete[] buf; + buf = newbuf; + } + + input.read( p, space ); + int len = input.gcount(); + + /* If we see eof then append the EOF char. */ + if ( len == 0 ) { + p[0] = last_char, len = 1; + execute = false; + } + + char *pe = p + len; + char *eof = 0; + %% write exec; + + /* Check if we failed. */ + if ( cs == rlscan_error ) { + /* Machine failed before finding a token. I'm not yet sure if this + * is reachable. */ + scan_error() << "colm scanner error (metalanguage)" << endl; + exit(1); + } + + /* Decide if we need to preserve anything. */ + char *preserve = ts; + + /* Now set up the prefix. */ + if ( preserve == 0 ) + have = 0; + else { + /* There is data that needs to be shifted over. */ + have = pe - preserve; + memmove( buf, preserve, have ); + unsigned int shiftback = preserve - buf; + if ( ts != 0 ) + ts -= shiftback; + te -= shiftback; + + preserve = buf; + } + } + delete[] buf; +} + +void ColmScanner::eof() +{ + InputLoc loc; + loc.fileName = "<EOF>"; + loc.line = line; + loc.col = 1; + parser->token( loc, ColmParser_tk_eof, 0, 0 ); +} diff --git a/src/main.cc b/src/main.cc new file mode 100644 index 00000000..6856da27 --- /dev/null +++ b/src/main.cc @@ -0,0 +1,623 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <iostream> +#include <fstream> +#include <unistd.h> +#include <sstream> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "global.h" +#include "debug.h" +#include "lmscan.h" +#include "pcheck.h" +#include "vector.h" +#include "version.h" +#include "keyops.h" +#include "parsedata.h" +#include "vector.h" +#include "version.h" +#include "fsmcodegen.h" + +using std::istream; +using std::ifstream; +using std::ostream; +using std::ios; +using std::cin; +using std::cout; +using std::cerr; +using std::endl; + +/* Graphviz dot file generation. */ +bool genGraphviz = false; + +using std::ostream; +using std::istream; +using std::ifstream; +using std::ofstream; +using std::ios; +using std::cout; +using std::cerr; +using std::cin; +using std::endl; + +/* Io globals. */ +istream *inStream = 0; +ostream *outStream = 0; +const char *inputFileName = 0; +const char *outputFileName = 0; +const char *gblExportTo = 0; +const char *gblExpImplTo = 0; +bool exportCode = false; + +bool generateGraphviz = false; +bool verbose = false; +bool logging = false; +bool branchPointInfo = false; +bool addUniqueEmptyProductions = false; +bool gblLibrary = false; + +ArgsVector includePaths; + +/* Print version information. */ +void version(); + +/* Total error count. */ +int gblErrorCount = 0; + +HostType hostTypesC[] = +{ + { "char", 0, true, CHAR_MIN, CHAR_MAX, sizeof(char) }, +}; + +HostLang hostLangC = { hostTypesC, 8, hostTypesC+0, true }; + +HostLang *hostLang = &hostLangC; +HostLangType hostLangType = CCode; + +/* Print the opening to an error in the input, then return the error ostream. */ +ostream &error( const InputLoc &loc ) +{ + /* Keep the error count. */ + gblErrorCount += 1; + + cerr << "error: " << inputFileName << ":" << + loc.line << ":" << loc.col << ": "; + return cerr; +} + +/* Print the opening to a program error, then return the error stream. */ +ostream &error() +{ + gblErrorCount += 1; + cerr << "error: " PROGNAME ": "; + return cerr; +} + + +/* Print the opening to a warning, then return the error ostream. */ +ostream &warning( ) +{ + cerr << "warning: " << inputFileName << ": "; + return cerr; +} + +/* Print the opening to a warning in the input, then return the error ostream. */ +ostream &warning( const InputLoc &loc ) +{ + assert( inputFileName != 0 ); + cerr << "warning: " << inputFileName << ":" << + loc.line << ":" << loc.col << ": "; + return cerr; +} + +void escapeLineDirectivePath( std::ostream &out, char *path ) +{ + for ( char *pc = path; *pc != 0; pc++ ) { + if ( *pc == '\\' ) + out << "\\\\"; + else + out << *pc; + } +} + +void escapeLineDirectivePath( std::ostream &out, char *path ); +void scan( char *fileName, istream &input ); + +bool printStatistics = false; + +/* Print a summary of the options. */ +void usage() +{ + cout << +"usage: colm [options] file\n" +"general:\n" +" -h, -H, -?, --help print this usage and exit\n" +" -v --version print version information and exit\n" +" -o <file> write output to <file>\n" +" -i show conflict information\n" +" -d make colm verbose\n" +" -l compile logging into the output executable\n" + ; +} + +/* Print version information. */ +void version() +{ + cout << "Colm version " VERSION << " " PUBDATE << endl << + "Copyright (c) 2007-2012 by Adrian D. Thurston" << endl; +} + +/* Scans a string looking for the file extension. If there is a file + * extension then pointer returned points to inside the string + * passed in. Otherwise returns null. */ +const char *findFileExtension( const char *stemFile ) +{ + const char *ppos = stemFile + strlen(stemFile) - 1; + + /* Scan backwards from the end looking for the first dot. + * If we encounter a '/' before the first dot, then stop the scan. */ + while ( 1 ) { + /* If we found a dot or got to the beginning of the string then + * we are done. */ + if ( ppos == stemFile || *ppos == '.' ) + break; + + /* If we hit a / then there is no extension. Done. */ + if ( *ppos == '/' ) { + ppos = stemFile; + break; + } + ppos--; + } + + /* If we got to the front of the string then bail we + * did not find an extension */ + if ( ppos == stemFile ) + ppos = 0; + + return ppos; +} + +/* Make a file name from a stem. Removes the old filename suffix and + * replaces it with a new one. Returns a newed up string. */ +char *fileNameFromStem( const char *stemFile, const char *suffix ) +{ + int len = strlen( stemFile ); + assert( len > 0 ); + + /* Get the extension. */ + const char *ppos = findFileExtension( stemFile ); + + /* If an extension was found, then shorten what we think the len is. */ + if ( ppos != 0 ) + len = ppos - stemFile; + + /* Make the return string from the stem and the suffix. */ + char *retVal = new char[ len + strlen( suffix ) + 1 ]; + strncpy( retVal, stemFile, len ); + strcpy( retVal + len, suffix ); + + return retVal; +} + + +/* Invoked by the parser when the root element is opened. */ +void openOutput( ) +{ + /* If the output format is code and no output file name is given, then + * make a default. */ + if ( outputFileName == 0 ) { + const char *ext = findFileExtension( inputFileName ); + if ( ext != 0 && strcmp( ext, ".rh" ) == 0 ) + outputFileName = fileNameFromStem( inputFileName, ".h" ); + else { + const char *defExtension = ".c"; + outputFileName = fileNameFromStem( inputFileName, defExtension ); + } + } + + if ( colm_log_compile ) { + cerr << "opening output file: " << outputFileName << endl; + } + + /* Make sure we are not writing to the same file as the input file. */ + if ( outputFileName != 0 && strcmp( inputFileName, outputFileName ) == 0 ) { + error() << "output file \"" << outputFileName << + "\" is the same as the input file" << endl; + } + + if ( outputFileName != 0 ) { + /* Open the output stream, attaching it to the filter. */ + ofstream *outFStream = new ofstream( outputFileName ); + + if ( !outFStream->is_open() ) { + error() << "error opening " << outputFileName << " for writing" << endl; + exit(1); + } + + outStream = outFStream; + } + else { + /* Writing out ot std out. */ + outStream = &cout; + } +} + +void openExports( ) +{ + /* Make sure we are not writing to the same file as the input file. */ + if ( gblExportTo != 0 && strcmp( inputFileName, gblExportTo ) == 0 ) { + error() << "output file \"" << gblExportTo << + "\" is the same as the input file" << endl; + } + + if ( gblExportTo != 0 ) { + /* Open the output stream, attaching it to the filter. */ + ofstream *outFStream = new ofstream( gblExportTo ); + + if ( !outFStream->is_open() ) { + error() << "error opening " << outputFileName << " for writing" << endl; + exit(1); + } + + outStream = outFStream; + } + else { + /* Writing out ot std out. */ + outStream = &cout; + } +} + +void openExportsImpl( ) +{ + /* Make sure we are not writing to the same file as the input file. */ + if ( gblExpImplTo != 0 && strcmp( inputFileName, gblExpImplTo ) == 0 ) { + error() << "output file \"" << gblExpImplTo << + "\" is the same as the input file" << endl; + } + + if ( gblExpImplTo != 0 ) { + /* Open the output stream, attaching it to the filter. */ + ofstream *outFStream = new ofstream( gblExpImplTo ); + + if ( !outFStream->is_open() ) { + error() << "error opening " << outputFileName << " for writing" << endl; + exit(1); + } + + outStream = outFStream; + } + else { + /* Writing out ot std out. */ + outStream = &cout; + } +} + +void compileOutputCommand( const char *command ) +{ + if ( colm_log_compile ) + cout << "compiling with: " << command << endl; + int res = system( command ); + if ( res != 0 ) + cout << "there was a problem compiling the output" << endl; +} + +void compileOutputInstalled( const char *argv0 ) +{ + /* Find the location of the colm program that is executing. */ + char *location = strdup( argv0 ); + char *last = location + strlen(location) - 1; + while ( true ) { + if ( last == location ) { + last[0] = '.'; + last[1] = 0; + break; + } + if ( *last == '/' ) { + last[0] = 0; + break; + } + last -= 1; + } + + char *exec = fileNameFromStem( outputFileName, ".bin" ); + + int length = 1024 + 3*strlen(location) + strlen(outputFileName) + strlen(exec); + char command[length]; + sprintf( command, + "gcc -Wall -Wwrite-strings" + " -I" PREFIX "/include/colm" + " -g" + " -o %s" + " %s" + " -L" PREFIX "/lib" + " -lcolm%c", + exec, outputFileName, logging ? 'd' : 'p' ); + + compileOutputCommand( command ); +} + +void compileOutputInSource( const char *argv0 ) +{ + /* Find the location of the colm program that is executing. */ + char *location = strdup( argv0 ); + char *last = strrchr( location, '/' ); + assert( last != 0 ); + last[1] = 0; + + char *exec = fileNameFromStem( outputFileName, ".bin" ); + + int length = 1024 + 3*strlen(location) + strlen(outputFileName) + strlen(exec); + char command[length]; + sprintf( command, + "gcc -Wall -Wwrite-strings" + " -I%s." + " -I%s../aapl" + " -g" + " -o %s" + " %s" + " -L%s" + " -lcolm%c", + location, location, + exec, outputFileName, location, logging ? 'd' : 'p' ); + + compileOutputCommand( command ); +} + +bool inSourceTree( const char *argv0 ) +{ + const char *lastSlash = strrchr( argv0, '/' ); + if ( lastSlash != 0 ) { + int rootLen = lastSlash - argv0 + 1; + char *mainPath = new char[rootLen + 16]; + memcpy( mainPath, argv0, rootLen ); + strcpy( mainPath + rootLen, "main.cc" ); + + struct stat sb; + int res = stat( mainPath, &sb ); + delete[] mainPath; + + if ( res == 0 && S_ISREG( sb.st_mode ) ) + return true; + } + + return false; +} + +void processArgs( int argc, const char **argv ) +{ + ParamCheck pc( "D:e:c:LI:vdlio:S:M:vHh?-:sV", argc, argv ); + + while ( pc.check() ) { + switch ( pc.state ) { + case ParamCheck::match: + switch ( pc.parameter ) { + case 'I': + includePaths.append( pc.parameterArg ); + break; + case 'v': + version(); + exit(0); + break; + case 'd': + verbose = true; + break; + case 'l': + logging = true; + break; + case 'i': + branchPointInfo = true; + break; + /* Output. */ + case 'o': + if ( *pc.parameterArg == 0 ) + error() << "a zero length output file name was given" << endl; + else if ( outputFileName != 0 ) + error() << "more than one output file name was given" << endl; + else { + /* Ok, remember the output file name. */ + outputFileName = pc.parameterArg; + } + break; + + case 'H': case 'h': case '?': + usage(); + exit(0); + case 's': + printStatistics = true; + break; + case 'V': + generateGraphviz = true; + break; + case '-': + if ( strcasecmp(pc.parameterArg, "help") == 0 ) { + usage(); + exit(0); + } + else if ( strcasecmp(pc.parameterArg, "version") == 0 ) { + version(); + exit(0); + } + else { + error() << "--" << pc.parameterArg << + " is an invalid argument" << endl; + } + break; + case 'L': + gblLibrary = true; + break; + case 'e': + gblExportTo = pc.parameterArg; + break; + case 'c': + gblExpImplTo = pc.parameterArg; + break; + case 'D': +#if DEBUG + if ( strcmp( pc.parameterArg, "BYTECODE" ) == 0 ) + colmActiveRealm |= REALM_BYTECODE; + else if ( strcmp( pc.parameterArg, "PARSE" ) == 0 ) + colmActiveRealm |= REALM_PARSE; + else if ( strcmp( pc.parameterArg, "MATCH" ) == 0 ) + colmActiveRealm |= REALM_MATCH; + else if ( strcmp( pc.parameterArg, "COMPILE" ) == 0 ) + colmActiveRealm |= REALM_COMPILE; + else if ( strcmp( pc.parameterArg, "POOL" ) == 0 ) + colmActiveRealm |= REALM_POOL; + else if ( strcmp( pc.parameterArg, "PRINT" ) == 0 ) + colmActiveRealm |= REALM_PRINT; + else if ( strcmp( pc.parameterArg, "INPUT" ) == 0 ) + colmActiveRealm |= REALM_INPUT; + else if ( strcmp( pc.parameterArg, "SCAN" ) == 0 ) + colmActiveRealm |= REALM_SCAN; + else + fatal( "unknown argument to -D %s\n", pc.parameterArg ); +#else + fatal("-D option specified but debugging messsages not compiled in"); +#endif + + } + break; + + case ParamCheck::invalid: + error() << "-" << pc.parameter << " is an invalid argument" << endl; + break; + + case ParamCheck::noparam: + /* It is interpreted as an input file. */ + if ( *pc.curArg == 0 ) + error() << "a zero length input file name was given" << endl; + else if ( inputFileName != 0 ) + error() << "more than one input file name was given" << endl; + else { + /* OK, Remember the filename. */ + inputFileName = pc.curArg; + } + break; + } + } +} + +/* Main, process args and call yyparse to start scanning input. */ +int main(int argc, const char **argv) +{ + processArgs( argc, argv ); + + if ( verbose ) { + colm_log_bytecode = 1; + colm_log_parse = 1; + colm_log_match = 1; + colm_log_compile = 1; + colm_log_conds = 1; + colmActiveRealm = 0xffffffff; + } + initInputFuncs(); + + /* Bail on above errors. */ + if ( gblErrorCount > 0 ) + exit(1); + + /* Make sure we are not writing to the same file as the input file. */ + if ( inputFileName != 0 && outputFileName != 0 && + strcmp( inputFileName, outputFileName ) == 0 ) + { + error() << "output file \"" << outputFileName << + "\" is the same as the input file" << endl; + } + + /* Open the input file for reading. */ + istream *inStream; + if ( inputFileName != 0 ) { + /* Open the input file for reading. */ + ifstream *inFile = new ifstream( inputFileName ); + inStream = inFile; + if ( ! inFile->is_open() ) + error() << "could not open " << inputFileName << " for reading" << endl; + } + else { + inputFileName = "<stdin>"; + inStream = &cin; + } + + /* Bail on above errors. */ + if ( gblErrorCount > 0 ) + exit(1); + + Compiler *pd = new Compiler( inputFileName, "machine", InputLoc(), std::cout ); + ColmParser *parser = new ColmParser( pd, inputFileName, "machine", InputLoc() ); + ColmScanner *scanner = new ColmScanner( inputFileName, *inStream, cout, parser, 0 ); + + parser->init(); + scanner->scan(); + scanner->eof(); + + /* Parsing complete, check for errors.. */ + if ( gblErrorCount > 0 ) + return 1; + + /* Initiate a compile following a parse. */ + pd->compile(); + + /* + * Write output. + */ + if ( generateGraphviz ) { + outStream = &cout; + pd->writeDotFile(); + } + else { + openOutput(); + pd->generateOutput(); + + if ( outStream != 0 ) + delete outStream; + + if ( !gblLibrary ) { + if ( inSourceTree( argv[0] ) ) + compileOutputInSource( argv[0] ); + else + compileOutputInstalled( argv[0] ); + } + + if ( gblExportTo != 0 ) { + openExports(); + pd->generateExports(); + delete outStream; + } + if ( gblExpImplTo != 0 ) { + openExportsImpl(); + scanner->parser->pd->generateExportsImpl(); + delete outStream; + } + } + + delete scanner; + delete parser; + delete pd; + + return 0; +} diff --git a/src/map.c b/src/map.c new file mode 100644 index 00000000..72f4a18c --- /dev/null +++ b/src/map.c @@ -0,0 +1,763 @@ +/* + * Copyright 2010-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <assert.h> +#include <pdarun.h> +#include <map.h> +#include <pool.h> + +#define true 1 +#define false 0 + +void mapListAbandon( Map *map ) +{ + map->head = map->tail = 0; +} + +void mapListAddBefore( Map *map, MapEl *next_el, MapEl *new_el ) +{ + /* Set the next pointer of the new element to next_el. We do + * this regardless of the state of the list. */ + new_el->next = next_el; + + /* Set reverse pointers. */ + if ( next_el == 0 ) { + /* There is no next elememnt. We are inserting at the tail. */ + new_el->prev = map->tail; + map->tail = new_el; + } + else { + /* There is a next element and we can access next's previous. */ + new_el->prev = next_el->prev; + next_el->prev = new_el; + } + + /* Set forward pointers. */ + if ( new_el->prev == 0 ) { + /* There is no previous element. Set the head pointer.*/ + map->head = new_el; + } + else { + /* There is a previous element, set it's next pointer to new_el. */ + new_el->prev->next = new_el; + } +} + +void mapListAddAfter( Map *map, MapEl *prev_el, MapEl *new_el ) +{ + /* Set the previous pointer of new_el to prev_el. We do + * this regardless of the state of the list. */ + new_el->prev = prev_el; + + /* Set forward pointers. */ + if (prev_el == 0) { + /* There was no prev_el, we are inserting at the head. */ + new_el->next = map->head; + map->head = new_el; + } + else { + /* There was a prev_el, we can access previous next. */ + new_el->next = prev_el->next; + prev_el->next = new_el; + } + + /* Set reverse pointers. */ + if (new_el->next == 0) { + /* There is no next element. Set the tail pointer. */ + map->tail = new_el; + } + else { + /* There is a next element. Set it's prev pointer. */ + new_el->next->prev = new_el; + } +} + + +MapEl *mapListDetach( Map *map, MapEl *el ) +{ + /* Set forward pointers to skip over el. */ + if ( el->prev == 0 ) + map->head = el->next; + else + el->prev->next = el->next; + + /* Set reverse pointers to skip over el. */ + if ( el->next == 0 ) + map->tail = el->prev; + else + el->next->prev = el->prev; + + /* Update List length and return element we detached. */ + return el; +} + + +/* Once an insertion position is found, attach a element to the tree. */ +void mapAttachRebal( Map *map, MapEl *element, MapEl *parentEl, MapEl *lastLess ) +{ + /* Increment the number of element in the tree. */ + map->treeSize += 1; + + /* Set element's parent. */ + element->parent = parentEl; + + /* New element always starts as a leaf with height 1. */ + element->left = 0; + element->right = 0; + element->height = 1; + + /* Are we inserting in the tree somewhere? */ + if ( parentEl != 0 ) { + /* We have a parent so we are somewhere in the tree. If the parent + * equals lastLess, then the last traversal in the insertion went + * left, otherwise it went right. */ + if ( lastLess == parentEl ) { + parentEl->left = element; + + mapListAddBefore( map, parentEl, element ); + } + else { + parentEl->right = element; + + mapListAddAfter( map, parentEl, element ); + } + } + else { + /* No parent element so we are inserting the root. */ + map->root = element; + + mapListAddAfter( map, map->tail, element ); + } + + /* Recalculate the heights. */ + mapRecalcHeights( map, parentEl ); + + /* Find the first unbalance. */ + MapEl *ub = mapFindFirstUnbalGP( map, element ); + + /* rebalance. */ + if ( ub != 0 ) + { + /* We assert that after this single rotation the + * tree is now properly balanced. */ + mapRebalance( map, ub ); + } +} + +#if 0 +/* Recursively delete all the children of a element. */ +void mapDeleteChildrenOf( Map *map, MapEl *element ) +{ + /* Recurse left. */ + if ( element->left ) { + mapDeleteChildrenOf( map, element->left ); + + /* Delete left element. */ + delete element->left; + element->left = 0; + } + + /* Recurse right. */ + if ( element->right ) { + mapDeleteChildrenOf( map, element->right ); + + /* Delete right element. */ + delete element->right; + element->left = 0; + } +} + +void mapEmpty( Map *map ) +{ + if ( map->root ) { + /* Recursively delete from the tree structure. */ + mapDeleteChildrenOf( map, map->root ); + delete map->root; + map->root = 0; + map->treeSize = 0; + + mapListAbandon( map ); + } +} +#endif + +/* rebalance from a element whose gradparent is unbalanced. Only + * call on a element that has a grandparent. */ +MapEl *mapRebalance( Map *map, MapEl *n ) +{ + long lheight, rheight; + MapEl *a, *b, *c; + MapEl *t1, *t2, *t3, *t4; + + MapEl *p = n->parent; /* parent (Non-NUL). L*/ + MapEl *gp = p->parent; /* Grand-parent (Non-NULL). */ + MapEl *ggp = gp->parent; /* Great grand-parent (may be NULL). */ + + if (gp->right == p) + { + /* gp + * * p + p + */ + if (p->right == n) + { + /* gp + * * p + p + * * n + n + */ + a = gp; + b = p; + c = n; + t1 = gp->left; + t2 = p->left; + t3 = n->left; + t4 = n->right; + } + else + { + /* gp + * * p + p + * / + * n + */ + a = gp; + b = n; + c = p; + t1 = gp->left; + t2 = n->left; + t3 = n->right; + t4 = p->right; + } + } + else + { + /* gp + * / + * p + */ + if (p->right == n) + { + /* gp + * / + * p + * * n + n + */ + a = p; + b = n; + c = gp; + t1 = p->left; + t2 = n->left; + t3 = n->right; + t4 = gp->right; + } + else + { + /* gp + * / + * p + * / + * n + */ + a = n; + b = p; + c = gp; + t1 = n->left; + t2 = n->right; + t3 = p->right; + t4 = gp->right; + } + } + + /* Perform rotation. + */ + + /* Tie b to the great grandparent. */ + if ( ggp == 0 ) + map->root = b; + else if ( ggp->left == gp ) + ggp->left = b; + else + ggp->right = b; + b->parent = ggp; + + /* Tie a as a leftchild of b. */ + b->left = a; + a->parent = b; + + /* Tie c as a rightchild of b. */ + b->right = c; + c->parent = b; + + /* Tie t1 as a leftchild of a. */ + a->left = t1; + if ( t1 != 0 ) t1->parent = a; + + /* Tie t2 as a rightchild of a. */ + a->right = t2; + if ( t2 != 0 ) t2->parent = a; + + /* Tie t3 as a leftchild of c. */ + c->left = t3; + if ( t3 != 0 ) t3->parent = c; + + /* Tie t4 as a rightchild of c. */ + c->right = t4; + if ( t4 != 0 ) t4->parent = c; + + /* The heights are all recalculated manualy and the great + * grand-parent is passed to recalcHeights() to ensure + * the heights are correct up the tree. + * + * Note that recalcHeights() cuts out when it comes across + * a height that hasn't changed. + */ + + /* Fix height of a. */ + lheight = a->left ? a->left->height : 0; + rheight = a->right ? a->right->height : 0; + a->height = (lheight > rheight ? lheight : rheight) + 1; + + /* Fix height of c. */ + lheight = c->left ? c->left->height : 0; + rheight = c->right ? c->right->height : 0; + c->height = (lheight > rheight ? lheight : rheight) + 1; + + /* Fix height of b. */ + lheight = a->height; + rheight = c->height; + b->height = (lheight > rheight ? lheight : rheight) + 1; + + /* Fix height of b's parents. */ + mapRecalcHeights( map, ggp ); + return ggp; +} + +/* Recalculates the heights of all the ancestors of element. */ +void mapRecalcHeights( Map *map, MapEl *element ) +{ + while ( element != 0 ) + { + long lheight = element->left ? element->left->height : 0; + long rheight = element->right ? element->right->height : 0; + + long new_height = (lheight > rheight ? lheight : rheight) + 1; + + /* If there is no chage in the height, then there will be no + * change in any of the ancestor's height. We can stop going up. + * If there was a change, continue upward. */ + if (new_height == element->height) + return; + else + element->height = new_height; + + element = element->parent; + } +} + +/* Finds the first element whose grandparent is unbalanced. */ +MapEl *mapFindFirstUnbalGP( Map *map, MapEl *element ) +{ + long lheight, rheight, balanceProp; + MapEl *gp; + + if ( element == 0 || element->parent == 0 || + element->parent->parent == 0 ) + return 0; + + /* Don't do anything if we we have no grandparent. */ + gp = element->parent->parent; + while ( gp != 0 ) + { + lheight = gp->left ? gp->left->height : 0; + rheight = gp->right ? gp->right->height : 0; + balanceProp = lheight - rheight; + + if ( balanceProp < -1 || balanceProp > 1 ) + return element; + + element = element->parent; + gp = gp->parent; + } + return 0; +} + + + +/* Finds the first element that is unbalanced. */ +MapEl *mapFindFirstUnbalEl( Map *map, MapEl *element ) +{ + if ( element == 0 ) + return 0; + + while ( element != 0 ) + { + long lheight = element->left ? + element->left->height : 0; + long rheight = element->right ? + element->right->height : 0; + long balanceProp = lheight - rheight; + + if ( balanceProp < -1 || balanceProp > 1 ) + return element; + + element = element->parent; + } + return 0; +} + +/* Replace a element in the tree with another element not in the tree. */ +void mapReplaceEl( Map *map, MapEl *element, MapEl *replacement ) +{ + MapEl *parent = element->parent, + *left = element->left, + *right = element->right; + + replacement->left = left; + if (left) + left->parent = replacement; + replacement->right = right; + if (right) + right->parent = replacement; + + replacement->parent = parent; + if (parent) + { + if (parent->left == element) + parent->left = replacement; + else + parent->right = replacement; + } + else { + map->root = replacement; + } + + replacement->height = element->height; +} + + +/* Removes a element from a tree and puts filler in it's place. + * Filler should be null or a child of element. */ +void mapRemoveEl( Map *map, MapEl *element, MapEl *filler ) +{ + MapEl *parent = element->parent; + + if ( parent ) + { + if ( parent->left == element ) + parent->left = filler; + else + parent->right = filler; + } + else { + map->root = filler; + } + + if ( filler ) + filler->parent = parent; + + return; +} + +/* Recursive worker for tree copying. */ +MapEl *mapCopyBranch( Program *prg, Map *map, MapEl *el, Kid *oldNextDown, Kid **newNextDown ) +{ + /* Duplicate element. Either the base element's copy constructor or defaul + * constructor will get called. Both will suffice for initting the + * pointers to null when they need to be. */ + MapEl *newEl = mapElAllocate( prg ); + + if ( (Kid*)el == oldNextDown ) + *newNextDown = (Kid*)newEl; + + /* If the left tree is there, copy it. */ + if ( newEl->left ) { + newEl->left = mapCopyBranch( prg, map, newEl->left, oldNextDown, newNextDown ); + newEl->left->parent = newEl; + } + + mapListAddAfter( map, map->tail, newEl ); + + /* If the right tree is there, copy it. */ + if ( newEl->right ) { + newEl->right = mapCopyBranch( prg, map, newEl->right, oldNextDown, newNextDown ); + newEl->right->parent = newEl; + } + + return newEl; +} + +MapEl *mapInsertEl( Program *prg, Map *map, MapEl *element, MapEl **lastFound ) +{ + long keyRelation; + MapEl *curEl = map->root, *parentEl = 0; + MapEl *lastLess = 0; + + while ( true ) { + if ( curEl == 0 ) { + /* We are at an external element and did not find the key we were + * looking for. Attach underneath the leaf and rebalance. */ + mapAttachRebal( map, element, parentEl, lastLess ); + + if ( lastFound != 0 ) + *lastFound = element; + return element; + } + + keyRelation = cmpTree( prg, + element->key, curEl->key ); + + /* Do we go left? */ + if ( keyRelation < 0 ) { + parentEl = lastLess = curEl; + curEl = curEl->left; + } + /* Do we go right? */ + else if ( keyRelation > 0 ) { + parentEl = curEl; + curEl = curEl->right; + } + /* We have hit the target. */ + else { + if ( lastFound != 0 ) + *lastFound = curEl; + return 0; + } + } +} + +MapEl *mapInsertKey( Program *prg, Map *map, Tree *key, MapEl **lastFound ) +{ + long keyRelation; + MapEl *curEl = map->root, *parentEl = 0; + MapEl *lastLess = 0; + + while ( true ) { + if ( curEl == 0 ) { + /* We are at an external element and did not find the key we were + * looking for. Create the new element, attach it underneath the leaf + * and rebalance. */ + MapEl *element = mapElAllocate( prg ); + element->key = key; + element->tree = 0; + mapAttachRebal( map, element, parentEl, lastLess ); + + if ( lastFound != 0 ) + *lastFound = element; + return element; + } + + keyRelation = cmpTree( prg, key, curEl->key ); + + /* Do we go left? */ + if ( keyRelation < 0 ) { + parentEl = lastLess = curEl; + curEl = curEl->left; + } + /* Do we go right? */ + else if ( keyRelation > 0 ) { + parentEl = curEl; + curEl = curEl->right; + } + /* We have hit the target. */ + else { + if ( lastFound != 0 ) + *lastFound = curEl; + return 0; + } + } +} + + +/** + * \brief Find a element in the tree with the given key. + * + * \returns The element if key exists, null if the key does not exist. + */ +MapEl *mapImplFind( Program *prg, Map *map, Tree *key ) +{ + MapEl *curEl = map->root; + long keyRelation; + + while ( curEl != 0 ) { + keyRelation = cmpTree( prg, key, curEl->key ); + + /* Do we go left? */ + if ( keyRelation < 0 ) + curEl = curEl->left; + /* Do we go right? */ + else if ( keyRelation > 0 ) + curEl = curEl->right; + /* We have hit the target. */ + else { + return curEl; + } + } + return 0; +} + + +/** + * \brief Find a element, then detach it from the tree. + * + * The element is not deleted. + * + * \returns The element detached if the key is found, othewise returns null. + */ +MapEl *mapDetachByKey( Program *prg, Map *map, Tree *key ) +{ + MapEl *element = mapImplFind( prg, map, key ); + if ( element ) + mapDetach( prg, map, element ); + + return element; +} + +/** + * \brief Detach a element from the tree. + * + * If the element is not in the tree then undefined behaviour results. + * + * \returns The element given. + */ +MapEl *mapDetach( Program *prg, Map *map, MapEl *element ) +{ + MapEl *replacement, *fixfrom; + long lheight, rheight; + + /* Remove the element from the ordered list. */ + mapListDetach( map, element ); + + /* Update treeSize. */ + map->treeSize--; + + /* Find a replacement element. */ + if (element->right) + { + /* Find the leftmost element of the right subtree. */ + replacement = element->right; + while (replacement->left) + replacement = replacement->left; + + /* If replacing the element the with its child then we need to start + * fixing at the replacement, otherwise we start fixing at the + * parent of the replacement. */ + if (replacement->parent == element) + fixfrom = replacement; + else + fixfrom = replacement->parent; + + mapRemoveEl( map, replacement, replacement->right ); + mapReplaceEl( map, element, replacement ); + } + else if (element->left) + { + /* Find the rightmost element of the left subtree. */ + replacement = element->left; + while (replacement->right) + replacement = replacement->right; + + /* If replacing the element the with its child then we need to start + * fixing at the replacement, otherwise we start fixing at the + * parent of the replacement. */ + if (replacement->parent == element) + fixfrom = replacement; + else + fixfrom = replacement->parent; + + mapRemoveEl( map, replacement, replacement->left ); + mapReplaceEl( map, element, replacement ); + } + else + { + /* We need to start fixing at the parent of the element. */ + fixfrom = element->parent; + + /* The element we are deleting is a leaf element. */ + mapRemoveEl( map, element, 0 ); + } + + /* If fixfrom is null it means we just deleted + * the root of the tree. */ + if ( fixfrom == 0 ) + return element; + + /* Fix the heights after the deletion. */ + mapRecalcHeights( map, fixfrom ); + + /* Fix every unbalanced element going up in the tree. */ + MapEl *ub = mapFindFirstUnbalEl( map, fixfrom ); + while ( ub ) + { + /* Find the element to rebalance by moving down from the first unbalanced + * element 2 levels in the direction of the greatest heights. On the + * second move down, the heights may be equal ( but not on the first ). + * In which case go in the direction of the first move. */ + lheight = ub->left ? ub->left->height : 0; + rheight = ub->right ? ub->right->height : 0; + assert( lheight != rheight ); + if (rheight > lheight) + { + ub = ub->right; + lheight = ub->left ? + ub->left->height : 0; + rheight = ub->right ? + ub->right->height : 0; + if (rheight > lheight) + ub = ub->right; + else if (rheight < lheight) + ub = ub->left; + else + ub = ub->right; + } + else + { + ub = ub->left; + lheight = ub->left ? + ub->left->height : 0; + rheight = ub->right ? + ub->right->height : 0; + if (rheight > lheight) + ub = ub->right; + else if (rheight < lheight) + ub = ub->left; + else + ub = ub->left; + } + + + /* rebalance returns the grandparant of the subtree formed + * by the element that were rebalanced. + * We must continue upward from there rebalancing. */ + fixfrom = mapRebalance( map, ub ); + + /* Find the next unbalaced element. */ + ub = mapFindFirstUnbalEl( map, fixfrom ); + } + + return element; +} + + + diff --git a/src/map.cc b/src/map.cc new file mode 100644 index 00000000..52dd2697 --- /dev/null +++ b/src/map.cc @@ -0,0 +1,26 @@ +/* + * Copyright 2008-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "pdarun.h" +#include <assert.h> + + + diff --git a/src/map.h b/src/map.h new file mode 100644 index 00000000..acb415b9 --- /dev/null +++ b/src/map.h @@ -0,0 +1,108 @@ +/* + * Copyright 2010-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _MAP_H +#define _MAP_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <program.h> + +typedef struct _MapEl +{ + /* Must overlay Kid. */ + Tree *tree; + struct _MapEl *next; + struct _MapEl *prev; + + struct _MapEl *left, *right, *parent; + long height; + Tree *key; +} MapEl; + +typedef struct _Map +{ + /* Must overlay Tree. */ + short id; + unsigned short flags; + long refs; + MapEl *head; + + MapEl *tail; + MapEl *root; + long treeSize; + GenericInfo *genericInfo; +} Map; + +void mapListAbandon( Map *map ); + +void mapListAddBefore( Map *map, MapEl *next_el, MapEl *new_el ); +void mapListAddAfter( Map *map, MapEl *prev_el, MapEl *new_el ); +MapEl *mapListDetach( Map *map, MapEl *el ); +void mapAttachRebal( Map *map, MapEl *element, MapEl *parentEl, MapEl *lastLess ); +void mapDeleteChildrenOf( Map *map, MapEl *element ); +void mapEmpty( Map *map ); +MapEl *mapRebalance( Map *map, MapEl *n ); +void mapRecalcHeights( Map *map, MapEl *element ); +MapEl *mapFindFirstUnbalGP( Map *map, MapEl *element ); +MapEl *mapFindFirstUnbalEl( Map *map, MapEl *element ); +void mapRemoveEl( Map *map, MapEl *element, MapEl *filler ); +void mapReplaceEl( Map *map, MapEl *element, MapEl *replacement ); +MapEl *mapInsertEl( Program *prg, Map *map, MapEl *element, MapEl **lastFound ); +MapEl *mapInsertKey( Program *prg, Map *map, Tree *key, MapEl **lastFound ); +MapEl *mapImplFind( Program *prg, Map *map, Tree *key ); +MapEl *mapDetachByKey( Program *prg, Map *map, Tree *key ); +MapEl *mapDetach( Program *prg, Map *map, MapEl *element ); +MapEl *mapCopyBranch( Program *prg, Map *map, MapEl *el, Kid *oldNextDown, Kid **newNextDown ); + +long cmpTree( Program *prg, const Tree *tree1, const Tree *tree2 ); + +void mapImplRemoveEl( Program *prg, Map *map, MapEl *element ); +int mapImplRemoveKey( Program *prg, Map *map, Tree *key ); + +/* + * Iterators. + */ + +void initTreeIter( TreeIter *treeIter, const Ref *rootRef, int searchId, Tree **stackRoot ); +void initRevTreeIter( RevTreeIter *revTriter, const Ref *rootRef, + int searchId, Tree **stackRoot, int children ); + + +void initUserIter( UserIter *userIter, Tree **stackRoot, long argSize, long searchId ); + +Tree *mapFind( Program *prg, Map *map, Tree *key ); +long mapLength( Map *map ); +Tree *mapUnstore( Program *prg, Map *map, Tree *key, Tree *existing ); +int mapInsert( Program *prg, Map *map, Tree *key, Tree *element ); +void mapUnremove( Program *prg, Map *map, Tree *key, Tree *element ); +Tree *mapUninsert( Program *prg, Map *map, Tree *key ); +Tree *mapStore( Program *prg, Map *map, Tree *key, Tree *element ); + + +#if defined(__cplusplus) +} +#endif + +#endif + diff --git a/src/parsedata.h b/src/parsedata.h new file mode 100644 index 00000000..79ba08c1 --- /dev/null +++ b/src/parsedata.h @@ -0,0 +1,1063 @@ +/* + * Copyright 2001-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _PARSEDATA_H +#define _PARSEDATA_H + +#include <iostream> +#include <limits.h> +#include "bstset.h" +#include "global.h" +#include "avlmap.h" +#include "avlset.h" +#include "bstmap.h" +#include "vector.h" +#include "dlist.h" +#include "dlistmel.h" +#include "fsmgraph.h" +#include "compare.h" +#include "vector.h" +#include "keyops.h" +#include "parsetree.h" +#include "astring.h" +#include "pdagraph.h" +#include "compare.h" +#include "pdarun.h" +#include "bytecode.h" +#include "program.h" + +using std::ostream; + +struct exit_object { }; +extern exit_object endp; +void operator<<( std::ostream &out, exit_object & ); + +/* Forwards. */ +struct RedFsm; +struct LangEl; +struct Compiler; +struct PdaCodeGen; +struct FsmCodeGen; + +#define SHIFT_CODE 0x1 +#define REDUCE_CODE 0x2 +#define SHIFT_REDUCE_CODE 0x3 + +inline long makeReduceCode( long reduction, bool isShiftReduce ) +{ + return ( isShiftReduce ? SHIFT_REDUCE_CODE : REDUCE_CODE ) | + ( reduction << 2 ); +} + +struct ProdEl; +struct ProdElList; +struct PdaLiteral; +struct Definition; + +/* A pointer to this is in PdaRun, but it's specification is not known by the + * runtime code. The runtime functions that access it are defined in + * ctinput.cpp and stubbed in fsmcodegen.cpp */ +struct Bindings + : public Vector<ParseTree*> +{}; + +struct DefListEl { Definition *prev, *next; }; +struct LelDefListEl { Definition *prev, *next; }; +typedef Vector< LangEl* > LangElVect; +typedef Vector< ProdEl* > FactorVect; + +typedef AvlMap<String, long, CmpStr> StringMap; +typedef AvlMapEl<String, long> StringMapEl; + +enum PredType { + PredLeft, + PredRight, + PredNonassoc, + PredNone +}; + +struct PredDecl +{ + PredDecl( TypeRef *typeRef, PredType predType, long predValue ) + : typeRef(typeRef), predType(predType), predValue(predValue) + {} + + TypeRef *typeRef; + PredType predType; + long predValue; + + PredDecl *prev, *next; +}; + +typedef DList<PredDecl> PredDeclList; + +/* Graph dictionary. */ +struct Definition +: + public DefListEl, public LelDefListEl +{ + enum Type { Production }; + + Definition( const InputLoc &loc, LangEl *prodName, ProdElList *prodElList, + bool prodCommit, CodeBlock *redBlock, int prodId, int prodNum, Type type ) + : + loc(loc), prodName(prodName), prodElList(prodElList), + prodCommit(prodCommit), redBlock(redBlock), prodId(prodId), prodNum(prodNum), + type(type), fsm(0), fsmLength(0), uniqueEmptyLeader(0), + isLeftRec(false), localFrame(0), lhsField(0), predOf(0), + collectIgnoreRegion(0) {} + + InputLoc loc; + LangEl *prodName; + ProdElList *prodElList; + bool prodCommit; + + CodeBlock *redBlock; + + int prodId; + int prodNum; + Type type; + + PdaGraph *fsm; + int fsmLength; + String data; + LongSet reducesTo; + + LangEl *uniqueEmptyLeader; + + ProdIdSet nonTermFirstSet; + AlphSet firstSet; + bool isLeftRec; + + ObjectDef *localFrame; + ObjField *lhsField; + + LangEl *predOf; + + UnsignedCharVect copy; + + TokenRegion *collectIgnoreRegion; +}; + +struct CmpDefById +{ + static int compare( Definition *d1, Definition *d2 ) + { + if ( d1->prodId < d2->prodId ) + return -1; + else if ( d1->prodId > d2->prodId ) + return 1; + else + return 0; + } +}; + + +/* Map dotItems to productions. */ +typedef BstMap< int, Definition*, CmpOrd<int> > DotItemIndex; +typedef BstMapEl< int, Definition*> DotItemIndexEl; + +struct DefList +: + public DListMel<Definition, DefListEl> +{}; + +/* A vector of production vectors. Each non terminal can have many productions. */ +struct LelDefList +: + public DListMel<Definition, LelDefListEl> +{}; + +/* A set of machines made during a closure round. */ +typedef Vector< PdaGraph* > Machines; + +/* List of language elements. */ +typedef DList<LangEl> LelList; + +typedef Vector< TokenDef* > TokenDefVect; + +struct UniqueType; + +typedef Vector<LangEl*> LangElVect; +typedef BstSet<LangEl*> LangElSet; + +/* A language element class. Can be a nonTerm or a term. */ +struct LangEl : public DListEl<LangEl> +{ + enum Type { Unknown, Term, NonTerm }; + + LangEl( Namespace *nspace, const String &name, Type type ); + ~LangEl(); + + /* The region the language element was defined in. */ + Namespace *nspace; + + String name; + String lit; + + String fullName; + String fullLit; + + /* For referencing the type. */ + String refName; + + /* For declaring things inside the type. */ + String declName; + + String xmlTag; + + Type type; + long id; + bool isUserTerm; + bool isContext; + String displayString; + long numAppearances; + bool commit; + bool ignore; + bool reduceFirst; + bool isLiteral; + bool isRepeat; + bool isList; + bool isOpt; + bool parseStop; + bool isEOF; + + LangEl *repeatOf; + + /* Productions from the language element if it is a non-terminal. */ + LelDefList defList; + + TokenDef *tokenDef; + Definition *rootDef; + LangEl *termDup; + LangEl *eofLel; + + PdaGraph *pdaGraph; + PdaTables *pdaTables; + + PdaState *startState; + + CodeBlock *transBlock; + + ObjectDef *objectDef; + NamespaceQual *objectDefUsesQual; + String objectDefUses; + + long thisSize; + long ofiOffset; + + GenericType *generic; + + long parserId; + + PredType predType; + long predValue; + + Context *contextDef; + Context *contextIn; + bool noPreIgnore; + bool noPostIgnore; + bool isCI; + TokenRegion *ciRegion; +}; + +struct ProdEl +{ + /* Language elements a factor node can be. */ + enum Type { + LiteralType, + ReferenceType + }; + + /* Construct with a reference to a var def. */ + ProdEl( Type type, const InputLoc &loc, ObjField *captureField, bool commit, TypeRef *typeRef, int priorVal ) + : + captureField(captureField), + commit(commit), + typeRef(typeRef), + langEl(0), + priorVal(priorVal), + type(type), + objField(0) + {} + + ProdEl( const InputLoc &loc, TypeRef *typeRef ) + : + captureField(0), + commit(false), + typeRef(typeRef), + langEl(0), + priorVal(0), + type(ReferenceType), + objField(0) + {} + + ObjField *captureField; + bool commit; + + TypeRef *typeRef; + + LangEl *langEl; + int priorVal; + Type type; + ObjField *objField; + ProdEl *prev, *next; +}; + +struct ProdElList : public DList<ProdEl> +{ + PdaGraph *walk( Compiler *pd, Definition *prod ); +}; + +/* This should be renamed. It is a literal string in a type reference. */ +struct PdaLiteral +{ + PdaLiteral( const InputLoc &loc, const Token &token ) + : loc(loc), token(token), value(0) { } + + InputLoc loc; + Token token; + long value; +}; + +/* Nodes in the tree that use this action. */ +typedef Vector<NameInst*> ActionRefs; + +/* Element in list of actions. Contains the string for the code to exectute. */ +struct Action +: + public DListEl<Action>, + public AvlTreeEl<Action> +{ +public: + + Action( const InputLoc &loc, const String &name, InlineList *inlineList ) + : + loc(loc), + name(name), + markType(MarkNone), + objField(0), + markId(-1), + inlineList(inlineList), + actionId(-1), + numTransRefs(0), + numToStateRefs(0), + numFromStateRefs(0), + numEofRefs(0), + numCondRefs(0), + anyCall(false), + isLmAction(false) + { + } + + Action( MarkType markType, long markId ) + : + name("mark"), + markType(markType), + objField(0), + markId(markId), + inlineList(new InlineList), + actionId(-1), + numTransRefs(0), + numToStateRefs(0), + numFromStateRefs(0), + numEofRefs(0), + numCondRefs(0), + anyCall(false), + isLmAction(false) + { + } + + /* Key for action dictionary. */ + const String &getKey() const { return name; } + + /* Data collected during parse. */ + InputLoc loc; + String name; + + MarkType markType; + ObjField *objField; + long markId; + + InlineList *inlineList; + int actionId; + + void actionName( ostream &out ) + { + if ( name != 0 ) + out << name; + else + out << loc.line << ":" << loc.col; + } + + /* Places in the input text that reference the action. */ + ActionRefs actionRefs; + + /* Number of references in the final machine. */ + bool numRefs() + { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; + int numCondRefs; + bool anyCall; + + bool isLmAction; +}; + +/* A list of actions. */ +typedef DList<Action> ActionList; +typedef AvlTree<Action, String, CmpStr> ActionDict; + +struct VarDef; +struct Join; +struct Expression; +struct Term; +struct FactorWithAug; +struct FactorWithLabel; +struct FactorWithRep; +struct FactorWithNeg; +struct Factor; +struct Literal; +struct Range; +struct RegExpr; +struct ReItem; +struct ReOrBlock; +struct ReOrItem; +struct TokenRegion; + +/* Priority name dictionary. */ +typedef AvlMapEl<String, int> PriorDictEl; +typedef AvlMap<String, int, CmpStr> PriorDict; + +/* Local error name dictionary. */ +typedef AvlMapEl<String, int> LocalErrDictEl; +typedef AvlMap<String, int, CmpStr> LocalErrDict; + +/* Tree of instantiated names. */ +typedef BstMapEl<String, NameInst*> NameMapEl; +typedef BstMap<String, NameInst*, CmpStr> NameMap; +typedef Vector<NameInst*> NameVect; +typedef BstSet<NameInst*> NameSet; + +/* Node in the tree of instantiated names. */ +struct NameInst +{ + NameInst( const InputLoc &loc, NameInst *parent, const String &name, + int id, bool isLabel ) : + loc(loc), parent(parent), name(name), id(id), isLabel(isLabel), + isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {} + + InputLoc loc; + + /* Keep parent pointers in the name tree to retrieve + * fully qulified names. */ + NameInst *parent; + + String name; + int id; + bool isLabel; + bool isLongestMatch; + + int numRefs; + int numUses; + + /* Names underneath us, excludes anonymous names. */ + NameMap children; + + /* All names underneath us in order of appearance. */ + NameVect childVect; + + /* Join scopes need an implicit "final" target. */ + NameInst *start, *final; + + /* During a fsm generation walk, lists the names that are referenced by + * epsilon operations in the current scope. After the link is made by the + * epsilon reference and the join operation is complete, the label can + * have its refcount decremented. Once there are no more references the + * entry point can be removed from the fsm returned. */ + NameVect referencedNames; + + /* Pointers for the name search queue. */ + NameInst *prev, *next; + + /* Check if this name inst or any name inst below is referenced. */ + bool anyRefsRec(); +}; + +typedef DList<NameInst> NameInstList; + +/* Stack frame used in walking the name tree. */ +struct NameFrame +{ + NameInst *prevNameInst; + int prevNameChild; + NameInst *prevLocalScope; +}; + +/* Class to collect information about the machine during the + * parse of input. */ +struct Compiler +{ + /* Create a new parse data object. This is done at the beginning of every + * fsm specification. */ + Compiler( const String &fileName, const String §ionName, + const InputLoc §ionLoc, ostream &out ); + ~Compiler(); + + /* + * Setting up the graph dict. + */ + + void compileLiteralTokens(); + void initEmptyScanners(); + void initUniqueTypes(); + + /* Initialize a graph dict with the basic fsms. */ + void initGraphDict(); + void createBuiltin( const char *name, BuiltinMachine builtin ); + + /* Make a name id in the current name instantiation scope if it is not + * already there. */ + NameInst *addNameInst( const InputLoc &loc, char *data, bool isLabel ); + NameInst *makeJoinNameTree( Join *join ); + NameInst *makeNameTree( ); + void fillNameIndex( NameInst **nameIndex, NameInst *from ); + NameInst **makeNameIndex( NameInst *rootName ); + + + void printNameTree( NameInst *rootName ); + void printNameIndex( NameInst **nameIndex ); + + /* Increments the usage count on entry names. Names that are no longer + * needed will have their entry points unset. */ + void unsetObsoleteEntries( FsmGraph *graph ); + + /* Resove name references in action code and epsilon transitions. */ + NameSet resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly ); + void resolveFrom( NameSet &result, NameInst *refFrom, + const NameRef &nameRef, int namePos ); + void referenceRegions( NameInst *root ); + + /* Set the alphabet type. If type types are not valid returns false. */ + bool setAlphType( char *s1, char *s2 ); + bool setAlphType( char *s1 ); + + /* Unique actions. */ + void removeDups( ActionTable &actionTable ); + void removeActionDups( FsmGraph *graph ); + + /* Dumping the name instantiation tree. */ + void printNameInst( NameInst *nameInst, int level ); + + /* Make the graph from a graph dict node. Does minimization. */ + void finishGraphBuild( FsmGraph *graph ); + FsmGraph *makeAllRegions(); + FsmGraph *makeScanner(); + + void analyzeAction( Action *action, InlineList *inlineList ); + void analyzeGraph( FsmGraph *graph ); + void resolvePrecedence( PdaGraph *pdaGraph ); + LangEl *predOf( PdaTrans *trans, long action ); + bool precedenceSwap( long action1, long action2, LangEl *l1, LangEl *l2 ); + bool precedenceRemoveBoth( LangEl *l1, LangEl *l2 ); + + void initKeyOps(); + + /* + * Data collected during the parse. + */ + + /* The list of instances. */ + RegionGraphList instanceList; + + /* Dictionary of actions. Lets actions be defined and then referenced. */ + ActionDict actionDict; + + /* Dictionary of named priorities. */ + PriorDict priorDict; + + /* Dictionary of named local errors. */ + LocalErrDict localErrDict; + + /* List of actions. Will be pasted into a switch statement. */ + ActionList actionList; + + /* The id of the next priority name and label. */ + int nextPriorKey, nextLocalErrKey, nextNameId; + + /* The default priority number key for a machine. This is active during + * the parse of the rhs of a machine assignment. */ + int curDefPriorKey; + + int curDefLocalErrKey; + + /* Alphabet type. */ + HostType *userAlphType; + bool alphTypeSet; + + /* Element type and get key expression. */ + InlineList *getKeyExpr; + InlineList *accessExpr; + InlineList *curStateExpr; + + /* The alphabet range. */ + char *lowerNum, *upperNum; + Key lowKey, highKey; + InputLoc rangeLowLoc, rangeHighLoc; + + /* The name of the file the fsm is from, and the spec name. */ + String fileName; + String sectionName; + InputLoc sectionLoc; + + /* Number of errors encountered parsing the fsm spec. */ + int errorCount; + + /* Counting the action and priority ordering. */ + int curActionOrd; + int curPriorOrd; + + /* Root of the name tree. */ + NameInst *curNameInst; + int curNameChild; + + /* The place where resolved epsilon transitions go. These cannot go into + * the parse tree because a single epsilon op can resolve more than once + * to different nameInsts if the machine it's in is used more than once. */ + NameVect epsilonResolvedLinks; + int nextEpsilonResolvedLink; + + /* Root of the name tree used for doing local name searches. */ + NameInst *localNameScope; + + void setLmInRetLoc( InlineList *inlineList ); + void initLongestMatchData(); + void initNameWalk( NameInst *rootName ); + NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; } + NameFrame enterNameScope( bool isLocal, int numScopes ); + void popNameScope( const NameFrame &frame ); + void resetNameScope( const NameFrame &frame ); + + /* Counter for assigning ids to longest match items. */ + int nextTokenId; + + /* List of all longest match parse tree items. */ + RegionList regionList; + + NamespaceList namespaceList; + + Action *newAction( const String &name, InlineList *inlineList ); + + Action *setTokStart; + int setTokStartOrd; + + Action *initActId; + int initActIdOrd; + + Action *setTokEnd; + int setTokEndOrd; + + CodeBlock *rootCodeBlock; + + void beginProcessing() + { + ::condData = &thisCondData; + ::keyOps = &thisKeyOps; + } + + CondData thisCondData; + KeyOps thisKeyOps; + + UniqueType *mainReturnUT; + + /* CONTEXT FREE */ + ProdElList *makeProdElList( LangEl *langEl ); + void wrapNonTerminals(); + void makeDefinitionNames(); + void noUndefindLangEls(); + void declareBaseLangEls(); + void makeLangElIds(); + void makeLangElNames(); + void makeTerminalWrappers(); + void makeEofElements(); + void makeIgnoreCollectors(); + void setPrecedence(); + + void typeDeclaration(); + void typeResolve(); + + /* Parser generation. */ + void advanceReductions( PdaGraph *pdaGraph ); + void sortActions( PdaGraph *pdaGraph ); + void addDupTerms( PdaGraph *pdaGraph ); + void linkExpansions( PdaGraph *pdaGraph ); + void lalr1FollowEpsilonOp( PdaGraph *pdaGraph ); + + void transferCommits( PdaGraph *pdaGraph, PdaTrans *trans, PdaState *state, long prodId ); + + void lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, FollowToAdd &followKeys ); + void lalr1AddFollow1( PdaGraph *pdaGraph, PdaState *state ); + + void lalr1AddFollow2( PdaGraph *pdaGraph, PdaTrans *trans, long followKey, long prior ); + void lalr1AddFollow1( PdaGraph *pdaGraph, PdaTrans *trans ); + + void lalr1AddFollowSets( PdaGraph *pdaGraph, LangElSet &parserEls ); + + void lr0BringInItem( PdaGraph *pdaGraph, PdaState *dest, PdaState *prodState, + PdaTrans *expandFrom, Definition *prod ); + void lr0InvokeClosure( PdaGraph *pdaGraph, PdaState *state ); + void lr0CloseAllStates( PdaGraph *pdaGraph ); + + void lalr1GenerateParser( PdaGraph *pdaGraph, LangElSet &parserEls ); + + void reduceActions( PdaGraph *pdaGraph ); + + bool makeNonTermFirstSetProd( Definition *prod, PdaState *state ); + void makeNonTermFirstSets(); + + bool makeFirstSetProd( Definition *prod, PdaState *state ); + void makeFirstSets(); + + int findIndexOff( PdaTables *pdaTables, PdaGraph *pdaGraph, PdaState *state, int &currLen ); + void trySetTime( PdaTrans *trans, long code, long &time ); + void addRegion( PdaState *tabState, PdaTrans *pdaTrans, long pdaKey, + bool noPreIgnore, bool noPostIgnore ); + PdaState *followProd( PdaState *tabState, PdaState *prodState ); + void findFollow( AlphSet &result, PdaState *overTab, + PdaState *overSrc, Definition *parentDef ); + void pdaActionOrder( PdaGraph *pdaGraph, LangElSet &parserEls ); + void pdaOrderFollow( LangEl *rootEl, PdaState *tabState, + PdaTrans *tabTrans, PdaTrans *srcTrans, + Definition *parentDef, Definition *definition, long &time ); + void pdaOrderProd( LangEl *rootEl, PdaState *tabState, + PdaState *srcState, Definition *parentDef, long &time ); + void analyzeMachine( PdaGraph *pdaGraph, LangElSet &parserEls ); + + void makeProdFsms(); + void insertUniqueEmptyProductions(); + void printNonTermFirstSets(); + void printFirstSets(); + + LangEl *makeRepeatProd( Namespace *nspace, const String &repeatName, + NamespaceQual *nspaceQual, const String &name ); + LangEl *makeListProd( Namespace *nspace, const String &listName, + NamespaceQual *nspaceQual, const String &name ); + LangEl *makeOptProd( Namespace *nspace, const String &optName, + NamespaceQual *nspaceQual, const String &name ); + void resolveFactor( ProdEl *fact ); + void resolveProductionEls(); + void resolvePatternEls(); + void resolveReplacementEls(); + void resolveParserEls(); + + void addMatchText( ObjectDef *frame, LangEl *lel ); + void addMatchLength( ObjectDef *frame, LangEl *lel ); + void addInput( ObjectDef *frame ); + void addCtx( ObjectDef *frame ); + void addTransTokVar( ObjectDef *frame, LangEl *lel ); + void addProdRHSVars( ObjectDef *localFrame, ProdElList *prodElList ); + void addProdRedObjectVar( ObjectDef *localFrame, LangEl *langEl ); + void addProdObjects(); + + void addProdRHSLoads( Definition *prod, CodeVect &code, long &insertPos ); + void addProdLHSLoad( Definition *prod, CodeVect &code, long &insertPos ); + void addPushBackLHS( Definition *prod, CodeVect &code, long &insertPos ); + + void prepGrammar(); + void parsePatterns(); + + void collectParserEls( LangElSet &parserEls ); + void makeParser( LangElSet &parserEls ); + PdaGraph *makePdaGraph( BstSet<LangEl*> &parserEls ); + PdaTables *makePdaTables( PdaGraph *pdaGraph ); + + void fillInPatterns( Program *prg ); + void makeRuntimeData(); + + /* Generate and write out the fsm. */ + void generateGraphviz(); + + void verifyParseStopGrammar( LangEl *langEl, PdaGraph *pdaGraph ); + void computeAdvanceReductions( LangEl *langEl, PdaGraph *pdaGraph ); + + void initFieldInstructions( ObjField *el ); + void initLocalInstructions( ObjField *el ); + void initLocalRefInstructions( ObjField *el ); + + void initMapFunctions( GenericType *gen ); + void initListField( GenericType *gen, const char *name, int offset ); + void initListFields( GenericType *gen ); + void initListFunctions( GenericType *gen ); + void initVectorFunctions( GenericType *gen ); + void initParserFunctions( GenericType *gen ); + void initParserFields( GenericType *gen ); + void initCtxField( GenericType *gen ); + + void addStdin(); + void addStdout(); + void addStderr(); + void addArgv(); + int argvOffset(); + void initGlobalFunctions(); + void makeDefaultIterators(); + void addLengthField( ObjectDef *objDef, Code getLength ); + ObjectDef *findObject( const String &name ); + void initAllLanguageObjects(); + void resolveListElementOf( ObjectDef *container, ObjectDef *obj, ElementOf *elof ); + void resolveMapElementOf( ObjectDef *container, ObjectDef *obj, ElementOf *elof ); + void resolveElementOf( ObjectDef *obj ); + void makeFuncVisible( Function *func, bool isUserIter ); + + void resolveFunction( Function *func ); + void resolveUserIter( Function *func ); + void resolvePreEof( TokenRegion *region ); + void resolveRootBlock(); + void resolveTranslateBlock( LangEl *langEl ); + void resolveReductionCode( Definition *prod ); + void resolveParseTree(); + void resolveGenericTypes(); + + void compileFunction( Function *func, CodeVect &code ); + void compileFunction( Function *func ); + void compileUserIter( Function *func, CodeVect &code ); + void compileUserIter( Function *func ); + void compilePreEof( TokenRegion *region ); + void compileRootBlock(); + void compileTranslateBlock( LangEl *langEl ); + void findLocalTrees( CharSet &trees ); + void makeProdCopies( Definition *prod ); + void compileReductionCode( Definition *prod ); + void initGenericTypes(); + void removeNonUnparsableRepls(); + void compileByteCode(); + + void resolveUses(); + void createDefaultScanner(); + void generateOutput(); + void compile(); + + void openNameSpace( ostream &out, Namespace *nspace ); + void closeNameSpace( ostream &out, Namespace *nspace ); + void refNameSpace( LangEl *lel, Namespace *nspace ); + void generateExports(); + void generateExportsImpl(); + + /* + * Graphviz Generation + */ + void writeTransList( PdaState *state ); + void writeDotFile( PdaGraph *graph ); + void writeDotFile( ); + + + /* + * Data collected during the parse. + */ + + LelList langEls; + DefList prodList; + + /* Dumping. */ + DotItemIndex dotItemIndex; + + PredDeclList predDeclList; + + /* The name of the file the fsm is from, and the spec name. */ + // EXISTS IN RL: char *fileName; + String parserName; + ostream &out; + // EXISTS IN RL: InputLoc sectionLoc; + + /* How to access the instance data. */ + String access; + + /* The name of the token structure. */ + String tokenStruct; + + GenericType *anyList; + GenericType *anyMap; + GenericType *anyVector; + + LangEl *ptrLangEl; + LangEl *boolLangEl; + LangEl *intLangEl; + LangEl *strLangEl; + LangEl *streamLangEl; + LangEl *inputLangEl; + LangEl *anyLangEl; + LangEl *rootLangEl; + LangEl *noTokenLangEl; + LangEl *eofLangEl; + LangEl *errorLangEl; + LangEl *defaultCharLangEl; + LangEl *ignoreLangEl; + + TokenRegion *rootRegion; + TokenRegion *defaultRegion; + TokenRegion *eofTokenRegion; + + Namespace *defaultNamespace; + Namespace *rootNamespace; + + int nextSymbolId; + int firstNonTermId; + + LangEl **langElIndex; + PdaState *actionDestState; + DefSetSet prodSetSet; + + Definition **prodIdIndex; + AlphSet literalSet; + + PatternList patternList; + ReplList replList; + ParserTextList parserTextList; + + ObjectDef *globalObjectDef; + + VectorTypeIdMap vectorTypeIdMap; + ObjectDef *curLocalFrame; + + UniqueType *findUniqueType( int typeId ); + UniqueType *findUniqueType( int typeId, LangEl *langEl ); + UniqueType *findUniqueType( int typeId, IterDef *iterDef ); + + UniqueType *uniqueTypeNil; + UniqueType *uniqueTypePtr; + UniqueType *uniqueTypeBool; + UniqueType *uniqueTypeInt; + UniqueType *uniqueTypeStr; + UniqueType *uniqueTypeStream; + UniqueType *uniqueTypeInput; + UniqueType *uniqueTypeIgnore; + UniqueType *uniqueTypeAny; + + UniqueTypeMap uniqeTypeMap; + UniqueRepeatMap uniqeRepeatMap; + UniqueMapMap uniqueMapMap; + UniqueListMap uniqueListMap; + UniqueVectorMap uniqueVectorMap; + UniqueParserMap uniqueParserMap; + + void initStrObject(); + void initStreamObject(); + void initInputObject(); + void initIntObject(); + void initTokenObjects(); + + ObjectDef *intObj; + ObjectDef *strObj; + ObjectDef *streamObj; + ObjectDef *inputObj; + ObjectDef *tokenObj; + + FsmTables *fsmTables; + RuntimeData *runtimeData; + + int nextPatReplId; + int nextGenericId; + + FunctionList functionList; + int nextFuncId; + + enum CompileContext { + CompileTranslation, + CompileReduction, + CompileFunction, + CompileRoot + }; + + CompileContext compileContext; + LongVect returnJumps; + LongVect breakJumps; + Function *curFunction; + + /* Loops fill this in for return statements to use. */ + CodeVect *loopCleanup; + + ObjField *makeDataEl(); + ObjField *makePosEl(); + ObjField *makeLineEl(); + + IterDef *findIterDef( IterDef::Type type, GenericType *generic ); + IterDef *findIterDef( IterDef::Type type, Function *func ); + IterDef *findIterDef( IterDef::Type type ); + IterDefSet iterDefSet; + + enum GeneratesType { GenToken, GenIgnore, GenCfl }; + + int nextObjectId; + GeneratesType generatesType; + bool generatesIgnore; + bool insideRegion; + + StringMap literalStrings; + + long nextFrameId; + long nextParserId; + + ObjectDef *rootLocalFrame; + + long nextLabelId; + ObjectDef *objectDef; + + bool revertOn; + + RedFsm *redFsm; + + PdaGraph *pdaGraph; + PdaTables *pdaTables; + + long predValue; + long nextMatchEndNum; + + TypeRef *argvTypeRef; + + Context *context; +}; + +void afterOpMinimize( FsmGraph *fsm, bool lastInSeq = true ); +Key makeFsmKeyHex( char *str, const InputLoc &loc, Compiler *pd ); +Key makeFsmKeyDec( char *str, const InputLoc &loc, Compiler *pd ); +Key makeFsmKeyNum( char *str, const InputLoc &loc, Compiler *pd ); +Key makeFsmKeyChar( char c, Compiler *pd ); +void makeFsmKeyArray( Key *result, char *data, int len, Compiler *pd ); +void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, + bool caseInsensitive, Compiler *pd ); +FsmGraph *makeBuiltin( BuiltinMachine builtin, Compiler *pd ); +FsmGraph *dotFsm( Compiler *pd ); +FsmGraph *dotStarFsm( Compiler *pd ); + +void errorStateLabels( const NameSet &locations ); + +struct ColmParser; + +typedef AvlMap<String, ColmParser *, CmpStr> ParserDict; +typedef AvlMapEl<String, ColmParser *> ParserDictEl; + +LangEl *declareLangEl( Compiler *pd, Namespace *nspace, const String &data, LangEl::Type type ); +LangEl *addLangEl( Compiler *pd, Namespace *nspace, const String &data, LangEl::Type type ); +void declareTypeAlias( Compiler *pd, Namespace *nspace, const String &data, TypeRef *typeRef ); +LangEl *findType( Compiler *pd, Namespace *nspace, const String &data ); + +#endif /* _PARSEDATA_H */ diff --git a/src/parsetree.cc b/src/parsetree.cc new file mode 100644 index 00000000..084ffbb8 --- /dev/null +++ b/src/parsetree.cc @@ -0,0 +1,1776 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "lmparse.h" +#include "parsetree.h" +#include "input.h" +#include "fsmrun.h" + +#include <iostream> +#include <iomanip> +#include <errno.h> +#include <limits.h> +#include <stdlib.h> + + +using namespace std; +ostream &operator<<( ostream &out, const NameRef &nameRef ); +ostream &operator<<( ostream &out, const NameInst &nameInst ); +ostream &operator<<( ostream &out, const Token &token ); + +/* Convert the literal string which comes in from the scanner into an array of + * characters with escapes and options interpreted. Also null terminates the + * string. Though this null termination should not be relied on for + * interpreting literals in the parser because the string may contain a + * literal string with \0 */ +void prepareLitString( String &result, bool &caseInsensitive, + const String &srcString, const InputLoc &loc ) +{ + result.setAs( String::Fresh(), srcString.length() ); + caseInsensitive = false; + + char *src = srcString.data + 1; + char *end = srcString.data + srcString.length() - 1; + + while ( *end != '\'' && *end != '\"' && *end != '\n' ) { + if ( *end == 'i' ) + caseInsensitive = true; + else { + error( loc ) << "literal string '" << *end << + "' option not supported" << endl; + } + end -= 1; + } + + if ( *end == '\n' ) + end++; + + char *dest = result.data; + int len = 0; + while ( src != end ) { + if ( *src == '\\' ) { + switch ( src[1] ) { + case '0': dest[len++] = '\0'; break; + case 'a': dest[len++] = '\a'; break; + case 'b': dest[len++] = '\b'; break; + case 't': dest[len++] = '\t'; break; + case 'n': dest[len++] = '\n'; break; + case 'v': dest[len++] = '\v'; break; + case 'f': dest[len++] = '\f'; break; + case 'r': dest[len++] = '\r'; break; + case '\n': break; + default: dest[len++] = src[1]; break; + } + src += 2; + } + else { + dest[len++] = *src++; + } + } + + result.chop( len ); +} + +int CmpUniqueType::compare( const UniqueType &ut1, const UniqueType &ut2 ) +{ + if ( ut1.typeId < ut2.typeId ) + return -1; + else if ( ut1.typeId > ut2.typeId ) + return 1; + else if ( ut1.typeId == TYPE_TREE || + ut1.typeId == TYPE_PTR || + ut1.typeId == TYPE_REF ) + { + if ( ut1.langEl < ut2.langEl ) + return -1; + else if ( ut1.langEl > ut2.langEl ) + return 1; + } + else if ( ut1.typeId == TYPE_ITER ) { + if ( ut1.iterDef < ut2.iterDef ) + return -1; + else if ( ut1.iterDef > ut2.iterDef ) + return 1; + } + else { + /* Fail on anything unimplemented. */ + assert( false ); + } + + return 0; +} + +int CmpUniqueRepeat::compare( const UniqueRepeat &ut1, const UniqueRepeat &ut2 ) +{ + if ( ut1.repeatType < ut2.repeatType ) + return -1; + else if ( ut1.repeatType > ut2.repeatType ) + return 1; + else { + if ( ut1.langEl < ut2.langEl ) + return -1; + else if ( ut1.langEl > ut2.langEl ) + return 1; + } + + return 0; +} + +int CmpUniqueMap::compare( const UniqueMap &ut1, const UniqueMap &ut2 ) +{ + if ( ut1.key < ut2.key ) + return -1; + else if ( ut1.key > ut2.key ) + return 1; + else { + if ( ut1.value < ut2.value ) + return -1; + else if ( ut1.value > ut2.value ) + return 1; + } + + return 0; +} + +int CmpUniqueList::compare( const UniqueList &ut1, const UniqueList &ut2 ) +{ + if ( ut1.value < ut2.value ) + return -1; + else if ( ut1.value > ut2.value ) + return 1; + + return 0; +} + +int CmpUniqueVector::compare( const UniqueVector &ut1, const UniqueVector &ut2 ) +{ + if ( ut1.value < ut2.value ) + return -1; + else if ( ut1.value > ut2.value ) + return 1; + + return 0; +} + +int CmpUniqueParser::compare( const UniqueParser &ut1, const UniqueParser &ut2 ) +{ + if ( ut1.parseType < ut2.parseType ) + return -1; + else if ( ut1.parseType > ut2.parseType ) + return 1; + + return 0; +} + +FsmGraph *VarDef::walk( Compiler *pd ) +{ + /* Recurse on the expression. */ + FsmGraph *rtnVal = join->walk( pd ); + + /* Do the tranfer of local error actions. */ + LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name ); + if ( localErrDictEl != 0 ) { + for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ ) + rtnVal->transferErrorActions( state, localErrDictEl->value ); + } + + /* If the expression below is a join operation with multiple expressions + * then it just had epsilon transisions resolved. If it is a join + * with only a single expression then run the epsilon op now. */ + if ( join->exprList.length() == 1 ) + rtnVal->epsilonOp(); + + /* We can now unset entry points that are not longer used. */ + pd->unsetObsoleteEntries( rtnVal ); + + return rtnVal; +} + + +FsmGraph *RegionDef::walk( Compiler *pd ) +{ + /* We enter into a new name scope. */ + NameFrame nameFrame = pd->enterNameScope( true, 1 ); + + /* Recurse on the expression. */ + FsmGraph *rtnVal = tokenRegion->walk( pd ); + + /* Do the tranfer of local error actions. */ + LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name ); + if ( localErrDictEl != 0 ) { + for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ ) + rtnVal->transferErrorActions( state, localErrDictEl->value ); + } + + /* We can now unset entry points that are not longer used. */ + pd->unsetObsoleteEntries( rtnVal ); + + /* If the name of the variable is referenced then add the entry point to + * the graph. */ + if ( pd->curNameInst->numRefs > 0 ) + rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState ); + + /* Pop the name scope. */ + pd->popNameScope( nameFrame ); + return rtnVal; +} + +void RegionDef::makeNameTree( const InputLoc &loc, Compiler *pd ) +{ + /* The variable definition enters a new scope. */ + NameInst *prevNameInst = pd->curNameInst; + pd->curNameInst = pd->addNameInst( loc, name, false ); + + /* Guess we do this now. */ + tokenRegion->makeActions( pd ); + + /* Save off the name inst into the token region. This is only legal for + * token regions because they are only ever referenced once (near the root + * of the name tree). They cannot have more than one corresponding name + * inst. */ + assert( tokenRegion->regionNameInst == 0 ); + tokenRegion->regionNameInst = pd->curNameInst; + + /* The name scope ends, pop the name instantiation. */ + pd->curNameInst = prevNameInst; +} + +InputLoc TokenDef::getLoc() +{ + return action != 0 ? action->loc : semiLoc; +} + +/* + * If there are any LMs then all of the following entry points must reset + * tokstart: + * + * 1. fentry(StateRef) + * 2. ftoto(StateRef), fcall(StateRef), fnext(StateRef) + * 3. targt of any transition that has an fcall (the return loc). + * 4. start state of all longest match routines. + */ + +Action *TokenRegion::newAction( Compiler *pd, const InputLoc &loc, + const String &name, InlineList *inlineList ) +{ + Action *action = new Action( loc, name, inlineList ); + pd->actionList.append( action ); + action->isLmAction = true; + return action; +} + +void TokenRegion::makeActions( Compiler *pd ) +{ + /* Make actions that set the action id. */ + for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmSetActId ) ); + char *actName = new char[50]; + sprintf( actName, "store%i", lmi->longestMatchId ); + lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* Make actions that execute the user action and restart on the last character. */ + for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmOnLast ) ); + char *actName = new char[50]; + sprintf( actName, "imm%i", lmi->longestMatchId ); + lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* Make actions that execute the user action and restart on the next + * character. These actions will set tokend themselves (it is the current + * char). */ + for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmOnNext ) ); + char *actName = new char[50]; + sprintf( actName, "lagh%i", lmi->longestMatchId ); + lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + /* Make actions that execute the user action and restart at tokend. These + * actions execute some time after matching the last char. */ + for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) { + /* For each part create actions for setting the match type. We need + * to do this so that the actions will go into the actionIndex. */ + InlineList *inlineList = new InlineList; + inlineList->append( new InlineItem( lmi->getLoc(), this, lmi, + InlineItem::LmOnLagBehind ) ); + char *actName = new char[50]; + sprintf( actName, "lag%i", lmi->longestMatchId ); + lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList ); + } + + InputLoc loc; + loc.line = 1; + loc.col = 1; + + /* Create the error action. */ + InlineList *il6 = new InlineList; + il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) ); + lmActSelect = newAction( pd, loc, "lagsel", il6 ); +} + +void TokenRegion::restart( FsmGraph *graph, FsmTrans *trans ) +{ + FsmState *fromState = trans->fromState; + graph->detachTrans( fromState, trans->toState, trans ); + graph->attachTrans( fromState, graph->startState, trans ); +} + +void TokenRegion::runLongestMatch( Compiler *pd, FsmGraph *graph ) +{ + graph->markReachableFromHereStopFinal( graph->startState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + ms->lmItemSet.insert( 0 ); + ms->stateBits &= ~ SB_ISMARKED; + } + } + + /* Transfer the first item of non-empty lmAction tables to the item sets + * of the states that follow. Exclude states that have no transitions out. + * This must happen on a separate pass so that on each iteration of the + * next pass we have the item set entries from all lmAction tables. */ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->lmActionTable.length() > 0 ) { + LmActionTableEl *lmAct = trans->lmActionTable.data; + FsmState *toState = trans->toState; + assert( toState ); + + /* Check if there are transitions out, this may be a very + * close approximation? Out transitions going nowhere? + * FIXME: Check. */ + if ( toState->outList.length() > 0 ) { + /* Fill the item sets. */ + graph->markReachableFromHereStopFinal( toState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + ms->lmItemSet.insert( lmAct->value ); + ms->stateBits &= ~ SB_ISMARKED; + } + } + } + } + } + } + + /* The lmItem sets are now filled, telling us which longest match rules + * can succeed in which states. First determine if we need to make sure + * act is defaulted to zero. */ + int maxItemSetLength = 0; + graph->markReachableFromHereStopFinal( graph->startState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + if ( ms->lmItemSet.length() > maxItemSetLength ) + maxItemSetLength = ms->lmItemSet.length(); + ms->stateBits &= ~ SB_ISMARKED; + } + } + + /* The actions executed on starting to match a token. */ + graph->isolateStartState(); + graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart ); + if ( maxItemSetLength > 1 ) { + /* The longest match action switch may be called when tokens are + * matched, in which case act must be initialized, there must be a + * case to handle the error, and the generated machine will require an + * error state. */ + lmSwitchHandlesError = true; + graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId ); + } + + /* The place to store transitions to restart. It maybe possible for the + * restarting to affect the searching through the graph that follows. For + * now take the safe route and save the list of transitions to restart + * until after all searching is done. */ + Vector<FsmTrans*> restartTrans; + + /* Set actions that do immediate token recognition, set the longest match part + * id and set the token ending. */ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->lmActionTable.length() > 0 ) { + LmActionTableEl *lmAct = trans->lmActionTable.data; + FsmState *toState = trans->toState; + assert( toState ); + + /* Check if there are transitions out, this may be a very + * close approximation? Out transitions going nowhere? + * FIXME: Check. */ + if ( toState->outList.length() == 0 ) { + /* Can execute the immediate action for the longest match + * part. Redirect the action to the start state. */ + trans->actionTable.setAction( lmAct->key, + lmAct->value->actOnLast ); + restartTrans.append( trans ); + } + else { + /* Look for non final states that have a non-empty item + * set. If these are present then we need to record the + * end of the token. Also Find the highest item set + * length reachable from here (excluding at transtions to + * final states). */ + bool nonFinalNonEmptyItemSet = false; + maxItemSetLength = 0; + graph->markReachableFromHereStopFinal( toState ); + for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) { + if ( ms->stateBits & SB_ISMARKED ) { + if ( ms->lmItemSet.length() > 0 && !ms->isFinState() ) + nonFinalNonEmptyItemSet = true; + if ( ms->lmItemSet.length() > maxItemSetLength ) + maxItemSetLength = ms->lmItemSet.length(); + ms->stateBits &= ~ SB_ISMARKED; + } + } + + /* If there are reachable states that are not final and + * have non empty item sets or that have an item set + * length greater than one then we need to set tokend + * because the error action that matches the token will + * require it. */ + if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 ) + trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd ); + + /* Some states may not know which longest match item to + * execute, must set it. */ + if ( maxItemSetLength > 1 ) { + /* There are transitions out, another match may come. */ + trans->actionTable.setAction( lmAct->key, + lmAct->value->setActId ); + } + } + } + } + } + + /* Now that all graph searching is done it certainly safe set the + * restarting. It may be safe above, however this must be verified. */ + for ( Vector<FsmTrans*>::Iter rs = restartTrans; rs.lte(); rs++ ) + restart( graph, *rs ); + + int lmErrActionOrd = pd->curActionOrd++; + + /* Embed the error for recognizing a char. */ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) { + if ( st->isFinState() ) { + /* On error execute the onActNext action, which knows that + * the last character of the token was one back and restart. */ + graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, + &st->lmItemSet[0]->actOnNext, 1 ); + st->eofActionTable.setAction( lmErrActionOrd, + st->lmItemSet[0]->actOnNext ); + st->eofTarget = graph->startState; + } + else { + graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, + &st->lmItemSet[0]->actLagBehind, 1 ); + st->eofActionTable.setAction( lmErrActionOrd, + st->lmItemSet[0]->actLagBehind ); + st->eofTarget = graph->startState; + } + } + else if ( st->lmItemSet.length() > 1 ) { + /* Need to use the select. Take note of the which items the select + * is needed for so only the necessary actions are included. */ + for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) { + if ( *plmi != 0 ) + (*plmi)->inLmSelect = true; + } + /* On error, execute the action select and go to the start state. */ + graph->setErrorTarget( st, graph->startState, &lmErrActionOrd, + &lmActSelect, 1 ); + st->eofActionTable.setAction( lmErrActionOrd, lmActSelect ); + st->eofTarget = graph->startState; + } + } + + /* Finally, the start state should be made final. */ + graph->setFinState( graph->startState ); +} + +void TokenRegion::transferScannerLeavingActions( FsmGraph *graph ) +{ + for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) { + if ( st->outActionTable.length() > 0 ) + graph->setErrorActions( st, st->outActionTable ); + } +} + +FsmGraph *TokenRegion::walk( Compiler *pd ) +{ + /* Make each part of the longest match. */ + int numParts = 0; + FsmGraph **parts = new FsmGraph*[tokenDefList.length()]; + for ( TokenDefListReg::Iter lmi = tokenDefList; lmi.lte(); lmi++ ) { + /* Watch out for patternless tokens. */ + if ( lmi->join != 0 ) { + /* Create the machine and embed the setting of the longest match id. */ + parts[numParts] = lmi->join->walk( pd ); + parts[numParts]->longMatchAction( pd->curActionOrd++, lmi ); + + /* Look for tokens that accept the zero length-word. The first one found + * will be used as the default token. */ + if ( defaultTokenDef == 0 && parts[numParts]->startState->isFinState() ) + defaultTokenDef = lmi; + + numParts += 1; + } + } + FsmGraph *retFsm = parts[0]; + + if ( defaultTokenDef != 0 && defaultTokenDef->tdLangEl->ignore ) + error() << "ignore token cannot be a scanner's zero-length token" << endp; + + /* The region is empty. Return the empty set. */ + if ( numParts == 0 ) { + retFsm = new FsmGraph(); + retFsm->lambdaFsm(); + } + else { + /* Before we union the patterns we need to deal with leaving actions. They + * are transfered to error transitions out of the final states (like local + * error actions) and to eof actions. In the scanner we need to forbid + * on_last for any final state that has an leaving action. */ + for ( int i = 0; i < numParts; i++ ) + transferScannerLeavingActions( parts[i] ); + + /* Union machines one and up with machine zero. */ + FsmGraph *retFsm = parts[0]; + for ( int i = 1; i < numParts; i++ ) { + retFsm->unionOp( parts[i] ); + afterOpMinimize( retFsm ); + } + + runLongestMatch( pd, retFsm ); + delete[] parts; + } + + return retFsm; +} + +/* Construct with a location and the first expression. */ +Join::Join( Expression *expr ) +: + context(0), + mark(0) +{ + exprList.append( expr ); +} + +/* Walk an expression node. */ +FsmGraph *Join::walk( Compiler *pd ) +{ + assert( exprList.length() == 1 ); + + FsmGraph *retFsm = exprList.head->walk( pd ); + + /* Maybe the the context. */ + if ( context != 0 ) { + retFsm->leaveFsmAction( pd->curActionOrd++, mark ); + FsmGraph *contextGraph = context->walk( pd ); + retFsm->concatOp( contextGraph ); + } + + return retFsm; +} + +/* Clean up after an expression node. */ +Expression::~Expression() +{ + switch ( type ) { + case OrType: case IntersectType: case SubtractType: + case StrongSubtractType: + delete expression; + delete term; + break; + case TermType: + delete term; + break; + case BuiltinType: + break; + } +} + +/* Evaluate a single expression node. */ +FsmGraph *Expression::walk( Compiler *pd, bool lastInSeq ) +{ + FsmGraph *rtnVal = 0; + switch ( type ) { + case OrType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd, false ); + /* Evaluate the term. */ + FsmGraph *rhs = term->walk( pd ); + /* Perform union. */ + rtnVal->unionOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case IntersectType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd ); + /* Evaluate the term. */ + FsmGraph *rhs = term->walk( pd ); + /* Perform intersection. */ + rtnVal->intersectOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case SubtractType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd ); + /* Evaluate the term. */ + FsmGraph *rhs = term->walk( pd ); + /* Perform subtraction. */ + rtnVal->subtractOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case StrongSubtractType: { + /* Evaluate the expression. */ + rtnVal = expression->walk( pd ); + + /* Evaluate the term and pad it with any* machines. */ + FsmGraph *rhs = dotStarFsm( pd ); + FsmGraph *termFsm = term->walk( pd ); + FsmGraph *trailAnyStar = dotStarFsm( pd ); + rhs->concatOp( termFsm ); + rhs->concatOp( trailAnyStar ); + + /* Perform subtraction. */ + rtnVal->subtractOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case TermType: { + /* Return result of the term. */ + rtnVal = term->walk( pd ); + break; + } + case BuiltinType: { + /* Duplicate the builtin. */ + rtnVal = makeBuiltin( builtin, pd ); + break; + } + } + + return rtnVal; +} + +/* Clean up after a term node. */ +Term::~Term() +{ + switch ( type ) { + case ConcatType: + case RightStartType: + case RightFinishType: + case LeftType: + delete term; + delete factorWithAug; + break; + case FactorWithAugType: + delete factorWithAug; + break; + } +} + +/* Evaluate a term node. */ +FsmGraph *Term::walk( Compiler *pd, bool lastInSeq ) +{ + FsmGraph *rtnVal = 0; + switch ( type ) { + case ConcatType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd, false ); + /* Evaluate the FactorWithRep. */ + FsmGraph *rhs = factorWithAug->walk( pd ); + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case RightStartType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd ); + + /* Evaluate the FactorWithRep. */ + FsmGraph *rhs = factorWithAug->walk( pd ); + + /* Set up the priority descriptors. The left machine gets the + * lower priority where as the right get the higher start priority. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 0; + rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* The start transitions right machine get the higher priority. + * Use the same unique key. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 1; + rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case RightFinishType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd ); + + /* Evaluate the FactorWithRep. */ + FsmGraph *rhs = factorWithAug->walk( pd ); + + /* Set up the priority descriptors. The left machine gets the + * lower priority where as the finishing transitions to the right + * get the higher priority. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 0; + rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* The finishing transitions of the right machine get the higher + * priority. Use the same unique key. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 1; + rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case LeftType: { + /* Evaluate the Term. */ + rtnVal = term->walk( pd ); + + /* Evaluate the FactorWithRep. */ + FsmGraph *rhs = factorWithAug->walk( pd ); + + /* Set up the priority descriptors. The left machine gets the + * higher priority. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 1; + rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* The right machine gets the lower priority. Since + * startTransPrior might unnecessarily increase the number of + * states during the state machine construction process (due to + * isolation), we use allTransPrior instead, which has the same + * effect. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 0; + rhs->allTransPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Perform concatenation. */ + rtnVal->concatOp( rhs ); + afterOpMinimize( rtnVal, lastInSeq ); + break; + } + case FactorWithAugType: { + rtnVal = factorWithAug->walk( pd ); + break; + } + } + return rtnVal; +} + +/* Clean up after a factor with augmentation node. */ +FactorWithAug::~FactorWithAug() +{ + delete factorWithRep; + + /* Walk the vector of parser actions, deleting function names. */ + + /* Clean up priority descriptors. */ + if ( priorDescs != 0 ) + delete[] priorDescs; +} + +void FactorWithAug::assignActions( Compiler *pd, FsmGraph *graph, int *actionOrd ) +{ + /* Assign actions. */ + for ( int i = 0; i < actions.length(); i++ ) { + switch ( actions[i].type ) { + /* Transition actions. */ + case at_start: + graph->startFsmAction( actionOrd[i], actions[i].action ); + afterOpMinimize( graph ); + break; + case at_all: + graph->allTransAction( actionOrd[i], actions[i].action ); + break; + case at_finish: + graph->finishFsmAction( actionOrd[i], actions[i].action ); + break; + case at_leave: + graph->leaveFsmAction( actionOrd[i], actions[i].action ); + break; + + /* Global error actions. */ + case at_start_gbl_error: + graph->startErrorAction( actionOrd[i], actions[i].action, 0 ); + afterOpMinimize( graph ); + break; + case at_all_gbl_error: + graph->allErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_final_gbl_error: + graph->finalErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_not_start_gbl_error: + graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_not_final_gbl_error: + graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + case at_middle_gbl_error: + graph->middleErrorAction( actionOrd[i], actions[i].action, 0 ); + break; + + /* Local error actions. */ + case at_start_local_error: + graph->startErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + afterOpMinimize( graph ); + break; + case at_all_local_error: + graph->allErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_final_local_error: + graph->finalErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_not_start_local_error: + graph->notStartErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_not_final_local_error: + graph->notFinalErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + case at_middle_local_error: + graph->middleErrorAction( actionOrd[i], actions[i].action, + actions[i].localErrKey ); + break; + + /* EOF actions. */ + case at_start_eof: + graph->startEOFAction( actionOrd[i], actions[i].action ); + afterOpMinimize( graph ); + break; + case at_all_eof: + graph->allEOFAction( actionOrd[i], actions[i].action ); + break; + case at_final_eof: + graph->finalEOFAction( actionOrd[i], actions[i].action ); + break; + case at_not_start_eof: + graph->notStartEOFAction( actionOrd[i], actions[i].action ); + break; + case at_not_final_eof: + graph->notFinalEOFAction( actionOrd[i], actions[i].action ); + break; + case at_middle_eof: + graph->middleEOFAction( actionOrd[i], actions[i].action ); + break; + + /* To State Actions. */ + case at_start_to_state: + graph->startToStateAction( actionOrd[i], actions[i].action ); + afterOpMinimize( graph ); + break; + case at_all_to_state: + graph->allToStateAction( actionOrd[i], actions[i].action ); + break; + case at_final_to_state: + graph->finalToStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_start_to_state: + graph->notStartToStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_final_to_state: + graph->notFinalToStateAction( actionOrd[i], actions[i].action ); + break; + case at_middle_to_state: + graph->middleToStateAction( actionOrd[i], actions[i].action ); + break; + + /* From State Actions. */ + case at_start_from_state: + graph->startFromStateAction( actionOrd[i], actions[i].action ); + afterOpMinimize( graph ); + break; + case at_all_from_state: + graph->allFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_final_from_state: + graph->finalFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_start_from_state: + graph->notStartFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_not_final_from_state: + graph->notFinalFromStateAction( actionOrd[i], actions[i].action ); + break; + case at_middle_from_state: + graph->middleFromStateAction( actionOrd[i], actions[i].action ); + break; + + /* Remaining cases, prevented by the parser. */ + default: + assert( false ); + break; + } + } +} + +void FactorWithAug::assignPriorities( FsmGraph *graph, int *priorOrd ) +{ + /* Assign priorities. */ + for ( int i = 0; i < priorityAugs.length(); i++ ) { + switch ( priorityAugs[i].type ) { + case at_start: + graph->startFsmPrior( priorOrd[i], &priorDescs[i]); + /* Start fsm priorities are a special case that may require + * minimization afterwards. */ + afterOpMinimize( graph ); + break; + case at_all: + graph->allTransPrior( priorOrd[i], &priorDescs[i] ); + break; + case at_finish: + graph->finishFsmPrior( priorOrd[i], &priorDescs[i] ); + break; + case at_leave: + graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] ); + break; + + default: + /* Parser Prevents this case. */ + break; + } + } +} + +void FactorWithAug::assignConditions( FsmGraph *graph ) +{ + for ( int i = 0; i < conditions.length(); i++ ) { + switch ( conditions[i].type ) { + /* Transition actions. */ + case at_start: + graph->startFsmCondition( conditions[i].action ); + afterOpMinimize( graph ); + break; + case at_all: + graph->allTransCondition( conditions[i].action ); + break; + case at_leave: + graph->leaveFsmCondition( conditions[i].action ); + break; + default: + break; + } + } +} + + +/* Evaluate a factor with augmentation node. */ +FsmGraph *FactorWithAug::walk( Compiler *pd ) +{ + /* Make the array of function orderings. */ + int *actionOrd = 0; + if ( actions.length() > 0 ) + actionOrd = new int[actions.length()]; + + /* First walk the list of actions, assigning order to all starting + * actions. */ + for ( int i = 0; i < actions.length(); i++ ) { + if ( actions[i].type == at_start || + actions[i].type == at_start_gbl_error || + actions[i].type == at_start_local_error || + actions[i].type == at_start_to_state || + actions[i].type == at_start_from_state || + actions[i].type == at_start_eof ) + actionOrd[i] = pd->curActionOrd++; + } + + /* Evaluate the factor with repetition. */ + FsmGraph *rtnVal = factorWithRep->walk( pd ); + + /* Compute the remaining action orderings. */ + for ( int i = 0; i < actions.length(); i++ ) { + if ( actions[i].type != at_start && + actions[i].type != at_start_gbl_error && + actions[i].type != at_start_local_error && + actions[i].type != at_start_to_state && + actions[i].type != at_start_from_state && + actions[i].type != at_start_eof ) + actionOrd[i] = pd->curActionOrd++; + } + + assignConditions( rtnVal ); + + assignActions( pd, rtnVal , actionOrd ); + + /* Make the array of priority orderings. Orderings are local to this walk + * of the factor with augmentation. */ + int *priorOrd = 0; + if ( priorityAugs.length() > 0 ) + priorOrd = new int[priorityAugs.length()]; + + /* Walk all priorities, assigning the priority ordering. */ + for ( int i = 0; i < priorityAugs.length(); i++ ) + priorOrd[i] = pd->curPriorOrd++; + + /* If the priority descriptors have not been made, make them now. Make + * priority descriptors for each priority asignment that will be passed to + * the fsm. Used to keep track of the key, value and used bit. */ + if ( priorDescs == 0 && priorityAugs.length() > 0 ) { + priorDescs = new PriorDesc[priorityAugs.length()]; + for ( int i = 0; i < priorityAugs.length(); i++ ) { + /* Init the prior descriptor for the priority setting. */ + priorDescs[i].key = priorityAugs[i].priorKey; + priorDescs[i].priority = priorityAugs[i].priorValue; + } + } + + /* Assign priorities into the machine. */ + assignPriorities( rtnVal, priorOrd ); + + /* Assign epsilon transitions. */ + for ( int e = 0; e < epsilonLinks.length(); e++ ) { + /* Get the name, which may not exist. If it doesn't then silently + * ignore it because an error has already been reported. */ + NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++]; + if ( epTarg != 0 ) { + /* Make the epsilon transitions. */ + rtnVal->epsilonTrans( epTarg->id ); + + /* Note that we have made a link to the name. */ + pd->localNameScope->referencedNames.append( epTarg ); + } + } + + if ( priorOrd != 0 ) + delete[] priorOrd; + if ( actionOrd != 0 ) + delete[] actionOrd; + return rtnVal; +} + + +/* Clean up after a factor with repetition node. */ +FactorWithRep::~FactorWithRep() +{ + switch ( type ) { + case StarType: case StarStarType: case OptionalType: case PlusType: + case ExactType: case MaxType: case MinType: case RangeType: + delete factorWithRep; + break; + case FactorWithNegType: + delete factorWithNeg; + break; + } +} + +/* Evaluate a factor with repetition node. */ +FsmGraph *FactorWithRep::walk( Compiler *pd ) +{ + FsmGraph *retFsm = 0; + + switch ( type ) { + case StarType: { + /* Evaluate the FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying kleene star to a machine that " + "accepts zero length word" << endl; + } + + /* Shift over the start action orders then do the kleene star. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + retFsm->starOp( ); + afterOpMinimize( retFsm ); + break; + } + case StarStarType: { + /* Evaluate the FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying kleene star to a machine that " + "accepts zero length word" << endl; + } + + /* Set up the prior descs. All gets priority one, whereas leaving gets + * priority zero. Make a unique key so that these priorities don't + * interfere with any priorities set by the user. */ + priorDescs[0].key = pd->nextPriorKey++; + priorDescs[0].priority = 1; + retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] ); + + /* Leaveing gets priority 0. Use same unique key. */ + priorDescs[1].key = priorDescs[0].key; + priorDescs[1].priority = 0; + retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] ); + + /* Shift over the start action orders then do the kleene star. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + retFsm->starOp( ); + afterOpMinimize( retFsm ); + break; + } + case OptionalType: { + /* Make the null fsm. */ + FsmGraph *nu = new FsmGraph(); + nu->lambdaFsm( ); + + /* Evaluate the FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + + /* Perform the question operator. */ + retFsm->unionOp( nu ); + afterOpMinimize( retFsm ); + break; + } + case PlusType: { + /* Evaluate the FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying plus operator to a machine that " + "accpets zero length word" << endl; + } + + /* Need a duplicated for the star end. */ + FsmGraph *dup = new FsmGraph( *retFsm ); + + /* The start func orders need to be shifted before doing the star. */ + pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd ); + + /* Star the duplicate. */ + dup->starOp( ); + afterOpMinimize( dup ); + + retFsm->concatOp( dup ); + afterOpMinimize( retFsm ); + break; + } + case ExactType: { + /* Get an int from the repetition amount. */ + if ( lowerRep == 0 ) { + /* No copies. Don't need to evaluate the factorWithRep. + * This Defeats the purpose so give a warning. */ + warning(loc) << "exactly zero repetitions results " + "in the null machine" << endl; + + retFsm = new FsmGraph(); + retFsm->lambdaFsm(); + } + else { + /* Evaluate the first FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing the + * repetition. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + /* Do the repetition on the machine. Already guarded against n == 0 */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + } + break; + } + case MaxType: { + /* Get an int from the repetition amount. */ + if ( upperRep == 0 ) { + /* No copies. Don't need to evaluate the factorWithRep. + * This Defeats the purpose so give a warning. */ + warning(loc) << "max zero repetitions results " + "in the null machine" << endl; + + retFsm = new FsmGraph(); + retFsm->lambdaFsm(); + } + else { + /* Evaluate the first FactorWithRep. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying max repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing the + * repetition. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + /* Do the repetition on the machine. Already guarded against n == 0 */ + retFsm->optionalRepeatOp( upperRep ); + afterOpMinimize( retFsm ); + } + break; + } + case MinType: { + /* Evaluate the repeated machine. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying min repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing the repetition + * and the kleene star. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + if ( lowerRep == 0 ) { + /* Acts just like a star op on the machine to return. */ + retFsm->starOp( ); + afterOpMinimize( retFsm ); + } + else { + /* Take a duplicate for the plus. */ + FsmGraph *dup = new FsmGraph( *retFsm ); + + /* Do repetition on the first half. */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + + /* Star the duplicate. */ + dup->starOp( ); + afterOpMinimize( dup ); + + /* Tak on the kleene star. */ + retFsm->concatOp( dup ); + afterOpMinimize( retFsm ); + } + break; + } + case RangeType: { + /* Check for bogus range. */ + if ( upperRep - lowerRep < 0 ) { + error(loc) << "invalid range repetition" << endl; + + /* Return null machine as recovery. */ + retFsm = new FsmGraph(); + retFsm->lambdaFsm(); + } + else if ( lowerRep == 0 && upperRep == 0 ) { + /* No copies. Don't need to evaluate the factorWithRep. This + * defeats the purpose so give a warning. */ + warning(loc) << "zero to zero repetitions results " + "in the null machine" << endl; + + retFsm = new FsmGraph(); + retFsm->lambdaFsm(); + } + else { + /* Now need to evaluate the repeated machine. */ + retFsm = factorWithRep->walk( pd ); + if ( retFsm->startState->isFinState() ) { + warning(loc) << "applying range repetition to a machine that " + "accepts zero length word" << endl; + } + + /* The start func orders need to be shifted before doing both kinds + * of repetition. */ + pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd ); + + if ( lowerRep == 0 ) { + /* Just doing max repetition. Already guarded against n == 0. */ + retFsm->optionalRepeatOp( upperRep ); + afterOpMinimize( retFsm ); + } + else if ( lowerRep == upperRep ) { + /* Just doing exact repetition. Already guarded against n == 0. */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + } + else { + /* This is the case that 0 < lowerRep < upperRep. Take a + * duplicate for the optional repeat. */ + FsmGraph *dup = new FsmGraph( *retFsm ); + + /* Do repetition on the first half. */ + retFsm->repeatOp( lowerRep ); + afterOpMinimize( retFsm ); + + /* Do optional repetition on the second half. */ + dup->optionalRepeatOp( upperRep - lowerRep ); + afterOpMinimize( dup ); + + /* Tak on the duplicate machine. */ + retFsm->concatOp( dup ); + afterOpMinimize( retFsm ); + } + } + break; + } + case FactorWithNegType: { + /* Evaluate the Factor. Pass it up. */ + retFsm = factorWithNeg->walk( pd ); + break; + }} + return retFsm; +} + + +/* Clean up after a factor with negation node. */ +FactorWithNeg::~FactorWithNeg() +{ + switch ( type ) { + case NegateType: + case CharNegateType: + delete factorWithNeg; + break; + case FactorType: + delete factor; + break; + } +} + +/* Evaluate a factor with negation node. */ +FsmGraph *FactorWithNeg::walk( Compiler *pd ) +{ + FsmGraph *retFsm = 0; + + switch ( type ) { + case NegateType: { + /* Evaluate the factorWithNeg. */ + FsmGraph *toNegate = factorWithNeg->walk( pd ); + + /* Negation is subtract from dot-star. */ + retFsm = dotStarFsm( pd ); + retFsm->subtractOp( toNegate ); + afterOpMinimize( retFsm ); + break; + } + case CharNegateType: { + /* Evaluate the factorWithNeg. */ + FsmGraph *toNegate = factorWithNeg->walk( pd ); + + /* CharNegation is subtract from dot. */ + retFsm = dotFsm( pd ); + retFsm->subtractOp( toNegate ); + afterOpMinimize( retFsm ); + break; + } + case FactorType: { + /* Evaluate the Factor. Pass it up. */ + retFsm = factor->walk( pd ); + break; + }} + return retFsm; +} + +/* Clean up after a factor node. */ +Factor::~Factor() +{ + switch ( type ) { + case LiteralType: + delete literal; + break; + case RangeType: + delete range; + break; + case OrExprType: + delete reItem; + break; + case RegExprType: + delete regExp; + break; + case ReferenceType: + break; + case ParenType: + delete join; + break; + } +} + +/* Evaluate a factor node. */ +FsmGraph *Factor::walk( Compiler *pd ) +{ + FsmGraph *rtnVal = 0; + switch ( type ) { + case LiteralType: + rtnVal = literal->walk( pd ); + break; + case RangeType: + rtnVal = range->walk( pd ); + break; + case OrExprType: + rtnVal = reItem->walk( pd, 0 ); + break; + case RegExprType: + rtnVal = regExp->walk( pd, 0 ); + break; + case ReferenceType: + rtnVal = varDef->walk( pd ); + break; + case ParenType: + rtnVal = join->walk( pd ); + break; + } + + return rtnVal; +} + + +/* Clean up a range object. Must delete the two literals. */ +Range::~Range() +{ + delete lowerLit; + delete upperLit; +} + +bool Range::verifyRangeFsm( FsmGraph *rangeEnd ) +{ + /* Must have two states. */ + if ( rangeEnd->stateList.length() != 2 ) + return false; + /* The start state cannot be final. */ + if ( rangeEnd->startState->isFinState() ) + return false; + /* There should be only one final state. */ + if ( rangeEnd->finStateSet.length() != 1 ) + return false; + /* The final state cannot have any transitions out. */ + if ( rangeEnd->finStateSet[0]->outList.length() != 0 ) + return false; + /* The start state should have only one transition out. */ + if ( rangeEnd->startState->outList.length() != 1 ) + return false; + /* The singe transition out of the start state should not be a range. */ + FsmTrans *startTrans = rangeEnd->startState->outList.head; + if ( startTrans->lowKey != startTrans->highKey ) + return false; + return true; +} + +/* Evaluate a range. Gets the lower an upper key and makes an fsm range. */ +FsmGraph *Range::walk( Compiler *pd ) +{ + /* Construct and verify the suitability of the lower end of the range. */ + FsmGraph *lowerFsm = lowerLit->walk( pd ); + if ( !verifyRangeFsm( lowerFsm ) ) { + error(lowerLit->loc) << + "bad range lower end, must be a single character" << endl; + } + + /* Construct and verify the upper end. */ + FsmGraph *upperFsm = upperLit->walk( pd ); + if ( !verifyRangeFsm( upperFsm ) ) { + error(upperLit->loc) << + "bad range upper end, must be a single character" << endl; + } + + /* Grab the keys from the machines, then delete them. */ + Key lowKey = lowerFsm->startState->outList.head->lowKey; + Key highKey = upperFsm->startState->outList.head->lowKey; + delete lowerFsm; + delete upperFsm; + + /* Validate the range. */ + if ( lowKey > highKey ) { + /* Recover by setting upper to lower; */ + error(lowerLit->loc) << "lower end of range is greater then upper end" << endl; + highKey = lowKey; + } + + /* Return the range now that it is validated. */ + FsmGraph *retFsm = new FsmGraph(); + retFsm->rangeFsm( lowKey, highKey ); + return retFsm; +} + +/* Evaluate a literal object. */ +FsmGraph *Literal::walk( Compiler *pd ) +{ + /* FsmGraph to return, is the alphabet signed. */ + FsmGraph *rtnVal = 0; + + switch ( type ) { + case Number: { + /* Make the fsm key in int format. */ + Key fsmKey = makeFsmKeyNum( literal.data, loc, pd ); + /* Make the new machine. */ + rtnVal = new FsmGraph(); + rtnVal->concatFsm( fsmKey ); + break; + } + case LitString: { + /* Make the array of keys in int format. */ + String interp; + bool caseInsensitive; + prepareLitString( interp, caseInsensitive, literal, loc ); + Key *arr = new Key[interp.length()]; + makeFsmKeyArray( arr, interp.data, interp.length(), pd ); + + /* Make the new machine. */ + rtnVal = new FsmGraph(); + if ( caseInsensitive ) + rtnVal->concatFsmCI( arr, interp.length() ); + else + rtnVal->concatFsm( arr, interp.length() ); + delete[] arr; + break; + }} + return rtnVal; +} + +/* Clean up after a regular expression object. */ +RegExpr::~RegExpr() +{ + switch ( type ) { + case RecurseItem: + delete regExp; + delete item; + break; + case Empty: + break; + } +} + +/* Evaluate a regular expression object. */ +FsmGraph *RegExpr::walk( Compiler *pd, RegExpr *rootRegex ) +{ + /* This is the root regex, pass down a pointer to this. */ + if ( rootRegex == 0 ) + rootRegex = this; + + FsmGraph *rtnVal = 0; + switch ( type ) { + case RecurseItem: { + /* Walk both items. */ + FsmGraph *fsm1 = regExp->walk( pd, rootRegex ); + FsmGraph *fsm2 = item->walk( pd, rootRegex ); + if ( fsm1 == 0 ) + rtnVal = fsm2; + else { + fsm1->concatOp( fsm2 ); + rtnVal = fsm1; + } + break; + } + case Empty: { + /* FIXME: Return something here. */ + rtnVal = 0; + break; + } + } + return rtnVal; +} + +/* Clean up after an item in a regular expression. */ +ReItem::~ReItem() +{ + switch ( type ) { + case Data: + case Dot: + break; + case OrBlock: + case NegOrBlock: + delete orBlock; + break; + } +} + +/* Evaluate a regular expression object. */ +FsmGraph *ReItem::walk( Compiler *pd, RegExpr *rootRegex ) +{ + /* The fsm to return, is the alphabet signed? */ + FsmGraph *rtnVal = 0; + + switch ( type ) { + case Data: { + /* Move the data into an integer array and make a concat fsm. */ + Key *arr = new Key[data.length()]; + makeFsmKeyArray( arr, data.data, data.length(), pd ); + + /* Make the concat fsm. */ + rtnVal = new FsmGraph(); + if ( rootRegex != 0 && rootRegex->caseInsensitive ) + rtnVal->concatFsmCI( arr, data.length() ); + else + rtnVal->concatFsm( arr, data.length() ); + delete[] arr; + break; + } + case Dot: { + /* Make the dot fsm. */ + rtnVal = dotFsm( pd ); + break; + } + case OrBlock: { + /* Get the or block and minmize it. */ + rtnVal = orBlock->walk( pd, rootRegex ); + rtnVal->minimizePartition2(); + break; + } + case NegOrBlock: { + /* Get the or block and minimize it. */ + FsmGraph *fsm = orBlock->walk( pd, rootRegex ); + fsm->minimizePartition2(); + + /* Make a dot fsm and subtract from it. */ + rtnVal = dotFsm( pd ); + rtnVal->subtractOp( fsm ); + rtnVal->minimizePartition2(); + break; + } + } + + /* If the item is followed by a star, then apply the star op. */ + if ( star ) { + if ( rtnVal->startState->isFinState() ) { + warning(loc) << "applying kleene star to a machine that " + "accpets zero length word" << endl; + } + + rtnVal->starOp(); + rtnVal->minimizePartition2(); + } + return rtnVal; +} + +/* Clean up after an or block of a regular expression. */ +ReOrBlock::~ReOrBlock() +{ + switch ( type ) { + case RecurseItem: + delete orBlock; + delete item; + break; + case Empty: + break; + } +} + + +/* Evaluate an or block of a regular expression. */ +FsmGraph *ReOrBlock::walk( Compiler *pd, RegExpr *rootRegex ) +{ + FsmGraph *rtnVal = 0; + switch ( type ) { + case RecurseItem: { + /* Evaluate the two fsm. */ + FsmGraph *fsm1 = orBlock->walk( pd, rootRegex ); + FsmGraph *fsm2 = item->walk( pd, rootRegex ); + if ( fsm1 == 0 ) + rtnVal = fsm2; + else { + fsm1->unionOp( fsm2 ); + rtnVal = fsm1; + } + break; + } + case Empty: { + rtnVal = 0; + break; + } + } + return rtnVal;; +} + +/* Evaluate an or block item of a regular expression. */ +FsmGraph *ReOrItem::walk( Compiler *pd, RegExpr *rootRegex ) +{ + /* The return value, is the alphabet signed? */ + FsmGraph *rtnVal = 0; + switch ( type ) { + case Data: { + /* Make the or machine. */ + rtnVal = new FsmGraph(); + + /* Put the or data into an array of ints. Note that we find unique + * keys. Duplicates are silently ignored. The alternative would be to + * issue warning or an error but since we can't with [a0-9a] or 'a' | + * 'a' don't bother here. */ + KeySet keySet; + makeFsmUniqueKeyArray( keySet, data.data, data.length(), + rootRegex != 0 ? rootRegex->caseInsensitive : false, pd ); + + /* Run the or operator. */ + rtnVal->orFsm( keySet.data, keySet.length() ); + break; + } + case Range: { + /* Make the upper and lower keys. */ + Key lowKey = makeFsmKeyChar( lower, pd ); + Key highKey = makeFsmKeyChar( upper, pd ); + + /* Validate the range. */ + if ( lowKey > highKey ) { + /* Recover by setting upper to lower; */ + error(loc) << "lower end of range is greater then upper end" << endl; + highKey = lowKey; + } + + /* Make the range machine. */ + rtnVal = new FsmGraph(); + rtnVal->rangeFsm( lowKey, highKey ); + + if ( rootRegex != 0 && rootRegex->caseInsensitive ) { + if ( lowKey <= 'Z' && 'A' <= highKey ) { + Key otherLow = lowKey < 'A' ? Key('A') : lowKey; + Key otherHigh = 'Z' < highKey ? Key('Z') : highKey; + + otherLow = 'a' + ( otherLow - 'A' ); + otherHigh = 'a' + ( otherHigh - 'A' ); + + FsmGraph *otherRange = new FsmGraph(); + otherRange->rangeFsm( otherLow, otherHigh ); + rtnVal->unionOp( otherRange ); + rtnVal->minimizePartition2(); + } + else if ( lowKey <= 'z' && 'a' <= highKey ) { + Key otherLow = lowKey < 'a' ? Key('a') : lowKey; + Key otherHigh = 'z' < highKey ? Key('z') : highKey; + + otherLow = 'A' + ( otherLow - 'a' ); + otherHigh = 'A' + ( otherHigh - 'a' ); + + FsmGraph *otherRange = new FsmGraph(); + otherRange->rangeFsm( otherLow, otherHigh ); + rtnVal->unionOp( otherRange ); + rtnVal->minimizePartition2(); + } + } + + break; + }} + return rtnVal; +} diff --git a/src/parsetree.h b/src/parsetree.h new file mode 100644 index 00000000..c3a75df5 --- /dev/null +++ b/src/parsetree.h @@ -0,0 +1,2253 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _PARSETREE_H +#define _PARSETREE_H + +#include <iostream> +#include <string.h> +#include "global.h" +#include "avlmap.h" +#include "bstmap.h" +#include "bstset.h" +#include "vector.h" +#include "dlist.h" +#include "dlistval.h" +#include "dlistmel.h" +#include "astring.h" +#include "bytecode.h" +#include "avlbasic.h" +#include "fsmrun.h" + +/* Operators that are represented with single symbol characters. */ +#define OP_DoubleEql 'e' +#define OP_NotEql 'q' +#define OP_LessEql 'l' +#define OP_GrtrEql 'g' +#define OP_LogicalAnd 'a' +#define OP_LogicalOr 'o' +#define OP_Deref 'd' + +#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 + #error "SIZEOF_LONG contained an unexpected value" +#endif + +struct NameInst; +struct FsmGraph; +struct RedFsm; +struct _FsmRun; +struct ObjectDef; +struct ElementOf; +struct UniqueType; +struct ObjField; +struct TransBlock; +struct CodeBlock; +struct PdaLiteral; +struct TypeAlias; +typedef struct _PdaRun PdaRun; + +/* + * Code Vector + */ +struct CodeVect : public Vector<Code> +{ + void appendHalf( Half half ) + { + /* not optimal. */ + append( half & 0xff ); + append( (half>>8) & 0xff ); + } + + void appendWord( Word word ) + { + /* not optimal. */ + append( word & 0xff ); + append( (word>>8) & 0xff ); + append( (word>>16) & 0xff ); + append( (word>>24) & 0xff ); + #if SIZEOF_LONG == 8 + append( (word>>32) & 0xff ); + append( (word>>40) & 0xff ); + append( (word>>48) & 0xff ); + append( (word>>56) & 0xff ); + #endif + } + + void setHalf( long pos, Half half ) + { + /* not optimal. */ + data[pos] = half & 0xff; + data[pos+1] = (half>>8) & 0xff; + } + + void insertHalf( long pos, Half half ) + { + /* not optimal. */ + insert( pos, half & 0xff ); + insert( pos+1, (half>>8) & 0xff ); + } + + void insertWord( long pos, Word word ) + { + /* not at all optimal. */ + insert( pos, word & 0xff ); + insert( pos+1, (word>>8) & 0xff ); + insert( pos+2, (word>>16) & 0xff ); + insert( pos+3, (word>>24) & 0xff ); + #if SIZEOF_LONG == 8 + insert( pos+4, (word>>32) & 0xff ); + insert( pos+5, (word>>40) & 0xff ); + insert( pos+6, (word>>48) & 0xff ); + insert( pos+7, (word>>56) & 0xff ); + #endif + } + + void insertTree( long pos, Tree *tree ) + { insertWord( pos, (Word) tree ); } +}; + + + +/* Types of builtin machines. */ +enum BuiltinMachine +{ + BT_Any, + BT_Ascii, + BT_Extend, + BT_Alpha, + BT_Digit, + BT_Alnum, + BT_Lower, + BT_Upper, + BT_Cntrl, + BT_Graph, + BT_Print, + BT_Punct, + BT_Space, + BT_Xdigit, + BT_Lambda, + BT_Empty +}; + +typedef BstSet<char> CharSet; +typedef Vector<unsigned char> UnsignedCharVect; + + +struct Compiler; +struct TypeRef; + +/* Leaf type. */ +struct Literal; + +/* Tree nodes. */ + +struct Term; +struct FactorWithAug; +struct FactorWithRep; +struct FactorWithNeg; +struct Factor; +struct Expression; +struct Join; +struct JoinOrLm; +struct RegionJoinOrLm; +struct TokenRegion; +struct Namespace; +struct Context; +struct TokenDef; +struct TokenDefListReg; +struct TokenDefListNs; +struct Range; +struct LangEl; + +/* Type of augmentation. Describes locations in the machine. */ +enum AugType +{ + /* Transition actions/priorities. */ + at_start, + at_all, + at_finish, + at_leave, + + /* Global error actions. */ + at_start_gbl_error, + at_all_gbl_error, + at_final_gbl_error, + at_not_start_gbl_error, + at_not_final_gbl_error, + at_middle_gbl_error, + + /* Local error actions. */ + at_start_local_error, + at_all_local_error, + at_final_local_error, + at_not_start_local_error, + at_not_final_local_error, + at_middle_local_error, + + /* To State Action embedding. */ + at_start_to_state, + at_all_to_state, + at_final_to_state, + at_not_start_to_state, + at_not_final_to_state, + at_middle_to_state, + + /* From State Action embedding. */ + at_start_from_state, + at_all_from_state, + at_final_from_state, + at_not_start_from_state, + at_not_final_from_state, + at_middle_from_state, + + /* EOF Action embedding. */ + at_start_eof, + at_all_eof, + at_final_eof, + at_not_start_eof, + at_not_final_eof, + at_middle_eof +}; + +/* IMPORTANT: These must follow the same order as the state augs in AugType + * since we will be using this to compose AugType. */ +enum StateAugType +{ + sat_start = 0, + sat_all, + sat_final, + sat_not_start, + sat_not_final, + sat_middle +}; + +struct Action; +struct PriorDesc; +struct RegExpr; +struct ReItem; +struct ReOrBlock; +struct ReOrItem; +struct ExplicitMachine; +struct InlineItem; +struct InlineList; + +/* Reference to a named state. */ +typedef Vector<String> NameRef; +typedef Vector<NameRef*> NameRefList; +typedef Vector<NameInst*> NameTargList; + +/* Structure for storing location of epsilon transitons. */ +struct EpsilonLink +{ + EpsilonLink( const InputLoc &loc, NameRef &target ) + : loc(loc), target(target) { } + + InputLoc loc; + NameRef target; +}; + +struct Label +{ + Label( const InputLoc &loc, const String &data, ObjField *objField ) + : loc(loc), data(data), objField(objField) { } + + InputLoc loc; + String data; + ObjField *objField; +}; + +/* Structure represents an action assigned to some FactorWithAug node. The + * factor with aug will keep an array of these. */ +struct ParserAction +{ + ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action ) + : loc(loc), type(type), localErrKey(localErrKey), action(action) { } + + InputLoc loc; + AugType type; + int localErrKey; + Action *action; +}; + +struct Token +{ + String data; + InputLoc loc; +}; + +void prepareLitString( String &result, bool &caseInsensitive, + const String &srcString, const InputLoc &loc ); + +std::ostream &operator<<(std::ostream &out, const Token &token ); + +typedef AvlMap< String, TokenDef*, CmpStr > LiteralDict; +typedef AvlMapEl< String, TokenDef* > LiteralDictEl; + +/* Store the value and type of a priority augmentation. */ +struct PriorityAug +{ + PriorityAug( AugType type, int priorKey, int priorValue ) : + type(type), priorKey(priorKey), priorValue(priorValue) { } + + AugType type; + int priorKey; + int priorValue; +}; + +/* + * A Variable Definition + */ +struct VarDef +{ + VarDef( const String &name, Join *join ) + : name(name), join(join) { } + + /* Parse tree traversal. */ + FsmGraph *walk( Compiler *pd ); + void makeNameTree( const InputLoc &loc, Compiler *pd ); + + String name; + Join *join; +}; + +/* + * A Variable Definition + */ +struct RegionDef +{ + RegionDef( const String &name, TokenRegion *tokenRegion ) + : name(name), tokenRegion(tokenRegion) { } + + /* Parse tree traversal. */ + FsmGraph *walk( Compiler *pd ); + void makeNameTree( const InputLoc &loc, Compiler *pd ); + + String name; + TokenRegion *tokenRegion; +}; + +typedef Vector<String> StringVect; +typedef CmpTable<String, CmpStr> CmpStrVect; + +struct NamespaceQual +{ + NamespaceQual( Namespace *declInNspace, TokenRegion *declInRegion ) : + cachedNspaceQual(0), declInNspace(declInNspace) {} + + Namespace *cachedNspaceQual; + Namespace *declInNspace; + + StringVect qualNames; + + Namespace *searchFrom( Namespace *from, StringVect::Iter &qualPart ); + Namespace *getQual( Compiler *pd ); +}; + +struct ReCapture +{ + ReCapture( Action *markEnter, Action *markLeave, ObjField *objField ) + : markEnter(markEnter), markLeave(markLeave), objField(objField) {} + + Action *markEnter; + Action *markLeave; + ObjField *objField; +}; + +typedef Vector<Context*> ContextVect; + +struct Context +{ + Context( InputLoc &loc, LangEl *lel ) + : + loc(loc), + lel(lel) + {} + + InputLoc loc; + LangEl *lel; + + ObjectDef *contextObjDef; +}; + +typedef Vector<ReCapture> ReCaptureVect; + +struct TokenDefPtr1 +{ + TokenDef *prev, *next; +}; + +struct TokenDefPtr2 +{ + TokenDef *prev, *next; +}; + +struct TokenDef +: + public TokenDefPtr1, + public TokenDefPtr2 +{ + TokenDef( const String &name, const String &literal, bool isLiteral, bool ignore, + Join *join, CodeBlock *codeBlock, InputLoc &semiLoc, + int longestMatchId, Namespace *nspace, TokenRegion *tokenRegion, + ReCaptureVect *pReCaptureVect, ObjectDef *objectDef, Context *contextIn ) + : + name(name), literal(literal), isLiteral(isLiteral), ignore(ignore), join(join), action(0), + codeBlock(codeBlock), tdLangEl(0), semiLoc(semiLoc), + longestMatchId(longestMatchId), inLmSelect(false), + nspace(nspace), tokenRegion(tokenRegion), objectDef(objectDef), + contextIn(contextIn), + dupOf(0), noPostIgnore(false), noPreIgnore(false), isZero(false) + { + if ( pReCaptureVect != 0 ) + reCaptureVect = *pReCaptureVect; + } + + InputLoc getLoc(); + + String name; + String literal; + bool isLiteral; + bool ignore; + Join *join; + Action *action; + CodeBlock *codeBlock; + LangEl *tdLangEl; + InputLoc semiLoc; + + Action *setActId; + Action *actOnLast; + Action *actOnNext; + Action *actLagBehind; + int longestMatchId; + bool inLmSelect; + Namespace *nspace; + TokenRegion *tokenRegion; + ReCaptureVect reCaptureVect; + ObjectDef *objectDef; + Context *contextIn; + + TokenDef *dupOf; + bool noPostIgnore; + bool noPreIgnore; + bool isZero; +}; + +struct LelDefList; + +struct NtDef +{ + NtDef( const String &name, Namespace *nspace, + LelDefList *defList, ObjectDef *objectDef, + Context *contextIn, bool reduceFirst ) + : + name(name), + nspace(nspace), + defList(defList), + objectDef(objectDef), + contextIn(contextIn), + reduceFirst(reduceFirst) + {} + + String name; + Namespace *nspace; + LelDefList *defList; + ObjectDef *objectDef; + Context *contextIn; + bool reduceFirst; + + NtDef *prev, *next; +}; + +struct NtDefList : DList<NtDef> {}; + +/* Declare a new type so that ptreetypes.h need not include dlist.h. */ +struct TokenDefListReg : DListMel<TokenDef, TokenDefPtr1> {}; +struct TokenDefListNs : DListMel<TokenDef, TokenDefPtr2> {}; + +struct ContextDef +{ + ContextDef( const String &name, Context *context, Namespace *nspace ) + : name(name), context(context), nspace(nspace) {} + + String name; + Context *context; + Namespace *nspace; + + ContextDef *prev, *next; +}; + +struct ContextDefList : DList<ContextDef> {}; + +struct TypeMapEl + : public AvlTreeEl<TypeMapEl> +{ + enum Type + { + TypeAliasType = 1, + LangElType + }; + + const String &getKey() { return key; } + + TypeMapEl( const String &key, TypeRef *typeRef ) + : type(TypeAliasType), key(key), value(0), typeRef(typeRef) {} + + TypeMapEl( const String &key, LangEl *value ) + : type(LangElType), key(key), value(value), typeRef(0) {} + + + Type type; + String key; + LangEl *value; + TypeRef *typeRef; + + TypeMapEl *prev, *next; +}; + +/* Symbol Map. */ +typedef AvlTree< TypeMapEl, String, CmpStr > TypeMap; + +typedef Vector<TokenRegion*> RegionVect; + +struct TokenRegion +{ + /* Construct with a list of joins */ + TokenRegion( const InputLoc &loc, const String &name, int id, + TokenRegion *parentRegion ) : + loc(loc), name(name), id(id), + lmSwitchHandlesError(false), regionNameInst(0), + parentRegion(parentRegion), defaultTokenDef(0), + preEofBlock(0), + ignoreOnlyRegion(0), tokenOnlyRegion(0), ciRegion(0), + wasEmpty(false), + isFullRegion(false), + isIgnoreOnly(false), + isTokenOnly(false), + isCiOnly(false), + ciLel(0), + derivedFrom(0) + { } + + /* Tree traversal. */ + FsmGraph *walk( Compiler *pd ); + void makeNameTree( Compiler *pd ); + void runLongestMatch( Compiler *pd, FsmGraph *graph ); + void transferScannerLeavingActions( FsmGraph *graph ); + Action *newAction( Compiler *pd, const InputLoc &loc, const String &name, + InlineList *inlineList ); + void makeActions( Compiler *pd ); + void findName( Compiler *pd ); + void restart( FsmGraph *graph, FsmTrans *trans ); + + InputLoc loc; + TokenDefListReg tokenDefList; + String name; + int id; + + Action *lmActSelect; + bool lmSwitchHandlesError; + + /* This gets saved off during the name walk. Can save it off because token + * regions are referenced once only. */ + NameInst *regionNameInst; + + TokenRegion *parentRegion; + RegionVect childRegions; + + TokenDef *defaultTokenDef; + + CodeBlock *preEofBlock; + + /* Dupe of the region, containing only the ignore tokens. */ + TokenRegion *ignoreOnlyRegion; + TokenRegion *tokenOnlyRegion; + TokenRegion *ciRegion; + + /* We alway init empty scanners with a single token. If we had to do this + * then wasEmpty is true. */ + bool wasEmpty; + + bool isFullRegion; + bool isIgnoreOnly; + bool isTokenOnly; + bool isCiOnly; + + LangEl *ciLel; + TokenRegion *derivedFrom; + + TokenRegion *next, *prev; +}; + +typedef DList<TokenRegion> RegionList; +typedef BstSet< TokenRegion*, CmpOrd<TokenRegion*> > RegionSet; + +typedef Vector<Namespace*> NamespaceVect; + +struct GenericType + : public DListEl<GenericType> +{ + GenericType( const String &name, long typeId, long id, + LangEl *langEl, TypeRef *typeArg ) + : + name(name), typeId(typeId), id(id), langEl(langEl), + typeArg(typeArg), keyTypeArg(0), + utArg(0), keyUT(0), + objDef(0) + {} + + const String &getKey() const + { return name; }; + + void declare( Compiler *pd, Namespace *nspace ); + + String name; + long typeId; + long id; + LangEl *langEl; + TypeRef *typeArg; + TypeRef *keyTypeArg; + UniqueType *utArg; + UniqueType *keyUT; + + ObjectDef *objDef; +}; + +typedef DList<GenericType> GenericList; + +typedef struct _UserIter UserIter; +typedef AvlMap<String, UserIter*, CmpStr> UserIterMap; +typedef AvlMapEl<String, UserIter*> UserIterMapEl; + +/* Graph dictionary. */ +struct GraphDictEl +: + public AvlTreeEl<GraphDictEl>, + public DListEl<GraphDictEl> +{ + GraphDictEl( const String &key ) + : key(key), value(0), isInstance(false) { } + GraphDictEl( const String &key, VarDef *value ) + : key(key), value(value), isInstance(false) { } + + const String &getKey() { return key; } + + String key; + VarDef *value; + bool isInstance; + + /* Location info of graph definition. Points to variable name of assignment. */ + InputLoc loc; +}; + +typedef AvlTree<GraphDictEl, String, CmpStr> GraphDict; +typedef DList<GraphDictEl> GraphList; + +/* Graph dictionary. */ +struct RegionGraphDictEl +: + public AvlTreeEl<RegionGraphDictEl>, + public DListEl<RegionGraphDictEl> +{ + RegionGraphDictEl( const String &key ) + : key(key), value(0), isInstance(false) { } + RegionGraphDictEl( const String &key, RegionDef *value ) + : key(key), value(value), isInstance(false) { } + + const String &getKey() { return key; } + + String key; + RegionDef *value; + bool isInstance; + + /* Location info of graph definition. Points to variable name of assignment. */ + InputLoc loc; +}; + +typedef AvlTree<RegionGraphDictEl, String, CmpStr> RegionGraphDict; +typedef DList<RegionGraphDictEl> RegionGraphList; + +struct TypeAlias +{ + TypeAlias( const InputLoc &loc, Namespace *nspace, + const String &name, TypeRef *typeRef ) + : + loc(loc), + nspace(nspace), + name(name), + typeRef(typeRef) + {} + + InputLoc loc; + Namespace *nspace; + String name; + TypeRef *typeRef; + + TypeAlias *prev, *next; +}; + +typedef DList<TypeAlias> TypeAliasList; + +struct Namespace +{ + /* Construct with a list of joins */ + Namespace( const InputLoc &loc, const String &name, int id, + Namespace *parentNamespace ) : + loc(loc), name(name), id(id), + parentNamespace(parentNamespace) { } + + /* Tree traversal. */ + Namespace *findNamespace( const String &name ); + + InputLoc loc; + String name; + int id; + + /* Literal patterns and the dictionary mapping literals to the underlying + * tokens. */ + LiteralDict literalDict; + + /* List of tokens defs in the namespace. */ + TokenDefListNs tokenDefList; + + /* List of nonterminal defs in the namespace. */ + NtDefList ntDefList; + + /* List of context definitions for encapsulating the data of a parser. */ + ContextDefList contextDefList; + + /* Dictionary of symbols within the region. */ + TypeMap typeMap; + GenericList genericList; + + /* Dictionary of graphs. Both instances and non-instances go here. */ + RegionGraphDict graphDict; + + /* regular language definitions. */ + GraphDict rlMap; + + TypeAliasList typeAliasList; + + Namespace *parentNamespace; + NamespaceVect childNamespaces; + + Namespace *next, *prev; + + void declare( Compiler *pd ); +}; + +typedef DList<Namespace> NamespaceList; +typedef BstSet< Namespace*, CmpOrd<Namespace*> > NamespaceSet; + +/* List of Expressions. */ +typedef DList<Expression> ExprList; + +struct JoinOrLm +{ + JoinOrLm( Join *join ) : + join(join) {} + + FsmGraph *walk( Compiler *pd ); + void makeNameTree( Compiler *pd ); + + Join *join; +}; + +struct RegionJoinOrLm +{ + enum Type { LongestMatchType }; + + RegionJoinOrLm( TokenRegion *tokenRegion ) : + tokenRegion(tokenRegion) {} + + FsmGraph *walk( Compiler *pd ); + void makeNameTree( Compiler *pd ); + + TokenRegion *tokenRegion; +}; + +/* + * Join + */ +struct Join +{ + /* Construct with the first expression. */ + Join( Expression *expr ); + + /* Tree traversal. */ + FsmGraph *walk( Compiler *pd ); + void makeNameTree( Compiler *pd ); + + /* Data. */ + ExprList exprList; + + Join *context; + Action *mark; +}; + +/* + * Expression + */ +struct Expression +{ + enum Type { + OrType, + IntersectType, + SubtractType, + StrongSubtractType, + TermType, + BuiltinType + }; + + /* Construct with an expression on the left and a term on the right. */ + Expression( Expression *expression, Term *term, Type type ) : + expression(expression), term(term), + builtin(builtin), type(type), prev(this), next(this) { } + + /* Construct with only a term. */ + Expression( Term *term ) : + expression(0), term(term), builtin(builtin), + type(TermType) , prev(this), next(this) { } + + /* Construct with a builtin type. */ + Expression( BuiltinMachine builtin ) : + expression(0), term(0), builtin(builtin), + type(BuiltinType), prev(this), next(this) { } + + ~Expression(); + + /* Tree traversal. */ + FsmGraph *walk( Compiler *pd, bool lastInSeq = true ); + void makeNameTree( Compiler *pd ); + + /* Node data. */ + Expression *expression; + Term *term; + BuiltinMachine builtin; + Type type; + + Expression *prev, *next; +}; + +/* + * Term + */ +struct Term +{ + enum Type { + ConcatType, + RightStartType, + RightFinishType, + LeftType, + FactorWithAugType + }; + + Term( Term *term, FactorWithAug *factorWithAug ) : + term(term), factorWithAug(factorWithAug), type(ConcatType) { } + + Term( Term *term, FactorWithAug *factorWithAug, Type type ) : + term(term), factorWithAug(factorWithAug), type(type) { } + + Term( FactorWithAug *factorWithAug ) : + term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { } + + ~Term(); + + FsmGraph *walk( Compiler *pd, bool lastInSeq = true ); + void makeNameTree( Compiler *pd ); + + Term *term; + FactorWithAug *factorWithAug; + Type type; + + /* Priority descriptor for RightFinish type. */ + PriorDesc priorDescs[2]; +}; + + +/* Third level of precedence. Augmenting nodes with actions and priorities. */ +struct FactorWithAug +{ + FactorWithAug( FactorWithRep *factorWithRep ) : + priorDescs(0), factorWithRep(factorWithRep) { } + ~FactorWithAug(); + + /* Tree traversal. */ + FsmGraph *walk( Compiler *pd ); + void makeNameTree( Compiler *pd ); + + void assignActions( Compiler *pd, FsmGraph *graph, int *actionOrd ); + void assignPriorities( FsmGraph *graph, int *priorOrd ); + + void assignConditions( FsmGraph *graph ); + + /* Actions and priorities assigned to the factor node. */ + Vector<ParserAction> actions; + Vector<PriorityAug> priorityAugs; + PriorDesc *priorDescs; + Vector<EpsilonLink> epsilonLinks; + Vector<ParserAction> conditions; + + FactorWithRep *factorWithRep; +}; + +/* Fourth level of precedence. Trailing unary operators. Provide kleen star, + * optional and plus. */ +struct FactorWithRep +{ + enum Type { + StarType, + StarStarType, + OptionalType, + PlusType, + ExactType, + MaxType, + MinType, + RangeType, + FactorWithNegType + }; + + FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep, + int lowerRep, int upperRep, Type type ) : + loc(loc), factorWithRep(factorWithRep), + factorWithNeg(0), lowerRep(lowerRep), + upperRep(upperRep), type(type) { } + + FactorWithRep( const InputLoc &loc, FactorWithNeg *factorWithNeg ) + : loc(loc), factorWithNeg(factorWithNeg), type(FactorWithNegType) { } + + ~FactorWithRep(); + + /* Tree traversal. */ + FsmGraph *walk( Compiler *pd ); + void makeNameTree( Compiler *pd ); + + InputLoc loc; + FactorWithRep *factorWithRep; + FactorWithNeg *factorWithNeg; + int lowerRep, upperRep; + Type type; + + /* Priority descriptor for StarStar type. */ + PriorDesc priorDescs[2]; +}; + +/* Fifth level of precedence. Provides Negation. */ +struct FactorWithNeg +{ + enum Type { + NegateType, + CharNegateType, + FactorType + }; + + FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) : + loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { } + + FactorWithNeg( const InputLoc &loc, Factor *factor ) : + loc(loc), factorWithNeg(0), factor(factor), type(FactorType) { } + + ~FactorWithNeg(); + + /* Tree traversal. */ + FsmGraph *walk( Compiler *pd ); + void makeNameTree( Compiler *pd ); + + InputLoc loc; + FactorWithNeg *factorWithNeg; + Factor *factor; + Type type; +}; + +/* + * Factor + */ +struct Factor +{ + /* Language elements a factor node can be. */ + enum Type { + LiteralType, + RangeType, + OrExprType, + RegExprType, + ReferenceType, + ParenType, + }; + + /* Construct with a literal fsm. */ + Factor( Literal *literal ) : + literal(literal), type(LiteralType) { } + + /* Construct with a range. */ + Factor( Range *range ) : + range(range), type(RangeType) { } + + /* Construct with the or part of a regular expression. */ + Factor( ReItem *reItem ) : + reItem(reItem), type(OrExprType) { } + + /* Construct with a regular expression. */ + Factor( RegExpr *regExp ) : + regExp(regExp), type(RegExprType) { } + + /* Construct with a reference to a var def. */ + Factor( const InputLoc &loc, VarDef *varDef ) : + loc(loc), varDef(varDef), type(ReferenceType) {} + + /* Construct with a parenthesized join. */ + Factor( Join *join ) : + join(join), type(ParenType) {} + + /* Cleanup. */ + ~Factor(); + + /* Tree traversal. */ + FsmGraph *walk( Compiler *pd ); + void makeNameTree( Compiler *pd ); + + InputLoc loc; + Literal *literal; + Range *range; + ReItem *reItem; + RegExpr *regExp; + VarDef *varDef; + Join *join; + int lower, upper; + Type type; +}; + +/* A range machine. Only ever composed of two literals. */ +struct Range +{ + Range( Literal *lowerLit, Literal *upperLit ) + : lowerLit(lowerLit), upperLit(upperLit) { } + + ~Range(); + FsmGraph *walk( Compiler *pd ); + bool verifyRangeFsm( FsmGraph *rangeEnd ); + + Literal *lowerLit; + Literal *upperLit; +}; + +/* Some literal machine. Can be a number or literal string. */ +struct Literal +{ + enum LiteralType { Number, LitString }; + + Literal( const InputLoc &loc, const String &literal, LiteralType type ) + : loc(loc), literal(literal), type(type) { } + + FsmGraph *walk( Compiler *pd ); + + InputLoc loc; + String literal; + LiteralType type; +}; + +/* Regular expression. */ +struct RegExpr +{ + enum RegExpType { RecurseItem, Empty }; + + /* Constructors. */ + RegExpr() : + type(Empty), caseInsensitive(false) { } + RegExpr(RegExpr *regExp, ReItem *item) : + regExp(regExp), item(item), + type(RecurseItem), caseInsensitive(false) { } + + ~RegExpr(); + FsmGraph *walk( Compiler *pd, RegExpr *rootRegex ); + + RegExpr *regExp; + ReItem *item; + RegExpType type; + bool caseInsensitive; +}; + +/* An item in a regular expression. */ +struct ReItem +{ + enum ReItemType { Data, Dot, OrBlock, NegOrBlock }; + + ReItem( const InputLoc &loc, const String &data ) + : loc(loc), data(data), star(false), type(Data) { } + ReItem( const InputLoc &loc, ReItemType type ) + : loc(loc), star(false), type(type) { } + ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type ) + : loc(loc), orBlock(orBlock), star(false), type(type) { } + + ~ReItem(); + FsmGraph *walk( Compiler *pd, RegExpr *rootRegex ); + + InputLoc loc; + String data; + ReOrBlock *orBlock; + bool star; + ReItemType type; +}; + +/* An or block item. */ +struct ReOrBlock +{ + enum ReOrBlockType { RecurseItem, Empty }; + + /* Constructors. */ + ReOrBlock() + : type(Empty) { } + ReOrBlock(ReOrBlock *orBlock, ReOrItem *item) + : orBlock(orBlock), item(item), type(RecurseItem) { } + + ~ReOrBlock(); + FsmGraph *walk( Compiler *pd, RegExpr *rootRegex ); + + ReOrBlock *orBlock; + ReOrItem *item; + ReOrBlockType type; +}; + +/* An item in an or block. */ +struct ReOrItem +{ + enum ReOrItemType { Data, Range }; + + ReOrItem( const InputLoc &loc, const String &data ) + : loc(loc), data(data), type(Data) {} + ReOrItem( const InputLoc &loc, char lower, char upper ) + : loc(loc), lower(lower), upper(upper), type(Range) { } + + FsmGraph *walk( Compiler *pd, RegExpr *rootRegex ); + + InputLoc loc; + String data; + char lower; + char upper; + ReOrItemType type; +}; + + +/* + * Inline code tree + */ +struct InlineList; +struct InlineItem +{ + enum Type + { + Text, + LmSwitch, + LmSetActId, + LmSetTokEnd, + LmOnLast, + LmOnNext, + LmOnLagBehind, + LmInitAct, + LmInitTokStart, + LmSetTokStart + }; + + InlineItem( const InputLoc &loc, const String &data, Type type ) : + loc(loc), data(data), nameRef(0), children(0), type(type) { } + + InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) : + loc(loc), nameRef(nameRef), children(0), type(type) { } + + InlineItem( const InputLoc &loc, TokenRegion *tokenRegion, + TokenDef *longestMatchPart, Type type ) : loc(loc), + nameRef(0), children(0), tokenRegion(tokenRegion), + longestMatchPart(longestMatchPart), type(type) { } + + InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) : + loc(loc), nameRef(0), nameTarg(nameTarg), children(0), + type(type) { } + + InlineItem( const InputLoc &loc, Type type ) : + loc(loc), nameRef(0), children(0), type(type) { } + + InputLoc loc; + String data; + NameRef *nameRef; + NameInst *nameTarg; + InlineList *children; + TokenRegion *tokenRegion; + TokenDef *longestMatchPart; + Type type; + + InlineItem *prev, *next; +}; + +/* Normally this would be atypedef, but that would entail including DList from + * ptreetypes, which should be just typedef forwards. */ +struct InlineList : public DList<InlineItem> { }; + +struct ProdEl; +struct LangVarRef; +struct ObjField; + +struct PatternItem +{ + enum Type { + FactorType, + InputText + }; + + PatternItem( const InputLoc &loc, const String &data, Type type ) : + loc(loc), factor(0), data(data), type(type), region(0), + varRef(0), bindId(0) {} + + PatternItem( const InputLoc &loc, ProdEl *factor, Type type ) : + loc(loc), factor(factor), type(type), region(0), + varRef(0), bindId(0) {} + + InputLoc loc; + ProdEl *factor; + String data; + Type type; + TokenRegion *region; + LangVarRef *varRef; + long bindId; + + PatternItem *prev, *next; +}; + +struct LangExpr; +typedef DList<PatternItem> PatternItemList; + +struct ReplItem +{ + enum Type { + InputText, + ExprType, + FactorType + }; + + ReplItem( const InputLoc &loc, Type type, const String &data ) : + loc(loc), type(type), data(data), expr(0), bindId(0) {} + + ReplItem( const InputLoc &loc, Type type, LangExpr *expr ) : + loc(loc), type(type), expr(expr), bindId(0) {} + + ReplItem( const InputLoc &loc, Type type, ProdEl *factor ) : + loc(loc), type(type), expr(expr), factor(factor), bindId(0) {} + + InputLoc loc; + Type type; + String data; + LangExpr *expr; + LangEl *langEl; + ProdEl *factor; + long bindId; + + ReplItem *prev, *next; +}; + +typedef DList<ReplItem> ReplItemList; + + +struct Pattern +{ + Pattern( const InputLoc &loc, Namespace *nspace, TokenRegion *region, + PatternItemList *list, int patRepId ) : + loc(loc), nspace(nspace), region(region), list(list), patRepId(patRepId), + langEl(0), pdaRun(0), nextBindId(1) {} + + InputLoc loc; + Namespace *nspace; + TokenRegion *region; + PatternItemList *list; + long patRepId; + LangEl *langEl; + PdaRun *pdaRun; + long nextBindId; + + Pattern *prev, *next; +}; + +typedef DList<Pattern> PatternList; + +struct Replacement +{ + Replacement( const InputLoc &loc, Namespace *nspace, + TokenRegion *region, ReplItemList *list, int patRepId ) : + loc(loc), nspace(nspace), region(region), list(list), + patRepId(patRepId), langEl(0), pdaRun(0), nextBindId(1), parse(true) {} + + InputLoc loc; + Namespace *nspace; + TokenRegion *region; + ReplItemList *list; + int patRepId; + LangEl *langEl; + PdaRun *pdaRun; + long nextBindId; + bool parse; + + Replacement *prev, *next; +}; + +typedef DList<Replacement> ReplList; + +struct ParserText +{ + ParserText( const InputLoc &loc, Namespace *nspace, + TokenRegion *region, ReplItemList *list ) : + loc(loc), nspace(nspace), region(region), list(list), + langEl(0), pdaRun(0), nextBindId(1), parse(true) {} + + InputLoc loc; + Namespace *nspace; + TokenRegion *region; + ReplItemList *list; + LangEl *langEl; + PdaRun *pdaRun; + long nextBindId; + bool parse; + + ParserText *prev, *next; +}; + +typedef DList<ParserText> ParserTextList; + +struct Function; + +struct IterDef +{ + enum Type { Tree, Child, RevChild, Repeat, RevRepeat, User }; + + IterDef( Type type, Function *func ); + IterDef( Type type ); + + Type type; + + Function *func; + bool useFuncId; + bool useSearchUT; + + Code inCreateWV; + Code inCreateWC; + Code inDestroy; + Code inAdvance; + + Code inGetCurR; + Code inGetCurWC; + Code inSetCurWC; + + Code inRefFromCur; +}; + +struct CmpIterDef +{ + static int compare( const IterDef &id1, const IterDef &id2 ) + { + if ( id1.type < id2.type ) + return -1; + else if ( id1.type > id2.type ) + return 1; + else if ( id1.type == IterDef::User ) { + if ( id1.func < id2.func ) + return -1; + else if ( id1.func > id2.func ) + return 1; + } + + return 0; + } +}; + +typedef AvlSet<IterDef, CmpIterDef> IterDefSet; +typedef AvlSetEl<IterDef> IterDefSetEl; + + +/* + * Unique Types. + */ + +/* + * type_ref -> qualified_name + * type_ref -> '*' type_ref + * type_ref -> '&' type_ref + * type_ref -> list type_ref type_ref + * type_ref -> map type_ref type_ref + * type_ref -> vector type_ref + * type_ref -> parser type_ref + * type_ref -> iter_tree type_ref + * type_ref -> iter_child type_ref + * type_ref -> iter_revchild type_ref + * type_ref -> iter_repeat type_ref + * type_ref -> iter_revrepeat type_ref + * type_ref -> iter_user type_ref + * + * type -> nil + * type -> def term + * type -> def nonterm + * type -> '*' type + * type -> '&' type + * type -> list type + * type -> map type type + * type -> vector type + * type -> parser type + * type -> iter_tree type + * type -> iter_child type + * type -> iter_revchild type + * type -> iter_repeat type + * type -> iter_revrepeat type + * type -> iter_user type + */ + +struct UniqueType : public AvlTreeEl<UniqueType> +{ + UniqueType( int typeId ) : + typeId(typeId), + langEl(0), + iterDef(0) {} + + UniqueType( int typeId, LangEl *langEl ) : + typeId(typeId), + langEl(langEl), + iterDef(0) {} + + UniqueType( int typeId, IterDef *iterDef ) : + typeId(typeId), + langEl(langEl), + iterDef(iterDef) {} + + int typeId; + LangEl *langEl; + IterDef *iterDef; +}; + +struct CmpUniqueType +{ + static int compare( const UniqueType &ut1, const UniqueType &ut2 ); +}; + +typedef AvlBasic< UniqueType, CmpUniqueType > UniqueTypeMap; + +enum RepeatType { + RepeatNone = 1, + RepeatRepeat, + RepeatList, + RepeatOpt, +}; + +/* + * Repeat types. + */ + +struct UniqueRepeat + : public AvlTreeEl<UniqueRepeat> +{ + UniqueRepeat( RepeatType repeatType, LangEl *langEl ) : + repeatType(repeatType), + langEl(langEl), declLangEl(0) {} + + RepeatType repeatType; + LangEl *langEl; + LangEl *declLangEl; +}; + +struct CmpUniqueRepeat +{ + static int compare( const UniqueRepeat &ut1, const UniqueRepeat &ut2 ); +}; + +typedef AvlBasic< UniqueRepeat, CmpUniqueRepeat > UniqueRepeatMap; + +/* + * Unique Map Types + */ + +struct UniqueMap + : public AvlTreeEl<UniqueMap> +{ + UniqueMap( UniqueType *key, UniqueType *value ) : + key(key), value(value), generic(0) {} + + UniqueType *key; + UniqueType *value; + + GenericType *generic; +}; + +struct CmpUniqueMap +{ + static int compare( const UniqueMap &ut1, const UniqueMap &ut2 ); +}; + +typedef AvlBasic< UniqueMap, CmpUniqueMap > UniqueMapMap; + +/* + * Unique List Types + */ + +struct UniqueList + : public AvlTreeEl<UniqueList> +{ + UniqueList( UniqueType *value ) : + value(value), generic(0) {} + + UniqueType *value; + GenericType *generic; +}; + +struct CmpUniqueList +{ + static int compare( const UniqueList &ut1, const UniqueList &ut2 ); +}; + +typedef AvlBasic< UniqueList, CmpUniqueList > UniqueListMap; + +/* + * Unique Vector Types + */ + +struct UniqueVector + : public AvlTreeEl<UniqueVector> +{ + UniqueVector( UniqueType *value ) : + value(value), generic(0) {} + + UniqueType *value; + GenericType *generic; +}; + +struct CmpUniqueVector +{ + static int compare( const UniqueVector &ut1, const UniqueVector &ut2 ); +}; + +typedef AvlBasic< UniqueVector, CmpUniqueVector > UniqueVectorMap; + +/* + * Unique Parser Types + */ + +struct UniqueParser + : public AvlTreeEl<UniqueParser> +{ + UniqueParser( UniqueType *parseType ) : + parseType(parseType), generic(0) {} + + UniqueType *parseType; + GenericType *generic; +}; + +struct CmpUniqueParser +{ + static int compare( const UniqueParser &ut1, const UniqueParser &ut2 ); +}; + +typedef AvlBasic< UniqueParser, CmpUniqueParser > UniqueParserMap; + +/* + * + */ + +typedef AvlMap< StringVect, int, CmpStrVect > VectorTypeIdMap; +typedef AvlMapEl< StringVect, int > VectorTypeIdMapEl; + +typedef Vector<TypeRef*> TypeRefVect; + +struct TypeRef +{ + enum Type + { + Unspecified, + Name, + Literal, + Iterator, + Map, + List, + Vector, + Parser, + Ref, + Ptr, + }; + + /* Qualification and a type name. These require lookup. */ + TypeRef( const InputLoc &loc, NamespaceQual *nspaceQual, String typeName ) : + type(Name), loc(loc), nspaceQual(nspaceQual), typeName(typeName), pdaLiteral(0), iterDef(0), + typeRef1(0), typeRef2(0), + repeatType(RepeatNone), + nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {} + + /* Qualification and a type name. These require lookup. */ + TypeRef( const InputLoc &loc, NamespaceQual *nspaceQual, PdaLiteral *pdaLiteral ) : + type(Literal), loc(loc), nspaceQual(nspaceQual), pdaLiteral(pdaLiteral), iterDef(0), + typeRef1(0), typeRef2(0), + repeatType(RepeatNone), + nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {} + + /* Generics. */ + TypeRef( Type type, const InputLoc &loc, NamespaceQual *nspaceQual, TypeRef *typeRef1, TypeRef *typeRef2 ) : + type(type), loc(loc), nspaceQual(nspaceQual), pdaLiteral(0), iterDef(0), + typeRef1(typeRef1), typeRef2(typeRef2), + repeatType(RepeatNone), + nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {} + + /* Pointers and Refs. */ + TypeRef( Type type, const InputLoc &loc, TypeRef *typeRef1 ) : + type(type), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(0), + typeRef1(typeRef1), typeRef2(0), + repeatType(RepeatNone), + nspace(0), uniqueType(0), searchUniqueType(0), generic(0) {} + + /* Resolution not needed. */ + + /* Iterator definition. */ + TypeRef( const InputLoc &loc, IterDef *iterDef, UniqueType *uniqueType, + UniqueType *searchUniqueType ) : + type(Iterator), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(iterDef), + typeRef1(0), typeRef2(0), + repeatType(RepeatNone), + nspace(0), uniqueType(uniqueType), searchUniqueType(searchUniqueType), generic(0) {} + + /* Unique type is given directly. */ + TypeRef( const InputLoc &loc, UniqueType *uniqueType ) : + type(Unspecified), loc(loc), nspaceQual(0), pdaLiteral(0), iterDef(0), + typeRef1(0), typeRef2(0), + repeatType(RepeatNone), + nspace(0), uniqueType(uniqueType), searchUniqueType(0), generic(0) {} + + void resolveRepeat( Compiler *pd ); + + UniqueType *lookupTypeName( Compiler *pd ); + UniqueType *lookupTypeLiteral( Compiler *pd ); + UniqueType *lookupTypeMap( Compiler *pd ); + UniqueType *lookupTypeList( Compiler *pd ); + UniqueType *lookupTypeVector( Compiler *pd ); + UniqueType *lookupTypeParser( Compiler *pd ); + UniqueType *lookupType( Compiler *pd ); + UniqueType *lookupTypePtr( Compiler *pd ); + UniqueType *lookupTypeRef( Compiler *pd ); + + Type type; + InputLoc loc; + NamespaceQual *nspaceQual; + String typeName; + PdaLiteral *pdaLiteral; + IterDef *iterDef; + TypeRef *typeRef1; + TypeRef *typeRef2; + RepeatType repeatType; + + /* Resolved. */ + Namespace *nspace; + UniqueType *uniqueType; + UniqueType *searchUniqueType; + GenericType *generic; +}; + +typedef DList<ObjField> ParameterList; + +struct ObjMethod +{ + ObjMethod( UniqueType *returnUT, String name, + int opcodeWV, int opcodeWC, int numParams, + UniqueType **types, ParameterList *paramList, bool isConst ) + : + returnUT(returnUT), + returnTypeId(0), + name(name), + opcodeWV(opcodeWV), + opcodeWC(opcodeWC), + numParams(numParams), + paramList(paramList), + isConst(isConst), + funcId(0), + useFuncId(false), + useCallObj(true), + isCustom(false), + func(0), + iterDef(0) + { + this->paramUTs = new UniqueType*[numParams]; + memcpy( this->paramUTs, types, sizeof(UniqueType*)*numParams ); + } + + UniqueType *returnUT; + long returnTypeId; + String name; + long opcodeWV; + long opcodeWC; + long numParams; + UniqueType **paramUTs; + ParameterList *paramList; + bool isConst; + long funcId; + bool useFuncId; + bool useCallObj; + bool isCustom; + Function *func; + IterDef *iterDef; +}; + +typedef AvlMap<String, ObjMethod*, CmpStr> ObjMethodMap; +typedef AvlMapEl<String, ObjMethod*> ObjMethodMapEl; + +struct RhsVal { RhsVal( int prodNum, int childNum ) : prodNum(prodNum), childNum(childNum) { } int prodNum; int childNum; }; + +struct ObjField +{ + ObjField( const InputLoc &loc, TypeRef *typeRef, const String &name ) : + loc(loc), typeRef(typeRef), name(name), + context(0), + pos(0), offset(0), + beenReferenced(false), + beenInitialized(false), + useOffset(true), + isConst(false), + isLhsEl(false), isRhsEl(false), + refActive(false), + isArgv(false), + isCustom(false), + isParam(false), + isRhsGet(false), + isExport(false), + dirtyTree(false), + inGetR( IN_HALT ), + inGetWC( IN_HALT ), + inGetWV( IN_HALT ), + inSetWC( IN_HALT ), + inSetWV( IN_HALT ) + {} + + InputLoc loc; + TypeRef *typeRef; + String name; + Context *context; + long pos; + long offset; + bool beenReferenced; + bool beenInitialized; + bool useOffset; + bool isConst; + bool isLhsEl; + bool isRhsEl; + bool refActive; + bool isArgv; + bool isCustom; + bool isParam; + bool isRhsGet; + bool isExport; + + /* True if some aspect of the tree has possibly been written to. This does + * not include attributes. This is here so we can optimize the storage of + * old lhs vars. If only a lhs attribute changes we don't need to preserve + * the original for backtracking. */ + bool dirtyTree; + + Vector<RhsVal> rhsVal; + + Code inGetR; + Code inGetWC; + Code inGetWV; + Code inSetWC; + Code inSetWV; + + ObjField *prev, *next; +}; + +typedef AvlMap<String, ObjField*, CmpStr> ObjFieldMap; +typedef AvlMapEl<String, ObjField*> ObjFieldMapEl; + +typedef DListVal<ObjField*> ObjFieldList; + +typedef DList<ObjField> ParameterList; + +struct TemplateType; + +/* Tree of name scopes for an object def. All of the object fields inside this + * tree live in one object def. This is used for scoping names in functions. */ +struct ObjNameScope +{ + ObjNameScope() + : parentScope(0), childIter(0) + {} + + ObjFieldMap *objFieldMap; + + ObjNameScope *parentScope; + DList<ObjNameScope> children; + + /* For iteration after declaration. */ + ObjNameScope *childIter; + + ObjNameScope *prev, *next; +}; + +struct ObjectDef +{ + enum Type { + UserType, + FrameType, + IterType, + BuiltinType + }; + + ObjectDef( Type type, String name, int id ) + : + type(type), name(name), id(id), + nextOffset(0), firstNonTree(0) + { + scope = new ObjNameScope; + scope->objFieldMap = new ObjFieldMap; + + objFieldList = new ObjFieldList; + objMethodMap = new ObjMethodMap(); + } + + Type type; + String name; + ObjFieldList *objFieldList; + ObjMethodMap *objMethodMap; + + /* Head of stack of name scopes. */ + ObjNameScope *scope; + + void pushScope(); + void popScope(); + void iterPushScope(); + void iterPopScope(); + + long id; + long nextOffset; + long firstNonTree; + + void referenceField( Compiler *pd, ObjField *field ); + void initField( Compiler *pd, ObjField *field ); + void createCode( Compiler *pd, CodeVect &code ); + ObjField *checkRedecl( const String &name ); + ObjMethod *findMethod( const String &name ); + ObjField *findFieldInScope( const String &name, ObjNameScope *inScope ); + ObjField *findField( const String &name ); + void insertField( const String &name, ObjField *value ); + void resolve( Compiler *pd ); + ObjField *findFieldNum( long offset ); + + long size() { return nextOffset; } + long sizeTrees() { return firstNonTree; } +}; + +typedef Vector<LangExpr*> ExprVect; +typedef Vector<String> StringVect; + +struct FieldInit +{ + FieldInit( const InputLoc &loc, String name, LangExpr *expr ) + : loc(loc), name(name), expr(expr) {} + + InputLoc loc; + String name; + LangExpr *expr; + + UniqueType *exprUT; +}; + +typedef Vector<FieldInit*> FieldInitVect; + +struct VarRefLookup +{ + VarRefLookup( int lastPtrInQual, int firstConstPart, ObjectDef *inObject ) : + lastPtrInQual(lastPtrInQual), + firstConstPart(firstConstPart), + inObject(inObject), + objField(0), + objMethod(0), + uniqueType(0), + iterSearchUT(0) + {} + + int lastPtrInQual; + int firstConstPart; + ObjectDef *inObject; + ObjField *objField; + ObjMethod *objMethod; + UniqueType *uniqueType; + UniqueType *iterSearchUT; +}; + +struct QualItem +{ + enum Type { Dot, Arrow }; + + QualItem( const InputLoc &loc, const String &data, Type type ) + : loc(loc), data(data), type(type) {} + + InputLoc loc; + String data; + Type type; +}; + +typedef Vector<QualItem> QualItemVect; + +struct LangVarRef +{ + LangVarRef( const InputLoc &loc, QualItemVect *qual, String name ) + : loc(loc), qual(qual), name(name) {} + + void resolve( Compiler *pd ) const; + + UniqueType *loadFieldInstr( Compiler *pd, CodeVect &code, ObjectDef *inObject, + ObjField *el, bool forWriting, bool revert ) const; + void setFieldInstr( Compiler *pd, CodeVect &code, ObjectDef *inObject, + ObjField *el, UniqueType *exprUT, bool revert ) const; + + VarRefLookup lookupMethod( Compiler *pd ) ; + VarRefLookup lookupField( Compiler *pd ) const; + + VarRefLookup lookupQualification( Compiler *pd, ObjectDef *rootDef ) const; + VarRefLookup lookupObj( Compiler *pd ) const; + + bool isCustom( Compiler *pd ) const; + bool isLocalRef( Compiler *pd ) const; + bool isContextRef( Compiler *pd ) const; + void loadQualification( Compiler *pd, CodeVect &code, ObjectDef *rootObj, + int lastPtrInQual, bool forWriting, bool revert ) const; + void loadCustom( Compiler *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) const; + void loadLocalObj( Compiler *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) const; + void loadContextObj( Compiler *pd, CodeVect &code, int lastPtrInQual, bool forWriting ) const; + void loadGlobalObj( Compiler *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) const; + void loadObj( Compiler *pd, CodeVect &code, int lastPtrInQual, bool forWriting ) const; + void canTakeRef( Compiler *pd, VarRefLookup &lookup ) const; + + void setFieldIter( Compiler *pd, CodeVect &code, + ObjectDef *inObject, UniqueType *objUT, UniqueType *exprType, bool revert ) const; + void setFieldSearch( Compiler *pd, CodeVect &code, + ObjectDef *inObject, UniqueType *exprType ) const; + void setField( Compiler *pd, CodeVect &code, + ObjectDef *inObject, UniqueType *type, bool revert ) const; + + void assignValue( Compiler *pd, CodeVect &code, UniqueType *exprUT ) const; + ObjField **evaluateArgs( Compiler *pd, CodeVect &code, + VarRefLookup &lookup, ExprVect *args ) const; + void callOperation( Compiler *pd, CodeVect &code, VarRefLookup &lookup ) const; + UniqueType *evaluateCall( Compiler *pd, CodeVect &code, ExprVect *args ); + UniqueType *evaluate( Compiler *pd, CodeVect &code, bool forWriting = false ) const; + ObjField *evaluateRef( Compiler *pd, CodeVect &code, long pushCount ) const; + ObjField *preEvaluateRef( Compiler *pd, CodeVect &code ) const; + void resetActiveRefs( Compiler *pd, VarRefLookup &lookup, ObjField **paramRefs ) const; + long loadQualificationRefs( Compiler *pd, CodeVect &code ) const; + void popRefQuals( Compiler *pd, CodeVect &code, + VarRefLookup &lookup, ExprVect *args ) const; + + InputLoc loc; + QualItemVect *qual; + String name; +}; + +struct LangTerm +{ + enum Type { + VarRefType, + MethodCallType, + NumberType, + StringType, + MatchType, + NewType, + ConstructType, + TypeIdType, + SearchType, + NilType, + TrueType, + FalseType, + ParseType, + ParseStopType, + MakeTreeType, + MakeTokenType, + EmbedStringType + }; + + LangTerm( Type type, LangVarRef *varRef ) + : type(type), varRef(varRef) {} + + LangTerm( LangVarRef *varRef, ExprVect *args ) + : type(MethodCallType), varRef(varRef), args(args) {} + + LangTerm( const InputLoc &loc, Type type, ExprVect *args ) + : loc(loc), type(type), args(args) {} + + LangTerm( Type type, String data ) + : type(type), varRef(0), data(data) {} + + LangTerm( Type type, NamespaceQual *nspaceQual, const String &data ) + : type(type), varRef(0), nspaceQual(nspaceQual), data(data) {} + + LangTerm( const InputLoc &loc, Type type ) + : loc(loc), type(type), varRef(0), typeRef(0) {} + + LangTerm( const InputLoc &loc, Type type, TypeRef *typeRef ) + : loc(loc), type(type), varRef(0), typeRef(typeRef) {} + + LangTerm( const InputLoc &loc, Type type, LangVarRef *varRef ) + : loc(loc), type(type), varRef(varRef) {} + + LangTerm( Type type, LangVarRef *varRef, Pattern *pattern ) + : type(type), varRef(varRef), pattern(pattern) {} + + LangTerm( const InputLoc &loc, Type type, TypeRef *typeRef, LangVarRef *varRef ) + : loc(loc), type(type), varRef(varRef), typeRef(typeRef) {} + + LangTerm( const InputLoc &loc, Type type, TypeRef *typeRef, FieldInitVect *fieldInitArgs, + Replacement *replacement ) + : loc(loc), type(type), typeRef(typeRef), fieldInitArgs(fieldInitArgs), + replacement(replacement) {} + + LangTerm( const InputLoc &loc, Type type, LangVarRef *varRef, ObjField *objField, + TypeRef *typeRef, FieldInitVect *fieldInitArgs, Replacement *replacement ) + : loc(loc), type(type), varRef(varRef), objField(objField), typeRef(typeRef), + fieldInitArgs(fieldInitArgs), replacement(replacement) {} + + LangTerm( Type type, LangExpr *expr ) + : type(type), expr(expr) {} + + LangTerm( ReplItemList *replItemList ) + : type(EmbedStringType), replItemList(replItemList) {} + + LangTerm( const InputLoc &loc, Type type, LangVarRef *varRef, + ObjField *objField, TypeRef *typeRef, GenericType *generic, TypeRef *parserTypeRef, + Replacement *replacement ) + : loc(loc), type(type), varRef(varRef), objField(objField), + typeRef(typeRef), generic(generic), parserTypeRef(parserTypeRef), + replacement(replacement) {} + + void resolve( Compiler *pd ); + + UniqueType *evaluateParse( Compiler *pd, CodeVect &code, bool stop ) const; + UniqueType *evaluateNew( Compiler *pd, CodeVect &code ) const; + UniqueType *evaluateConstruct( Compiler *pd, CodeVect &code ) const; + UniqueType *evaluateMatch( Compiler *pd, CodeVect &code ) const; + UniqueType *evaluate( Compiler *pd, CodeVect &code ) const; + void assignFieldArgs( Compiler *pd, CodeVect &code, UniqueType *replUT ) const; + UniqueType *evaluateMakeToken( Compiler *pd, CodeVect &code ) const; + UniqueType *evaluateMakeTree( Compiler *pd, CodeVect &code ) const; + UniqueType *evaluateEmbedString( Compiler *pd, CodeVect &code ) const; + + InputLoc loc; + Type type; + LangVarRef *varRef; + ExprVect *args; + NamespaceQual *nspaceQual; + String data; + ObjField *objField; + TypeRef *typeRef; + Pattern *pattern; + FieldInitVect *fieldInitArgs; + GenericType *generic; + TypeRef *parserTypeRef; + Replacement *replacement; + LangExpr *expr; + ReplItemList *replItemList; +}; + +struct LangExpr +{ + enum Type { + BinaryType, + UnaryType, + TermType + }; + + LangExpr( const InputLoc &loc, LangExpr *left, char op, LangExpr *right ) + : loc(loc), type(BinaryType), left(left), op(op), right(right) {} + + LangExpr( const InputLoc &loc, char op, LangExpr *right ) + : loc(loc), type(UnaryType), left(0), op(op), right(right) {} + + LangExpr( LangTerm *term ) + : type(TermType), term(term) {} + + void resolve( Compiler *pd ) const; + + UniqueType *evaluate( Compiler *pd, CodeVect &code ) const; + + InputLoc loc; + Type type; + LangExpr *left; + char op; + LangExpr *right; + LangTerm *term; +}; + +struct LangStmt; +typedef DList<LangStmt> StmtList; + +struct LangStmt +{ + enum Type { + AssignType, + PrintType, + PrintXMLACType, + PrintXMLType, + PrintStreamType, + ExprType, + IfType, + ElseType, + RejectType, + WhileType, + ReturnType, + YieldType, + ForIterType, + BreakType, + ParserType + }; + + LangStmt( const InputLoc &loc, Type type, FieldInitVect *fieldInitVect ) : + loc(loc), type(type), varRef(0), expr(0), fieldInitVect(fieldInitVect), next(0) {} + + LangStmt( const InputLoc &loc, Type type, ExprVect *exprPtrVect ) : + loc(loc), type(type), varRef(0), expr(0), exprPtrVect(exprPtrVect), next(0) {} + + LangStmt( const InputLoc &loc, Type type, LangExpr *expr ) : + loc(loc), type(type), varRef(0), expr(expr), exprPtrVect(0), next(0) {} + + LangStmt( Type type, LangVarRef *varRef ) : + type(type), varRef(varRef), expr(0), exprPtrVect(0), next(0) {} + + LangStmt( const InputLoc &loc, Type type, ObjField *objField ) : + loc(loc), type(type), varRef(0), objField(objField), expr(0), + exprPtrVect(0), next(0) {} + + LangStmt( const InputLoc &loc, Type type, LangVarRef *varRef, LangExpr *expr ) : + loc(loc), type(type), varRef(varRef), expr(expr), exprPtrVect(0), next(0) {} + + LangStmt( Type type, LangExpr *expr, StmtList *stmtList ) : + type(type), expr(expr), stmtList(stmtList), next(0) {} + + LangStmt( Type type, StmtList *stmtList ) : + type(type), stmtList(stmtList), next(0) {} + + LangStmt( Type type, LangExpr *expr, StmtList *stmtList, LangStmt *elsePart ) : + type(type), expr(expr), stmtList(stmtList), elsePart(elsePart), next(0) {} + + LangStmt( const InputLoc &loc, Type type ) : + loc(loc), type(type), next(0) {} + + LangStmt( Type type, LangVarRef *varRef, Replacement *replacement ) : + type(type), varRef(varRef), expr(0), replacement(replacement), + exprPtrVect(0), next(0) {} + + LangStmt( Type type, LangVarRef *varRef, ParserText *parserText ) : + type(type), varRef(varRef), expr(0), parserText(parserText), + exprPtrVect(0), next(0) {} + + /* ForIterType */ + LangStmt( const InputLoc &loc, Type type, ObjField *objField, + TypeRef *typeRef, LangTerm *langTerm, StmtList *stmtList ) : + loc(loc), type(type), langTerm(langTerm), objField(objField), typeRef(typeRef), + stmtList(stmtList), next(0) {} + + LangStmt( Type type ) : + type(type), next(0) {} + + void resolve( Compiler *pd ) const; + void resolveParserItems( Compiler *pd ) const; + + void evaluateParserItems( Compiler *pd, CodeVect &code ) const; + LangTerm *chooseDefaultIter( Compiler *pd, LangTerm *fromVarRef ) const; + void compileWhile( Compiler *pd, CodeVect &code ) const; + void compileForIterBody( Compiler *pd, CodeVect &code, UniqueType *iterUT ) const; + void compileForIter( Compiler *pd, CodeVect &code ) const; + void compile( Compiler *pd, CodeVect &code ) const; + + InputLoc loc; + Type type; + LangVarRef *varRef; + LangTerm *langTerm; + ObjField *objField; + TypeRef *typeRef; + LangExpr *expr; + Replacement *replacement; + ParserText *parserText; + ExprVect *exprPtrVect; + FieldInitVect *fieldInitVect; + StmtList *stmtList; + /* Either another if, or an else. */ + LangStmt *elsePart; + String name; + + /* Normally you don't need to initialize double list pointers, however, we + * make use of the next pointer for returning a pair of statements using + * one pointer to a LangStmt, so we need to initialize it above. */ + LangStmt *prev, *next; +}; + +struct CodeBlock +{ + CodeBlock( StmtList *stmtList ) + : + frameId(-1), + stmtList(stmtList), + localFrame(0), + context(0) {} + + void compile( Compiler *pd, CodeVect &code ) const; + void resolve( Compiler *pd ) const; + + long frameId; + StmtList *stmtList; + ObjectDef *localFrame; + CharSet trees; + Context *context; + + /* Each frame has two versions of + * the code: revert and commit. */ + CodeVect codeWV, codeWC; +}; + +struct Function +{ + Function( TypeRef *typeRef, const String &name, + ParameterList *paramList, CodeBlock *codeBlock, + int funcId, bool isUserIter ) + : + typeRef(typeRef), + name(name), + paramList(paramList), + codeBlock(codeBlock), + funcId(funcId), + isUserIter(isUserIter), + paramListSize(0), + paramUTs(0), + inContext(0) + {} + + TransBlock *transBlock; + TypeRef *typeRef; + String name; + ParameterList *paramList; + CodeBlock *codeBlock; + ObjectDef *localFrame; + long funcId; + bool isUserIter; + long paramListSize; + UniqueType **paramUTs; + Context *inContext; + + Function *prev, *next; +}; + +typedef DList<Function> FunctionList; + +#endif /* _PARSETREE_H */ diff --git a/src/pcheck.cc b/src/pcheck.cc new file mode 100644 index 00000000..d5401f7d --- /dev/null +++ b/src/pcheck.cc @@ -0,0 +1,154 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "pcheck.h" +#include <assert.h> + +/* Construct a new parameter checker with for paramSpec. */ +ParamCheck::ParamCheck( const char *paramSpec, int argc, const char **argv ) +: + state(noparam), + argOffset(0), + curArg(0), + iCurArg(1), + paramSpec(paramSpec), + argc(argc), + argv(argv) +{ +} + +/* Check a single option. Returns the index of the next parameter. Sets p to + * the arg character if valid, 0 otherwise. Sets parg to the parameter arg if + * there is one, NULL otherwise. */ +bool ParamCheck::check() +{ + bool requiresParam; + + if ( iCurArg >= argc ) { /* Off the end of the arg list. */ + state = noparam; + return false; + } + + if ( argOffset != 0 && *argOffset == 0 ) { + /* We are at the end of an arg string. */ + iCurArg += 1; + if ( iCurArg >= argc ) { + state = noparam; + return false; + } + argOffset = 0; + } + + if ( argOffset == 0 ) { + /* Set the current arg. */ + curArg = argv[iCurArg]; + + /* We are at the beginning of an arg string. */ + if ( argv[iCurArg] == 0 || /* Argv[iCurArg] is null. */ + argv[iCurArg][0] != '-' || /* Not a param. */ + argv[iCurArg][1] == 0 ) { /* Only a dash. */ + parameter = 0; + parameterArg = 0; + + iCurArg += 1; + state = noparam; + return true; + } + argOffset = argv[iCurArg] + 1; + } + + /* Get the arg char. */ + char argChar = *argOffset; + + /* Loop over all the parms and look for a match. */ + const char *pSpec = paramSpec; + while ( *pSpec != 0 ) { + char pSpecChar = *pSpec; + + /* If there is a ':' following the char then + * it requires a parm. If a parm is required + * then move ahead two in the parmspec. Otherwise + * move ahead one in the parm spec. */ + if ( pSpec[1] == ':' ) { + requiresParam = true; + pSpec += 2; + } + else { + requiresParam = false; + pSpec += 1; + } + + /* Do we have a match. */ + if ( argChar == pSpecChar ) { + if ( requiresParam ) { + if ( argOffset[1] == 0 ) { + /* The param must follow. */ + if ( iCurArg + 1 == argc ) { + /* We are the last arg so there + * cannot be a parameter to it. */ + parameter = argChar; + parameterArg = 0; + iCurArg += 1; + argOffset = 0; + state = invalid; + return true; + } + else { + /* the parameter to the arg is the next arg. */ + parameter = pSpecChar; + parameterArg = argv[iCurArg + 1]; + iCurArg += 2; + argOffset = 0; + state = match; + return true; + } + } + else { + /* The param for the arg is built in. */ + parameter = pSpecChar; + parameterArg = argOffset + 1; + iCurArg += 1; + argOffset = 0; + state = match; + return true; + } + } + else { + /* Good, we matched the parm and no + * arg is required. */ + parameter = pSpecChar; + parameterArg = 0; + argOffset += 1; + state = match; + return true; + } + } + } + + /* We did not find a match. Bad Argument. */ + parameter = argChar; + parameterArg = 0; + argOffset += 1; + state = invalid; + return true; +} + + diff --git a/src/pcheck.h b/src/pcheck.h new file mode 100644 index 00000000..5be60426 --- /dev/null +++ b/src/pcheck.h @@ -0,0 +1,48 @@ +/* + * Copyright 2001, 2002 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _PCHECK_H +#define _PCHECK_H + +class ParamCheck +{ +public: + ParamCheck( const char *paramSpec, int argc, const char **argv ); + + bool check(); + + const char *parameterArg; /* The argument to the parameter. */ + char parameter; /* The parameter matched. */ + enum { match, invalid, noparam } state; + + const char *argOffset; /* If we are reading params inside an + * arg this points to the offset. */ + + const char *curArg; /* Pointer to the current arg. */ + int iCurArg; /* Index to the current arg. */ + +private: + const char *paramSpec; /* Parameter spec supplied by the coder. */ + int argc; /* Arguement data from the command line. */ + const char **argv; +}; + +#endif /* _PCHECK_H */ diff --git a/src/pdabuild.cc b/src/pdabuild.cc new file mode 100644 index 00000000..4bce96ce --- /dev/null +++ b/src/pdabuild.cc @@ -0,0 +1,2091 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <iostream> +#include <iomanip> +#include <errno.h> +#include <stdlib.h> + +/* Parsing. */ +#include "global.h" +#include "parsedata.h" +#include "pdacodegen.h" +#include "pdarun.h" +#include "redfsm.h" +#include "fsmcodegen.h" +#include "redbuild.h" +#include "fsmrun.h" + +/* Dumping the fsm. */ +#include "mergesort.h" + +using namespace std; + +char startDefName[] = "start"; + +/* Count the transitions in the fsm by walking the state list. */ +int countTransitions( PdaGraph *fsm ) +{ + int numTrans = 0; + PdaState *state = fsm->stateList.head; + while ( state != 0 ) { + numTrans += state->transMap.length(); + state = state->next; + } + return numTrans; +} + +LangEl::LangEl( Namespace *nspace, const String &name, Type type ) +: + nspace(nspace), + name(name), + lit(name), + type(type), + id(-1), + isUserTerm(false), + isContext(false), + displayString(0), + numAppearances(0), + commit(false), + ignore(false), + reduceFirst(false), + isLiteral(false), + isRepeat(false), + isList(false), + isOpt(false), + parseStop(false), + isEOF(false), + repeatOf(0), + tokenDef(0), + rootDef(0), + termDup(0), + eofLel(0), + pdaGraph(0), + pdaTables(0), + transBlock(0), + objectDef(0), + thisSize(0), + ofiOffset(0), + generic(0), + parserId(-1), + predType(PredNone), + predValue(0), + contextDef(0), + contextIn(0), + noPreIgnore(false), + noPostIgnore(false), + isCI(false), + ciRegion(0) +{ +} + +PdaGraph *ProdElList::walk( Compiler *pd, Definition *prod ) +{ + PdaGraph *prodFsm = new PdaGraph(); + PdaState *last = prodFsm->addState(); + prodFsm->setStartState( last ); + + if ( prod->collectIgnoreRegion != 0 ) { +// cerr << "production " << prod->data << " has collect ignore region " << +// prod->collectIgnoreRegion->name << endl; + + /* Use the IGNORE TOKEN lang el for the region. */ + long value = prod->collectIgnoreRegion->ciLel->id; + + PdaState *newState = prodFsm->addState(); + PdaTrans *newTrans = prodFsm->appendNewTrans( last, newState, value, value ); + + newTrans->isShift = true; + newTrans->shiftPrior = 0; // WAT + last = newState; + } + + int prodLength = 0; + for ( Iter prodEl = first(); prodEl.lte(); prodEl++, prodLength++ ) { + //PdaGraph *itemFsm = prodEl->walk( pd ); + long value = prodEl->langEl->id; + + PdaState *newState = prodFsm->addState(); + PdaTrans *newTrans = prodFsm->appendNewTrans( last, newState, value, value ); + + newTrans->isShift = true; + newTrans->shiftPrior = prodEl->priorVal; + //cerr << "PRIOR VAL: " << newTrans->shiftPrior << endl; + + if ( prodEl->commit ) { + //cout << "COMMIT: inserting commit of length: " << pd->prodLength << endl; + /* Insert the commit into transitions out of last */ + for ( TransMap::Iter trans = last->transMap; trans.lte(); trans++ ) + trans->value->commits.insert( prodLength ); + } + + last = newState; + } + + /* Make the last state the final state. */ + prodFsm->setFinState( last ); + return prodFsm; +} + + +ProdElList *Compiler::makeProdElList( LangEl *langEl ) +{ + ProdElList *prodElList = new ProdElList(); + UniqueType *uniqueType = findUniqueType( TYPE_TREE, langEl ); + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueType ); + prodElList->append( new ProdEl( InputLoc(), typeRef ) ); + prodElList->tail->langEl = langEl; + return prodElList; +} + +void Compiler::makeDefinitionNames() +{ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + int prodNum = 1; + for ( LelDefList::Iter def = lel->defList; def.lte(); def++ ) { + def->data.setAs( lel->name.length() + 32, "%s-%i", + lel->name.data, prodNum++ ); + } + } +} + +/* Make sure there there are no language elements whose type is unkonwn. This + * can happen when an id is used on the rhs of a definition but is not defined + * as anything. */ +void Compiler::noUndefindLangEls() +{ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->type == LangEl::Unknown ) + error() << "'" << lel->name << "' was not defined as anything" << endp; + } +} + +void Compiler::makeLangElIds() +{ + /* The first id 0 is reserved for the stack sentinal. A negative id means + * error to the parsing function, inducing backtracking. */ + nextSymbolId = 1; + + /* First pass assigns to the user terminals. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + /* Must be a term, and not any of the special reserved terminals. + * Remember if the non terminal is a user non terminal. */ + if ( lel->type == LangEl::Term && + !lel->isEOF && + lel != errorLangEl && + lel != noTokenLangEl ) + { + lel->isUserTerm = true; + lel->id = nextSymbolId++; + } + } + + //eofLangEl->id = nextSymbolId++; + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + /* Must be a term, and not any of the special reserved terminals. + * Remember if the non terminal is a user non terminal. */ + if ( lel->isEOF ) + lel->id = nextSymbolId++; + } + + /* Next assign to the eof notoken, which we always create. */ + noTokenLangEl->id = nextSymbolId++; + + /* Possibly assign to the error language element. */ + if ( errorLangEl != 0 ) + errorLangEl->id = nextSymbolId++; + + /* Save this for the code generation. */ + firstNonTermId = nextSymbolId; + + /* A third and final pass assigns to everything else. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + /* Anything else not yet assigned gets assigned now. */ + if ( lel->id < 0 ) + lel->id = nextSymbolId++; + } + + assert( ptrLangEl->id == LEL_ID_PTR ); + assert( boolLangEl->id == LEL_ID_BOOL ); + assert( intLangEl->id == LEL_ID_INT ); + assert( strLangEl->id == LEL_ID_STR ); + assert( streamLangEl->id == LEL_ID_STREAM ); + assert( inputLangEl->id == LEL_ID_INPUT ); + assert( ignoreLangEl->id == LEL_ID_IGNORE ); +} + +void Compiler::refNameSpace( LangEl *lel, Namespace *nspace ) +{ + if ( nspace == defaultNamespace || nspace == rootNamespace ) { + lel->refName = "::" + lel->refName; + return; + } + + lel->refName = nspace->name + "::" + lel->refName; + lel->declName = nspace->name + "::" + lel->declName; + lel->xmlTag = nspace->name + "::" + lel->xmlTag; + refNameSpace( lel, nspace->parentNamespace ); +} + +void Compiler::makeLangElNames() +{ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->id == LEL_ID_INT ) { + lel->fullName = "_int"; + lel->fullLit = "_int"; + lel->refName = "_int"; + lel->declName = "_int"; + lel->xmlTag = "int"; + } + else if ( lel->id == LEL_ID_BOOL ) { + lel->fullName = "_bool"; + lel->fullLit = "_bool"; + lel->refName = "_bool"; + lel->declName = "_bool"; + lel->xmlTag = "bool"; + } + else { + lel->fullName = lel->name; + lel->fullLit = lel->lit; + lel->refName = lel->lit; + lel->declName = lel->lit; + lel->xmlTag = lel->name; + } + + /* If there is also a namespace next to the type, we add a prefix to + * the type. It's not convenient to name C++ classes the same as a + * namespace in the same scope. We don't want to restrict colm, so we + * add a workaround for the least-common case. The type gets t_ prefix. + * */ + Namespace *nspace = lel->nspace->findNamespace( lel->name ); + if ( nspace != 0 ) { + lel->refName = "t_" + lel->refName; + lel->fullName = "t_" + lel->fullName; + lel->declName = "t_" + lel->declName; + lel->xmlTag = "t_" + lel->xmlTag; + } + + refNameSpace( lel, lel->nspace ); + } +} + +/* Set up dot sets, shift info, and prod sets. */ +void Compiler::makeProdFsms() +{ + /* There are two items in the index for each production (high and low). */ + int indexLen = prodList.length() * 2; + dotItemIndex.setAsNew( indexLen ); + int dsiLow = 0, indexPos = 0; + + /* Build FSMs for all production language elements. */ + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) + prod->fsm = prod->prodElList->walk( this, prod ); + + makeNonTermFirstSets(); + makeFirstSets(); + + /* Build FSMs for all production language elements. */ + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + if ( addUniqueEmptyProductions ) { + /* This must be re-implemented. */ + assert( false ); + //if ( !prod->isLeftRec && prod->uniqueEmptyLeader != 0 ) { + // PdaGraph *emptyLeader = prod->uniqueEmptyLeader->walk( this ); + // emptyLeader->concatOp( prod->fsm ); + // prod->fsm = emptyLeader; + //} + } + + /* Compute the machine's length. */ + prod->fsmLength = prod->fsm->fsmLength( ); + + /* Productions have a unique production id for each final state. + * This lets us use a production length specific to each final state. + * Start states are always isolated therefore if the start state is + * final then reductions from it will always have a fixed production + * length. This is a simple method for determining the length + * of zero-length derivations when reducing. */ + + /* Number of dot items needed for the production is elements + 1 + * because the dot can be before the first and after the last element. */ + int numForProd = prod->fsm->stateList.length() + 1; + + /* Set up the low and high values in the index for this production. */ + dotItemIndex.data[indexPos].key = dsiLow; + dotItemIndex.data[indexPos].value = prod; + dotItemIndex.data[indexPos+1].key = dsiLow + numForProd - 1; + dotItemIndex.data[indexPos+1].value = prod; + + int dsi = dsiLow; + for ( PdaStateList::Iter state = prod->fsm->stateList; state.lte(); state++, dsi++ ) { + /* All transitions are shifts. */ + for ( TransMap::Iter out = state->transMap; out.lte(); out++ ) + assert( out->value->isShift ); + + state->dotSet.insert( dsi ); + } + + /* Move over the production. */ + dsiLow += numForProd; + indexPos += 2; + + if ( prod->prodCommit ) { + for ( PdaStateSet::Iter fin = prod->fsm->finStateSet; fin.lte(); fin++ ) { + int length = prod->fsmLength; + //cerr << "PENDING COMMIT IN FINAL STATE of " << prod->prodId << + // " with len: " << length << endl; + (*fin)->pendingCommits.insert( ProdIdPair( prod->prodId, length ) ); + } + } + } + + /* Make the final state specific prod id to prod id mapping. */ + prodIdIndex = new Definition*[prodList.length()]; + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) + prodIdIndex[prod->prodId] = prod; +} + +/* Want the first set of over src. If the first set contains epsilon, go over + * it and over tab. If overSrc is the end of the production, find the follow + * from the table, taking only the characters on which the parent is reduced. + * */ +void Compiler::findFollow( AlphSet &result, PdaState *overTab, + PdaState *overSrc, Definition *parentDef ) +{ + if ( overSrc->isFinState() ) { + assert( overSrc->transMap.length() == 0 ); + + /* At the end of the production. Turn to the table. */ + long redCode = makeReduceCode( parentDef->prodId, false ); + for ( TransMap::Iter tabTrans = overTab->transMap; tabTrans.lte(); tabTrans++ ) { + for ( ActDataList::Iter adl = tabTrans->value->actions; adl.lte(); adl++ ) { + if ( *adl == redCode ) + result.insert( tabTrans->key ); + } + } + } + else { + /* Get the first set of the item. If the first set contains epsilon + * then move over overSrc and overTab and recurse. */ + assert( overSrc->transMap.length() == 1 ); + TransMap::Iter pastTrans = overSrc->transMap; + + LangEl *langEl = langElIndex[pastTrans->key]; + if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { + bool hasEpsilon = false; + for ( LelDefList::Iter def = langEl->defList; def.lte(); def++ ) { + result.insert( def->firstSet ); + + if ( def->firstSet.find( -1 ) ) + hasEpsilon = true; + } + + /* Find the equivalent state in the parser. */ + if ( hasEpsilon ) { + PdaTrans *tabTrans = overTab->findTrans( pastTrans->key ); + findFollow( result, tabTrans->toState, + pastTrans->value->toState, parentDef ); + } + + /* Now possibly the dup. */ + if ( langEl->termDup != 0 ) + result.insert( langEl->termDup->id ); + } + else { + result.insert( pastTrans->key ); + } + } +} + +PdaState *Compiler::followProd( PdaState *tabState, PdaState *prodState ) +{ + while ( prodState->transMap.length() == 1 ) { + TransMap::Iter prodTrans = prodState->transMap; + PdaTrans *tabTrans = tabState->findTrans( prodTrans->key ); + prodState = prodTrans->value->toState; + tabState = tabTrans->toState; + } + return tabState; +} + +void Compiler::trySetTime( PdaTrans *trans, long code, long &time ) +{ + /* Find the item. */ + for ( ActDataList::Iter adl = trans->actions; adl.lte(); adl++ ) { + if ( *adl == code ) { + /* If the time of the shift is not already set, set it. */ + if ( trans->actOrds[adl.pos()] == 0 ) { + //cerr << "setting time: state = " << tabState->stateNum + // << ", trans = " << tabTrans->lowKey + // << ", time = " << time << endl; + trans->actOrds[adl.pos()] = time++; + } + break; + } + } +} + +/* Go down a defintiion and then handle the follow actions. */ +void Compiler::pdaOrderFollow( LangEl *rootEl, PdaState *tabState, + PdaTrans *tabTrans, PdaTrans *srcTrans, Definition *parentDef, + Definition *definition, long &time ) +{ + /* We need the follow from tabState/srcState over the defintion we are + * currently processing. */ + PdaState *overTab = tabTrans->toState; + PdaState *overSrc = srcTrans->toState; + + AlphSet alphSet; + if ( parentDef == rootEl->rootDef ) + alphSet.insert( rootEl->eofLel->id ); + else + findFollow( alphSet, overTab, overSrc, parentDef ); + + /* Now follow the production to find out where it expands to. */ + PdaState *expandToState = followProd( tabState, definition->fsm->startState ); + + /* Find the reduce item. */ + long redCode = makeReduceCode( definition->prodId, false ); + + for ( TransMap::Iter tt = expandToState->transMap; tt.lte(); tt++ ) { + if ( alphSet.find( tt->key ) ) { + trySetTime( tt->value, redCode, time ); + + /* If the items token region is not recorded in the state, do it now. */ + addRegion( expandToState, tt->value, tt->key, + tt->value->noPreIgnore, tt->value->noPostIgnore ); + } + } +} + +bool regionVectHas( RegionVect ®Vect, TokenRegion *region ) +{ + for ( RegionVect::Iter trvi = regVect; trvi.lte(); trvi++ ) { + if ( *trvi == region ) + return true; + } + return false; +} + +void Compiler::addRegion( PdaState *tabState, PdaTrans *tabTrans, + long pdaKey, bool noPreIgnore, bool noPostIgnore ) +{ + LangEl *langEl = langElIndex[pdaKey]; + if ( langEl != 0 && langEl->type == LangEl::Term ) { + TokenRegion *region = 0; + + /* If it is not the eof, then use the region associated + * with the token definition. */ + if ( langEl->isCI ) { + //cerr << "isCI" << endl; + region = langEl->ciRegion->ciRegion; + } + else if ( !langEl->isEOF && langEl->tokenDef != 0 ) { + region = langEl->tokenDef->tokenRegion; + } + + if ( region != 0 ) { + /* region. */ + TokenRegion *scanRegion = region; + + if ( langEl->noPreIgnore ) + scanRegion = region->tokenOnlyRegion; + + if ( !regionVectHas( tabState->regions, scanRegion ) ) { + tabState->regions.append( scanRegion ); + } + + /* Pre-region of to state */ + PdaState *toState = tabTrans->toState; + if ( !langEl->noPostIgnore && + region->ignoreOnlyRegion != 0 && + !regionVectHas( toState->preRegions, region->ignoreOnlyRegion ) ) + { + toState->preRegions.append( region->ignoreOnlyRegion ); + } + } + } +} + +#if 0 + orderState( tabState, prodState, time ): + if not tabState.dotSet.find( prodState.dotID ) + tabState.dotSet.insert( prodState.dotID ) + tabTrans = tabState.findMatchingTransition( prodState.getTransition() ) + + if tabTrans is NonTerminal: + for production in tabTrans.nonTerm.prodList: + orderState( tabState, production.startState, time ) + + for all expandToState in tabTrans.expandToStates: + for all followTrans in expandToState.transList + reduceAction = findAction( production.reduction ) + if reduceAction.time is unset: + reduceAction.time = time++ + end + end + end + end + end + + shiftAction = tabTrans.findAction( shift ) + if shiftAction.time is unset: + shiftAction.time = time++ + end + + orderState( tabTrans.toState, prodTrans.toState, time ) + end + end + + orderState( parseTable.startState, startProduction.startState, 1 ) +#endif + +void Compiler::pdaOrderProd( LangEl *rootEl, PdaState *tabState, + PdaState *srcState, Definition *parentDef, long &time ) +{ + assert( srcState->dotSet.length() == 1 ); + if ( tabState->dotSet2.find( srcState->dotSet[0] ) ) + return; + tabState->dotSet2.insert( srcState->dotSet[0] ); + + assert( srcState->transMap.length() == 0 || srcState->transMap.length() == 1 ); + + if ( srcState->transMap.length() == 1 ) { + TransMap::Iter srcTrans = srcState->transMap; + + /* Find the equivalent state in the parser. */ + PdaTrans *tabTrans = tabState->findTrans( srcTrans->key ); + + /* Recurse into the transition if it is a non-terminal. */ + LangEl *langEl = langElIndex[srcTrans->key]; + if ( langEl != 0 ) { + if ( langEl->reduceFirst ) { + /* Use a shortest match ordering for the contents of this + * nonterminal. Does follows for all productions first, then + * goes down the productions. */ + for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) { + pdaOrderFollow( rootEl, tabState, tabTrans, srcTrans->value, + parentDef, expDef, time ); + } + for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) + pdaOrderProd( rootEl, tabState, expDef->fsm->startState, expDef, time ); + + } + else { + /* The default action ordering. For each prod, goes down the + * prod then sets the follow before going to the next prod. */ + for ( LelDefList::Iter expDef = langEl->defList; expDef.lte(); expDef++ ) { + pdaOrderProd( rootEl, tabState, expDef->fsm->startState, expDef, time ); + + pdaOrderFollow( rootEl, tabState, tabTrans, srcTrans->value, + parentDef, expDef, time ); + } + } + } + + trySetTime( tabTrans, SHIFT_CODE, time ); + + /* Now possibly for the dup. */ + if ( langEl != 0 && langEl->termDup != 0 ) { + PdaTrans *dupTrans = tabState->findTrans( langEl->termDup->id ); + trySetTime( dupTrans, SHIFT_CODE, time ); + } + + /* If the items token region is not recorded in the state, do it now. */ + addRegion( tabState, tabTrans, srcTrans->key, + srcTrans->value->noPreIgnore, srcTrans->value->noPostIgnore ); + + /* Go over one in the production. */ + pdaOrderProd( rootEl, tabTrans->toState, + srcTrans->value->toState, parentDef, time ); + } +} + +void Compiler::pdaActionOrder( PdaGraph *pdaGraph, LangElSet &parserEls ) +{ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + assert( (state->stateBits & SB_ISMARKED) == 0 ); + + /* Traverse the src state's transitions. */ + long last = 0; + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + if ( ! trans.first() ) + assert( last < trans->key ); + last = trans->key; + } + } + + /* Compute the action orderings, record the max value. */ + long time = 1; + for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) { + PdaState *startState = (*pe)->rootDef->fsm->startState; + pdaOrderProd( *pe, (*pe)->startState, startState, (*pe)->rootDef, time ); + + /* Walk over the start lang el and set the time for shift of + * the eof action that completes the parse. */ + PdaTrans *overStart = (*pe)->startState->findTrans( (*pe)->id ); + PdaTrans *eofTrans = overStart->toState->findTrans( (*pe)->eofLel->id ); + eofTrans->actOrds[0] = time++; + } + + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + if ( state->regions.length() == 0 ) { + for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { + /* There are no regions and EOF leaves the state. Add the eof + * token region. */ + PdaTrans *trans = tel->value; + LangEl *lel = langElIndex[trans->lowKey]; + if ( lel != 0 && lel->isEOF ) + state->regions.append( eofTokenRegion ); + } + } + } + + if ( colm_log_compile ) { + /* Warn about states with empty token region lists. */ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + if ( state->regions.length() == 0 ) { + warning() << "state has an empty token region, state: " << + state->stateNum << endl; + } + } + } + + /* Some actions may not have an ordering. I believe these to be actions + * that result in a parse error and they arise because the state tables + * are LALR(1) but the action ordering is LR(1). LALR(1) causes some + * reductions that lead nowhere. */ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 ); + for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { + PdaTrans *trans = tel->value; + /* Check every action has an ordering. */ + for ( ActDataList::Iter adl = trans->actOrds; adl.lte(); adl++ ) { + if ( *adl == 0 ) + *adl = time++; + } + } + } +} + +void Compiler::advanceReductions( PdaGraph *pdaGraph ) +{ + /* Loop all states. */ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + if ( !state->advanceReductions ) + continue; + + bool outHasShift = false; + ReductionMap outReds; + LongSet outCommits; + for ( TransMap::Iter out = state->transMap; out.lte(); out++ ) { + /* Get the transition from the trans el. */ + if ( out->value->isShift ) + outHasShift = true; + outReds.insert( out->value->reductions ); + outCommits.insert( out->value->commits ); + } + + bool inHasShift = false; + ReductionMap inReds; + for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) { + /* Get the transition from the trans el. */ + if ( in->isShift ) + inHasShift = true; + inReds.insert( in->reductions ); + } + + if ( !outHasShift && outReds.length() == 1 && + inHasShift && inReds.length() == 0 ) + { + //cerr << "moving reduction to shift" << endl; + + /* Move the reduction to all in transitions. */ + for ( PdaTransInList::Iter in = state->inRange; in.lte(); in++ ) { + assert( in->actions.length() == 1 ); + assert( in->actions[0] == SHIFT_CODE ); + in->actions[0] = makeReduceCode( outReds[0].key, true ); + in->afterShiftCommits.insert( outCommits ); + } + + /* + * Remove all transitions out of the state. + */ + + /* Detach out range transitions. */ + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + pdaGraph->detachTrans( state, trans->value->toState, trans->value ); + delete trans->value; + } + state->transMap.empty(); + + /* Redirect all the in transitions to the actionDestState. */ + pdaGraph->inTransMove( actionDestState, state ); + } + } + + pdaGraph->removeUnreachableStates(); +} + +void Compiler::sortActions( PdaGraph *pdaGraph ) +{ + /* Sort the actions. */ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 ); + for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { + PdaTrans *trans = tel->value; + + /* Sort by the action ords. */ + ActDataList actions( trans->actions ); + ActDataList actOrds( trans->actOrds ); + ActDataList actPriors( trans->actPriors ); + trans->actions.empty(); + trans->actOrds.empty(); + trans->actPriors.empty(); + while ( actOrds.length() > 0 ) { + int min = 0; + for ( int i = 1; i < actOrds.length(); i++ ) { + if ( actPriors[i] > actPriors[min] || + (actPriors[i] == actPriors[min] && + actOrds[i] < actOrds[min] ) ) + { + min = i; + } + } + trans->actions.append( actions[min] ); + trans->actOrds.append( actOrds[min] ); + trans->actPriors.append( actPriors[min] ); + actions.remove(min); + actOrds.remove(min); + actPriors.remove(min); + } + + if ( branchPointInfo && trans->actions.length() > 1 ) { + cerr << "info: branch point" + << " state: " << state->stateNum + << " trans: "; + LangEl *lel = langElIndex[trans->lowKey]; + if ( lel == 0 ) + cerr << (char)trans->lowKey << endl; + else + cerr << lel->lit << endl; + + for ( ActDataList::Iter act = trans->actions; act.lte(); act++ ) { + switch ( *act & 0x3 ) { + case 1: + cerr << " shift" << endl; + break; + case 2: + cerr << " reduce " << + prodIdIndex[(*act >> 2)]->data << endl; + break; + case 3: + cerr << " shift-reduce" << endl; + break; + } + } + } + + /* Verify that shifts of nonterminals don't have any branch + * points or commits. */ + if ( trans->lowKey >= firstNonTermId ) { + if ( trans->actions.length() != 1 || + (trans->actions[0] & 0x3) != 1 ) + { + error() << "TRANS ON NONTERMINAL is something " + "other than a shift" << endl; + } + if ( trans->commits.length() > 0 ) + error() << "TRANS ON NONTERMINAL has a commit" << endl; + } + + /* TODO: Shift-reduces are optimizations. Verify that + * shift-reduces exist only if they don't entail a conflict. */ + } + } +} + +void Compiler::reduceActions( PdaGraph *pdaGraph ) +{ + /* Reduce the actions. */ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + for ( TransMap::Iter tel = state->transMap; tel.lte(); tel++ ) { + PdaTrans *trans = tel->value; + PdaActionSetEl *inSet; + + int commitLen = trans->commits.length() > 0 ? + trans->commits[trans->commits.length()-1] : 0; + + if ( trans->afterShiftCommits.length() > 0 ) { + int afterShiftCommit = trans->afterShiftCommits[ + trans->afterShiftCommits.length()-1]; + + if ( commitLen > 0 && commitLen+1 > afterShiftCommit ) + commitLen = ( commitLen + 1 ); + else + commitLen = afterShiftCommit; + } + else { + commitLen = commitLen * -1; + } + + //if ( commitLen != 0 ) { + // cerr << "FINAL ACTION COMMIT LEN: " << commitLen << endl; + //} + + pdaGraph->actionSet.insert( ActionData( trans->toState->stateNum, + trans->actions, commitLen ), &inSet ); + trans->actionSetEl = inSet; + } + } +} + +void Compiler::computeAdvanceReductions( LangEl *langEl, PdaGraph *pdaGraph ) +{ + /* Get the entry into the graph and traverse over the root. The resulting + * state can have eof, nothing else can. */ + PdaState *overStart = pdaGraph->followFsm( + langEl->startState, + langEl->rootDef->fsm ); + + /* The graph must reduce to root all on it's own. It cannot depend on + * require EOF. */ + for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) { + if ( st == overStart ) + continue; + + for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) { + if ( tr->value->lowKey == langEl->eofLel->id ) + st->advanceReductions = true; + } + } +} + +void Compiler::verifyParseStopGrammar( LangEl *langEl, PdaGraph *pdaGraph ) +{ + /* Get the entry into the graph and traverse over the root. The resulting + * state can have eof, nothing else can. */ + PdaState *overStart = pdaGraph->followFsm( + langEl->startState, + langEl->rootDef->fsm ); + + /* The graph must reduce to root all on it's own. It cannot depend on + * require EOF. */ + for ( PdaStateList::Iter st = pdaGraph->stateList; st.lte(); st++ ) { + if ( st == overStart ) + continue; + + for ( TransMap::Iter tr = st->transMap; tr.lte(); tr++ ) { + if ( tr->value->lowKey == langEl->eofLel->id ) { + /* This needs a better error message. Appears to be voodoo. */ + error() << "grammar is not usable with parse_stop" << endp; + } + } + } +} + +LangEl *Compiler::predOf( PdaTrans *trans, long action ) +{ + LangEl *lel; + if ( action == SHIFT_CODE ) + lel = langElIndex[trans->lowKey]; + else + lel = prodIdIndex[action >> 2]->predOf; + return lel; +} + + +bool Compiler::precedenceSwap( long action1, long action2, LangEl *l1, LangEl *l2 ) +{ + bool swap = false; + if ( l2->predValue > l1->predValue ) + swap = true; + else if ( l1->predValue == l2->predValue ) { + if ( l1->predType == PredLeft && action1 == SHIFT_CODE ) + swap = true; + else if ( l1->predType == PredRight && action2 == SHIFT_CODE ) + swap = true; + } + return swap; +} + +bool Compiler::precedenceRemoveBoth( LangEl *l1, LangEl *l2 ) +{ + if ( l1->predValue == l2->predValue && l1->predType == PredNonassoc ) + return true; + return false; +} + +void Compiler::resolvePrecedence( PdaGraph *pdaGraph ) +{ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + assert( CmpDotSet::compare( state->dotSet, state->dotSet2 ) == 0 ); + + for ( long t = 0; t < state->transMap.length(); /* increment at end */ ) { + PdaTrans *trans = state->transMap[t].value; + +again: + /* Find action with precedence. */ + for ( int i = 0; i < trans->actions.length(); i++ ) { + LangEl *li = predOf( trans, trans->actions[i] ); + + if ( li != 0 && li->predType != PredNone ) { + /* Find another action with precedence. */ + for ( int j = i+1; j < trans->actions.length(); j++ ) { + LangEl *lj = predOf( trans, trans->actions[j] ); + + if ( lj != 0 && lj->predType != PredNone ) { + /* Conflict to check. */ + bool swap = precedenceSwap( trans->actions[i], + trans->actions[j], li, lj ); + + if ( swap ) { + long t = trans->actions[i]; + trans->actions[i] = trans->actions[j]; + trans->actions[j] = t; + } + + trans->actions.remove( j ); + if ( precedenceRemoveBoth( li, lj ) ) + trans->actions.remove( i ); + + goto again; + } + } + } + } + + /* If there are still actions then move to the next one. If not, + * (due to nonassoc) then remove the transition. */ + if ( trans->actions.length() > 0 ) + t += 1; + else + state->transMap.vremove( t ); + } + } +} + +void Compiler::analyzeMachine( PdaGraph *pdaGraph, LangElSet &parserEls ) +{ + pdaGraph->maxState = pdaGraph->stateList.length() - 1; + pdaGraph->maxLelId = nextSymbolId - 1; + pdaGraph->maxOffset = pdaGraph->stateList.length() * pdaGraph->maxLelId; + + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + if ( trans->value->isShift ) { + trans->value->actions.append( SHIFT_CODE ); + trans->value->actPriors.append( trans->value->shiftPrior ); + } + for ( ReductionMap::Iter red = trans->value->reductions; red.lte(); red++ ) { + trans->value->actions.append( makeReduceCode( red->key, false ) ); + trans->value->actPriors.append( red->value ); + } + trans->value->actOrds.appendDup( 0, trans->value->actions.length() ); + } + } + + pdaActionOrder( pdaGraph, parserEls ); + sortActions( pdaGraph ); + resolvePrecedence( pdaGraph ); + + /* Verify that any type we parse_stop can actually be parsed that way. */ + for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) { + LangEl *lel = *pe; + if ( lel->parseStop ) + computeAdvanceReductions(lel , pdaGraph); + } + + advanceReductions( pdaGraph ); + pdaGraph->setStateNumbers(); + reduceActions( pdaGraph ); + + /* Set the action ids. */ + int actionSetId = 0; + for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) + asi->key.id = actionSetId++; + + /* Get the max index. */ + pdaGraph->maxIndex = actionSetId - 1; + + /* Compute the max prod length. */ + pdaGraph->maxProdLen = 0; + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + if ( (unsigned)prod->fsmLength > pdaGraph->maxProdLen ) + pdaGraph->maxProdLen = prod->fsmLength; + } + + /* Asserts that any transition with a nonterminal has a single action + * which is either a shift or a shift-reduce. */ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + LangEl *langEl = langElIndex[trans->value->lowKey]; + if ( langEl != 0 && langEl->type == LangEl::NonTerm ) { + assert( trans->value->actions.length() == 1 ); + assert( trans->value->actions[0] == SHIFT_CODE || + (trans->value->actions[0] & 0x3) == SHIFT_REDUCE_CODE ); + } + } + } + + /* Assert that shift reduces always appear on their own. */ + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + for ( ActDataList::Iter act = trans->value->actions; act.lte(); act++ ) { + if ( (*act & 0x3) == SHIFT_REDUCE_CODE ) + assert( trans->value->actions.length() == 1 ); + } + } + } + + /* Verify that any type we parse_stop can actually be parsed that way. */ + for ( LangElSet::Iter pe = parserEls; pe.lte(); pe++ ) { + LangEl *lel = *pe; + if ( lel->parseStop ) + verifyParseStopGrammar(lel , pdaGraph); + } +} + +void Compiler::wrapNonTerminals() +{ + /* Make a language element that will be used to make the root productions. + * These are used for making parsers rooted at any production (including + * the start symbol). */ + rootLangEl = declareLangEl( this, rootNamespace, "_root", LangEl::NonTerm ); + + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + /* Make a single production used when the lel is a root. */ + ProdElList *prodElList = makeProdElList( lel ); + lel->rootDef = new Definition( InputLoc(), rootLangEl, + prodElList, false, 0, + prodList.length(), rootLangEl->defList.length(), + Definition::Production ); + prodList.append( lel->rootDef ); + rootLangEl->defList.append( lel->rootDef ); + + /* First resolve. */ + for ( ProdElList::Iter fact = *prodElList; fact.lte(); fact++ ) + resolveFactor( fact ); + } +} + +bool Compiler::makeNonTermFirstSetProd( Definition *prod, PdaState *state ) +{ + bool modified = false; + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + if ( trans->key >= firstNonTermId ) { + long *inserted = prod->nonTermFirstSet.insert( trans->key ); + if ( inserted != 0 ) + modified = true; + + bool hasEpsilon = false; + LangEl *lel = langElIndex[trans->key]; + for ( LelDefList::Iter ldef = lel->defList; ldef.lte(); ldef++ ) { + for ( ProdIdSet::Iter pid = ldef->nonTermFirstSet; + pid.lte(); pid++ ) + { + if ( *pid == -1 ) + hasEpsilon = true; + else { + long *inserted = prod->nonTermFirstSet.insert( *pid ); + if ( inserted != 0 ) + modified = true; + } + } + } + + if ( hasEpsilon ) { + if ( trans->value->toState->isFinState() ) { + long *inserted = prod->nonTermFirstSet.insert( -1 ); + if ( inserted != 0 ) + modified = true; + } + + bool lmod = makeNonTermFirstSetProd( prod, trans->value->toState ); + if ( lmod ) + modified = true; + } + } + } + return modified; +} + + +void Compiler::makeNonTermFirstSets() +{ + bool modified = true; + while ( modified ) { + modified = false; + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + if ( prod->fsm->startState->isFinState() ) { + long *inserted = prod->nonTermFirstSet.insert( -1 ); + if ( inserted != 0 ) + modified = true; + } + + bool lmod = makeNonTermFirstSetProd( prod, prod->fsm->startState ); + if ( lmod ) + modified = true; + } + } + + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + if ( prod->nonTermFirstSet.find( prod->prodName->id ) ) + prod->isLeftRec = true; + } +} + +void Compiler::printNonTermFirstSets() +{ + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + cerr << prod->data << ": "; + for ( ProdIdSet::Iter pid = prod->nonTermFirstSet; pid.lte(); pid++ ) + { + if ( *pid < 0 ) + cerr << " <EPSILON>"; + else { + LangEl *lel = langElIndex[*pid]; + cerr << " " << lel->name; + } + } + cerr << endl; + + if ( prod->isLeftRec ) + cerr << "PROD IS LEFT REC: " << prod->data << endl; + } +} + +bool Compiler::makeFirstSetProd( Definition *prod, PdaState *state ) +{ + bool modified = false; + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + if ( trans->key < firstNonTermId ) { + long *inserted = prod->firstSet.insert( trans->key ); + if ( inserted != 0 ) + modified = true; + } + else { + long *inserted = prod->firstSet.insert( trans->key ); + if ( inserted != 0 ) + modified = true; + + LangEl *klangEl = langElIndex[trans->key]; + if ( klangEl != 0 && klangEl->termDup != 0 ) { + long *inserted2 = prod->firstSet.insert( klangEl->termDup->id ); + if ( inserted2 != 0 ) + modified = true; + } + + bool hasEpsilon = false; + LangEl *lel = langElIndex[trans->key]; + for ( LelDefList::Iter ldef = lel->defList; ldef.lte(); ldef++ ) { + for ( ProdIdSet::Iter pid = ldef->firstSet; + pid.lte(); pid++ ) + { + if ( *pid == -1 ) + hasEpsilon = true; + else { + long *inserted = prod->firstSet.insert( *pid ); + if ( inserted != 0 ) + modified = true; + } + } + } + + if ( hasEpsilon ) { + if ( trans->value->toState->isFinState() ) { + long *inserted = prod->firstSet.insert( -1 ); + if ( inserted != 0 ) + modified = true; + } + + bool lmod = makeFirstSetProd( prod, trans->value->toState ); + if ( lmod ) + modified = true; + } + } + } + return modified; +} + + +void Compiler::makeFirstSets() +{ + bool modified = true; + while ( modified ) { + modified = false; + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + if ( prod->fsm->startState->isFinState() ) { + long *inserted = prod->firstSet.insert( -1 ); + if ( inserted != 0 ) + modified = true; + } + + bool lmod = makeFirstSetProd( prod, prod->fsm->startState ); + if ( lmod ) + modified = true; + } + } +} + +void Compiler::printFirstSets() +{ + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + cerr << prod->data << ": "; + for ( ProdIdSet::Iter pid = prod->firstSet; pid.lte(); pid++ ) + { + if ( *pid < 0 ) + cerr << " <EPSILON>"; + else { + LangEl *lel = langElIndex[*pid]; + if ( lel != 0 ) + cerr << endl << " " << lel->name; + else + cerr << endl << " " << *pid; + } + } + cerr << endl; + } +} + +void Compiler::insertUniqueEmptyProductions() +{ + int limit = prodList.length(); + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + if ( prod->prodId == limit ) + break; + + /* Get a language element. */ + char name[20]; + sprintf(name, "U%li", prodList.length()); + LangEl *prodName = addLangEl( this, rootNamespace, name, LangEl::NonTerm ); + Definition *newDef = new Definition( InputLoc(), prodName, + 0 /* FIXME new VarDef( name, 0 )*/, + false, 0, prodList.length(), prodName->defList.length(), + Definition::Production ); + prodName->defList.append( newDef ); + prodList.append( newDef ); + + prod->uniqueEmptyLeader = prodName; + } +} + +void Compiler::makeRuntimeData() +{ + long count = 0; + + /* + * ProdLengths + * ProdLhsIs + * ProdNames + * ProdCodeBlocks + * ProdCodeBlockLens + */ + + runtimeData->frameInfo = new FrameInfo[nextFrameId]; + runtimeData->numFrames = nextFrameId; + memset( runtimeData->frameInfo, 0, sizeof(FrameInfo) * nextFrameId ); + + /* + * Init code block. + */ + if ( rootCodeBlock == 0 ) { + runtimeData->rootCode = 0; + runtimeData->rootCodeLen = 0; + runtimeData->rootFrameId = 0; + } + else { + runtimeData->rootCode = rootCodeBlock->codeWC.data; + runtimeData->rootCodeLen = rootCodeBlock->codeWC.length(); + runtimeData->rootFrameId = rootCodeBlock->frameId; + } + + runtimeData->frameInfo[rootCodeBlock->frameId].codeWV = 0; + runtimeData->frameInfo[rootCodeBlock->frameId].codeLenWV = 0; + runtimeData->frameInfo[rootCodeBlock->frameId].trees = rootCodeBlock->trees.data; + runtimeData->frameInfo[rootCodeBlock->frameId].treesLen = rootCodeBlock->trees.length(); + runtimeData->frameInfo[rootCodeBlock->frameId].frameSize = rootLocalFrame->size(); + runtimeData->frameInfo[rootCodeBlock->frameId].argSize = 0; + + /* + * prodInfo + */ + count = prodList.length(); + runtimeData->prodInfo = new ProdInfo[count]; + runtimeData->numProds = count; + + count = 0; + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + runtimeData->prodInfo[count].lhsId = prod->prodName->id; + runtimeData->prodInfo[count].prodNum = prod->prodNum; + runtimeData->prodInfo[count].length = prod->fsmLength; + runtimeData->prodInfo[count].name = prod->data; + runtimeData->prodInfo[count].frameId = -1; + + CodeBlock *block = prod->redBlock; + if ( block != 0 ) { + runtimeData->prodInfo[count].frameId = block->frameId; + runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data; + runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length(); + + runtimeData->frameInfo[block->frameId].trees = block->trees.data; + runtimeData->frameInfo[block->frameId].treesLen = block->trees.length(); + + runtimeData->frameInfo[block->frameId].frameSize = block->localFrame->size(); + runtimeData->frameInfo[block->frameId].argSize = 0; + } + + runtimeData->prodInfo[count].lhsUpref = true; + runtimeData->prodInfo[count].copy = prod->copy.data; + runtimeData->prodInfo[count].copyLen = prod->copy.length() / 2; + count += 1; + } + + /* + * regionInfo + */ + runtimeData->numRegions = regionList.length()+1; + runtimeData->regionInfo = new RegionInfo[runtimeData->numRegions]; + memset( runtimeData->regionInfo, 0, sizeof(RegionInfo) * runtimeData->numRegions ); + + runtimeData->regionInfo[0].name = "___EMPTY"; + runtimeData->regionInfo[0].defaultToken = -1; + for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) { + long regId = reg->id+1; + runtimeData->regionInfo[regId].name = reg->name; + runtimeData->regionInfo[regId].defaultToken = + reg->defaultTokenDef == 0 ? -1 : reg->defaultTokenDef->tdLangEl->id; + runtimeData->regionInfo[regId].eofFrameId = -1; + runtimeData->regionInfo[regId].isIgnoreOnly = reg->isIgnoreOnly; + runtimeData->regionInfo[regId].isCiOnly = reg->isCiOnly; + runtimeData->regionInfo[regId].ciLelId = reg->isCiOnly ? reg->derivedFrom->ciLel->id : 0; + + CodeBlock *block = reg->preEofBlock; + if ( block != 0 ) { + runtimeData->regionInfo[regId].eofFrameId = block->frameId; + runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data; + runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length(); + + runtimeData->frameInfo[block->frameId].trees = block->trees.data; + runtimeData->frameInfo[block->frameId].treesLen = block->trees.length(); + + runtimeData->frameInfo[block->frameId].frameSize = block->localFrame->size(); + runtimeData->frameInfo[block->frameId].argSize = 0; + } + } + + /* + * lelInfo + */ + + count = nextSymbolId; + runtimeData->lelInfo = new LangElInfo[count]; + runtimeData->numLangEls = count; + memset( runtimeData->lelInfo, 0, sizeof(LangElInfo)*count ); + + for ( int i = 0; i < nextSymbolId; i++ ) { + LangEl *lel = langElIndex[i]; + if ( lel != 0 ) { + runtimeData->lelInfo[i].name = lel->fullLit; + runtimeData->lelInfo[i].xmlTag = lel->xmlTag; + runtimeData->lelInfo[i].repeat = lel->isRepeat; + runtimeData->lelInfo[i].list = lel->isList; + runtimeData->lelInfo[i].literal = lel->isLiteral; + runtimeData->lelInfo[i].ignore = lel->ignore; + runtimeData->lelInfo[i].frameId = -1; + + CodeBlock *block = lel->transBlock; + if ( block != 0 ) { + runtimeData->lelInfo[i].frameId = block->frameId; + runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data; + runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length(); + + runtimeData->frameInfo[block->frameId].trees = block->trees.data; + runtimeData->frameInfo[block->frameId].treesLen = block->trees.length(); + + runtimeData->frameInfo[block->frameId].frameSize = block->localFrame->size(); + runtimeData->frameInfo[block->frameId].argSize = 0; + } + + + runtimeData->lelInfo[i].objectTypeId = + lel->objectDef == 0 ? 0 : lel->objectDef->id; + runtimeData->lelInfo[i].ofiOffset = lel->ofiOffset; + runtimeData->lelInfo[i].objectLength = + ( lel->objectDef == 0 || lel->objectDef == tokenObj ) ? 0 : + lel->objectDef->size(); + +// runtimeData->lelInfo[i].contextTypeId = 0; +// lel->context == 0 ? 0 : lel->context->contextObjDef->id; +// runtimeData->lelInfo[i].contextLength = 0; //lel->context == 0 ? 0 : +// lel->context->contextObjDef->size(); +// if ( lel->context != 0 ) { +// cout << "type: " << runtimeData->lelInfo[i].contextTypeId << " length: " << +// runtimeData->lelInfo[i].contextLength << endl; +// } + + runtimeData->lelInfo[i].termDupId = lel->termDup == 0 ? 0 : lel->termDup->id; + runtimeData->lelInfo[i].genericId = lel->generic == 0 ? 0 : lel->generic->id; + + if ( lel->tokenDef != 0 && lel->tokenDef->join != 0 && + lel->tokenDef->join->context != 0 ) + runtimeData->lelInfo[i].markId = lel->tokenDef->join->mark->markId; + else + runtimeData->lelInfo[i].markId = -1; + + runtimeData->lelInfo[i].numCaptureAttr = 0; + } + else { + memset(&runtimeData->lelInfo[i], 0, sizeof(LangElInfo) ); + runtimeData->lelInfo[i].name = "__UNUSED"; + runtimeData->lelInfo[i].xmlTag = "__UNUSED"; + runtimeData->lelInfo[i].frameId = -1; + } + } + + /* + * FunctionInfo + */ + count = functionList.length(); + + runtimeData->functionInfo = new FunctionInfo[count]; + runtimeData->numFunctions = count; + memset( runtimeData->functionInfo, 0, sizeof(FunctionInfo)*count ); + for ( FunctionList::Iter func = functionList; func.lte(); func++ ) { + runtimeData->functionInfo[func->funcId].name = func->name; + runtimeData->functionInfo[func->funcId].frameId = -1; + + CodeBlock *block = func->codeBlock; + if ( block != 0 ) { + runtimeData->functionInfo[func->funcId].frameId = block->frameId; + + runtimeData->frameInfo[block->frameId].codeWV = block->codeWV.data; + runtimeData->frameInfo[block->frameId].codeLenWV = block->codeWV.length(); + + runtimeData->frameInfo[block->frameId].codeWC = block->codeWC.data; + runtimeData->frameInfo[block->frameId].codeLenWC = block->codeWC.length(); + + runtimeData->frameInfo[block->frameId].trees = block->trees.data; + runtimeData->frameInfo[block->frameId].treesLen = block->trees.length(); + + runtimeData->frameInfo[block->frameId].frameSize = func->localFrame->size(); + runtimeData->frameInfo[block->frameId].argSize = func->paramListSize; + } + + runtimeData->functionInfo[func->funcId].frameSize = func->localFrame->size(); + runtimeData->functionInfo[func->funcId].argSize = func->paramListSize; + } + + /* + * PatReplInfo + */ + + /* Filled in later after patterns are parsed. */ + runtimeData->patReplInfo = new PatReplInfo[nextPatReplId]; + memset( runtimeData->patReplInfo, 0, sizeof(PatReplInfo) * nextPatReplId ); + runtimeData->numPatterns = nextPatReplId; + runtimeData->patReplNodes = 0; + runtimeData->numPatternNodes = 0; + + + /* + * GenericInfo + */ + count = 1; + for ( NamespaceList::Iter nspace = namespaceList; nspace.lte(); nspace++ ) + count += nspace->genericList.length(); + assert( count == nextGenericId ); + + runtimeData->genericInfo = new GenericInfo[count]; + runtimeData->numGenerics = count; + memset( &runtimeData->genericInfo[0], 0, sizeof(GenericInfo) ); + for ( NamespaceList::Iter nspace = namespaceList; nspace.lte(); nspace++ ) { + for ( GenericList::Iter gen = nspace->genericList; gen.lte(); gen++ ) { + runtimeData->genericInfo[gen->id].type = gen->typeId; + runtimeData->genericInfo[gen->id].typeArg = gen->utArg->typeId; + runtimeData->genericInfo[gen->id].keyType = gen->keyUT != 0 ? + gen->keyUT->typeId : 0; + runtimeData->genericInfo[gen->id].keyOffset = 0; + runtimeData->genericInfo[gen->id].langElId = gen->langEl->id; + runtimeData->genericInfo[gen->id].parserId = gen->utArg->langEl->parserId; + } + } + + runtimeData->argvGenericId = argvTypeRef->generic->id; + + /* + * Literals + */ + runtimeData->numLiterals = literalStrings.length(); + runtimeData->litdata = new const char *[literalStrings.length()]; + runtimeData->litlen = new long [literalStrings.length()]; + runtimeData->literals = 0; + for ( StringMap::Iter el = literalStrings; el.lte(); el++ ) { + /* Data. */ + char *data = new char[el->key.length()+1]; + memcpy( data, el->key.data, el->key.length() ); + data[el->key.length()] = 0; + runtimeData->litdata[el->value] = data; + + /* Length. */ + runtimeData->litlen[el->value] = el->key.length(); + } + + /* Captured attributes. Loop over tokens and count first. */ + long numCapturedAttr = 0; +// for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) { +// for ( TokenDefListReg::Iter td = reg->tokenDefList; td.lte(); td++ ) +// numCapturedAttr += td->reCaptureVect.length(); +// } + runtimeData->captureAttr = new CaptureAttr[numCapturedAttr]; + runtimeData->numCapturedAttr = numCapturedAttr; + memset( runtimeData->captureAttr, 0, sizeof( CaptureAttr ) * numCapturedAttr ); + + count = 0; +// for ( RegionList::Iter reg = regionList; reg.lte(); reg++ ) { +// for ( TokenDefListReg::Iter td = reg->tokenDefList; td.lte(); td++ ) { +// runtimeData->lelInfo[td->token->id].captureAttr = count; +// runtimeData->lelInfo[td->token->id].numCaptureAttr = td->reCaptureVect.length(); +// for ( ReCaptureVect::Iter c = td->reCaptureVect; c.lte(); c++ ) { +// runtimeData->captureAttr[count].mark_enter = c->markEnter->markId; +// runtimeData->captureAttr[count].mark_leave = c->markLeave->markId; +// runtimeData->captureAttr[count].offset = c->objField->offset; +// +// count += 1; +// } +// } +// } + + runtimeData->fsmTables = fsmTables; + runtimeData->pdaTables = pdaTables; + + /* FIXME: need a parser descriptor. */ + runtimeData->startStates = new int[nextParserId]; + runtimeData->eofLelIds = new int[nextParserId]; + runtimeData->parserLelIds = new int[nextParserId]; + runtimeData->numParsers = nextParserId; + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->parserId >= 0 ) { + runtimeData->startStates[lel->parserId] = lel->startState->stateNum; + runtimeData->eofLelIds[lel->parserId] = lel->eofLel->id; + runtimeData->parserLelIds[lel->parserId] = lel->id; + } + } + + runtimeData->globalSize = globalObjectDef->size(); + + /* + * firstNonTermId + */ + runtimeData->firstNonTermId = firstNonTermId; + + /* Special trees. */ + runtimeData->integerId = intLangEl->id; + runtimeData->stringId = strLangEl->id; + runtimeData->anyId = anyLangEl->id; + runtimeData->eofId = 0; //eofLangEl->id; + runtimeData->noTokenId = noTokenLangEl->id; +} + +/* Borrow alg->state for mapsTo. */ +void countNodes( Program *prg, int &count, ParseTree *parseTree, Kid *kid ) +{ + if ( kid != 0 ) { + count += 1; + + /* Should't have to recurse here. */ + Tree *ignoreList = treeLeftIgnore( prg, kid->tree ); + if ( ignoreList != 0 ) { + Kid *ignore = ignoreList->child; + while ( ignore != 0 ) { + count += 1; + ignore = ignore->next; + } + } + + ignoreList = treeRightIgnore( prg, kid->tree ); + if ( ignoreList != 0 ) { + Kid *ignore = ignoreList->child; + while ( ignore != 0 ) { + count += 1; + ignore = ignore->next; + } + } + + //count += prg->rtd->lelInfo[kid->tree->id].numCaptureAttr; + + if ( !( parseTree->flags & PF_NAMED ) && + !( parseTree->flags & PF_ARTIFICIAL ) && + treeChild( prg, kid->tree ) != 0 ) + { + countNodes( prg, count, parseTree->child, treeChild( prg, kid->tree ) ); + } + countNodes( prg, count, parseTree->next, kid->next ); + } +} + +void fillNodes( Program *prg, int &nextAvail, Bindings *bindings, long &bindId, + PatReplNode *nodes, ParseTree *parseTree, Kid *kid, int ind ) +{ + if ( kid != 0 ) { + PatReplNode &node = nodes[ind]; + + Kid *child = + !( parseTree->flags & PF_NAMED ) && + !( parseTree->flags & PF_ARTIFICIAL ) && + treeChild( prg, kid->tree ) != 0 + ? + treeChild( prg, kid->tree ) : 0; + + ParseTree *ptChild = + !( parseTree->flags & PF_NAMED ) && + !( parseTree->flags & PF_ARTIFICIAL ) && + treeChild( prg, kid->tree ) != 0 + ? + parseTree->child : 0; + + /* Set up the fields. */ + node.id = kid->tree->id; + node.prodNum = kid->tree->prodNum; + node.length = stringLength( kid->tree->tokdata ); + node.data = stringData( kid->tree->tokdata ); + + /* Ignore items. */ + Tree *ignoreList = treeLeftIgnore( prg, kid->tree ); + Kid *ignore = ignoreList == 0 ? 0 : ignoreList->child; + node.leftIgnore = ignore == 0 ? -1 : nextAvail; + + while ( ignore != 0 ) { + PatReplNode &node = nodes[nextAvail++]; + + memset( &node, 0, sizeof(PatReplNode) ); + node.id = ignore->tree->id; + node.prodNum = ignore->tree->prodNum; + node.next = ignore->next == 0 ? -1 : nextAvail; + + node.length = stringLength( ignore->tree->tokdata ); + node.data = stringData( ignore->tree->tokdata ); + + ignore = ignore->next; + } + + /* Ignore items. */ + ignoreList = treeRightIgnore( prg, kid->tree ); + ignore = ignoreList == 0 ? 0 : ignoreList->child; + node.rightIgnore = ignore == 0 ? -1 : nextAvail; + + while ( ignore != 0 ) { + PatReplNode &node = nodes[nextAvail++]; + + memset( &node, 0, sizeof(PatReplNode) ); + node.id = ignore->tree->id; + node.prodNum = ignore->tree->prodNum; + node.next = ignore->next == 0 ? -1 : nextAvail; + + node.length = stringLength( ignore->tree->tokdata ); + node.data = stringData( ignore->tree->tokdata ); + + ignore = ignore->next; + } + + ///* The captured attributes. */ + //for ( int i = 0; i < prg->rtd->lelInfo[kid->tree->id].numCaptureAttr; i++ ) { + // CaptureAttr *cap = prg->rtd->captureAttr + + // prg->rtd->lelInfo[kid->tree->id].captureAttr + i; + // + // Tree *attr = getAttr( kid->tree, cap->offset ); + // + // PatReplNode &node = nodes[nextAvail++]; + // memset( &node, 0, sizeof(PatReplNode) ); + // + // node.id = attr->id; + // node.prodNum = attr->prodNum; + // node.length = stringLength( attr->tokdata ); + // node.data = stringData( attr->tokdata ); + //} + + node.stop = parseTree->flags & PF_TERM_DUP; + + node.child = child == 0 ? -1 : nextAvail++; + + /* Recurse. */ + fillNodes( prg, nextAvail, bindings, bindId, nodes, ptChild, child, node.child ); + + /* Since the parser is bottom up the bindings are in a bottom up + * traversal order. Check after recursing. */ + node.bindId = 0; + if ( bindId < bindings->length() && bindings->data[bindId] == parseTree ) { + /* Remember that binding ids are indexed from one. */ + node.bindId = bindId++; + + //cout << "binding match in " << __PRETTY_FUNCTION__ << endl; + //cout << "bindId: " << node.bindId << endl; + } + + node.next = kid->next == 0 ? -1 : nextAvail++; + + /* Move to the next child. */ + fillNodes( prg, nextAvail, bindings, bindId, nodes, parseTree->next, kid->next, node.next ); + } +} + +void Compiler::fillInPatterns( Program *prg ) +{ + /* + * patReplNodes + */ + + /* Count is referenced and computed by mapNode. */ + int count = 0; + for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) { + countNodes( prg, count, + pat->pdaRun->stackTop->next, + pat->pdaRun->stackTop->next->shadow ); + } + + for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) { + countNodes( prg, count, + repl->pdaRun->stackTop->next, + repl->pdaRun->stackTop->next->shadow ); + } + + runtimeData->patReplNodes = new PatReplNode[count]; + runtimeData->numPatternNodes = count; + + int nextAvail = 0; + + for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) { + int ind = nextAvail++; + runtimeData->patReplInfo[pat->patRepId].offset = ind; + + /* BindIds are indexed base one. */ + runtimeData->patReplInfo[pat->patRepId].numBindings = + pat->pdaRun->bindings->length() - 1; + + /* Init the bind */ + long bindId = 1; + fillNodes( prg, nextAvail, pat->pdaRun->bindings, bindId, + runtimeData->patReplNodes, + pat->pdaRun->stackTop->next, + pat->pdaRun->stackTop->next->shadow, + ind ); + } + + for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) { + int ind = nextAvail++; + runtimeData->patReplInfo[repl->patRepId].offset = ind; + + /* BindIds are indexed base one. */ + runtimeData->patReplInfo[repl->patRepId].numBindings = + repl->pdaRun->bindings->length() - 1; + + long bindId = 1; + fillNodes( prg, nextAvail, repl->pdaRun->bindings, bindId, + runtimeData->patReplNodes, + repl->pdaRun->stackTop->next, + repl->pdaRun->stackTop->next->shadow, + ind ); + } + + assert( nextAvail == count ); +} + + +int Compiler::findIndexOff( PdaTables *pdaTables, PdaGraph *pdaGraph, PdaState *state, int &curLen ) +{ + for ( int start = 0; start < curLen; ) { + int offset = start; + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + if ( pdaTables->owners[offset] != -1 ) + goto next_start; + + offset++; + if ( ! trans.last() ) { + TransMap::Iter next = trans.next(); + offset += next->key - trans->key - 1; + } + } + + /* Got though the whole list without a conflict. */ + return start; + +next_start: + start++; + } + + return curLen; +} + +struct CmpSpan +{ + static int compare( PdaState *state1, PdaState *state2 ) + { + int dist1 = 0, dist2 = 0; + + if ( state1->transMap.length() > 0 ) { + TransMap::Iter first1 = state1->transMap.first(); + TransMap::Iter last1 = state1->transMap.last(); + dist1 = last1->key - first1->key; + } + + if ( state2->transMap.length() > 0 ) { + TransMap::Iter first2 = state2->transMap.first(); + TransMap::Iter last2 = state2->transMap.last(); + dist2 = last2->key - first2->key; + } + + if ( dist1 < dist2 ) + return 1; + else if ( dist2 < dist1 ) + return -1; + return 0; + } +}; + +PdaGraph *Compiler::makePdaGraph( LangElSet &parserEls ) +{ + //for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) + // cerr << prod->prodId << " " << prod->data << endl; + + PdaGraph *pdaGraph = new PdaGraph(); + lalr1GenerateParser( pdaGraph, parserEls ); + pdaGraph->setStateNumbers(); + analyzeMachine( pdaGraph, parserEls ); + + //cerr << "NUMBER OF STATES: " << pdaGraph->stateList.length() << endl; + + return pdaGraph; +} + +PdaTables *Compiler::makePdaTables( PdaGraph *pdaGraph ) +{ + int count, pos; + PdaTables *pdaTables = new PdaTables; + + /* + * Counting max indices. + */ + count = 0; + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + count++; + if ( ! trans.last() ) { + TransMap::Iter next = trans.next(); + count += next->key - trans->key - 1; + } + } + } + + + /* Allocate indicies and owners. */ + pdaTables->numIndicies = count; + pdaTables->indicies = new int[count]; + pdaTables->owners = new int[count]; + for ( long i = 0; i < count; i++ ) { + pdaTables->indicies[i] = -1; + pdaTables->owners[i] = -1; + } + + /* Allocate offsets. */ + int numStates = pdaGraph->stateList.length(); + pdaTables->offsets = new unsigned int[numStates]; + pdaTables->numStates = numStates; + + /* Place transitions into indicies/owners */ + PdaState **states = new PdaState*[numStates]; + long ds = 0; + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) + states[ds++] = state; + + /* Sorting baseded on span length. Gives an improvement, but incures a + * cost. Off for now. */ + //MergeSort< PdaState*, CmpSpan > mergeSort; + //mergeSort.sort( states, numStates ); + + int indLen = 0; + for ( int s = 0; s < numStates; s++ ) { + PdaState *state = states[s]; + + int indOff = findIndexOff( pdaTables, pdaGraph, state, indLen ); + pdaTables->offsets[state->stateNum] = indOff; + + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + pdaTables->indicies[indOff] = trans->value->actionSetEl->key.id; + pdaTables->owners[indOff] = state->stateNum; + indOff++; + + if ( ! trans.last() ) { + TransMap::Iter next = trans.next(); + indOff += next->key - trans->key - 1; + } + } + + if ( indOff > indLen ) + indLen = indOff; + } + + /* We allocated the max, but cmpression gives us less. */ + pdaTables->numIndicies = indLen; + delete[] states; + + + /* + * Keys + */ + count = pdaGraph->stateList.length() * 2;; + pdaTables->keys = new int[count]; + pdaTables->numKeys = count; + + count = 0; + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + if ( state->transMap.length() == 0 ) { + pdaTables->keys[count+0] = 0; + pdaTables->keys[count+1] = 0; + } + else { + TransMap::Iter first = state->transMap.first(); + TransMap::Iter last = state->transMap.last(); + pdaTables->keys[count+0] = first->key; + pdaTables->keys[count+1] = last->key; + } + count += 2; + } + + /* + * Targs + */ + count = pdaGraph->actionSet.length(); + pdaTables->targs = new unsigned int[count]; + pdaTables->numTargs = count; + + count = 0; + for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) + pdaTables->targs[count++] = asi->key.targ; + + /* + * ActInds + */ + count = pdaGraph->actionSet.length(); + pdaTables->actInds = new unsigned int[count]; + pdaTables->numActInds = count; + + count = pos = 0; + for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) { + pdaTables->actInds[count++] = pos; + pos += asi->key.actions.length() + 1; + } + + /* + * Actions + */ + count = 0; + for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) + count += asi->key.actions.length() + 1; + + pdaTables->actions = new unsigned int[count]; + pdaTables->numActions = count; + + count = 0; + for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) { + for ( ActDataList::Iter ali = asi->key.actions; ali.lte(); ali++ ) + pdaTables->actions[count++] = *ali; + + pdaTables->actions[count++] = 0; + } + + /* + * CommitLen + */ + count = pdaGraph->actionSet.length(); + pdaTables->commitLen = new int[count]; + pdaTables->numCommitLen = count; + + count = 0; + for ( PdaActionSet::Iter asi = pdaGraph->actionSet; asi.lte(); asi++ ) + pdaTables->commitLen[count++] = asi->key.commitLen; + + /* + * tokenRegionInds. Start at one so region index 0 is null (unset). + */ + count = 0; + pos = 1; + pdaTables->tokenRegionInds = new int[pdaTables->numStates]; + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + pdaTables->tokenRegionInds[count++] = pos; + pos += state->regions.length() + 1; + } + + + /* + * tokenRegions. Build in a null at the beginning. + */ + + count = 1; + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) + count += state->regions.length() + 1; + + pdaTables->numRegionItems = count; + pdaTables->tokenRegions = new int[pdaTables->numRegionItems]; + + count = 0; + pdaTables->tokenRegions[count++] = 0; + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + for ( RegionVect::Iter reg = state->regions; reg.lte(); reg++ ) + pdaTables->tokenRegions[count++] = (*reg)->id + 1; + + pdaTables->tokenRegions[count++] = 0; + } + + /* + * tokenPreRegions. Build in a null at the beginning. + */ + + count = 1; + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) + count += state->regions.length() + 1; + + pdaTables->numPreRegionItems = count; + pdaTables->tokenPreRegions = new int[pdaTables->numPreRegionItems]; + + count = 0; + pdaTables->tokenPreRegions[count++] = 0; + for ( PdaStateList::Iter state = pdaGraph->stateList; state.lte(); state++ ) { + for ( RegionVect::Iter reg = state->regions; reg.lte(); reg++ ) { + assert( state->preRegions.length() <= 1 ); + if ( state->preRegions.length() == 0 || state->preRegions[0]->wasEmpty ) + pdaTables->tokenPreRegions[count++] = -1; + else + pdaTables->tokenPreRegions[count++] = state->preRegions[0]->id + 1; + } + + pdaTables->tokenPreRegions[count++] = 0; + } + + + return pdaTables; +} + +void Compiler::makeParser( LangElSet &parserEls ) +{ + pdaGraph = makePdaGraph( parserEls ); + pdaTables = makePdaTables( pdaGraph ); +} + diff --git a/src/pdacodegen.cc b/src/pdacodegen.cc new file mode 100644 index 00000000..9e3dca47 --- /dev/null +++ b/src/pdacodegen.cc @@ -0,0 +1,653 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <iostream> +#include <stdlib.h> +#include <ctype.h> +#include <limits.h> +#include "global.h" +#include "parsedata.h" +#include "avlmap.h" +#include "avlbasic.h" +#include "avlset.h" +#include "mergesort.h" +#include "pdacodegen.h" + +using std::cerr; +using std::endl; + +#define FRESH_BLOCK 8128 +#define act_sb "0x1" +#define act_rb "0x2" +#define lower "0x0000ffff" +#define upper "0xffff0000" + +void escapeLiteralString( std::ostream &out, const char *path, int length ) +{ + for ( const char *pc = path, *end = path+length; pc != end; pc++ ) { + switch ( *pc ) { + case '\\': out << "\\\\"; break; + case '"': out << "\\\""; break; + case '\a': out << "\\a"; break; + case '\b': out << "\\b"; break; + case '\t': out << "\\t"; break; + case '\n': out << "\\n"; break; + case '\v': out << "\\v"; break; + case '\f': out << "\\f"; break; + case '\r': out << "\\r"; break; + default: out << *pc; break; + } + } +} + +void escapeLiteralString( std::ostream &out, const char *path ) +{ + escapeLiteralString( out, path, strlen(path) ); +} + +void PdaCodeGen::writeTokenIds() +{ + out << "/*\n"; + for ( LelList::Iter lel = pd->langEls; lel.lte(); lel++ ) { + if ( lel->name != 0 ) + out << " " << lel->name << " " << lel->id << endl; + else + out << " " << lel->id << endl; + } + out << "*/\n\n"; +} + +void PdaCodeGen::defineRuntime() +{ + out << + "extern RuntimeData main_runtimeData;\n" + "\n"; +} + +void PdaCodeGen::writeRuntimeData( RuntimeData *runtimeData, PdaTables *pdaTables ) +{ + /* + * Blocks of code in frames. + */ + for ( int i = 0; i < runtimeData->numFrames; i++ ) { + /* FIXME: horrible code cloning going on here. */ + if ( runtimeData->frameInfo[i].codeLenWV > 0 ) { + out << "Code code_" << i << "_wv[] = {\n\t"; + + Code *block = runtimeData->frameInfo[i].codeWV; + for ( int j = 0; j < runtimeData->frameInfo[i].codeLenWV; j++ ) { + out << (unsigned long) block[j]; + + if ( j < runtimeData->frameInfo[i].codeLenWV-1 ) { + out << ", "; + if ( (j+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + } + + if ( runtimeData->frameInfo[i].codeLenWC > 0 ) { + out << "Code code_" << i << "_wc[] = {\n\t"; + + Code *block = runtimeData->frameInfo[i].codeWC; + for ( int j = 0; j < runtimeData->frameInfo[i].codeLenWC; j++ ) { + out << (unsigned long) block[j]; + + if ( j < runtimeData->frameInfo[i].codeLenWC-1 ) { + out << ", "; + if ( (j+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + } + + if ( runtimeData->frameInfo[i].treesLen > 0 ) { + out << "char trees_" << i << "[] = {\n\t"; + + char *block = runtimeData->frameInfo[i].trees; + for ( int j = 0; j < runtimeData->frameInfo[i].treesLen; j++ ) { + out << (long) block[j]; + + if ( j < runtimeData->frameInfo[i].treesLen-1 ) { + out << ", "; + if ( (j+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + } + } + + /* + * Blocks in production info. + */ + for ( int i = 0; i < runtimeData->numProds; i++ ) { + if ( runtimeData->prodInfo[i].copyLen > 0 ) { + out << "unsigned char copy_" << i << "[] = {\n\t"; + + unsigned char *block = runtimeData->prodInfo[i].copy; + for ( int j = 0; j < runtimeData->prodInfo[i].copyLen; j++ ) { + out << (long) block[j*2] << ", " << (long) block[j*2+1]; + + if ( j < runtimeData->prodInfo[i].copyLen-1 ) { + out << ", "; + if ( (j+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + } + } + + /* + * Init code. + */ + out << "Code " << rootCode() << "[] = {\n\t"; + Code *block = runtimeData->rootCode ; + for ( int j = 0; j < runtimeData->rootCodeLen; j++ ) { + out << (unsigned int) block[j]; + + if ( j < runtimeData->rootCodeLen-1 ) { + out << ", "; + if ( (j+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + /* + * lelInfo + */ + out << "LangElInfo " << lelInfo() << "[] = {\n"; + for ( int i = 0; i < runtimeData->numLangEls; i++ ) { + out << "\t{"; + + /* Name. */ + out << " \""; + escapeLiteralString( out, runtimeData->lelInfo[i].name ); + out << "\", "; + + /* Name. */ + out << " \""; + escapeLiteralString( out, runtimeData->lelInfo[i].xmlTag ); + out << "\", "; + + /* Repeat, literal, ignore flags. */ + out << (int)runtimeData->lelInfo[i].repeat << ", " << + (int)runtimeData->lelInfo[i].list << ", " << + (int)runtimeData->lelInfo[i].literal << ", " << + (int)runtimeData->lelInfo[i].ignore << ", "; + + out << runtimeData->lelInfo[i].frameId << ", "; + + out << runtimeData->lelInfo[i].objectTypeId << ", "; + + out << runtimeData->lelInfo[i].ofiOffset << ", "; + + out << runtimeData->lelInfo[i].objectLength << ", "; + +// out << runtimeData->lelInfo[i].contextTypeId << ", "; +// out << runtimeData->lelInfo[i].contextLength << ", "; + + out << runtimeData->lelInfo[i].termDupId << ", "; + + out << runtimeData->lelInfo[i].genericId << ", "; + + out << runtimeData->lelInfo[i].markId << ", "; + + out << runtimeData->lelInfo[i].captureAttr << ", "; + + out << runtimeData->lelInfo[i].numCaptureAttr; + + out << " }"; + + if ( i < runtimeData->numLangEls-1 ) + out << ",\n"; + } + out << "\n};\n\n"; + + /* + * frameInfo + */ + out << "FrameInfo " << frameInfo() << "[] = {\n"; + for ( int i = 0; i < runtimeData->numFrames; i++ ) { + out << "\t{ "; + + if ( runtimeData->frameInfo[i].codeLenWV > 0 ) + out << "code_" << i << "_wv, "; + else + out << "0, "; + out << runtimeData->frameInfo[i].codeLenWV << ", "; + + if ( runtimeData->frameInfo[i].codeLenWC > 0 ) + out << "code_" << i << "_wc, "; + else + out << "0, "; + out << runtimeData->frameInfo[i].codeLenWC << ", "; + + if ( runtimeData->frameInfo[i].treesLen > 0 ) + out << "trees_" << i << ", "; + else + out << "0, "; + + out << + runtimeData->frameInfo[i].treesLen << ", " << + runtimeData->frameInfo[i].argSize << ", " << + runtimeData->frameInfo[i].frameSize; + + out << " }"; + + if ( i < runtimeData->numFrames-1 ) + out << ",\n"; + } + out << "\n};\n\n"; + + + /* + * prodInfo + */ + out << "ProdInfo " << prodInfo() << "[] = {\n"; + for ( int i = 0; i < runtimeData->numProds; i++ ) { + out << "\t{ "; + + out << runtimeData->prodInfo[i].lhsId << ", "; + out << runtimeData->prodInfo[i].prodNum << ", "; + out << runtimeData->prodInfo[i].length << ", "; + + out << + '"' << runtimeData->prodInfo[i].name << "\", " << + runtimeData->prodInfo[i].frameId << ", " << + (int)runtimeData->prodInfo[i].lhsUpref << ", "; + + if ( runtimeData->prodInfo[i].copyLen > 0 ) + out << "copy_" << i << ", "; + else + out << "0, "; + + out << runtimeData->prodInfo[i].copyLen << ", "; + + + out << " }"; + + if ( i < runtimeData->numProds-1 ) + out << ",\n"; + } + out << "\n};\n\n"; + + /* + * patReplInfo + */ + out << "PatReplInfo " << patReplInfo() << "[] = {\n"; + for ( int i = 0; i < runtimeData->numPatterns; i++ ) { + out << " { " << runtimeData->patReplInfo[i].offset << ", " << + runtimeData->patReplInfo[i].numBindings << " },\n"; + } + out << "};\n\n"; + + /* + * patReplNodes + */ + out << "PatReplNode " << patReplNodes() << "[] = {\n"; + for ( int i = 0; i < runtimeData->numPatternNodes; i++ ) { + PatReplNode &node = runtimeData->patReplNodes[i]; + out << " { " << node.id << ", " << + node.prodNum << ", " << node.next << ", " << + node.child << ", " << node.bindId << ", "; + if ( node.data == 0 ) + out << "0"; + else { + out << '\"'; + escapeLiteralString( out, node.data, node.length ); + out << '\"'; + } + out << ", " << node.length << ", "; + + out << node.leftIgnore << ", "; + out << node.rightIgnore << ", "; + + out << (int)node.stop << " },\n"; + } + out << "};\n\n"; + + /* + * functionInfo + */ + out << "FunctionInfo " << functionInfo() << "[] = {\n"; + for ( int i = 0; i < runtimeData->numFunctions; i++ ) { + out << "\t{ " << + "\"" << runtimeData->functionInfo[i].name << "\", " << + runtimeData->functionInfo[i].frameId << ", " << + runtimeData->functionInfo[i].argSize << ", " << + runtimeData->functionInfo[i].frameSize; + out << " }"; + + if ( i < runtimeData->numFunctions-1 ) + out << ",\n"; + } + out << "\n};\n\n"; + + /* + * regionInfo + */ + out << "RegionInfo " << regionInfo() << "[] = {\n"; + for ( int i = 0; i < runtimeData->numRegions; i++ ) { + out << "\t{ \""; + /* Name. */ + escapeLiteralString( out, runtimeData->regionInfo[i].name ); + out << "\", " << runtimeData->regionInfo[i].defaultToken << + ", " << runtimeData->regionInfo[i].eofFrameId << + ", " << runtimeData->regionInfo[i].isIgnoreOnly << + ", " << runtimeData->regionInfo[i].isCiOnly << + ", " << runtimeData->regionInfo[i].ciLelId << + " }"; + + if ( i < runtimeData->numRegions-1 ) + out << ",\n"; + } + out << "\n};\n\n"; + + /* + * genericInfo + */ + out << "GenericInfo " << genericInfo() << "[] = {\n"; + for ( int i = 0; i < runtimeData->numGenerics; i++ ) { + out << "\t{ " << + runtimeData->genericInfo[i].type << ", " << + runtimeData->genericInfo[i].typeArg << ", " << + runtimeData->genericInfo[i].keyOffset << ", " << + runtimeData->genericInfo[i].keyType << ", " << + runtimeData->genericInfo[i].langElId << ", " << + runtimeData->genericInfo[i].parserId << " },\n"; + } + out << "};\n\n"; + + /* + * literals + */ + out << "const char *" << litdata() << "[] = {\n"; + for ( int i = 0; i < runtimeData->numLiterals; i++ ) { + out << "\t\""; + escapeLiteralString( out, runtimeData->litdata[i] ); + out << "\",\n"; + } + out << "};\n\n"; + + out << "long " << litlen() << "[] = {\n\t"; + for ( int i = 0; i < runtimeData->numLiterals; i++ ) + out << runtimeData->litlen[i] << ", "; + out << "};\n\n"; + + out << "Head *" << literals() << "[] = {\n\t"; + for ( int i = 0; i < runtimeData->numLiterals; i++ ) + out << "0, "; + out << "};\n\n"; + + out << "int startStates[] = {\n\t"; + for ( long i = 0; i < runtimeData->numParsers; i++ ) { + out << runtimeData->startStates[i] << ", "; + } + out << "};\n\n"; + + out << "int eofLelIds[] = {\n\t"; + for ( long i = 0; i < runtimeData->numParsers; i++ ) { + out << runtimeData->eofLelIds[i] << ", "; + } + out << "};\n\n"; + + out << "int parserLelIds[] = {\n\t"; + for ( long i = 0; i < runtimeData->numParsers; i++ ) { + out << runtimeData->parserLelIds[i] << ", "; + } + out << "};\n\n"; + + out << "CaptureAttr captureAttr[] = {\n"; + for ( long i = 0; i < runtimeData->numCapturedAttr; i++ ) { + out << "\t{ " << + runtimeData->captureAttr[i].mark_enter << ", " << + runtimeData->captureAttr[i].mark_leave << ", " << + runtimeData->captureAttr[i].offset << " },\n"; + } + + out << "};\n\n"; + + out << + "RuntimeData main_runtimeData = \n" + "{\n" + " " << lelInfo() << ",\n" + " " << runtimeData->numLangEls << ",\n" + "\n" + " " << prodInfo() << ",\n" + " " << runtimeData->numProds << ",\n" + "\n" + " " << regionInfo() << ",\n" + " " << runtimeData->numRegions << ",\n" + "\n" + " " << rootCode() << ",\n" + " " << runtimeData->rootCodeLen << ",\n" + " " << runtimeData->rootFrameId << ",\n" + "\n" + " " << frameInfo() << ",\n" + " " << runtimeData->numFrames << ",\n" + "\n" + " " << functionInfo() << ",\n" + " " << runtimeData->numFunctions << ",\n" + "\n" + " " << patReplInfo() << ",\n" + " " << runtimeData->numPatterns << ",\n" + "\n" + " " << patReplNodes() << ",\n" + " " << runtimeData->numPatternNodes << ",\n" + "\n" + " " << genericInfo() << ",\n" + " " << runtimeData->numGenerics << ",\n" + " " << runtimeData->argvGenericId << ",\n" + "\n" + " " << litdata() << ",\n" + " " << litlen() << ",\n" + " " << literals() << ",\n" + " " << runtimeData->numLiterals << ",\n" + "\n" + " captureAttr,\n" + " " << runtimeData->numCapturedAttr << ",\n" + "\n" + " &fsmTables_start,\n" + " &pid_0_pdaTables,\n" + " startStates, eofLelIds, parserLelIds, " << runtimeData->numParsers << ",\n" + "\n" + " " << runtimeData->globalSize << ",\n" + "\n" + " " << runtimeData->firstNonTermId << ",\n" + " " << runtimeData->integerId << ",\n" + " " << runtimeData->stringId << ",\n" + " " << runtimeData->anyId << ",\n" + " " << runtimeData->eofId << ",\n" + " " << runtimeData->noTokenId << "\n" + "};\n" + "\n"; +} + +void PdaCodeGen::writeParserData( long id, PdaTables *tables ) +{ + String prefix = "pid_" + String(0, "%ld", id) + "_"; + + out << "int " << prefix << indicies() << "[] = {\n\t"; + for ( int i = 0; i < tables->numIndicies; i++ ) { + out << tables->indicies[i]; + + if ( i < tables->numIndicies-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "int " << prefix << owners() << "[] = {\n\t"; + for ( int i = 0; i < tables->numIndicies; i++ ) { + out << tables->owners[i]; + + if ( i < tables->numIndicies-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "int " << prefix << keys() << "[] = {\n\t"; + for ( int i = 0; i < tables->numKeys; i++ ) { + out << tables->keys[i]; + + if ( i < tables->numKeys-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "unsigned int " << prefix << offsets() << "[] = {\n\t"; + for ( int i = 0; i < tables->numStates; i++ ) { + out << tables->offsets[i]; + + if ( i < tables->numStates-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "unsigned int " << prefix << targs() << "[] = {\n\t"; + for ( int i = 0; i < tables->numTargs; i++ ) { + out << tables->targs[i]; + + if ( i < tables->numTargs-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "unsigned int " << prefix << actInds() << "[] = {\n\t"; + for ( int i = 0; i < tables->numActInds; i++ ) { + out << tables->actInds[i]; + + if ( i < tables->numActInds-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "unsigned int " << prefix << actions() << "[] = {\n\t"; + for ( int i = 0; i < tables->numActions; i++ ) { + out << tables->actions[i]; + + if ( i < tables->numActions-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "int " << prefix << commitLen() << "[] = {\n\t"; + for ( int i = 0; i < tables->numCommitLen; i++ ) { + out << tables->commitLen[i]; + + if ( i < tables->numCommitLen-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "int " << prefix << tokenRegionInds() << "[] = {\n\t"; + for ( int i = 0; i < tables->numStates; i++ ) { + out << tables->tokenRegionInds[i]; + + if ( i < tables->numStates-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "int " << prefix << tokenRegions() << "[] = {\n\t"; + for ( int i = 0; i < tables->numRegionItems; i++ ) { + out << tables->tokenRegions[i]; + + if ( i < tables->numRegionItems-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << "int " << prefix << tokenPreRegions() << "[] = {\n\t"; + for ( int i = 0; i < tables->numPreRegionItems; i++ ) { + out << tables->tokenPreRegions[i]; + + if ( i < tables->numPreRegionItems-1 ) { + out << ", "; + if ( (i+1) % 8 == 0 ) + out << "\n\t"; + } + } + out << "\n};\n\n"; + + out << + "PdaTables " << prefix << "pdaTables =\n" + "{\n" + " " << prefix << indicies() << ",\n" + " " << prefix << owners() << ",\n" + " " << prefix << keys() << ",\n" + " " << prefix << offsets() << ",\n" + " " << prefix << targs() << ",\n" + " " << prefix << actInds() << ",\n" + " " << prefix << actions() << ",\n" + " " << prefix << commitLen() << ",\n" + + " " << prefix << tokenRegionInds() << ",\n" + " " << prefix << tokenRegions() << ",\n" + " " << prefix << tokenPreRegions() << ",\n" + "\n" + " " << tables->numIndicies << ",\n" + " " << tables->numKeys << ",\n" + " " << tables->numStates << ",\n" + " " << tables->numTargs << ",\n" + " " << tables->numActInds << ",\n" + " " << tables->numActions << ",\n" + " " << tables->numCommitLen << ",\n" + " " << tables->numRegionItems << ",\n" + " " << tables->numPreRegionItems << "\n" + "};\n" + "\n"; +} + diff --git a/src/pdacodegen.h b/src/pdacodegen.h new file mode 100644 index 00000000..8e5e7a3a --- /dev/null +++ b/src/pdacodegen.h @@ -0,0 +1,106 @@ +/* + * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#ifndef _PDACODEGEN_H +#define _PDACODEGEN_H + +struct Compiler; + +struct PdaCodeGen +{ + PdaCodeGen( const char *fileName, const char *parserName, Compiler *pd, ostream &out ) + : + fileName(fileName), + parserName(parserName), + pd(pd), + out(out) + {} + + /* + * Code Generation. + */ + void startCodeGen(); + void endCodeGen( int endLine ); + + void writeTokenIds(); + void writeLangEls(); + + void writeReference( Definition *prod, char *data ); + void writeUndoReference( Definition *prod, char *data ); + void writeFinalReference( Definition *prod, char *data ); + void writeFirstLocate( Definition *prod ); + void writeRhsLocate( Definition *prod ); + + void defineRuntime(); + void writeRuntimeData( RuntimeData *runtimeData, PdaTables *pdaTables ); + void writeParserData( long id, PdaTables *tables ); + + String PARSER() { return "parser_"; } + + String startState() { return PARSER() + "startState"; } + String indicies() { return PARSER() + "indicies"; } + String owners() { return PARSER() + "owners"; } + String keys() { return PARSER() + "keys"; } + String offsets() { return PARSER() + "offsets"; } + String targs() { return PARSER() + "targs"; } + String actInds() { return PARSER() + "actInds"; } + String actions() { return PARSER() + "actions"; } + String commitLen() { return PARSER() + "commitLen"; } + String fssProdIdIndex() { return PARSER() + "fssProdIdIndex"; } + String prodLengths() { return PARSER() + "prodLengths"; } + String prodLhsIds() { return PARSER() + "prodLhsIds"; } + String prodNames() { return PARSER() + "prodNames"; } + String lelInfo() { return PARSER() + "lelInfo"; } + String prodInfo() { return PARSER() + "prodInfo"; } + String tokenRegionInds() { return PARSER() + "tokenRegionInds"; } + String tokenRegions() { return PARSER() + "tokenRegions"; } + String tokenPreRegions() { return PARSER() + "tokenPreRegions"; } + String prodCodeBlocks() { return PARSER() + "prodCodeBlocks"; } + String prodCodeBlockLens() { return PARSER() + "prodCodeBlockLens"; } + String rootCode() { return PARSER() + "rootCode"; } + String frameInfo() { return PARSER() + "frameInfo"; } + String functionInfo() { return PARSER() + "functionInfo"; } + String objFieldInfo() { return PARSER() + "objFieldInfo"; } + String patReplInfo() { return PARSER() + "patReplInfo"; } + String patReplNodes() { return PARSER() + "patReplNodes"; } + String regionInfo() { return PARSER() + "regionInfo"; } + String genericInfo() { return PARSER() + "genericInfo"; } + String litdata() { return PARSER() + "litdata"; } + String litlen() { return PARSER() + "litlen"; } + String literals() { return PARSER() + "literals"; } + String fsmTables() { return PARSER() + "fsmTables"; } + + /* + * Graphviz Generation + */ + void writeTransList( PdaState *state ); + void writeDotFile( PdaGraph *graph ); + void writeDotFile( ); + + + const char *fileName; + const char *parserName; + Compiler *pd; + ostream &out; +}; + +#endif diff --git a/src/pdagraph.cc b/src/pdagraph.cc new file mode 100644 index 00000000..8f17b7a5 --- /dev/null +++ b/src/pdagraph.cc @@ -0,0 +1,533 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <string.h> +#include <iostream> +#include <string.h> +#include <assert.h> +#include "global.h" +#include "pdagraph.h" +#include "mergesort.h" + +using std::cerr; +using std::endl; + +/* Create a new fsm state. State has not out transitions or in transitions, not + * out out transition data and not number. */ +PdaState::PdaState() +: + /* No in transitions. */ + inRange(), + + /* No entry points, or epsilon trans. */ + pendingCommits(), + + stateSet(0), + + /* Only used during merging. Normally null. */ + stateDictEl(0), + + /* No state identification bits. */ + stateBits(0), + + onClosureQueue(false), + inClosedMap(false), + followMarked(false), + + advanceReductions(false) +{ +} + +/* Copy everything except the action transitions. That is left up to the + * PdaGraph copy constructor. */ +PdaState::PdaState(const PdaState &other) +: + inRange(), + + /* Duplicate the entry id set, epsilon transitions and context sets. These + * are sets of integers and as such need no fixing. */ + pendingCommits(other.pendingCommits), + + stateSet(0), + + /* This is only used during merging. Normally null. */ + stateDictEl(0), + + /* Fsm state data. */ + stateBits(other.stateBits), + + dotSet(other.dotSet), + onClosureQueue(false), + inClosedMap(false), + followMarked(false), + + transMap() +{ + /* Duplicate all the transitions. */ + for ( TransMap::Iter trans = other.transMap; trans.lte(); trans++ ) { + /* Dupicate and store the orginal target in the transition. This will + * be corrected once all the states have been created. */ + PdaTrans *newTrans = new PdaTrans(*trans->value); + newTrans->toState = trans->value->toState; + transMap.append( TransMapEl( newTrans->lowKey, newTrans ) ); + } +} + +/* If there is a state dict element, then delete it. Everything else is left + * up to the FsmGraph destructor. */ +PdaState::~PdaState() +{ + if ( stateDictEl != 0 ) + delete stateDictEl; +} + +/* Graph constructor. */ +PdaGraph::PdaGraph() +: + /* No start state. */ + startState(0) +{ +} + +/* Copy all graph data including transitions. */ +PdaGraph::PdaGraph( const PdaGraph &graph ) +: + /* Lists start empty. Will be filled by copy. */ + stateList(), + misfitList(), + + /* Copy in the entry points, + * pointers will be resolved later. */ + startState(graph.startState), + + /* Will be filled by copy. */ + finStateSet() +{ + /* Create the states and record their map in the original state. */ + PdaStateList::Iter origState = graph.stateList; + for ( ; origState.lte(); origState++ ) { + /* Make the new state. */ + PdaState *newState = new PdaState( *origState ); + + /* Add the state to the list. */ + stateList.append( newState ); + + /* Set the mapsTo item of the old state. */ + origState->stateMap = newState; + } + + /* Derefernce all the state maps. */ + for ( PdaStateList::Iter state = stateList; state.lte(); state++ ) { + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + /* The points to the original in the src machine. The taget's duplicate + * is in the statemap. */ + PdaState *toState = trans->value->toState != 0 ? + trans->value->toState->stateMap : 0; + + /* Attach The transition to the duplicate. */ + trans->value->toState = 0; + attachTrans( state, toState, trans->value ); + } + } + + /* Fix the start state pointer and the new start state's count of in + * transiions. */ + startState = startState->stateMap; + + /* Build the final state set. */ + PdaStateSet::Iter st = graph.finStateSet; + for ( ; st.lte(); st++ ) + finStateSet.insert((*st)->stateMap); +} + +/* Deletes all transition data then deletes each state. */ +PdaGraph::~PdaGraph() +{ + /* Delete all the transitions. */ + PdaStateList::Iter state = stateList; + for ( ; state.lte(); state++ ) { + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) + delete trans->value; + } + + /* Delete all the states. */ + stateList.empty(); +} + +/* Set a state final. The state has its isFinState set to true and the state + * is added to the finStateSet. */ +void PdaGraph::setFinState( PdaState *state ) +{ + /* Is it already a fin state. */ + if ( state->stateBits & SB_ISFINAL ) + return; + + state->stateBits |= SB_ISFINAL; + finStateSet.insert( state ); +} + +void PdaGraph::unsetAllFinStates( ) +{ + for ( PdaStateSet::Iter st = finStateSet; st.lte(); st++ ) { + PdaState *state = *st; + state->stateBits &= ~ SB_ISFINAL; + } + finStateSet.empty(); +} + +/* Set and unset a state as the start state. */ +void PdaGraph::setStartState( PdaState *state ) +{ + /* Sould change from unset to set. */ + assert( startState == 0 ); + startState = state; +} + +/* Mark all states reachable from state. Traverses transitions forward. Used + * for removing states that have no path into them. */ +void PdaGraph::markReachableFromHere( PdaState *state ) +{ + /* Base case: return; */ + if ( state->stateBits & SB_ISMARKED ) + return; + + /* Set this state as processed. We are going to visit all states that this + * state has a transition to. */ + state->stateBits |= SB_ISMARKED; + + /* Recurse on all out transitions. */ + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + if ( trans->value->toState != 0 ) + markReachableFromHere( trans->value->toState ); + } +} + +void PdaGraph::setStateNumbers() +{ + int curNum = 0; + PdaStateList::Iter state = stateList; + for ( ; state.lte(); state++ ) + state->stateNum = curNum++; +} + +/* Insert a transition into an inlist. The head must be supplied. */ +void PdaGraph::attachToInList( PdaState *from, PdaState *to, + PdaTrans *&head, PdaTrans *trans ) +{ + trans->ilnext = head; + trans->ilprev = 0; + + /* If in trans list is not empty, set the head->prev to trans. */ + if ( head != 0 ) + head->ilprev = trans; + + /* Now insert ourselves at the front of the list. */ + head = trans; +}; + +/* Detach a transition from an inlist. The head of the inlist must be supplied. */ +void PdaGraph::detachFromInList( PdaState *from, PdaState *to, + PdaTrans *&head, PdaTrans *trans ) +{ + /* Detach in the inTransList. */ + if ( trans->ilprev == 0 ) + head = trans->ilnext; + else + trans->ilprev->ilnext = trans->ilnext; + + if ( trans->ilnext != 0 ) + trans->ilnext->ilprev = trans->ilprev; +} + +/* Attach states on the default transition, range list or on out/in list key. + * Type of attaching and is controlled by keyType. First makes a new + * transition. If there is already a transition out from fromState on the + * default, then will assertion fail. */ +PdaTrans *PdaGraph::appendNewTrans( PdaState *from, PdaState *to, long lowKey, long ) +{ + /* Make the new transition. */ + PdaTrans *retVal = new PdaTrans(); + + /* The transition is now attached. Remember the parties involved. */ + retVal->fromState = from; + retVal->toState = to; + + /* Make the entry in the out list for the transitions. */ + from->transMap.append( TransMapEl( lowKey, retVal ) ); + + /* Set the the keys of the new trans. */ + retVal->lowKey = lowKey; + + /* Attach using inRange as the head pointer. */ + attachToInList( from, to, to->inRange.head, retVal ); + + return retVal; +} + +PdaTrans *PdaGraph::insertNewTrans( PdaState *from, PdaState *to, long lowKey, long ) +{ + /* Make the new transition. */ + PdaTrans *retVal = new PdaTrans(); + + /* The transition is now attached. Remember the parties involved. */ + retVal->fromState = from; + retVal->toState = to; + + /* Make the entry in the out list for the transitions. */ + from->transMap.insert( lowKey, retVal ); + + /* Set the the keys of the new trans. */ + retVal->lowKey = lowKey; + + /* Attach using inRange as the head pointer. */ + attachToInList( from, to, to->inRange.head, retVal ); + + return retVal; +} + +/* Attach for range lists or for the default transition. Type of attaching is + * controlled by the keyType parameter. This attach should be used when a + * transition already is allocated and must be attached to a target state. + * Does not handle adding the transition into the out list. */ +void PdaGraph::attachTrans( PdaState *from, PdaState *to, PdaTrans *trans ) +{ + assert( trans->fromState == 0 && trans->toState == 0 ); + trans->fromState = from; + trans->toState = to; + + /* Attach using the inRange pointer as the head pointer. */ + attachToInList( from, to, to->inRange.head, trans ); +} + +/* Detach for out/in lists or for default transition. The type of detaching is + * controlled by the keyType parameter. */ +void PdaGraph::detachTrans( PdaState *from, PdaState *to, PdaTrans *trans ) +{ + assert( trans->fromState == from && trans->toState == to ); + trans->fromState = 0; + trans->toState = 0; + + /* Detach using to's inRange pointer as the head. */ + detachFromInList( from, to, to->inRange.head, trans ); +} + + +/* Detach a state from the graph. Detaches and deletes transitions in and out + * of the state. Empties inList and outList. Removes the state from the final + * state set. A detached state becomes useless and should be deleted. */ +void PdaGraph::detachState( PdaState *state ) +{ + /* Detach the in transitions from the inRange list of transitions. */ + while ( state->inRange.head != 0 ) { + /* Get pointers to the trans and the state. */ + PdaTrans *trans = state->inRange.head; + PdaState *fromState = trans->fromState; + + /* Detach the transitions from the source state. */ + detachTrans( fromState, state, trans ); + + /* Ok to delete the transition. */ + fromState->transMap.remove( trans->lowKey ); + delete trans; + } + + /* Detach out range transitions. */ + for ( TransMap::Iter trans = state->transMap; trans.lte(); trans++ ) { + detachTrans( state, trans->value->toState, trans->value ); + delete trans->value; + } + + /* Delete all of the out range pointers. */ + state->transMap.empty(); + + /* Unset final stateness before detaching from graph. */ + if ( state->stateBits & SB_ISFINAL ) + finStateSet.remove( state ); +} + +/* Move all the transitions that go into src so that they go into dest. */ +void PdaGraph::inTransMove( PdaState *dest, PdaState *src ) +{ + /* Do not try to move in trans to and from the same state. */ + assert( dest != src ); + + /* If src is the start state, dest becomes the start state. */ + assert( src != startState ); + + /* Move the transitions in inRange. */ + while ( src->inRange.head != 0 ) { + /* Get trans and from state. */ + PdaTrans *trans = src->inRange.head; + PdaState *fromState = trans->fromState; + + /* Detach from src, reattach to dest. */ + detachTrans( fromState, src, trans ); + attachTrans( fromState, dest, trans ); + } +} + +void PdaGraph::addInReduction( PdaTrans *dest, long prodId, long prior ) +{ + /* Look for the reduction. If not there insert it, otherwise take + * the max of the priorities. */ + ReductionMapEl *redMapEl = dest->reductions.find( prodId ); + if ( redMapEl == 0 ) + dest->reductions.insert( prodId, prior ); + else if ( prior > redMapEl->value ) + redMapEl->value = prior; +} + +/* Callback invoked when another trans (or possibly this) is added into this + * transition during the merging process. Draw in any properties of srcTrans + * into this transition. AddInTrans is called when a new transitions is made + * that will be a duplicate of another transition or a combination of several + * other transitions. AddInTrans will be called for each transition that the + * new transition is to represent. */ +void PdaGraph::addInTrans( PdaTrans *destTrans, PdaTrans *srcTrans ) +{ + /* Protect against adding in from ourselves. */ + if ( srcTrans != destTrans ) { + + /* Add in the shift priority. */ + if ( destTrans->isShift && srcTrans->isShift ) { + /* Both shifts are set. We want the max of the two. */ + if ( srcTrans->shiftPrior > destTrans->shiftPrior ) + destTrans->shiftPrior = srcTrans->shiftPrior; + } + else if ( srcTrans->isShift ) { + /* Just the source is set, copy the source prior over. */ + destTrans->shiftPrior = srcTrans->shiftPrior; + } + + /* If either is a shift, dest is a shift. */ + destTrans->isShift = destTrans->isShift || srcTrans->isShift; + + /* Add in the reductions. */ + for ( ReductionMap::Iter red = srcTrans->reductions; red.lte(); red++ ) + addInReduction( destTrans, red->key, red->value ); + + /* Add in the commit points. */ + destTrans->commits.insert( srcTrans->commits ); + + if ( srcTrans->toState->advanceReductions ) + destTrans->toState->advanceReductions = true; + + if ( srcTrans->noPreIgnore ) + destTrans->noPreIgnore = true; + if ( srcTrans->noPostIgnore ) + destTrans->noPostIgnore = true; + } +} + +/* NO LONGER USED. */ +void PdaGraph::addInState( PdaState *destState, PdaState *srcState ) +{ + /* Draw in any properties of srcState into destState. */ + if ( srcState != destState ) { + /* Get the epsilons, context, out priorities. */ + destState->pendingCommits.insert( srcState->pendingCommits ); + if ( srcState->pendingCommits.length() > 0 ) + cerr << "THERE ARE PENDING COMMITS DRAWN IN" << endl; + + /* Parser generation data. */ + destState->dotSet.insert( srcState->dotSet ); + + if ( srcState->onClosureQueue && !destState->onClosureQueue ) { + stateClosureQueue.append( destState ); + destState->onClosureQueue = true; + } + } +} + +/* Make a new state. The new state will be put on the graph's + * list of state. The new state can be created final or non final. */ +PdaState *PdaGraph::addState() +{ + /* Make the new state to return. */ + PdaState *state = new PdaState(); + + /* Create the new state. */ + stateList.append( state ); + + return state; +} + + +/* Follow from to the final state of srcFsm. */ +PdaState *PdaGraph::followFsm( PdaState *from, PdaGraph *srcFsm ) +{ + PdaState *followSrc = srcFsm->startState; + + while ( ! followSrc->isFinState() ) { + assert( followSrc->transMap.length() == 1 ); + PdaTrans *followTrans = followSrc->transMap[0].value; + + PdaTrans *inTrans = from->findTrans( followTrans->lowKey ); + assert( inTrans != 0 ); + + from = inTrans->toState; + followSrc = followTrans->toState; + } + + return from; +} + +int PdaGraph::fsmLength( ) +{ + int length = 0; + PdaState *state = startState; + while ( ! state->isFinState() ) { + length += 1; + state = state->transMap[0].value->toState; + } + return length; +} + +/* Remove states that have no path to them from the start state. Recursively + * traverses the graph marking states that have paths into them. Then removes + * all states that did not get marked. */ +void PdaGraph::removeUnreachableStates() +{ + /* Mark all the states that can be reached + * through the existing set of entry points. */ + if ( startState != 0 ) + markReachableFromHere( startState ); + + for ( PdaStateSet::Iter si = entryStateSet; si.lte(); si++ ) + markReachableFromHere( *si ); + + /* Delete all states that are not marked + * and unmark the ones that are marked. */ + PdaState *state = stateList.head; + while ( state ) { + PdaState *next = state->next; + + if ( state->stateBits & SB_ISMARKED ) + state->stateBits &= ~ SB_ISMARKED; + else { + detachState( state ); + stateList.detach( state ); + delete state; + } + + state = next; + } +} diff --git a/src/pdagraph.h b/src/pdagraph.h new file mode 100644 index 00000000..dc11b3e1 --- /dev/null +++ b/src/pdagraph.h @@ -0,0 +1,515 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _PDAGRAPH_H +#define _PDAGRAPH_H + +#include <assert.h> +#include "vector.h" +#include "bstset.h" +#include "compare.h" +#include "avltree.h" +#include "dlist.h" +#include "bstmap.h" +#include "sbstmap.h" +#include "sbstset.h" +#include "sbsttable.h" +#include "avlset.h" +#include "dlistmel.h" +#include "avltree.h" + +/* Flags for states. */ +#define SB_ISFINAL 0x04 +#define SB_ISMARKED 0x08 +#define SB_ISSTART 0x10 + +/* Flags for transitions. */ +#define TB_ISMARKED 0x01 + +struct PdaTrans; +struct PdaState; +struct PdaGraph; +struct TokenDef; +struct Definition; +struct LangEl; +struct TokenRegion; + +typedef Vector<TokenRegion*> RegionVect; + +typedef Vector<long> ActDataList; + +struct ActionData +{ + ActionData( int targ, ActDataList &actions, int commitLen ) + : targ(targ), commitLen(commitLen), id(0), actions(actions) { } + + int targ; + int commitLen; + int id; + + ActDataList actions; +}; + + +struct CmpActionData +{ + static int compare( const ActionData &ap1, const ActionData &ap2 ) + { + if ( ap1.targ < ap2.targ ) + return -1; + else if ( ap1.targ > ap2.targ ) + return 1; + else if ( ap1.commitLen < ap2.commitLen ) + return -1; + else if ( ap1.commitLen > ap2.commitLen ) + return 1; + else if ( ap1.id < ap2.id ) + return -1; + else if ( ap1.id > ap2.id ) + return 1; + + return CmpTable< long, CmpOrd<long> >:: + compare( ap1.actions, ap2.actions ); + } +}; + +typedef AvlSet<ActionData, CmpActionData> PdaActionSet; +typedef AvlSetEl<ActionData> PdaActionSetEl; + +/* List pointers for the closure queue. Goes into state. */ +struct ClosureQueueListEl { PdaState *prev, *next; }; + +/* Queue of states, transitions to be closed. */ +typedef DListMel< PdaState, ClosureQueueListEl > StateClosureQueue; +typedef DList<PdaTrans> TransClosureQueue; + +typedef BstSet< Definition*, CmpOrd<Definition*> > DefSet; +typedef CmpTable< Definition*, CmpOrd<Definition*> > CmpDefSet; +typedef BstSet< DefSet, CmpDefSet > DefSetSet; + +typedef Vector< Definition* > DefVect; +typedef BstSet< long, CmpOrd<long> > AlphSet; + +struct ExpandToEl +{ + ExpandToEl( PdaState *state, int prodId ) + : state(state), prodId(prodId) { } + + PdaState *state; + int prodId; +}; + +struct CmpExpandToEl +{ + static inline int compare( const ExpandToEl &etel1, const ExpandToEl &etel2 ) + { + if ( etel1.state < etel2.state ) + return -1; + else if ( etel1.state > etel2.state ) + return 1; + else if ( etel1.prodId < etel2.prodId ) + return -1; + else if ( etel1.prodId > etel2.prodId ) + return 1; + else + return 0; + } +}; + +typedef BstSet<ExpandToEl, CmpExpandToEl> ExpandToSet; +typedef BstSet< int, CmpOrd<int> > IntSet; +typedef CmpTable< int, CmpOrd<int> > CmpIntSet; + +typedef BstSet< long, CmpOrd<long> > LongSet; +typedef CmpTable< long, CmpOrd<long> > CmpLongSet; + +typedef BstMap< long, long, CmpOrd<long> > LongMap; +typedef BstMapEl< long, long > LongMapEl; + +typedef LongSet ProdIdSet; +typedef CmpLongSet CmpProdIdSet; + +/* Set of states, list of states. */ +typedef BstSet<PdaState*> PdaStateSet; +typedef Vector<PdaState*> StateVect; +typedef DList<PdaState> PdaStateList; + +typedef LongMap FollowToAdd; +typedef LongMap ReductionMap; +typedef LongMapEl ReductionMapEl; + +struct ProdIdPair +{ + ProdIdPair( int onReduce, int length ) + : onReduce(onReduce), length(length) {} + + int onReduce; + int length; +}; + +struct CmpProdIdPair +{ + static inline int compare( const ProdIdPair &pair1, const ProdIdPair &pair2 ) + { + if ( pair1.onReduce < pair2.onReduce ) + return -1; + else if ( pair1.onReduce > pair2.onReduce ) + return 1; + else if ( pair1.length < pair2.length ) + return -1; + else if ( pair1.length > pair2.length ) + return 1; + else + return 0; + } +}; + +typedef BstSet< ProdIdPair, CmpProdIdPair > ProdIdPairSet; + +/* Transition class that implements actions and priorities. */ +struct PdaTrans +{ + PdaTrans() : + fromState(0), + toState(0), + isShift(false), + isShiftReduce(false), + shiftPrior(0), + noPreIgnore(false), + noPostIgnore(false) + { } + + PdaTrans( const PdaTrans &other ) : + lowKey(other.lowKey), + fromState(0), toState(0), + isShift(other.isShift), + isShiftReduce(other.isShiftReduce), + shiftPrior(other.shiftPrior), + reductions(other.reductions), + commits(other.commits), + noPreIgnore(false), + noPostIgnore(false) + { } + + long lowKey; + PdaState *fromState; + PdaState *toState; + + /* Pointers for outlist. */ + PdaTrans *prev, *next; + + /* Pointers for in-list. */ + PdaTrans *ilprev, *ilnext; + + long maxPrior(); + + /* Parse Table construction data. */ + bool isShift, isShiftReduce; + int shiftPrior; + ReductionMap reductions; + ActDataList actions; + ActDataList actOrds; + ActDataList actPriors; + + ExpandToSet expandTo; + + PdaActionSetEl *actionSetEl; + + LongSet commits; + LongSet afterShiftCommits; + + bool noPreIgnore; + bool noPostIgnore; +}; + +/* In transition list. Like DList except only has head pointers, which is all + * that is required. Insertion and deletion is handled by the graph. This + * class provides the iterator of a single list. */ +struct PdaTransInList +{ + PdaTransInList() : head(0) { } + + PdaTrans *head; + + struct Iter + { + /* Default construct. */ + Iter() : ptr(0) { } + + /* Construct, assign from a list. */ + Iter( const PdaTransInList &il ) : ptr(il.head) { } + Iter &operator=( const PdaTransInList &dl ) { ptr = dl.head; return *this; } + + /* At the end */ + bool lte() const { return ptr != 0; } + bool end() const { return ptr == 0; } + + /* At the first, last element. */ + bool first() const { return ptr && ptr->ilprev == 0; } + bool last() const { return ptr && ptr->ilnext == 0; } + + /* Cast, dereference, arrow ops. */ + operator PdaTrans*() const { return ptr; } + PdaTrans &operator *() const { return *ptr; } + PdaTrans *operator->() const { return ptr; } + + /* Increment, decrement. */ + inline void operator++(int) { ptr = ptr->ilnext; } + inline void operator--(int) { ptr = ptr->ilprev; } + + /* The iterator is simply a pointer. */ + PdaTrans *ptr; + }; +}; + +typedef DList<PdaTrans> PdaTransList; + +/* A element in a state dict. */ +struct PdaStateDictEl +: + public AvlTreeEl<PdaStateDictEl> +{ + PdaStateDictEl(const PdaStateSet &stateSet) + : stateSet(stateSet) { } + + const PdaStateSet &getKey() { return stateSet; } + PdaStateSet stateSet; + PdaState *targState; +}; + +/* Dictionary mapping a set of states to a target state. */ +typedef AvlTree< PdaStateDictEl, PdaStateSet, CmpTable<PdaState*> > PdaStateDict; + +/* What items does a particular state encompass. */ +typedef BstSet< long, CmpOrd<long> > DotSet; +typedef CmpTable< long, CmpOrd<long> > CmpDotSet; + +/* Map of dot sets to states. */ +typedef AvlTree< PdaState, DotSet, CmpDotSet > DotSetMap; +typedef PdaState DotSetMapEl; + +typedef BstMap< long, PdaTrans* > TransMap; +typedef BstMapEl< long, PdaTrans* > TransMapEl; + +/* State class that implements actions and priorities. */ +struct PdaState +: + public ClosureQueueListEl, + public AvlTreeEl< PdaState > +{ + PdaState(); + PdaState(const PdaState &other); + ~PdaState(); + + /* Is the state final? */ + bool isFinState() { return stateBits & SB_ISFINAL; } + + PdaTrans *findTrans( long key ) + { + TransMapEl *transMapEl = transMap.find( key ); + if ( transMapEl == 0 ) + return 0; + return transMapEl->value; + } + + /* In transition list. */ + PdaTransInList inRange; + + ProdIdPairSet pendingCommits; + + /* When duplicating the fsm we need to map each + * state to the new state representing it. */ + PdaState *stateMap; + + /* When merging states (state machine operations) this next pointer is + * used for the list of states that need to be filled in. */ + PdaState *alg_next; + + PdaStateSet *stateSet; + + /* Identification for printing and stable minimization. */ + int stateNum; + + /* A pointer to a dict element that contains the set of states this state + * represents. This cannot go into alg, because alg.next is used during + * the merging process. */ + PdaStateDictEl *stateDictEl; + + /* Bits controlling the behaviour of the state during collapsing to dfa. */ + int stateBits; + + /* State list elements. */ + PdaState *next, *prev; + + /* For dotset map. */ + DotSet &getKey() { return dotSet; } + + /* Closure management. */ + DotSet dotSet; + DotSet dotSet2; + bool onClosureQueue; + bool inClosedMap; + bool followMarked; + bool onStateList; + + TransMap transMap; + + RegionVect regions; + RegionVect preRegions; + + bool advanceReductions; +}; + +/* Compare lists of epsilon transitions. Entries are name ids of targets. */ +typedef CmpTable< int, CmpOrd<int> > CmpEpsilonTrans; + +/* Compare sets of context values. */ +typedef CmpTable< int, CmpOrd<int> > CmpContextSets; + +/* Graph class that implements actions and priorities. */ +struct PdaGraph +{ + /* Constructors/Destructors. */ + PdaGraph(); + PdaGraph( const PdaGraph &graph ); + ~PdaGraph(); + + /* The list of states. */ + PdaStateList stateList; + PdaStateList misfitList; + + /* The start state. */ + PdaState *startState; + PdaStateSet entryStateSet; + + /* The set of final states. */ + PdaStateSet finStateSet; + + /* Closure queues and maps. */ + DotSetMap closedMap; + StateClosureQueue stateClosureQueue; + StateClosureQueue stateClosedList; + + TransClosureQueue transClosureQueue; + PdaState *stateClosureHead; + + LangEl **langElIndex; + + void setStartState( PdaState *state ); + void unsetStartState( ); + + /* + * Basic attaching and detaching. + */ + + /* Common to attaching/detaching list and default. */ + void attachToInList( PdaState *from, PdaState *to, PdaTrans *&head, PdaTrans *trans ); + void detachFromInList( PdaState *from, PdaState *to, PdaTrans *&head, PdaTrans *trans ); + + /* Attach with a new transition. */ + PdaTrans *appendNewTrans( PdaState *from, PdaState *to, long onChar1, long ); + PdaTrans *insertNewTrans( PdaState *from, PdaState *to, long lowKey, long ); + + /* Attach with an existing transition that already in an out list. */ + void attachTrans( PdaState *from, PdaState *to, PdaTrans *trans ); + + /* Detach a transition from a target state. */ + void detachTrans( PdaState *from, PdaState *to, PdaTrans *trans ); + + /* Detach a state from the graph. */ + void detachState( PdaState *state ); + + /* + * Callbacks. + */ + + /* Add in the properties of srcTrans into this. */ + void addInReduction( PdaTrans *dest, long prodId, long prior ); + void addInTrans( PdaTrans *destTrans, PdaTrans *srcTrans ); + void addInState( PdaState *destState, PdaState *srcState ); + + /* + * Allocation. + */ + + /* New up a state and add it to the graph. */ + PdaState *addState(); + + /* + * Fsm operators. + */ + + /* Follow to the fin state of src fsm. */ + PdaState *followFsm( PdaState *from, PdaGraph *srcFsm ); + + /* + * Final states + */ + + /* Set and Unset a state as final. */ + void setFinState( PdaState *state ); + void unsetFinState( PdaState *state ); + void unsetAllFinStates( ); + + /* Set State numbers starting at 0. */ + void setStateNumbers(); + + /* + * Path pruning + */ + + /* Mark all states reachable from state. */ + void markReachableFromHere( PdaState *state ); + + /* Removes states that cannot be reached by any path in the fsm and are + * thus wasted silicon. */ + void removeUnreachableStates(); + + /* Remove error actions from states on which the error transition will + * never be taken. */ + bool outListCovers( PdaState *state ); + + /* Remove states that are on the misfit list. */ + void removeMisfits(); + + + /* + * Other + */ + + /* Move the in trans into src into dest. */ + void inTransMove(PdaState *dest, PdaState *src); + + int fsmLength( ); + + /* Collected machine information. */ + unsigned long long maxState; + unsigned long long maxAction; + unsigned long long maxLelId; + unsigned long long maxOffset; + unsigned long long maxIndex; + unsigned long long maxProdLen; + + PdaActionSet actionSet; +}; + + +#endif /* _FSMGRAPH_H */ diff --git a/src/pdarun.c b/src/pdarun.c new file mode 100644 index 00000000..62ab107e --- /dev/null +++ b/src/pdarun.c @@ -0,0 +1,2272 @@ +/* + * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" +#include "debug.h" +#include "pdarun.h" +#include "fsmrun.h" +#include "bytecode.h" +#include "tree.h" +#include "pool.h" + +#include <errno.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +#define true 1 +#define false 0 + +#define act_sb 0x1 +#define act_rb 0x2 +#define lower 0x0000ffff +#define upper 0xffff0000 + +#define read_word_p( i, p ) do { \ + i = ((Word) p[0]); \ + i |= ((Word) p[1]) << 8; \ + i |= ((Word) p[2]) << 16; \ + i |= ((Word) p[3]) << 24; \ +} while(0) + +#define read_tree_p( i, p ) do { \ + Word w; \ + w = ((Word) p[0]); \ + w |= ((Word) p[1]) << 8; \ + w |= ((Word) p[2]) << 16; \ + w |= ((Word) p[3]) << 24; \ + i = (Tree*)w; \ +} while(0) + +void initFsmRun( FsmRun *fsmRun, Program *prg ) +{ + fsmRun->tables = prg->rtd->fsmTables; + fsmRun->runBuf = 0; + + /* Run buffers need to stick around because + * token strings point into them. */ + fsmRun->runBuf = newRunBuf(); + fsmRun->runBuf->next = 0; + + fsmRun->p = fsmRun->pe = fsmRun->runBuf->data; + fsmRun->peof = 0; + + fsmRun->attachedInput = 0; + fsmRun->attachedSource = 0; + fsmRun->preRegion = -1; +} + +void clearFsmRun( Program *prg, FsmRun *fsmRun ) +{ + if ( fsmRun->runBuf != 0 ) { + /* Transfer the run buf list to the program */ + RunBuf *head = fsmRun->runBuf; + RunBuf *tail = head; + while ( tail->next != 0 ) + tail = tail->next; + + tail->next = prg->allocRunBuf; + prg->allocRunBuf = head; + } +} + +/* Keep the position up to date after consuming text. */ +void updatePosition( InputStream *inputStream, const char *data, long length ) +{ + if ( !inputStream->handlesLine ) { + int i; + for ( i = 0; i < length; i++ ) { + if ( data[i] != '\n' ) + inputStream->column += 1; + else { + inputStream->line += 1; + inputStream->column = 1; + } + } + } + + inputStream->byte += length; +} + +/* Keep the position up to date after sending back text. */ +void undoPosition( InputStream *inputStream, const char *data, long length ) +{ + /* FIXME: this needs to fetch the position information from the parsed + * token and restore based on that.. */ + int i; + if ( !inputStream->handlesLine ) { + for ( i = 0; i < length; i++ ) { + if ( data[i] == '\n' ) + inputStream->line -= 1; + } + } + + inputStream->byte -= length; +} + +void incrementSteps( PdaRun *pdaRun ) +{ + pdaRun->steps += 1; + debug( REALM_PARSE, "steps up to %ld\n", pdaRun->steps ); +} + +void decrementSteps( PdaRun *pdaRun ) +{ + pdaRun->steps -= 1; + debug( REALM_PARSE, "steps down to %ld\n", pdaRun->steps ); +} + +/* Load up a token, starting from tokstart if it is set. If not set then + * start it at data. */ +Head *streamPull( Program *prg, FsmRun *fsmRun, InputStream *inputStream, long length ) +{ + /* We should not be in the midst of getting a token. */ + assert( fsmRun->tokstart == 0 ); + + RunBuf *runBuf = newRunBuf(); + runBuf->next = fsmRun->runBuf; + fsmRun->runBuf = runBuf; + + int len = 0; + getData( fsmRun, inputStream, 0, runBuf->data, length, &len ); + consumeData( inputStream, length ); + fsmRun->p = fsmRun->pe = runBuf->data + length; + + Head *tokdata = stringAllocPointer( prg, runBuf->data, length ); + updatePosition( inputStream, runBuf->data, length ); + + return tokdata; +} + +void undoStreamPull( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length ) +{ + debug( REALM_PARSE, "undoing stream pull\n" ); + + prependData( inputStream, data, length ); +} + +void streamPushText( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length ) +{ + prependData( inputStream, data, length ); +} + +void streamPushTree( FsmRun *fsmRun, InputStream *inputStream, Tree *tree, int ignore ) +{ + prependTree( inputStream, tree, ignore ); +} + +void undoStreamPush( Program *prg, Tree **sp, FsmRun *fsmRun, InputStream *inputStream, long length ) +{ + if ( length < 0 ) { + Tree *tree = undoPrependTree( inputStream ); + treeDownref( prg, sp, tree ); + } + else { + undoPrependData( inputStream, length ); + } +} + +void undoStreamAppend( Program *prg, Tree **sp, FsmRun *fsmRun, InputStream *inputStream, Tree *input, long length ) +{ + if ( input->id == LEL_ID_STR ) + undoAppendData( inputStream, length ); + else if ( input->id == LEL_ID_STREAM ) + undoAppendStream( inputStream ); + else { + Tree *tree = undoAppendTree( inputStream ); + treeDownref( prg, sp, tree ); + } +} + +/* Should only be sending back whole tokens/ignores, therefore the send back + * should never cross a buffer boundary. Either we slide back data, or we move to + * a previous buffer and slide back data. */ +static void sendBackText( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length ) +{ + debug( REALM_PARSE, "push back of %ld characters\n", length ); + + if ( length == 0 ) + return; + + debug( REALM_PARSE, "sending back text: %.*s\n", + (int)length, data ); + + undoConsumeData( fsmRun, inputStream, data, length ); + undoPosition( inputStream, data, length ); +} + +void sendBackTree( InputStream *inputStream, Tree *tree ) +{ + undoConsumeTree( inputStream, tree, false ); +} + +/* + * Stops on: + * PcrRevIgnore + */ +static void sendBackIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, + InputStream *inputStream, ParseTree *parseTree ) +{ + #ifdef DEBUG + LangElInfo *lelInfo = prg->rtd->lelInfo; + debug( REALM_PARSE, "sending back: %s%s\n", + lelInfo[parseTree->shadow->tree->id].name, + parseTree->flags & PF_ARTIFICIAL ? " (artificial)" : "" ); + #endif + + Head *head = parseTree->shadow->tree->tokdata; + int artificial = parseTree->flags & PF_ARTIFICIAL; + + if ( head != 0 && !artificial ) + sendBackText( fsmRun, inputStream, stringData( head ), head->length ); + + decrementSteps( pdaRun ); + + /* Check for reverse code. */ + if ( parseTree->flags & PF_HAS_RCODE ) { + pdaRun->onDeck = true; + parseTree->flags &= ~PF_HAS_RCODE; + } + + if ( pdaRun->steps == pdaRun->targetSteps ) { + debug( REALM_PARSE, "trigger parse stop, steps = target = %d\n", pdaRun->targetSteps ); + pdaRun->stop = true; + } + +} + +void attachInput( FsmRun *fsmRun, InputStream *is ) +{ + if ( is->attached != 0 && is->attached != fsmRun ) + detachInput( is->attached, is ); + + if ( is->attached != fsmRun ) { + debug( REALM_INPUT, "attaching fsm run to input stream: %p %p\n", fsmRun, is ); + fsmRun->attachedInput = is; + is->attached = fsmRun; + } +} + +void attachSource( FsmRun *fsmRun, SourceStream *ss ) +{ + if ( ss->attached != 0 && ss->attached != fsmRun ) + detachSource( ss->attached, ss ); + + if ( ss->attached != fsmRun ) { + debug( REALM_INPUT, "attaching fsm run to source stream: %p %p\n", fsmRun, ss ); + fsmRun->attachedSource = ss; + ss->attached = fsmRun; + } +} + +void detachInput( FsmRun *fsmRun, InputStream *is ) +{ + debug( REALM_INPUT, "detaching fsm run from input stream: %p %p\n", fsmRun, is ); + + fsmRun->attachedInput = 0; + is->attached = 0; + + clearBuffered( fsmRun ); + + if ( fsmRun->attachedSource != 0 ) { + fsmRun->attachedSource->attached = 0; + fsmRun->attachedSource = 0; + } +} + +void detachSource( FsmRun *fsmRun, SourceStream *is ) +{ + debug( REALM_INPUT, "detaching fsm run from source stream: %p %p\n", fsmRun, is ); + + fsmRun->attachedSource = 0; + is->attached = 0; + + clearBuffered( fsmRun ); + + if ( fsmRun->attachedInput != 0 ) { + fsmRun->attachedInput->attached = 0; + fsmRun->attachedInput = 0; + } +} + +void clearBuffered( FsmRun *fsmRun ) +{ + /* If there is data in the current buffer then send the whole send back + * should be in this buffer. */ + if ( fsmRun->tokstart != 0 ) { + fsmRun->p = fsmRun->pe = fsmRun->tokstart; + fsmRun->tokstart = 0; + } + else { + fsmRun->pe = fsmRun->p; + } +} + +void resetToken( FsmRun *fsmRun ) +{ + /* If there is a token started, but never finished for a lack of data, we + * must first backup over it. */ + if ( fsmRun->tokstart != 0 ) { + fsmRun->p = fsmRun->tokstart; + fsmRun->tokstart = 0; + } +} + +/* Stops on: + * PcrRevToken + */ + +static void sendBack( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, + InputStream *inputStream, ParseTree *parseTree ) +{ + debug( REALM_PARSE, "sending back: %s\n", prg->rtd->lelInfo[parseTree->id].name ); + + if ( parseTree->flags & PF_NAMED ) { + ///* Send back anything in the buffer that has not been parsed. */ + //if ( fsmRun->p == fsmRun->runBuf->data ) + // sendBackRunBufHead( fsmRun, inputStream ); + + /* Send the named lang el back first, then send back any leading + * whitespace. */ + undoConsumeLangEl( inputStream ); + } + + decrementSteps( pdaRun ); + + /* Artifical were not parsed, instead sent in as items. */ + if ( parseTree->flags & PF_ARTIFICIAL ) { + /* Check for reverse code. */ + if ( parseTree->flags & PF_HAS_RCODE ) { + debug( REALM_PARSE, "tree has rcode, setting on deck\n" ); + pdaRun->onDeck = true; + parseTree->flags &= ~PF_HAS_RCODE; + } + + treeUpref( parseTree->shadow->tree ); + + sendBackTree( inputStream, parseTree->shadow->tree ); + } + else { + /* Check for reverse code. */ + if ( parseTree->flags & PF_HAS_RCODE ) { + debug( REALM_PARSE, "tree has rcode, setting on deck\n" ); + pdaRun->onDeck = true; + parseTree->flags &= ~PF_HAS_RCODE; + } + + /* Push back the token data. */ + sendBackText( fsmRun, inputStream, stringData( parseTree->shadow->tree->tokdata ), + stringLength( parseTree->shadow->tree->tokdata ) ); + + /* If eof was just sent back remember that it needs to be sent again. */ + if ( parseTree->id == prg->rtd->eofLelIds[pdaRun->parserId] ) + inputStream->eofSent = false; + + /* If the item is bound then store remove it from the bindings array. */ + popBinding( pdaRun, parseTree ); + } + + if ( pdaRun->steps == pdaRun->targetSteps ) { + debug( REALM_PARSE, "trigger parse stop, steps = target = %d\n", pdaRun->targetSteps ); + pdaRun->stop = true; + } + + /* Downref the tree that was sent back and free the kid. */ + treeDownref( prg, sp, parseTree->shadow->tree ); + kidFree( prg, parseTree->shadow ); + parseTreeFree( prg, parseTree ); +} + +void setRegion( PdaRun *pdaRun, int emptyIgnore, ParseTree *tree ) +{ + if ( emptyIgnore ) { + /* Recording the next region. */ + tree->region = pdaRun->nextRegionInd; + if ( pdaRun->tables->tokenRegions[tree->region+1] != 0 ) + pdaRun->numRetry += 1; + } +} + +void ignoreTree( Program *prg, FsmRun *fsmRun, PdaRun *pdaRun, Tree *tree ) +{ + int emptyIgnore = pdaRun->accumIgnore == 0; + + incrementSteps( pdaRun ); + + ParseTree *parseTree = parseTreeAllocate( prg ); + parseTree->shadow = kidAllocate( prg ); + parseTree->shadow->tree = tree; + + parseTree->next = pdaRun->accumIgnore; + pdaRun->accumIgnore = parseTree; + + transferReverseCode( pdaRun, parseTree ); + + if ( fsmRun->preRegion >= 0 ) + parseTree->flags |= PF_RIGHT_IGNORE; + + setRegion( pdaRun, emptyIgnore, pdaRun->accumIgnore ); +} + +void ignoreTree2( Program *prg, PdaRun *pdaRun, Tree *tree ) +{ + int emptyIgnore = pdaRun->accumIgnore == 0; + + incrementSteps( pdaRun ); + + ParseTree *parseTree = parseTreeAllocate( prg ); + parseTree->flags |= PF_ARTIFICIAL; + parseTree->shadow = kidAllocate( prg ); + parseTree->shadow->tree = tree; + + parseTree->next = pdaRun->accumIgnore; + pdaRun->accumIgnore = parseTree; + + transferReverseCode( pdaRun, parseTree ); + + setRegion( pdaRun, emptyIgnore, pdaRun->accumIgnore ); +} + +Kid *makeTokenWithData( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, + InputStream *inputStream, int id, Head *tokdata ) +{ + /* Make the token object. */ + long objectLength = prg->rtd->lelInfo[id].objectLength; + Kid *attrs = allocAttrs( prg, objectLength ); + + Kid *input = 0; + input = kidAllocate( prg ); + input->tree = treeAllocate( prg ); + + debug( REALM_PARSE, "made token %p\n", input->tree ); + + input->tree->refs = 1; + input->tree->id = id; + input->tree->tokdata = tokdata; + + /* No children and ignores get added later. */ + input->tree->child = attrs; + + LangElInfo *lelInfo = prg->rtd->lelInfo; + if ( lelInfo[id].numCaptureAttr > 0 ) { + int i; + for ( i = 0; i < lelInfo[id].numCaptureAttr; i++ ) { + CaptureAttr *ca = &prg->rtd->captureAttr[lelInfo[id].captureAttr + i]; + Head *data = stringAllocFull( prg, + fsmRun->mark[ca->mark_enter], fsmRun->mark[ca->mark_leave] + - fsmRun->mark[ca->mark_enter] ); + Tree *string = constructString( prg, data ); + treeUpref( string ); + setAttr( input->tree, ca->offset, string ); + } + } + + return input; +} + +void clearIgnoreList( Program *prg, Tree **sp, Kid *kid ) +{ + while ( kid != 0 ) { + Kid *next = kid->next; + treeDownref( prg, sp, kid->tree ); + kidFree( prg, kid ); + kid = next; + } +} + +static void reportParseError( Program *prg, Tree **sp, PdaRun *pdaRun ) +{ + Kid *kid = pdaRun->btPoint; + Head *deepest = 0; + while ( kid != 0 ) { + Head *head = kid->tree->tokdata; + if ( head != 0 && head->location != 0 ) { + if ( deepest == 0 || head->location->byte > deepest->location->byte ) + deepest = head; + } + kid = kid->next; + } + + Head *errorHead = 0; + + /* If there are no error points on record assume the error occurred at the beginning of the stream. */ + if ( deepest == 0 ) + errorHead = stringAllocFull( prg, "PARSE ERROR at 1:1", 18 ); + else { + debug( REALM_PARSE, "deepest location byte: %d\n", deepest->location->byte ); + + long line = deepest->location->line; + long i, column = deepest->location->column; + + for ( i = 0; i < deepest->length; i++ ) { + if ( deepest->data[i] != '\n' ) + column += 1; + else { + line += 1; + column = 1; + } + } + + char formatted[128]; + sprintf( formatted, "PARSE ERROR at %ld:%ld", line, column ); + errorHead = stringAllocFull( prg, formatted, strlen(formatted) ); + } + + Tree *tree = constructString( prg, errorHead ); + treeDownref( prg, sp, prg->lastParseError ); + prg->lastParseError = tree; + treeUpref( prg->lastParseError ); +} + +static void attachRightIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, ParseTree *parseTree ) +{ + if ( pdaRun->accumIgnore == 0 ) + return; + + if ( pdaRun->stackTop->id > 0 && pdaRun->stackTop->id < prg->rtd->firstNonTermId ) { + /* OK, do it */ + debug( REALM_PARSE, "attaching right ignore\n" ); + + /* Reset. */ + assert( ! ( parseTree->flags & PF_RIGHT_IL_ATTACHED ) ); + + ParseTree *accum = pdaRun->accumIgnore; + + ParseTree *stopAt = 0, *use = accum; + while ( use != 0 ) { + if ( ! (use->flags & PF_RIGHT_IGNORE) ) + stopAt = use; + use = use->next; + } + + if ( stopAt != 0 ) { + /* Stop at was set. Make it the last item in the igore list. Take + * the rest. */ + accum = stopAt->next; + stopAt->next = 0; + } + else { + /* Stop at was never set. All right ignore. Use it all. */ + pdaRun->accumIgnore = 0; + } + + /* The data list needs to be extracted and reversed. The parse tree list + * can remain in stack order. */ + ParseTree *child = accum, *last = 0; + Kid *dataChild = 0, *dataLast = 0; + + while ( child ) { + dataChild = child->shadow; + ParseTree *next = child->next; + + /* Reverse the lists. */ + dataChild->next = dataLast; + child->next = last; + + /* Detach the parse tree from the data tree. */ + child->shadow = 0; + + /* Keep the last for reversal. */ + dataLast = dataChild; + last = child; + + child = next; + } + + /* Last is now the first. */ + parseTree->rightIgnore = last; + + if ( dataChild != 0 ) { + debug( REALM_PARSE, "attaching ignore right\n" ); + + Kid *ignoreKid = dataLast; + + /* Copy the ignore list first if we need to attach it as a right + * ignore. */ + Tree *rightIgnore = 0; + + rightIgnore = treeAllocate( prg ); + rightIgnore->id = LEL_ID_IGNORE; + rightIgnore->child = ignoreKid; + + Tree *pushTo = parseTree->shadow->tree; + + pushTo = pushRightIgnore( prg, pushTo, rightIgnore ); + + parseTree->shadow->tree = pushTo; + + parseTree->flags |= PF_RIGHT_IL_ATTACHED; + } + } +} + +static void attachLeftIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, ParseTree *parseTree ) +{ + /* Reset. */ + assert( ! ( parseTree->flags & PF_LEFT_IL_ATTACHED ) ); + + ParseTree *accum = pdaRun->accumIgnore; + pdaRun->accumIgnore = 0; + + /* The data list needs to be extracted and reversed. The parse tree list + * can remain in stack order. */ + ParseTree *child = accum, *last = 0; + Kid *dataChild = 0, *dataLast = 0; + + while ( child ) { + dataChild = child->shadow; + ParseTree *next = child->next; + + /* Reverse the lists. */ + dataChild->next = dataLast; + child->next = last; + + /* Detach the parse tree from the data tree. */ + child->shadow = 0; + + /* Keep the last for reversal. */ + dataLast = dataChild; + last = child; + + child = next; + } + + /* Last is now the first. */ + parseTree->leftIgnore = last; + + if ( dataChild != 0 ) { + debug( REALM_PARSE, "attaching left ignore\n" ); + + Kid *ignoreKid = dataChild; + + /* Make the ignore list for the left-ignore. */ + Tree *leftIgnore = treeAllocate( prg ); + leftIgnore->id = LEL_ID_IGNORE; + leftIgnore->child = ignoreKid; + + Tree *pushTo = parseTree->shadow->tree; + + pushTo = pushLeftIgnore( prg, pushTo, leftIgnore ); + + parseTree->shadow->tree = pushTo; + + parseTree->flags |= PF_LEFT_IL_ATTACHED; + } +} + +/* Not currently used. Need to revive this. WARNING: untested changes here */ +static void detachRightIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, ParseTree *parseTree ) +{ + /* Right ignore are immediately discarded since they are copies of + * left-ignores. */ + Tree *rightIgnore = 0; + if ( parseTree->flags & PF_RIGHT_IL_ATTACHED ) { + Tree *popFrom = parseTree->shadow->tree; + + popFrom = popRightIgnore( prg, sp, popFrom, &rightIgnore ); + + parseTree->shadow->tree = popFrom; + + parseTree->flags &= ~PF_RIGHT_IL_ATTACHED; + } + + if ( parseTree->rightIgnore != 0 ) { + assert( rightIgnore != 0 ); + + /* Transfer the trees to accumIgnore. */ + ParseTree *ignore = parseTree->rightIgnore; + parseTree->rightIgnore = 0; + + Kid *dataIgnore = rightIgnore->child; + rightIgnore->child = 0; + + ParseTree *last = 0; + Kid *dataLast = 0; + while ( ignore != 0 ) { + ParseTree *next = ignore->next; + Kid *dataNext = dataIgnore->next; + + /* Put the data trees underneath the parse trees. */ + ignore->shadow = dataIgnore; + + /* Reverse. */ + ignore->next = last; + dataIgnore->next = dataLast; + + /* Keep last for reversal. */ + last = ignore; + dataLast = dataIgnore; + + ignore = next; + dataIgnore = dataNext; + } + + pdaRun->accumIgnore = last; + + treeDownref( prg, sp, rightIgnore ); + } +} + +static void detachLeftIgnore( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, ParseTree *parseTree ) +{ + /* Detach left. */ + Tree *leftIgnore = 0; + if ( parseTree->flags & PF_LEFT_IL_ATTACHED ) { + Tree *popFrom = parseTree->shadow->tree; + + popFrom = popLeftIgnore( prg, sp, popFrom, &leftIgnore ); + + parseTree->shadow->tree = popFrom; + + parseTree->flags &= ~PF_LEFT_IL_ATTACHED; + } + + if ( parseTree->leftIgnore != 0 ) { + assert( leftIgnore != 0 ); + + /* Transfer the trees to accumIgnore. */ + ParseTree *ignore = parseTree->leftIgnore; + parseTree->leftIgnore = 0; + + Kid *dataIgnore = leftIgnore->child; + leftIgnore->child = 0; + + ParseTree *last = 0; + Kid *dataLast = 0; + while ( ignore != 0 ) { + ParseTree *next = ignore->next; + Kid *dataNext = dataIgnore->next; + + /* Put the data trees underneath the parse trees. */ + ignore->shadow = dataIgnore; + + /* Reverse. */ + ignore->next = last; + dataIgnore->next = dataLast; + + /* Keep last for reversal. */ + last = ignore; + dataLast = dataIgnore; + + ignore = next; + dataIgnore = dataNext; + } + + pdaRun->accumIgnore = last; + } + + treeDownref( prg, sp, leftIgnore ); +} + +void handleError( Program *prg, Tree **sp, PdaRun *pdaRun ) +{ + /* Check the result. */ + if ( pdaRun->parseError ) { + /* Error occured in the top-level parser. */ + reportParseError( prg, sp, pdaRun ); + } + else { + if ( isParserStopFinished( pdaRun ) ) { + debug( REALM_PARSE, "stopping the parse\n" ); + pdaRun->stopParsing = true; + } + } +} + +void sendIgnore( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, PdaRun *pdaRun, long id ) +{ + debug( REALM_PARSE, "ignoring: %s\n", prg->rtd->lelInfo[id].name ); + + /* Make the ignore string. */ + Head *ignoreStr = extractMatch( prg, fsmRun, inputStream ); + updatePosition( inputStream, fsmRun->tokstart, ignoreStr->length ); + + debug( REALM_PARSE, "ignoring: %.*s\n", ignoreStr->length, ignoreStr->data ); + + Tree *tree = treeAllocate( prg ); + tree->refs = 1; + tree->id = id; + tree->tokdata = ignoreStr; + + /* Send it to the pdaRun. */ + ignoreTree( prg, fsmRun, pdaRun, tree ); +} + + +/* Doesn't consume. */ +Head *peekMatch( Program *prg, FsmRun *fsmRun, InputStream *inputStream ) +{ + long length = fsmRun->p - fsmRun->tokstart; + Head *head = stringAllocPointer( prg, fsmRun->tokstart, length ); + head->location = locationAllocate( prg ); + head->location->line = inputStream->line; + head->location->column = inputStream->column; + head->location->byte = inputStream->byte; + + debug( REALM_PARSE, "location byte: %d\n", inputStream->byte ); + + return head; +} + +/* Consumes. */ +Head *extractMatch( Program *prg, FsmRun *fsmRun, InputStream *inputStream ) +{ + long length = fsmRun->p - fsmRun->tokstart; + Head *head = stringAllocPointer( prg, fsmRun->tokstart, length ); + head->location = locationAllocate( prg ); + head->location->line = inputStream->line; + head->location->column = inputStream->column; + head->location->byte = inputStream->byte; + + debug( REALM_PARSE, "location byte: %d\n", inputStream->byte ); + + consumeData( inputStream, length ); + + return head; +} + +static void sendToken( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, PdaRun *pdaRun, long id ) +{ + int emptyIgnore = pdaRun->accumIgnore == 0; + + /* Make the token data. */ + Head *tokdata = extractMatch( prg, fsmRun, inputStream ); + + debug( REALM_PARSE, "token: %s text: %.*s\n", + prg->rtd->lelInfo[id].name, + stringLength(tokdata), stringData(tokdata) ); + + updatePosition( inputStream, fsmRun->tokstart, tokdata->length ); + + Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, inputStream, id, tokdata ); + + incrementSteps( pdaRun ); + + ParseTree *parseTree = parseTreeAllocate( prg ); + parseTree->id = input->tree->id; + parseTree->shadow = input; + + pdaRun->parseInput = parseTree; + + /* Store any alternate scanning region. */ + if ( input != 0 && pdaRun->cs >= 0 ) + setRegion( pdaRun, emptyIgnore, parseTree ); +} + +static void sendTree( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream ) +{ + Kid *input = kidAllocate( prg ); + input->tree = consumeTree( inputStream ); + + incrementSteps( pdaRun ); + + ParseTree *parseTree = parseTreeAllocate( prg ); + parseTree->id = input->tree->id; + parseTree->flags |= PF_ARTIFICIAL; + parseTree->shadow = input; + + pdaRun->parseInput = parseTree; +} + +static void sendIgnoreTree( Program *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream ) +{ + Tree *tree = consumeTree( inputStream ); + ignoreTree2( prg, pdaRun, tree ); +} + +static void sendCi( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, PdaRun *pdaRun, int id ) +{ + debug( REALM_PARSE, "token: CI\n" ); + +/**/ + + int emptyIgnore = pdaRun->accumIgnore == 0; + + /* Make the token data. */ + Head *tokdata = headAllocate( prg ); + tokdata->location = locationAllocate( prg ); + tokdata->location->line = inputStream->line; + tokdata->location->column = inputStream->column; + tokdata->location->byte = inputStream->byte; + + debug( REALM_PARSE, "token: %s text: %.*s\n", + prg->rtd->lelInfo[id].name, + stringLength(tokdata), stringData(tokdata) ); + + updatePosition( inputStream, fsmRun->tokstart, tokdata->length ); + + Kid *input = makeTokenWithData( prg, pdaRun, fsmRun, inputStream, id, tokdata ); + + incrementSteps( pdaRun ); + + ParseTree *parseTree = parseTreeAllocate( prg ); + parseTree->id = input->tree->id; + parseTree->shadow = input; + + pdaRun->parseInput = parseTree; + + /* Store any alternate scanning region. */ + if ( input != 0 && pdaRun->cs >= 0 ) + setRegion( pdaRun, emptyIgnore, parseTree ); +} + + +static void sendEof( Program *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, PdaRun *pdaRun ) +{ + debug( REALM_PARSE, "token: _EOF\n" ); + + incrementSteps( pdaRun ); + + Head *head = headAllocate( prg ); + head->location = locationAllocate( prg ); + head->location->line = inputStream->line; + head->location->column = inputStream->column; + head->location->byte = inputStream->byte; + + Kid *input = kidAllocate( prg ); + input->tree = treeAllocate( prg ); + + input->tree->refs = 1; + input->tree->id = prg->rtd->eofLelIds[pdaRun->parserId]; + input->tree->tokdata = head; + + /* Set the state using the state of the parser. */ + fsmRun->region = pdaRunGetNextRegion( pdaRun, 0 ); + fsmRun->preRegion = pdaRunGetNextPreRegion( pdaRun ); + fsmRun->cs = fsmRun->tables->entryByRegion[fsmRun->region]; + + ParseTree *parseTree = parseTreeAllocate( prg ); + parseTree->id = input->tree->id; + parseTree->shadow = input; + + pdaRun->parseInput = parseTree; +} + +void newToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun ) +{ + /* Init the scanner vars. */ + fsmRun->act = 0; + fsmRun->tokstart = 0; + fsmRun->tokend = 0; + fsmRun->matchedToken = 0; + + /* Set the state using the state of the parser. */ + fsmRun->region = pdaRunGetNextRegion( pdaRun, 0 ); + fsmRun->preRegion = pdaRunGetNextPreRegion( pdaRun ); + if ( fsmRun->preRegion > 0 ) { + debug( REALM_PARSE, "pre region for next token: %s\n", + prg->rtd->regionInfo[fsmRun->preRegion].name ); + fsmRun->cs = fsmRun->tables->entryByRegion[fsmRun->preRegion]; + fsmRun->ncs = fsmRun->tables->entryByRegion[fsmRun->region]; + } + else { + debug( REALM_PARSE, "scanning using token region: %s\n", + prg->rtd->regionInfo[fsmRun->region].name ); + + fsmRun->cs = fsmRun->tables->entryByRegion[fsmRun->region]; + } + + + /* Clear the mark array. */ + memset( fsmRun->mark, 0, sizeof(fsmRun->mark) ); +} + +static void pushBtPoint( Program *prg, PdaRun *pdaRun ) +{ + Tree *tree = 0; + if ( pdaRun->accumIgnore != 0 ) + tree = pdaRun->accumIgnore->shadow->tree; + else if ( pdaRun->tokenList != 0 ) + tree = pdaRun->tokenList->kid->tree; + + if ( tree != 0 ) { + debug( REALM_PARSE, "pushing bt point with location byte %d\n", + ( tree != 0 && tree->tokdata != 0 && tree->tokdata->location != 0 ) ? + tree->tokdata->location->byte : 0 ); + + Kid *kid = kidAllocate( prg ); + kid->tree = tree; + treeUpref( tree ); + kid->next = pdaRun->btPoint; + pdaRun->btPoint = kid; + } +} + + +#define SCAN_UNDO -7 +#define SCAN_IGNORE -6 +#define SCAN_TREE -5 +#define SCAN_TRY_AGAIN_LATER -4 +#define SCAN_ERROR -3 +#define SCAN_LANG_EL -2 +#define SCAN_EOF -1 + +long scanToken( Program *prg, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream ) +{ + if ( pdaRun->triggerUndo ) + return SCAN_UNDO; + + while ( true ) { + fsmExecute( fsmRun, inputStream ); + + /* First check if scanning stopped because we have a token. */ + if ( fsmRun->matchedToken > 0 ) { + /* If the token has a marker indicating the end (due to trailing + * context) then adjust data now. */ + LangElInfo *lelInfo = prg->rtd->lelInfo; + if ( lelInfo[fsmRun->matchedToken].markId >= 0 ) + fsmRun->p = fsmRun->mark[lelInfo[fsmRun->matchedToken].markId]; + + return fsmRun->matchedToken; + } + + /* Check for error. */ + if ( fsmRun->cs == fsmRun->tables->errorState ) { + /* If a token was started, but not finished (tokstart != 0) then + * restore data to the beginning of that token. */ + if ( fsmRun->tokstart != 0 ) + fsmRun->p = fsmRun->tokstart; + + /* Check for a default token in the region. If one is there + * then send it and continue with the processing loop. */ + if ( prg->rtd->regionInfo[fsmRun->region].defaultToken >= 0 ) { + fsmRun->tokstart = fsmRun->tokend = fsmRun->p; + return prg->rtd->regionInfo[fsmRun->region].defaultToken; + } + + return SCAN_ERROR; + } + + /* Got here because the state machine didn't match a token or + * encounter an error. Must be because we got to the end of the buffer + * data. */ + assert( fsmRun->p == fsmRun->pe ); + + /* There may be space left in the current buffer. If not then we need + * to make some. */ + long space = fsmRun->runBuf->data + FSM_BUFSIZE - fsmRun->pe; + if ( space == 0 ) { + /* Create a new run buf. */ + RunBuf *newBuf = newRunBuf(); + + /* If partway through a token then preserve the prefix. */ + long have = 0; + + if ( fsmRun->tokstart == 0 ) { + /* No prefix. We filled the previous buffer. */ + fsmRun->runBuf->length = FSM_BUFSIZE; + } + else { + int i; + + debug( REALM_SCAN, "copying data over to new buffer\n" ); + assert( fsmRun->runBuf->offset == 0 ); + + if ( fsmRun->tokstart == fsmRun->runBuf->data ) { + /* A token is started and it is already at the beginning + * of the current buffer. This means buffer is full and it + * must be grown. Probably need to do this sooner. */ + fatal( "OUT OF BUFFER SPACE\n" ); + } + + /* There is data that needs to be shifted over. */ + have = fsmRun->pe - fsmRun->tokstart; + memcpy( newBuf->data, fsmRun->tokstart, have ); + + /* Compute the length of the previous buffer. */ + fsmRun->runBuf->length = FSM_BUFSIZE - have; + + /* Compute tokstart and tokend. */ + long dist = fsmRun->tokstart - newBuf->data; + + fsmRun->tokend -= dist; + fsmRun->tokstart = newBuf->data; + + /* Shift any markers. */ + for ( i = 0; i < MARK_SLOTS; i++ ) { + if ( fsmRun->mark[i] != 0 ) + fsmRun->mark[i] -= dist; + } + } + + fsmRun->p = fsmRun->pe = newBuf->data + have; + fsmRun->peof = 0; + + newBuf->next = fsmRun->runBuf; + fsmRun->runBuf = newBuf; + } + + /* We don't have any data. What is next in the input inputStream? */ + space = fsmRun->runBuf->data + FSM_BUFSIZE - fsmRun->pe; + assert( space > 0 ); + + /* Get more data. */ + int have = fsmRun->tokstart != 0 ? fsmRun->p - fsmRun->tokstart : 0; + int len = 0; + debug( REALM_SCAN, "fetching data: have: %d space: %d\n", have, space ); + int type = getData( fsmRun, inputStream, have, fsmRun->p, space, &len ); + + switch ( type ) { + case INPUT_DATA: + fsmRun->pe = fsmRun->p + len; + break; + + case INPUT_EOF: + if ( fsmRun->tokstart != 0 ) + fsmRun->peof = fsmRun->pe; + else + return SCAN_EOF; + break; + + case INPUT_EOD: + return SCAN_TRY_AGAIN_LATER; + + case INPUT_LANG_EL: + if ( fsmRun->tokstart != 0 ) + fsmRun->peof = fsmRun->pe; + else + return SCAN_LANG_EL; + break; + + case INPUT_TREE: + if ( fsmRun->tokstart != 0 ) + fsmRun->peof = fsmRun->pe; + else + return SCAN_TREE; + break; + case INPUT_IGNORE: + if ( fsmRun->tokstart != 0 ) + fsmRun->peof = fsmRun->pe; + else + return SCAN_IGNORE; + break; + } + } + + /* Should not be reached. */ + return SCAN_ERROR; +} + +/* + * Stops on: + * PcrPreEof + * PcrGeneration + * PcrReduction + * PcrRevReduction + * PcrRevIgnore + * PcrRevToken + */ + +long parseLoop( Program *prg, Tree **sp, PdaRun *pdaRun, + FsmRun *fsmRun, InputStream *inputStream, long entry ) +{ + LangElInfo *lelInfo = prg->rtd->lelInfo; + +switch ( entry ) { +case PcrStart: + + pdaRun->stop = false; + + while ( true ) { + debug( REALM_PARSE, "parse loop start %d:%d\n", inputStream->line, inputStream->column ); + + /* Pull the current scanner from the parser. This can change during + * parsing due to inputStream pushes, usually for the purpose of includes. + * */ + pdaRun->tokenId = scanToken( prg, pdaRun, fsmRun, inputStream ); + + if ( pdaRun->tokenId == SCAN_ERROR ) { + if ( fsmRun->preRegion >= 0 ) { + fsmRun->preRegion = -1; + fsmRun->cs = fsmRun->ncs; + debug( REALM_PARSE, "moving from pre region to main region: %s\n", + prg->rtd->regionInfo[fsmRun->region].name ); + continue; + } + } + + if ( pdaRun->tokenId == SCAN_ERROR && + ( prg->rtd->regionInfo[fsmRun->region].ciLelId > 0 ) ) + { + debug( REALM_PARSE, "sending a collect ignore\n" ); + sendCi( prg, sp, inputStream, fsmRun, pdaRun, prg->rtd->regionInfo[fsmRun->region].ciLelId ); + goto yes; + } + + if ( pdaRun->tokenId == SCAN_TRY_AGAIN_LATER ) { + debug( REALM_PARSE, "scanner says try again later\n" ); + break; + } + + assert( pdaRun->parseInput == 0 ); + pdaRun->parseInput = 0; + + /* Check for EOF. */ + if ( pdaRun->tokenId == SCAN_EOF ) { + inputStream->eofSent = true; + sendEof( prg, sp, inputStream, fsmRun, pdaRun ); + + pdaRun->frameId = prg->rtd->regionInfo[fsmRun->region].eofFrameId; + + if ( prg->ctxDepParsing && pdaRun->frameId >= 0 ) { + debug( REALM_PARSE, "HAVE PRE_EOF BLOCK\n" ); + + pdaRun->fi = &prg->rtd->frameInfo[pdaRun->frameId]; + pdaRun->code = pdaRun->fi->codeWV; + +return PcrPreEof; +case PcrPreEof: + makeReverseCode( pdaRun ); + } + } + else if ( pdaRun->tokenId == SCAN_UNDO ) { + /* Fall through with parseInput = 0. FIXME: Do we need to send back ignore? */ + debug( REALM_PARSE, "invoking undo from the scanner\n" ); + } + else if ( pdaRun->tokenId == SCAN_ERROR ) { + /* Scanner error, maybe retry. */ + if ( pdaRun->accumIgnore == 0 && pdaRunGetNextRegion( pdaRun, 1 ) != 0 ) { + debug( REALM_PARSE, "scanner failed, trying next region\n" ); + + pdaRun->nextRegionInd += 1; + goto skipSend; + } + else if ( pdaRun->numRetry > 0 ) { + debug( REALM_PARSE, "invoking parse error from the scanner\n" ); + + /* Fall through to send null (error). */ + pushBtPoint( prg, pdaRun ); + } + else { + debug( REALM_PARSE, "no alternate scanning regions\n" ); + + /* There are no alternative scanning regions to try, nor are + * there any alternatives stored in the current parse tree. No + * choice but to end the parse. */ + pushBtPoint( prg, pdaRun ); + + reportParseError( prg, sp, pdaRun ); + pdaRun->parseError = 1; + goto skipSend; + } + } + else if ( pdaRun->tokenId == SCAN_LANG_EL ) { + debug( REALM_PARSE, "sending an named lang el\n" ); + + /* A named language element (parsing colm program). */ + sendNamedLangEl( prg, sp, pdaRun, fsmRun, inputStream ); + } + else if ( pdaRun->tokenId == SCAN_TREE ) { + debug( REALM_PARSE, "sending a tree\n" ); + + /* A tree already built. */ + sendTree( prg, sp, pdaRun, fsmRun, inputStream ); + } + else if ( pdaRun->tokenId == SCAN_IGNORE ) { + debug( REALM_PARSE, "sending an ignore token\n" ); + + /* A tree to ignore. */ + sendIgnoreTree( prg, sp, pdaRun, fsmRun, inputStream ); + goto skipSend; + } + else if ( prg->ctxDepParsing && lelInfo[pdaRun->tokenId].frameId >= 0 ) { + /* Has a generation action. */ + debug( REALM_PARSE, "token gen action: %s\n", + prg->rtd->lelInfo[pdaRun->tokenId].name ); + + /* Make the token data. */ + pdaRun->tokdata = peekMatch( prg, fsmRun, inputStream ); + + /* Note that we don't update the position now. It is done when the token + * data is pulled from the inputStream. */ + + fsmRun->p = fsmRun->tokstart; + fsmRun->tokstart = 0; + + pdaRun->fi = &prg->rtd->frameInfo[prg->rtd->lelInfo[pdaRun->tokenId].frameId]; + pdaRun->frameId = prg->rtd->lelInfo[pdaRun->tokenId].frameId; + pdaRun->code = pdaRun->fi->codeWV; + +return PcrGeneration; +case PcrGeneration: + + makeReverseCode( pdaRun ); + + /* Finished with the match text. */ + stringFree( prg, pdaRun->tokdata ); + + goto skipSend; + } + else if ( lelInfo[pdaRun->tokenId].ignore ) { + debug( REALM_PARSE, "sending an ignore token: %s\n", + prg->rtd->lelInfo[pdaRun->tokenId].name ); + + /* Is an ignore token. */ + sendIgnore( prg, sp, inputStream, fsmRun, pdaRun, pdaRun->tokenId ); + goto skipSend; + } + else { + debug( REALM_PARSE, "sending an a plain old token: %s\n", + prg->rtd->lelInfo[pdaRun->tokenId].name ); + + /* Is a plain token. */ + sendToken( prg, sp, inputStream, fsmRun, pdaRun, pdaRun->tokenId ); + } +yes: + + if ( pdaRun->parseInput != 0 ) + transferReverseCode( pdaRun, pdaRun->parseInput ); + + if ( pdaRun->parseInput != 0 ) { + /* If it's a nonterminal with a termdup then flip the parse tree to the terminal. */ + if ( pdaRun->parseInput->id >= prg->rtd->firstNonTermId ) { + pdaRun->parseInput->id = prg->rtd->lelInfo[pdaRun->parseInput->id].termDupId; + pdaRun->parseInput->flags |= PF_TERM_DUP; + } + } + + long pcr = parseToken( prg, sp, pdaRun, fsmRun, inputStream, PcrStart ); + + while ( pcr != PcrDone ) { + +return pcr; +case PcrReduction: +case PcrReverse: + + pcr = parseToken( prg, sp, pdaRun, fsmRun, inputStream, entry ); + } + + assert( pcr == PcrDone ); + + handleError( prg, sp, pdaRun ); + +skipSend: + newToken( prg, pdaRun, fsmRun ); + + /* Various stop conditions. This should all be coverned by one test + * eventually. */ + + if ( pdaRun->triggerUndo ) { + debug( REALM_PARSE, "parsing stopped by triggerUndo\n" ); + break; + } + + if ( inputStream->eofSent ) { + debug( REALM_PARSE, "parsing stopped by EOF\n" ); + break; + } + + if ( pdaRun->stopParsing ) { + debug( REALM_PARSE, "scanner has been stopped\n" ); + break; + } + + if ( pdaRun->stop ) { + debug( REALM_PARSE, "parsing has been stopped by consumedCount\n" ); + break; + } + + if ( prg->induceExit ) { + debug( REALM_PARSE, "parsing has been stopped by a call to exit\n" ); + break; + } + + if ( pdaRun->parseError ) { + debug( REALM_PARSE, "parsing stopped by a parse error\n" ); + break; + } + } + +case PcrDone: +break; } + + return PcrDone; +} + +/* Offset can be used to look at the next nextRegionInd. */ +int pdaRunGetNextRegion( PdaRun *pdaRun, int offset ) +{ + return pdaRun->tables->tokenRegions[pdaRun->nextRegionInd+offset]; +} + +int pdaRunGetNextPreRegion( PdaRun *pdaRun ) +{ + return pdaRun->tables->tokenPreRegions[pdaRun->nextRegionInd]; +} + +Tree *getParsedRoot( PdaRun *pdaRun, int stop ) +{ + if ( pdaRun->parseError ) + return 0; + else if ( stop ) { + if ( pdaRun->stackTop->shadow != 0 ) + return pdaRun->stackTop->shadow->tree; + } + else { + if ( pdaRun->stackTop->next->shadow != 0 ) + return pdaRun->stackTop->next->shadow->tree; + } + return 0; +} + +void clearParseTree( Program *prg, Tree **sp, ParseTree *parseTree ) +{ + /* Traverse the stack downreffing. */ + ParseTree *pt = parseTree; + while ( pt != 0 ) { + ParseTree *next = pt->next; + if ( pt->shadow != 0 ) { + treeDownref( prg, sp, pt->shadow->tree ); + kidFree( prg, pt->shadow ); + } + if ( pt->child != 0 ) + clearParseTree( prg, sp, pt->child ); + if ( pt->leftIgnore != 0 ) + clearParseTree( prg, sp, pt->leftIgnore ); + if ( pt->rightIgnore != 0 ) + clearParseTree( prg, sp, pt->rightIgnore ); + parseTreeFree( prg, pt ); + pt = next; + } +} + +void clearPdaRun( Program *prg, Tree **sp, PdaRun *pdaRun ) +{ + /* Remaining stack and parse trees underneath. */ + clearParseTree( prg, sp, pdaRun->stackTop ); + pdaRun->stackTop = 0; + + /* Traverse the token list downreffing. */ + Ref *ref = pdaRun->tokenList; + while ( ref != 0 ) { + Ref *next = ref->next; + kidFree( prg, (Kid*)ref ); + ref = next; + } + pdaRun->tokenList = 0; + + /* Traverse the btPoint list downreffing */ + Kid *btp = pdaRun->btPoint; + while ( btp != 0 ) { + Kid *next = btp->next; + treeDownref( prg, sp, btp->tree ); + kidFree( prg, (Kid*)btp ); + btp = next; + } + pdaRun->btPoint = 0; + + /* Clear out any remaining ignores. */ + clearParseTree( prg, sp, pdaRun->accumIgnore ); + pdaRun->accumIgnore = 0; + + if ( pdaRun->context != 0 ) + treeDownref( prg, sp, pdaRun->context ); + + rcodeDownrefAll( prg, sp, &pdaRun->reverseCode ); + rtCodeVectEmpty( &pdaRun->reverseCode ); + rtCodeVectEmpty( &pdaRun->rcodeCollect ); +} + +int isParserStopFinished( PdaRun *pdaRun ) +{ + int done = + pdaRun->stackTop->next != 0 && + pdaRun->stackTop->next->next == 0 && + pdaRun->stackTop->id == pdaRun->stopTarget; + return done; +} + +void initPdaRun( PdaRun *pdaRun, Program *prg, PdaTables *tables, + FsmRun *fsmRun, int parserId, long stopTarget, int revertOn, Tree *context ) +{ + memset( pdaRun, 0, sizeof(PdaRun) ); + pdaRun->tables = tables; + pdaRun->parserId = parserId; + pdaRun->stopTarget = stopTarget; + pdaRun->revertOn = revertOn; + pdaRun->targetSteps = -1; + + debug( REALM_PARSE, "initializing PdaRun\n" ); + + /* FIXME: need the right one here. */ + pdaRun->cs = prg->rtd->startStates[pdaRun->parserId]; + + Kid *sentinal = kidAllocate( prg ); + sentinal->tree = treeAllocate( prg ); + sentinal->tree->refs = 1; + + /* Init the element allocation variables. */ + pdaRun->stackTop = parseTreeAllocate( prg ); + pdaRun->stackTop->state = -1; + pdaRun->stackTop->shadow = sentinal; + + pdaRun->numRetry = 0; + pdaRun->nextRegionInd = pdaRun->tables->tokenRegionInds[pdaRun->cs]; + pdaRun->stopParsing = false; + pdaRun->accumIgnore = 0; + pdaRun->btPoint = 0; + pdaRun->checkNext = false; + pdaRun->checkStop = false; + + initBindings( pdaRun ); + + initRtCodeVect( &pdaRun->reverseCode ); + initRtCodeVect( &pdaRun->rcodeCollect ); + + pdaRun->context = splitTree( prg, context ); + pdaRun->parseError = 0; + pdaRun->parseInput = 0; + pdaRun->triggerUndo = 0; + + pdaRun->tokenId = 0; + + pdaRun->onDeck = false; + pdaRun->parsed = 0; + pdaRun->reject = false; + + pdaRun->rcBlockCount = 0; +} + +long stackTopTarget( Program *prg, PdaRun *pdaRun ) +{ + long state; + if ( pdaRun->stackTop->state < 0 ) + state = prg->rtd->startStates[pdaRun->parserId]; + else { + state = pdaRun->tables->targs[(int)pdaRun->tables->indicies[pdaRun->tables->offsets[ + pdaRun->stackTop->state] + + (pdaRun->stackTop->id - pdaRun->tables->keys[pdaRun->stackTop->state<<1])]]; + } + return state; +} + +/* + * Local commit: + * -clears reparse flags underneath + * -must be possible to backtrack after + * Global commit (revertOn) + * -clears all reparse flags + * -must be possible to backtrack after + * Global commit (!revertOn) + * -clears all reparse flags + * -clears all 'parsed' reverse code + * -clears all reverse code + * -clears all alg structures + */ + +int beenCommitted( ParseTree *parseTree ) +{ + return parseTree->flags & PF_COMMITTED; +} + +Code *backupOverRcode( Code *rcode ) +{ + Word len; + rcode -= SIZEOF_WORD; + read_word_p( len, rcode ); + rcode -= len; + return rcode; +} + +/* The top level of the stack is linked right-to-left. Trees underneath are + * linked left-to-right. */ +void commitKid( Program *prg, PdaRun *pdaRun, Tree **root, ParseTree *lel, Code **rcode, long *causeReduce ) +{ + ParseTree *tree = 0; + Tree **sp = root; + //Tree *restore = 0; + +head: + /* Commit */ + debug( REALM_PARSE, "commit: visiting %s\n", + prg->rtd->lelInfo[lel->id].name ); + + /* Load up the parsed tree. */ + tree = lel; + + /* Check for reverse code. */ + //restore = 0; + if ( tree->flags & PF_HAS_RCODE ) { + /* If tree caused some reductions, now is not the right time to backup + * over the reverse code. We need to backup over the reductions first. Store + * the count of the reductions and do it when the count drops to zero. */ + if ( tree->causeReduce > 0 ) { + /* The top reduce block does not correspond to this alg. */ + debug( REALM_PARSE, "commit: causeReduce found, delaying backup: %ld\n", + (long)tree->causeReduce ); + *causeReduce = tree->causeReduce; + } + else { + *rcode = backupOverRcode( *rcode ); + + //if ( **rcode == IN_RESTORE_LHS ) { + // debug( REALM_PARSE, "commit: has restore_lhs\n" ); + // read_tree_p( restore, (*rcode+1) ); + //} + } + } + + //FIXME: what was this about? + //if ( restore != 0 ) + // tree = restore; + + /* All the parse algorithm data except for the RCODE flag is in the + * original. That is why we restore first, then we can clear the retry + * values. */ + + /* Check causeReduce, might be time to backup over the reverse code + * belonging to a nonterminal that caused previous reductions. */ + if ( *causeReduce > 0 && + tree->id >= prg->rtd->firstNonTermId && + !(tree->flags & PF_TERM_DUP) ) + { + *causeReduce -= 1; + + if ( *causeReduce == 0 ) { + debug( REALM_PARSE, "commit: causeReduce dropped to zero, backing up over rcode\n" ); + + /* Cause reduce just dropped down to zero. */ + *rcode = backupOverRcode( *rcode ); + } + } + + ///* FIXME: why was this here? + // * Reset retries. */ + //if ( tree->flags & AF_PARSED ) { + // if ( tree->retryLower > 0 ) { + // pdaRun->numRetry -= 1; + // tree->retryLower = 0; + // } + // if ( tree->retryUpper > 0 ) { + // pdaRun->numRetry -= 1; + // tree->retryUpper = 0; + // } + //} + + tree->flags |= PF_COMMITTED; + + /* Do not recures on trees that are terminal dups. */ + if ( !(tree->flags & PF_TERM_DUP) && + !(tree->flags & PF_NAMED) && + !(tree->flags & PF_ARTIFICIAL) && + tree->child != 0 ) + { + vm_push( (Tree*)lel ); + lel = tree->child; + + if ( lel != 0 ) { + while ( lel != 0 ) { + vm_push( (Tree*)lel ); + lel = lel->next; + } + } + } + +backup: + if ( sp != root ) { + ParseTree *next = (ParseTree*)vm_pop(); + if ( next->next == lel ) { + /* Moving backwards. */ + lel = next; + + if ( !beenCommitted( lel ) ) + goto head; + } + else { + /* Moving upwards. */ + lel = next; + } + + goto backup; + } + + pdaRun->numRetry = 0; + assert( sp == root ); +} + +void commitFull( Program *prg, Tree **sp, PdaRun *pdaRun, long causeReduce ) +{ + debug( REALM_PARSE, "running full commit" ); + + ParseTree *parseTree = pdaRun->stackTop; + Code *rcode = pdaRun->reverseCode.data + pdaRun->reverseCode.tabLen; + + /* The top level of the stack is linked right to left. This is the + * traversal order we need for committing. */ + while ( parseTree != 0 && !beenCommitted( parseTree ) ) { + commitKid( prg, pdaRun, sp, parseTree, &rcode, &causeReduce ); + parseTree = parseTree->next; + } + + /* We cannot always clear all the rcode here. We may need to backup over + * the parse statement. We depend on the context flag. */ + if ( !pdaRun->revertOn ) + rcodeDownrefAll( prg, sp, &pdaRun->reverseCode ); +} + +/* + * shift: retry goes into lower of shifted node. + * reduce: retry goes into upper of reduced node. + * shift-reduce: cannot be a retry + */ + +/* Stops on: + * PcrReduction + * PcrRevToken + * PcrRevReduction + */ +long parseToken( Program *prg, Tree **sp, PdaRun *pdaRun, + FsmRun *fsmRun, InputStream *inputStream, long entry ) +{ + int pos; + unsigned int *action; + int rhsLen; + int owner; + int induceReject; + int indPos; + //LangElInfo *lelInfo = prg->rtd->lelInfo; + +switch ( entry ) { +case PcrStart: + + /* The scanner will send a null token if it can't find a token. */ + if ( pdaRun->parseInput == 0 ) + goto parseError; + + /* This will cause parseInput to be lost. This + * path should be traced. */ + if ( pdaRun->cs < 0 ) + return PcrDone; + + /* Record the state in the parse tree. */ + pdaRun->parseInput->state = pdaRun->cs; + +again: + if ( pdaRun->parseInput == 0 ) + goto _out; + + pdaRun->lel = pdaRun->parseInput; + pdaRun->curState = pdaRun->cs; + + if ( pdaRun->lel->id < pdaRun->tables->keys[pdaRun->curState<<1] || + pdaRun->lel->id > pdaRun->tables->keys[(pdaRun->curState<<1)+1] ) { + debug( REALM_PARSE, "parse error, no transition 1\n" ); + pushBtPoint( prg, pdaRun ); + goto parseError; + } + + indPos = pdaRun->tables->offsets[pdaRun->curState] + + (pdaRun->lel->id - pdaRun->tables->keys[pdaRun->curState<<1]); + + owner = pdaRun->tables->owners[indPos]; + if ( owner != pdaRun->curState ) { + debug( REALM_PARSE, "parse error, no transition 2\n" ); + pushBtPoint( prg, pdaRun ); + goto parseError; + } + + pos = pdaRun->tables->indicies[indPos]; + if ( pos < 0 ) { + debug( REALM_PARSE, "parse error, no transition 3\n" ); + pushBtPoint( prg, pdaRun ); + goto parseError; + } + + /* Checking complete. */ + + induceReject = false; + pdaRun->cs = pdaRun->tables->targs[pos]; + action = pdaRun->tables->actions + pdaRun->tables->actInds[pos]; + if ( pdaRun->lel->retryLower ) + action += pdaRun->lel->retryLower; + + /* + * Shift + */ + + if ( *action & act_sb ) { + debug( REALM_PARSE, "shifted: %s\n", + prg->rtd->lelInfo[pdaRun->lel->id].name ); + /* Consume. */ + pdaRun->parseInput = pdaRun->parseInput->next; + + pdaRun->lel->state = pdaRun->curState; + + /* If its a token then attach ignores and record it in the token list + * of the next ignore attachment to use. */ + if ( pdaRun->lel->id < prg->rtd->firstNonTermId ) { + if ( pdaRun->lel->causeReduce == 0 ) + attachRightIgnore( prg, sp, pdaRun, pdaRun->stackTop ); + } + + pdaRun->lel->next = pdaRun->stackTop; + pdaRun->stackTop = pdaRun->lel; + + /* If its a token then attach ignores and record it in the token list + * of the next ignore attachment to use. */ + if ( pdaRun->lel->id < prg->rtd->firstNonTermId ) { + attachLeftIgnore( prg, sp, pdaRun, pdaRun->lel ); + + Ref *ref = (Ref*)kidAllocate( prg ); + ref->kid = pdaRun->lel->shadow; + //treeUpref( pdaRun->tree ); + ref->next = pdaRun->tokenList; + pdaRun->tokenList = ref; + } + + if ( action[1] == 0 ) + pdaRun->lel->retryLower = 0; + else { + debug( REALM_PARSE, "retry: %p\n", pdaRun->stackTop ); + pdaRun->lel->retryLower += 1; + assert( pdaRun->lel->retryUpper == 0 ); + /* FIXME: Has the retry already been counted? */ + pdaRun->numRetry += 1; + } + } + + /* + * Commit + */ + + if ( pdaRun->tables->commitLen[pos] != 0 ) { + long causeReduce = 0; + if ( pdaRun->parseInput != 0 ) { + if ( pdaRun->parseInput->flags & PF_HAS_RCODE ) + causeReduce = pdaRun->parseInput->causeReduce; + } + commitFull( prg, sp, pdaRun, causeReduce ); + } + + /* + * Reduce + */ + + if ( *action & act_rb ) { + int r, objectLength; + ParseTree *last, *child; + Kid *attrs; + Kid *dataLast, *dataChild; + + /* If there was shift don't attach again. */ + if ( !( *action & act_sb ) && pdaRun->lel->id < prg->rtd->firstNonTermId ) + attachRightIgnore( prg, sp, pdaRun, pdaRun->stackTop ); + + pdaRun->reduction = *action >> 2; + + if ( pdaRun->parseInput != 0 ) + pdaRun->parseInput->causeReduce += 1; + + Kid *value = kidAllocate( prg ); + value->tree = treeAllocate( prg ); + value->tree->refs = 1; + value->tree->id = prg->rtd->prodInfo[pdaRun->reduction].lhsId; + value->tree->prodNum = prg->rtd->prodInfo[pdaRun->reduction].prodNum; + + pdaRun->redLel = parseTreeAllocate( prg ); + pdaRun->redLel->id = prg->rtd->prodInfo[pdaRun->reduction].lhsId; + pdaRun->redLel->next = 0; + pdaRun->redLel->causeReduce = 0; + pdaRun->redLel->retryLower = 0; + pdaRun->redLel->shadow = value; + + /* Transfer. */ + pdaRun->redLel->retryUpper = pdaRun->lel->retryLower; + pdaRun->lel->retryLower = 0; + + /* Allocate the attributes. */ + objectLength = prg->rtd->lelInfo[pdaRun->redLel->id].objectLength; + attrs = allocAttrs( prg, objectLength ); + + /* Build the list of children. We will be giving up a reference when we + * detach parse tree and data tree, but gaining the reference when we + * put the children under the new data tree. No need to alter refcounts + * here. */ + rhsLen = prg->rtd->prodInfo[pdaRun->reduction].length; + child = last = 0; + dataChild = dataLast = 0; + for ( r = 0; r < rhsLen; r++ ) { + + /* The child. */ + child = pdaRun->stackTop; + dataChild = child->shadow; + + /* Pop. */ + pdaRun->stackTop = pdaRun->stackTop->next; + + /* Detach the parse tree from the data. */ + child->shadow = 0; + + /* Reverse list. */ + child->next = last; + dataChild->next = dataLast; + + /* Track last for reversal. */ + last = child; + dataLast = dataChild; + } + + pdaRun->redLel->child = child; + pdaRun->redLel->shadow->tree->child = kidListConcat( attrs, dataChild ); + + debug( REALM_PARSE, "reduced: %s rhsLen %d\n", + prg->rtd->prodInfo[pdaRun->reduction].name, rhsLen ); + if ( action[1] == 0 ) + pdaRun->redLel->retryUpper = 0; + else { + pdaRun->redLel->retryUpper += 1; + assert( pdaRun->lel->retryLower == 0 ); + pdaRun->numRetry += 1; + debug( REALM_PARSE, "retry: %p\n", pdaRun->redLel ); + } + + /* When the production is of zero length we stay in the same state. + * Otherwise we use the state stored in the first child. */ + pdaRun->cs = rhsLen == 0 ? pdaRun->curState : child->state; + + if ( prg->ctxDepParsing && prg->rtd->prodInfo[pdaRun->reduction].frameId >= 0 ) { + /* Frame info for reduction. */ + pdaRun->fi = &prg->rtd->frameInfo[prg->rtd->prodInfo[pdaRun->reduction].frameId]; + pdaRun->frameId = prg->rtd->prodInfo[pdaRun->reduction].frameId; + pdaRun->reject = false; + pdaRun->parsed = 0; + pdaRun->code = pdaRun->fi->codeWV; + +return PcrReduction; +case PcrReduction: + + if ( prg->induceExit ) + goto fail; + + /* If the lhs was stored and it changed then we need to restore the + * original upon backtracking, otherwise downref since we took a + * copy above. */ + if ( pdaRun->parsed != 0 ) { + if ( pdaRun->parsed != pdaRun->redLel->shadow->tree ) { + debug( REALM_PARSE, "lhs tree was modified, adding a restore instruction\n" ); +// +// /* Make it into a parse tree. */ +// Tree *newPt = prepParseTree( prg, sp, pdaRun->redLel->tree ); +// treeDownref( prg, sp, pdaRun->redLel->tree ); +// +// /* Copy it in. */ +// pdaRun->redLel->tree = newPt; +// treeUpref( pdaRun->redLel->tree ); + + /* Add the restore instruct. */ + append( &pdaRun->rcodeCollect, IN_RESTORE_LHS ); + appendWord( &pdaRun->rcodeCollect, (Word)pdaRun->parsed ); + append( &pdaRun->rcodeCollect, SIZEOF_CODE + SIZEOF_WORD ); + } + else { + /* Not changed. Done with parsed. */ + treeDownref( prg, sp, pdaRun->parsed ); + } + pdaRun->parsed = 0; + } + + /* Pull out the reverse code, if any. */ + makeReverseCode( pdaRun ); + transferReverseCode( pdaRun, pdaRun->redLel ); + + /* Perhaps the execution environment is telling us we need to + * reject the reduction. */ + induceReject = pdaRun->reject; + } + + /* If the left hand side was replaced then the only parse algorithm + * data that is contained in it will the PF_HAS_RCODE flag. Everthing + * else will be in the original. This requires that we restore first + * when going backwards and when doing a commit. */ + + if ( induceReject ) { + debug( REALM_PARSE, "error induced during reduction of %s\n", + prg->rtd->lelInfo[pdaRun->redLel->id].name ); + pdaRun->redLel->state = pdaRun->curState; + pdaRun->redLel->next = pdaRun->stackTop; + pdaRun->stackTop = pdaRun->redLel; + /* FIXME: What is the right argument here? */ + pushBtPoint( prg, pdaRun ); + goto parseError; + } + + pdaRun->redLel->next = pdaRun->parseInput; + pdaRun->parseInput = pdaRun->redLel; + } + + goto again; + +parseError: + debug( REALM_PARSE, "hit error, backtracking\n" ); + + if ( pdaRun->numRetry == 0 ) { + debug( REALM_PARSE, "out of retries failing parse\n" ); + goto fail; + } + + while ( 1 ) { + if ( pdaRun->onDeck ) { + debug( REALM_BYTECODE, "dropping out for reverse code call\n" ); + + pdaRun->frameId = -1; + pdaRun->code = popReverseCode( &pdaRun->reverseCode ); + +return PcrReverse; +case PcrReverse: + + decrementSteps( pdaRun ); + } + else if ( pdaRun->checkNext ) { + pdaRun->checkNext = false; + + if ( pdaRun->next > 0 && pdaRun->tables->tokenRegions[pdaRun->next] != 0 ) { + debug( REALM_PARSE, "found a new region\n" ); + pdaRun->numRetry -= 1; + pdaRun->cs = stackTopTarget( prg, pdaRun ); + pdaRun->nextRegionInd = pdaRun->next; + return PcrDone; + } + } + else if ( pdaRun->checkStop ) { + pdaRun->checkStop = false; + + if ( pdaRun->stop ) { + debug( REALM_PARSE, "stopping the backtracking, steps is %d\n", pdaRun->steps ); + + pdaRun->cs = stackTopTarget( prg, pdaRun ); + goto _out; + } + } + else if ( pdaRun->parseInput != 0 ) { + /* Either we are dealing with a terminal that was + * shifted or a nonterminal that was reduced. */ + if ( pdaRun->parseInput->id < prg->rtd->firstNonTermId ) { + assert( pdaRun->parseInput->retryUpper == 0 ); + + if ( pdaRun->parseInput->retryLower != 0 ) { + debug( REALM_PARSE, "found retry targ: %p\n", pdaRun->parseInput ); + + pdaRun->numRetry -= 1; + pdaRun->cs = pdaRun->parseInput->state; + goto again; + } + + if ( pdaRun->parseInput->causeReduce != 0 ) { + pdaRun->undoLel = pdaRun->stackTop; + + /* Check if we've arrived at the stack sentinal. This guard + * is here to allow us to initially set numRetry to one to + * cause the parser to backup all the way to the beginning + * when an error occurs. */ + if ( pdaRun->undoLel->next == 0 ) + break; + + /* Either we are dealing with a terminal that was + * shifted or a nonterminal that was reduced. */ + assert( !(pdaRun->stackTop->id < prg->rtd->firstNonTermId) ); + + debug( REALM_PARSE, "backing up over non-terminal: %s\n", + prg->rtd->lelInfo[pdaRun->stackTop->id].name ); + + /* Pop the item from the stack. */ + pdaRun->stackTop = pdaRun->stackTop->next; + + /* Queue it as next parseInput item. */ + pdaRun->undoLel->next = pdaRun->parseInput; + pdaRun->parseInput = pdaRun->undoLel; + } + else { + long region = pdaRun->parseInput->region; + pdaRun->next = region > 0 ? region + 1 : 0; + pdaRun->checkNext = true; + pdaRun->checkStop = true; + + sendBack( prg, sp, pdaRun, fsmRun, inputStream, pdaRun->parseInput ); + + pdaRun->parseInput = 0; + } + } + else if ( pdaRun->parseInput->flags & PF_HAS_RCODE ) { + debug( REALM_PARSE, "tree has rcode, setting on deck\n" ); + pdaRun->onDeck = true; + pdaRun->parsed = 0; + + /* Only the RCODE flag was in the replaced lhs. All the rest is in + * the the original. We read it after restoring. */ + + pdaRun->parseInput->flags &= ~PF_HAS_RCODE; + } + else { + /* Remove it from the input queue. */ + pdaRun->undoLel = pdaRun->parseInput; + pdaRun->parseInput = pdaRun->parseInput->next; + + /* Extract children from the child list. */ + ParseTree *first = pdaRun->undoLel->child; + pdaRun->undoLel->child = 0; + + /* This will skip the ignores/attributes, etc. */ + Kid *dataFirst = treeExtractChild( prg, pdaRun->undoLel->shadow->tree ); + + /* Walk the child list and and push the items onto the parsing + * stack one at a time. */ + while ( first != 0 ) { + /* Get the next item ahead of time. */ + ParseTree *next = first->next; + Kid *dataNext = dataFirst->next; + + /* Push onto the stack. */ + first->next = pdaRun->stackTop; + pdaRun->stackTop = first; + + /* Reattach the data and the parse tree. */ + first->shadow = dataFirst; + + first = next; + dataFirst = dataNext; + } + + /* If there is an parseInput queued, this is one less reduction it has + * caused. */ + if ( pdaRun->parseInput != 0 ) + pdaRun->parseInput->causeReduce -= 1; + + if ( pdaRun->undoLel->retryUpper != 0 ) { + /* There is always an parseInput item here because reduce + * conflicts only happen on a lookahead character. */ + assert( pdaRun->parseInput != pdaRun->undoLel ); + assert( pdaRun->parseInput != 0 ); + assert( pdaRun->undoLel->retryLower == 0 ); + assert( pdaRun->parseInput->retryUpper == 0 ); + + /* Transfer the retry from undoLel to parseInput. */ + pdaRun->parseInput->retryLower = pdaRun->undoLel->retryUpper; + pdaRun->parseInput->retryUpper = 0; + pdaRun->parseInput->state = stackTopTarget( prg, pdaRun ); + } + + /* Free the reduced item. */ + treeDownref( prg, sp, pdaRun->undoLel->shadow->tree ); + kidFree( prg, pdaRun->undoLel->shadow ); + parseTreeFree( prg, pdaRun->undoLel ); + + /* If the stacktop had right ignore attached, detach now. */ + if ( pdaRun->stackTop->flags & PF_RIGHT_IL_ATTACHED ) + detachRightIgnore( prg, sp, pdaRun, pdaRun->stackTop ); + } + } + else if ( pdaRun->accumIgnore != 0 ) { + debug( REALM_PARSE, "have accumulated ignore to undo\n" ); + + /* Send back any accumulated ignore tokens, then trigger error + * in the the parser. */ + ParseTree *ignore = pdaRun->accumIgnore; + pdaRun->accumIgnore = pdaRun->accumIgnore->next; + ignore->next = 0; + + long region = ignore->region; + pdaRun->next = region > 0 ? region + 1 : 0; + pdaRun->checkNext = true; + pdaRun->checkStop = true; + + sendBackIgnore( prg, sp, pdaRun, fsmRun, inputStream, ignore ); + + treeDownref( prg, sp, ignore->shadow->tree ); + kidFree( prg, ignore->shadow ); + parseTreeFree( prg, ignore ); + } + else { + /* Now it is time to undo something. Pick an element from the top of + * the stack. */ + pdaRun->undoLel = pdaRun->stackTop; + + /* Check if we've arrived at the stack sentinal. This guard is + * here to allow us to initially set numRetry to one to cause the + * parser to backup all the way to the beginning when an error + * occurs. */ + if ( pdaRun->undoLel->next == 0 ) + break; + + /* Either we are dealing with a terminal that was + * shifted or a nonterminal that was reduced. */ + if ( pdaRun->stackTop->id < prg->rtd->firstNonTermId ) { + debug( REALM_PARSE, "backing up over effective terminal: %s\n", + prg->rtd->lelInfo[pdaRun->stackTop->id].name ); + + /* Pop the item from the stack. */ + pdaRun->stackTop = pdaRun->stackTop->next; + + /* Queue it as next parseInput item. */ + pdaRun->undoLel->next = pdaRun->parseInput; + pdaRun->parseInput = pdaRun->undoLel; + + /* Pop from the token list. */ + Ref *ref = pdaRun->tokenList; + pdaRun->tokenList = ref->next; + kidFree( prg, (Kid*)ref ); + + assert( pdaRun->accumIgnore == 0 ); + detachLeftIgnore( prg, sp, pdaRun, fsmRun, pdaRun->parseInput ); + } + else { + debug( REALM_PARSE, "backing up over non-terminal: %s\n", + prg->rtd->lelInfo[pdaRun->stackTop->id].name ); + + /* Pop the item from the stack. */ + pdaRun->stackTop = pdaRun->stackTop->next; + + /* Queue it as next parseInput item. */ + pdaRun->undoLel->next = pdaRun->parseInput; + pdaRun->parseInput = pdaRun->undoLel; + } + + /* Undo attach of right ignore. */ + if ( pdaRun->stackTop->flags & PF_RIGHT_IL_ATTACHED ) + detachRightIgnore( prg, sp, pdaRun, pdaRun->stackTop ); + } + } + +fail: + pdaRun->cs = -1; + pdaRun->parseError = 1; + + /* If we failed parsing on tree we must free it. The caller expected us to + * either consume it or send it back to the parseInput. */ + if ( pdaRun->parseInput != 0 ) { + //treeDownref( prg, sp, (Tree*)pdaRun->parseInput->tree ); + //ptKidFree( prg, pdaRun->parseInput ); + pdaRun->parseInput = 0; + } + + /* FIXME: do we still need to fall through here? A fail is permanent now, + * no longer called into again. */ + + return PcrDone; + +_out: + pdaRun->nextRegionInd = pdaRun->tables->tokenRegionInds[pdaRun->cs]; + +case PcrDone: +break; } + + return PcrDone; +} diff --git a/src/pdarun.h b/src/pdarun.h new file mode 100644 index 00000000..1bdf651c --- /dev/null +++ b/src/pdarun.h @@ -0,0 +1,473 @@ +/* + * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __COLM_PDARUN_H +#define __COLM_PDARUN_H + +#include <input.h> +#include <fsmrun.h> +#include <defs.h> +#include <tree.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct ColmProgram; + +#define MARK_SLOTS 32 + +typedef struct _FsmTables +{ + long *actions; + long *keyOffsets; + char *transKeys; + long *singleLengths; + long *rangeLengths; + long *indexOffsets; + long *transTargsWI; + long *transActionsWI; + long *toStateActions; + long *fromStateActions; + long *eofActions; + long *eofTargs; + long *entryByRegion; + + long numStates; + long numActions; + long numTransKeys; + long numSingleLengths; + long numRangeLengths; + long numIndexOffsets; + long numTransTargsWI; + long numTransActionsWI; + long numRegions; + + long startState; + long firstFinal; + long errorState; + + struct GenAction **actionSwitch; + long numActionSwitch; +} FsmTables; + +typedef struct _FsmRun +{ + FsmTables *tables; + + RunBuf *runBuf; + + /* FsmRun State. */ + long region, preRegion; + long cs, ncs, act; + char *tokstart, *tokend; + char *p, *pe, *peof; + int returnResult; + char *mark[MARK_SLOTS]; + long matchedToken; + + InputStream *attachedInput; + SourceStream *attachedSource; +} FsmRun; + +void initFsmRun( FsmRun *fsmRun, struct ColmProgram *prg ); +void clearFsmRun( struct ColmProgram *prg, FsmRun *fsmRun ); +void updatePosition( InputStream *inputStream, const char *data, long length ); +void undoPosition( InputStream *inputStream, const char *data, long length ); +void sendBackRunBufHead( FsmRun *fsmRun, InputStream *inputStream ); +void undoStreamPull( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length ); + + +#if SIZEOF_LONG != 4 && SIZEOF_LONG != 8 + #error "SIZEOF_LONG contained an unexpected value" +#endif + +struct _Execution; + +typedef struct _RtCodeVect +{ + Code *data; + long tabLen; + long allocLen; + + /* FIXME: leak when freed. */ +} RtCodeVect; + +void listAddAfter( List *list, ListEl *prev_el, ListEl *new_el ); +void listAddBefore( List *list, ListEl *next_el, ListEl *new_el ); + +void listPrepend( List *list, ListEl *new_el ); +void listAppend( List *list, ListEl *new_el ); + +ListEl *listDetach( List *list, ListEl *el ); +ListEl *listDetachFirst(List *list ); +ListEl *listDetachLast(List *list ); + +long listLength(List *list); + +typedef struct _FunctionInfo +{ + const char *name; + long frameId; + long argSize; + long frameSize; +} FunctionInfo; + +/* + * Program Data. + */ + +typedef struct _PatReplInfo +{ + long offset; + long numBindings; +} PatReplInfo; + +typedef struct _PatReplNode +{ + long id; + long prodNum; + long next; + long child; + long bindId; + const char *data; + long length; + long leftIgnore; + long rightIgnore; + + /* Just match nonterminal, don't go inside. */ + unsigned char stop; +} PatReplNode; + +/* FIXME: should have a descriptor for object types to give the length. */ + +typedef struct _LangElInfo +{ + const char *name; + const char *xmlTag; + unsigned char repeat; + unsigned char list; + unsigned char literal; + unsigned char ignore; + + long frameId; + + long objectTypeId; + long ofiOffset; + long objectLength; + +// long contextTypeId; +// long contextLength; + + long termDupId; + long genericId; + long markId; + long captureAttr; + long numCaptureAttr; +} LangElInfo; + +typedef struct _ObjFieldInfo +{ + int typeId; +} ObjFieldInfo; + +typedef struct _ProdInfo +{ + unsigned long lhsId; + short prodNum; + long length; + const char *name; + long frameId; + unsigned char lhsUpref; + unsigned char *copy; + long copyLen; +} ProdInfo; + +typedef struct _FrameInfo +{ + Code *codeWV; + long codeLenWV; + Code *codeWC; + long codeLenWC; + char *trees; + long treesLen; + long argSize; + long frameSize; +} FrameInfo; + +typedef struct _RegionInfo +{ + const char *name; + long defaultToken; + long eofFrameId; + int isIgnoreOnly; + int isCiOnly; + int ciLelId; +} RegionInfo; + +typedef struct _CaptureAttr +{ + long mark_enter; + long mark_leave; + long offset; +} CaptureAttr; + +typedef struct _PdaTables +{ + /* Parser table data. */ + int *indicies; + int *owners; + int *keys; + unsigned int *offsets; + unsigned int *targs; + unsigned int *actInds; + unsigned int *actions; + int *commitLen; + int *tokenRegionInds; + int *tokenRegions; + int *tokenPreRegions; + + int numIndicies; + int numKeys; + int numStates; + int numTargs; + int numActInds; + int numActions; + int numCommitLen; + int numRegionItems; + int numPreRegionItems; +} PdaTables; + +typedef struct _PoolBlock +{ + void *data; + struct _PoolBlock *next; +} PoolBlock; + +typedef struct _PoolItem +{ + struct _PoolItem *next; +} PoolItem; + +typedef struct _PoolAlloc +{ + PoolBlock *head; + long nextel; + PoolItem *pool; + int sizeofT; +} PoolAlloc; + +typedef struct _PdaRun +{ + int numRetry; + ParseTree *stackTop; + Ref *tokenList; + int cs; + int nextRegionInd; + + PdaTables *tables; + int parserId; + + /* Reused. */ + RtCodeVect rcodeCollect; + RtCodeVect reverseCode; + + int stopParsing; + long stopTarget; + + ParseTree *accumIgnore; + + Kid *btPoint; + + struct Bindings *bindings; + + int revertOn; + + Tree *context; + + int stop; + int parseError; + + long steps; + long targetSteps; + + int onDeck; + + /* + * Data we added when refactoring the parsing engine into a coroutine. + */ + + ParseTree *parseInput; + FrameInfo *fi; + int reduction; + ParseTree *redLel; + int curState; + ParseTree *lel; + int triggerUndo; + + int tokenId; + Head *tokdata; + int frameId; + int next; + ParseTree *undoLel; + + int checkNext; + int checkStop; + + /* The lhs is sometimes saved before reduction actions in case it is + * replaced and we need to restore it on backtracking */ + Tree *parsed; + + int reject; + + /* Instruction pointer to use when we stop parsing and execute code. */ + Code *code; + + int rcBlockCount; +} PdaRun; + +void rtCodeVectReplace( RtCodeVect *vect, long pos, const Code *val, long len ); +void rtCodeVectEmpty( RtCodeVect *vect ); +void rtCodeVectRemove( RtCodeVect *vect, long pos, long len ); + +void initRtCodeVect( RtCodeVect *codeVect ); + +//inline static void remove( RtCodeVect *vect, long pos ); +inline static void append( RtCodeVect *vect, const Code val ); +inline static void append2( RtCodeVect *vect, const Code *val, long len ); +inline static void appendHalf( RtCodeVect *vect, Half half ); +inline static void appendWord( RtCodeVect *vect, Word word ); + +inline static void append2( RtCodeVect *vect, const Code *val, long len ) +{ + rtCodeVectReplace( vect, vect->tabLen, val, len ); +} + +inline static void append( RtCodeVect *vect, const Code val ) +{ + rtCodeVectReplace( vect, vect->tabLen, &val, 1 ); +} + +inline static void appendHalf( RtCodeVect *vect, Half half ) +{ + /* not optimal. */ + append( vect, half & 0xff ); + append( vect, (half>>8) & 0xff ); +} + +inline static void appendWord( RtCodeVect *vect, Word word ) +{ + /* not optimal. */ + append( vect, word & 0xff ); + append( vect, (word>>8) & 0xff ); + append( vect, (word>>16) & 0xff ); + append( vect, (word>>24) & 0xff ); + #if SIZEOF_LONG == 8 + append( vect, (word>>32) & 0xff ); + append( vect, (word>>40) & 0xff ); + append( vect, (word>>48) & 0xff ); + append( vect, (word>>56) & 0xff ); + #endif +} + +void incrementSteps( PdaRun *pdaRun ); +void decrementSteps( PdaRun *pdaRun ); + +int makeReverseCode( PdaRun *pdaRun ); +void transferReverseCode( PdaRun *pdaRun, ParseTree *tree ); + +void initPdaRun( PdaRun *pdaRun, struct ColmProgram *prg, PdaTables *tables, + FsmRun *fsmRun, int parserId, long stopTarget, int revertOn, Tree *context ); +void clearPdaRun( struct ColmProgram *prg, Tree **root, PdaRun *pdaRun ); + +void initInputStream( InputStream *inputStream ); +void clearInputStream( struct ColmProgram *prg, Tree **sp, InputStream *inputStream ); +void initSourceStream( SourceStream *in ); +void clearSourceStream( struct ColmProgram *prg, Tree **sp, SourceStream *sourceStream ); + + +void clearContext( PdaRun *pdaRun, Tree **sp ); +Kid *extractIgnore( PdaRun *pdaRun ); +long stackTopTarget( struct ColmProgram *prg, PdaRun *pdaRun ); +void runCommit( PdaRun *pdaRun ); +int isParserStopFinished( PdaRun *pdaRun ); +void pdaRunMatch( PdaRun *pdaRun, Kid *tree, Kid *pattern ); + +/* Offset can be used to look at the next nextRegionInd. */ +int pdaRunGetNextRegion( PdaRun *pdaRun, int offset ); +int pdaRunGetNextPreRegion( PdaRun *pdaRun ); + +#define PcrStart 1 +#define PcrDone 2 +#define PcrReduction 3 +#define PcrGeneration 4 +#define PcrPreEof 5 +#define PcrReverse 6 + +long parseToken( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, + FsmRun *fsmRun, InputStream *inputStream, long entry ); + +long undoParse( Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream, Tree *tree ); + +Head *streamPull( struct ColmProgram *prg, FsmRun *fsmRun, InputStream *inputStream, long length ); +Head *stringAllocPointer( struct ColmProgram *prg, const char *data, long length ); + +void streamPushText( FsmRun *fsmRun, InputStream *inputStream, const char *data, long length ); +void streamPushTree( FsmRun *fsmRun, InputStream *inputStream, Tree *tree, int ignore ); +void undoStreamPush( struct ColmProgram *prg, Tree **sp, FsmRun *fsmRun, InputStream *inputStream, long length ); +void undoStreamAppend( struct ColmProgram *prg, Tree **sp, FsmRun *fsmRun, InputStream *inputStream, struct ColmTree *tree, long length ); +Kid *makeTokenWithData( struct ColmProgram *prg, PdaRun *pdaRun, FsmRun *fsmRun, + InputStream *inputStream, int id, Head *tokdata ); + +void pushBinding( PdaRun *pdaRun, ParseTree *parseTree ); +void popBinding( PdaRun *pdaRun, ParseTree *parseTree ); + +void executeGenerationAction( struct ColmProgram *prg, Tree **sp, FsmRun *fsmRun, PdaRun *pdaRun, + InputStream *inputStream, int frameId, Code *code, long id, Head *tokdata ); +Kid *extractIgnore( PdaRun *pdaRun ); +long sendBackQueuedIgnore( struct ColmProgram *prg, Tree **sp, InputStream *inputStream, + FsmRun *fsmRun, PdaRun *pdaRun, long entry ); +void clearIgnoreList( struct ColmProgram *prg, Tree **sp, Kid *kid ); +Head *extractMatch( struct ColmProgram *prg, FsmRun *fsmRun, InputStream *inputStream ); +Head *extractMatch( struct ColmProgram *prg, FsmRun *fsmRun, InputStream *inputStream ); +void newToken( struct ColmProgram *prg, PdaRun *pdaRun, FsmRun *fsmRun ); +void fsmExecute( FsmRun *fsmRun, InputStream *inputStream ); +void sendNamedLangEl( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, FsmRun *fsmRun, InputStream *inputStream ); +long parseLoop( struct ColmProgram *prg, Tree **sp, PdaRun *pdaRun, + FsmRun *fsmRun, InputStream *inputStream, long entry ); +void initBindings( PdaRun *pdaRun ); +Tree *getParsedRoot( PdaRun *pdaRun, int stop ); +void undoParseStream( struct ColmProgram *prg, Tree **sp, InputStream *inputStream, FsmRun *fsmRun, + PdaRun *pdaRun, long steps ); + +void clearBuffered( FsmRun *fsmRun ); +void resetToken( FsmRun *fsmRun ); + +void detachInput( FsmRun *fsmRun, InputStream *is ); +void attachInput( FsmRun *fsmRun, InputStream *is ); +void detachSource( FsmRun *fsmRun, SourceStream *ss ); +void attachSource( FsmRun *fsmRun, SourceStream *ss ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/pool.c b/src/pool.c new file mode 100644 index 00000000..e1c3c240 --- /dev/null +++ b/src/pool.c @@ -0,0 +1,330 @@ +/* + * Copyright 2010-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <string.h> +#include <stdlib.h> + +#include <pdarun.h> +#include <pool.h> +#include <debug.h> + +void initPoolAlloc( PoolAlloc *poolAlloc, int sizeofT ) +{ + poolAlloc->head = 0; + poolAlloc->nextel = FRESH_BLOCK; + poolAlloc->pool = 0; + poolAlloc->sizeofT = sizeofT; +} + +void *poolAllocAllocate( PoolAlloc *poolAlloc ) +{ + debug( REALM_POOL, "pool allocation\n" ); + +#ifdef POOL_MALLOC + void *res = malloc( poolAlloc->sizeofT ); + memset( res, 0, poolAlloc->sizeofT ); + return res; +#else + //#ifdef COLM_LOG_BYTECODE + //cerr << "allocating in: " << __PRETTY_FUNCTION__ << endl; + //#endif + + void *newEl = 0; + if ( poolAlloc->pool == 0 ) { + if ( poolAlloc->nextel == FRESH_BLOCK ) { + //#ifdef COLM_LOG_BYTECODE + //if ( colm_log_bytecode ) + // cerr << "allocating " << FRESH_BLOCK << " Elements of type T" << endl; + //#endif + + PoolBlock *newBlock = (PoolBlock*)malloc( sizeof(PoolBlock) ); + newBlock->data = malloc( poolAlloc->sizeofT * FRESH_BLOCK ); + newBlock->next = poolAlloc->head; + poolAlloc->head = newBlock; + poolAlloc->nextel = 0; + } + + newEl = (char*)poolAlloc->head->data + poolAlloc->sizeofT * poolAlloc->nextel++; + } + else { + newEl = poolAlloc->pool; + poolAlloc->pool = poolAlloc->pool->next; + } + memset( newEl, 0, poolAlloc->sizeofT ); + return newEl; +#endif +} + +void poolAllocFree( PoolAlloc *poolAlloc, void *el ) +{ + #if 0 + /* Some sanity checking. Best not to normally run with this on. */ + char *p = (char*)el + sizeof(PoolItem*); + char *pe = (char*)el + sizeof(T); + for ( ; p < pe; p++ ) + assert( *p != 0xcc ); + memset( el, 0xcc, sizeof(T) ); + #endif + +#ifdef POOL_MALLOC + free( el ); +#else + PoolItem *pi = (PoolItem*) el; + pi->next = poolAlloc->pool; + poolAlloc->pool = pi; +#endif +} + +void poolAllocClear( PoolAlloc *poolAlloc ) +{ + PoolBlock *block = poolAlloc->head; + while ( block != 0 ) { + PoolBlock *next = block->next; + free( block->data ); + free( block ); + block = next; + } + + poolAlloc->head = 0; + poolAlloc->nextel = 0; + poolAlloc->pool = 0; +} + +long poolAllocNumLost( PoolAlloc *poolAlloc ) +{ + /* Count the number of items allocated. */ + long lost = 0; + PoolBlock *block = poolAlloc->head; + if ( block != 0 ) { + lost = poolAlloc->nextel; + block = block->next; + while ( block != 0 ) { + lost += FRESH_BLOCK; + block = block->next; + } + } + + /* Subtract. Items that are on the free list. */ + PoolItem *pi = poolAlloc->pool; + while ( pi != 0 ) { + lost -= 1; + pi = pi->next; + } + + return lost; +} + +/* + * Kid + */ + +Kid *kidAllocate( Program *prg ) +{ + return (Kid*) poolAllocAllocate( &prg->kidPool ); +} + +void kidFree( Program *prg, Kid *el ) +{ + poolAllocFree( &prg->kidPool, el ); +} + +void kidClear( Program *prg ) +{ + poolAllocClear( &prg->kidPool ); +} + +long kidNumLost( Program *prg ) +{ + return poolAllocNumLost( &prg->kidPool ); +} + +/* + * Tree + */ + +Tree *treeAllocate( Program *prg ) +{ + return (Tree*) poolAllocAllocate( &prg->treePool ); +} + +void treeFree( Program *prg, Tree *el ) +{ + poolAllocFree( &prg->treePool, el ); +} + +void treeClear( Program *prg ) +{ + poolAllocClear( &prg->treePool ); +} + +long treeNumLost( Program *prg ) +{ + return poolAllocNumLost( &prg->treePool ); +} + +/* + * ParseTree + */ + +ParseTree *parseTreeAllocate( Program *prg ) +{ + return (ParseTree*) poolAllocAllocate( &prg->parseTreePool ); +} + +void parseTreeFree( Program *prg, ParseTree *el ) +{ + poolAllocFree( &prg->parseTreePool, el ); +} + +void parseTreeClear( Program *prg ) +{ + poolAllocClear( &prg->parseTreePool ); +} + +long parseTreeNumLost( Program *prg ) +{ + return poolAllocNumLost( &prg->parseTreePool ); +} + +/* + * ListEl + */ + +ListEl *listElAllocate( Program *prg ) +{ + return (ListEl*) poolAllocAllocate( &prg->listElPool ); +} + +void listElFree( Program *prg, ListEl *el ) +{ + poolAllocFree( &prg->listElPool, el ); +} + +void listElClear( Program *prg ) +{ + poolAllocClear( &prg->listElPool ); +} + +long listElNumLost( Program *prg ) +{ + return poolAllocNumLost( &prg->listElPool ); +} + +/* + * MapEl + */ + +MapEl *mapElAllocate( Program *prg ) +{ + return (MapEl*) poolAllocAllocate( &prg->mapElPool ); +} + +void mapElFree( Program *prg, MapEl *el ) +{ + poolAllocFree( &prg->mapElPool, el ); +} + +void mapElClear( Program *prg ) +{ + poolAllocClear( &prg->mapElPool ); +} + +long mapElNumLost( Program *prg ) +{ + return poolAllocNumLost( &prg->mapElPool ); +} + +/* + * Head + */ + +Head *headAllocate( Program *prg ) +{ + return (Head*) poolAllocAllocate( &prg->headPool ); +} + +void headFree( Program *prg, Head *el ) +{ + poolAllocFree( &prg->headPool, el ); +} + +void headClear( Program *prg ) +{ + poolAllocClear( &prg->headPool ); +} + +long headNumLost( Program *prg ) +{ + return poolAllocNumLost( &prg->headPool ); +} + +/* + * Location + */ + +Location *locationAllocate( Program *prg ) +{ + return (Location*) poolAllocAllocate( &prg->locationPool ); +} + +void locationFree( Program *prg, Location *el ) +{ + poolAllocFree( &prg->locationPool, el ); +} + +void locationClear( Program *prg ) +{ + poolAllocClear( &prg->locationPool ); +} + +long locationNumLost( Program *prg ) +{ + return poolAllocNumLost( &prg->locationPool ); +} + +/* + * Stream + */ + +Stream *streamAllocate( Program *prg ) +{ + return (Stream*)mapElAllocate( prg ); +} + +void streamFree( Program *prg, Stream *stream ) +{ + mapElFree( prg, (MapEl*)stream ); +} + + +/* + * Input + */ + +Input *inputAllocate( Program *prg ) +{ + return (Input*)mapElAllocate( prg ); +} + +void inputFree( Program *prg, Input *accumStream ) +{ + mapElFree( prg, (MapEl*)accumStream ); +} diff --git a/src/pool.h b/src/pool.h new file mode 100644 index 00000000..ae647abd --- /dev/null +++ b/src/pool.h @@ -0,0 +1,86 @@ +/* + * Copyright 2010-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _POOL_H +#define _POOL_H + +/* Allocation, number of items. */ +#define FRESH_BLOCK 8128 + +#include <pdarun.h> +#include <map.h> +#include <tree.h> + +#ifdef __cplusplus +extern "C" { +#endif + +void initPoolAlloc( PoolAlloc *poolAlloc, int sizeofT ); + +Kid *kidAllocate( Program *prg ); +void kidFree( Program *prg, Kid *el ); +void kidClear( Program *prg ); +long kidNumLost( Program *prg ); + +Tree *treeAllocate( Program *prg ); +void treeFree( Program *prg, Tree *el ); +void treeClear( Program *prg ); +long treeNumLost( Program *prg ); + +ParseTree *parseTreeAllocate( Program *prg ); +void parseTreeFree( Program *prg, ParseTree *el ); +void parseTreeClear( Program *prg ); +long parseTreeNumLost( Program *prg ); + +ListEl *listElAllocate( Program *prg ); +void listElFree( Program *prg, ListEl *el ); +void listElClear( Program *prg ); +long listElNumLost( Program *prg ); + +MapEl *mapElAllocate( Program *prg ); +void mapElFree( Program *prg, MapEl *el ); +void mapElClear( Program *prg ); +long mapElNumLost( Program *prg ); + +Head *headAllocate( Program *prg ); +void headFree( Program *prg, Head *el ); +void headClear( Program *prg ); +long headNumLost( Program *prg ); + +Location *locationAllocate( Program *prg ); +void locationFree( Program *prg, Location *el ); +void locationClear( Program *prg ); +long locationNumLost( Program *prg ); + +Stream *streamAllocate( Program *prg ); +void streamFree( Program *prg, Stream *stream ); + +Input *inputAllocate( Program *prg ); +void inputFree( Program *prg, Input *stream ); + +/* Wrong place. */ +TreePair mapRemove( Program *prg, Map *map, Tree *key ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/program.c b/src/program.c new file mode 100644 index 00000000..c17b8bb4 --- /dev/null +++ b/src/program.c @@ -0,0 +1,254 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <pdarun.h> +#include <fsmrun.h> +#include <tree.h> +#include <bytecode.h> +#include <pool.h> +#include <debug.h> +#include <config.h> + +#include <alloca.h> +#include <sys/mman.h> +#include <string.h> +#include <assert.h> +#include <stdlib.h> + +void colmInit( long debugRealm ) +{ + /* Always on because because logging is controlled with ifdefs in\n" the + * runtime lib. */ + colm_log_bytecode = 1; + colm_log_parse = 1; + colm_log_match = 1; + colm_log_compile = 1; + colm_log_conds = 1; + colmActiveRealm = debugRealm; + initInputFuncs(); +} + +void colmRunProgram( Program *prg ) +{ + assert( sizeof(Int) <= sizeof(Tree) ); + assert( sizeof(Str) <= sizeof(Tree) ); + assert( sizeof(Pointer) <= sizeof(Tree) ); + assert( sizeof(Map) <= sizeof(MapEl) ); + assert( sizeof(List) <= sizeof(MapEl) ); + assert( sizeof(Stream) <= sizeof(MapEl) ); + assert( sizeof(Parser) <= sizeof(MapEl) ); + + /* Allocate the global variable. */ + allocGlobal( prg ); + + /* + * Allocate the VM stack. + */ + + prg->vm_stack = stackAlloc(); + prg->vm_root = &prg->vm_stack[VM_STACK_SIZE]; + + /* + * Execute + */ + if ( prg->rtd->rootCodeLen > 0 ) { + //RtCodeVect rcodeCollect; + Execution execution; + + initExecution( &execution, 0, 0, 0, 0, prg->rtd->rootFrameId ); + mainExecution( prg, &execution, prg->rtd->rootCode ); + } +} + +void clearGlobal( Program *prg, Tree **sp ) +{ + /* Downref all the fields in the global object. */ + int g; + for ( g = 0; g < prg->rtd->globalSize; g++ ) { + //assert( getAttr( global, g )->refs == 1 ); + treeDownref( prg, sp, getAttr( prg->global, g ) ); + } + + /* Free the global object. */ + if ( prg->rtd->globalSize > 0 ) + freeAttrs( prg, prg->global->child ); + treeFree( prg, prg->global ); +} + +void allocGlobal( Program *prg ) +{ + /* Alloc the global. */ + Tree *tree = treeAllocate( prg ); + tree->child = allocAttrs( prg, prg->rtd->globalSize ); + tree->refs = 1; + prg->global = tree; +} + +Tree **stackAlloc() +{ + //return new Tree*[VM_STACK_SIZE]; + + return (Tree**)mmap( 0, sizeof(Tree*)*VM_STACK_SIZE, + PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0 ); +} + +Tree **vm_root( struct ColmProgram *prg ) +{ + return prg->vm_root; +} + +Tree *returnVal( struct ColmProgram *prg ) +{ + return prg->returnVal; +} + + +Program *colmNewProgram( RuntimeData *rtd, int argc, const char **argv ) +{ + Program *prg = malloc(sizeof(Program)); + memset( prg, 0, sizeof(Program) ); + prg->argc = argc; + prg->argv = argv; + prg->rtd = rtd; + prg->ctxDepParsing = 1; + prg->global = 0; + prg->heap = 0; + prg->stdinVal = 0; + prg->stdoutVal = 0; + prg->stderrVal = 0; + prg->induceExit = 0; + prg->exitStatus = 0; + + initPoolAlloc( &prg->kidPool, sizeof(Kid) ); + initPoolAlloc( &prg->treePool, sizeof(Tree) ); + initPoolAlloc( &prg->parseTreePool, sizeof(ParseTree) ); + initPoolAlloc( &prg->listElPool, sizeof(ListEl) ); + initPoolAlloc( &prg->mapElPool, sizeof(MapEl) ); + initPoolAlloc( &prg->headPool, sizeof(Head) ); + initPoolAlloc( &prg->locationPool, sizeof(Location) ); + + Int *trueInt = (Int*) treeAllocate( prg ); + trueInt->id = LEL_ID_BOOL; + trueInt->refs = 1; + trueInt->value = 1; + + Int *falseInt = (Int*) treeAllocate( prg ); + falseInt->id = LEL_ID_BOOL; + falseInt->refs = 1; + falseInt->value = 0; + + prg->trueVal = (Tree*)trueInt; + prg->falseVal = (Tree*)falseInt; + + prg->allocRunBuf = 0; + prg->returnVal = 0; + prg->lastParseError = 0; + + return prg; +} + +int colmDeleteProgram( Program *prg ) +{ + Tree **sp = prg->vm_root; + int exitStatus = prg->exitStatus; + + #ifdef COLM_LOG_BYTECODE + if ( colm_log_bytecode ) { + cerr << "clearing the prg" << endl; + } + #endif + + treeDownref( prg, sp, prg->returnVal ); + treeDownref( prg, sp, prg->lastParseError ); + clearGlobal( prg, sp ); + + /* Clear the heap. */ + Kid *a = prg->heap; + while ( a != 0 ) { + Kid *next = a->next; + treeDownref( prg, sp, a->tree ); + kidFree( prg, a ); + a = next; + } + + //assert( trueVal->refs == 1 ); + //assert( falseVal->refs == 1 ); + treeDownref( prg, sp, prg->trueVal ); + treeDownref( prg, sp, prg->falseVal ); + + treeDownref( prg, sp, (Tree*)prg->stdinVal ); + treeDownref( prg, sp, (Tree*)prg->stdoutVal ); + treeDownref( prg, sp, (Tree*)prg->stderrVal ); + +#if DEBUG + long kidLost = kidNumLost( prg ); + long treeLost = treeNumLost( prg ); + long parseTreeLost = parseTreeNumLost( prg ); + long listLost = listElNumLost( prg ); + long mapLost = mapElNumLost( prg ); + long headLost = headNumLost( prg ); + long locationLost = locationNumLost( prg ); + + if ( kidLost ) + message( "warning: lost kids: %ld\n", kidLost ); + + if ( treeLost ) + message( "warning: lost trees: %ld\n", treeLost ); + + if ( parseTreeLost ) + message( "warning: lost parse trees: %ld\n", parseTreeLost ); + + if ( listLost ) + message( "warning: lost listEls: %ld\n", listLost ); + + if ( mapLost ) + message( "warning: lost mapEls: %ld\n", mapLost ); + + if ( headLost ) + message( "warning: lost heads: %ld\n", headLost ); + + if ( locationLost ) + message( "warning: lost locations: %ld\n", locationLost ); +#endif + + kidClear( prg ); + treeClear( prg ); + headClear( prg ); + parseTreeClear( prg ); + listElClear( prg ); + mapElClear( prg ); + locationClear( prg ); + + //memset( vm_stack, 0, sizeof(Tree*) * VM_STACK_SIZE); + + RunBuf *rb = prg->allocRunBuf; + while ( rb != 0 ) { + RunBuf *next = rb->next; + free( rb ); + rb = next; + } + + free( prg ); + + return exitStatus; +} + + diff --git a/src/program.h b/src/program.h new file mode 100644 index 00000000..9a50274d --- /dev/null +++ b/src/program.h @@ -0,0 +1,128 @@ +/* + * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __COLM_PROGRAM_H +#define __COLM_PROGRAM_H + +#include <pdarun.h> + +typedef struct ColmRuntimeData +{ + LangElInfo *lelInfo; + long numLangEls; + + ProdInfo *prodInfo; + long numProds; + + RegionInfo *regionInfo; + long numRegions; + + Code *rootCode; + long rootCodeLen; + long rootFrameId; + + FrameInfo *frameInfo; + long numFrames; + + FunctionInfo *functionInfo; + long numFunctions; + + PatReplInfo *patReplInfo; + long numPatterns; + + PatReplNode *patReplNodes; + long numPatternNodes; + + GenericInfo *genericInfo; + long numGenerics; + + long argvGenericId; + + const char **litdata; + long *litlen; + Head **literals; + long numLiterals; + + CaptureAttr *captureAttr; + long numCapturedAttr; + + FsmTables *fsmTables; + PdaTables *pdaTables; + int *startStates; + int *eofLelIds; + int *parserLelIds; + long numParsers; + + long globalSize; + + long firstNonTermId; + + long integerId; + long stringId; + long anyId; + long eofId; + long noTokenId; +} RuntimeData; + + +typedef struct ColmProgram +{ + int argc; + const char **argv; + + unsigned char ctxDepParsing; + RuntimeData *rtd; + Tree *global; + int induceExit; + int exitStatus; + + PoolAlloc kidPool; + PoolAlloc treePool; + PoolAlloc parseTreePool; + PoolAlloc listElPool; + PoolAlloc mapElPool; + PoolAlloc headPool; + PoolAlloc locationPool; + + Tree *trueVal; + Tree *falseVal; + + Kid *heap; + + Tree **se; + + Stream *stdinVal; + Stream *stdoutVal; + Stream *stderrVal; + + RunBuf *allocRunBuf; + + Tree **vm_stack; + Tree **vm_root; + + /* Returned from the main line. Should have exports instead. */ + Tree *returnVal; + + /* The most recent parse error. Should be returned from the parsing function. */ + Tree *lastParseError; +} Program; + +#endif diff --git a/src/redbuild.cc b/src/redbuild.cc new file mode 100644 index 00000000..ae5faf38 --- /dev/null +++ b/src/redbuild.cc @@ -0,0 +1,650 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#include "global.h" +#include "redbuild.h" +#include "fsmgraph.h" +#include "redfsm.h" +#include "fsmcodegen.h" +#include <string.h> + +using namespace std; + +RedFsmBuild::RedFsmBuild( char *fsmName, Compiler *pd, FsmGraph *fsm ) +: + fsmName(fsmName), + pd(pd), + fsm(fsm), + nextActionTableId(0), + startState(-1), + errState(-1) +{ +} + +void RedFsmBuild::initActionList( unsigned long length ) +{ + redFsm->allActions = new GenAction[length]; + memset( redFsm->allActions, 0, sizeof(GenAction) * length ); + for ( unsigned long a = 0; a < length; a++ ) + redFsm->genActionList.append( redFsm->allActions+a ); +} + + +void RedFsmBuild::makeActionList() +{ + /* Determine which actions to write. */ + int nextActionId = 0; + for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) { + if ( act->numRefs() > 0 || act->numCondRefs > 0 ) + act->actionId = nextActionId++; + } + + initActionList( nextActionId ); + curAction = 0; + + for ( ActionList::Iter act = pd->actionList; act.lte(); act++ ) { + if ( act->actionId >= 0 ) + makeAction( act ); + } +} + +void RedFsmBuild::initActionTableList( unsigned long length ) +{ + redFsm->allActionTables = new RedAction[length]; +} + +void RedFsmBuild::initStateList( unsigned long length ) +{ + redFsm->allStates = new RedState[length]; + for ( unsigned long s = 0; s < length; s++ ) + redFsm->stateList.append( redFsm->allStates+s ); + + /* We get the start state as an offset, set the pointer now. */ + assert( startState >= 0 ); + redFsm->startState = redFsm->allStates + startState; + if ( errState >= 0 ) + redFsm->errState = redFsm->allStates + errState; + for ( EntryIdVect::Iter en = redFsm->entryPointIds; en.lte(); en++ ) + redFsm->entryPoints.insert( redFsm->allStates + *en ); + + /* The nextStateId is no longer used to assign state ids (they come in set + * from the frontend now), however generation code still depends on it. + * Should eventually remove this variable. */ + redFsm->nextStateId = redFsm->stateList.length(); +} + +void RedFsmBuild::addEntryPoint( int entryId, char *name, unsigned long entryState ) +{ + redFsm->entryPointIds.append( entryState ); + redFsm->entryPointNames.append( name ); + redFsm->redEntryMap.insert( entryId, entryState ); +} + +void RedFsmBuild::addRegionToEntry( int regionId, int entryId ) +{ + assert( regionId == redFsm->regionToEntry.length() ); + redFsm->regionToEntry.append( entryId ); +} + +void RedFsmBuild::initTransList( int snum, unsigned long length ) +{ + /* Could preallocate the out range to save time growing it. For now do + * nothing. */ +} + +void RedFsmBuild::newTrans( int snum, int tnum, Key lowKey, + Key highKey, long targ, long action ) +{ + /* Get the current state and range. */ + RedState *curState = redFsm->allStates + snum; + RedTransList &destRange = curState->outRange; + + if ( curState == redFsm->errState ) + return; + + /* Make the new transitions. */ + RedState *targState = targ >= 0 ? (redFsm->allStates + targ) : + redFsm->wantComplete ? redFsm->getErrorState() : 0; + RedAction *actionTable = action >= 0 ? (redFsm->allActionTables + action) : 0; + RedTrans *trans = redFsm->allocateTrans( targState, actionTable ); + RedTransEl transEl( lowKey, highKey, trans ); + + if ( redFsm->wantComplete ) { + /* If the machine is to be complete then we need to fill any gaps with + * the error transitions. */ + if ( destRange.length() == 0 ) { + /* Range is currently empty. */ + if ( keyOps->minKey < lowKey ) { + /* The first range doesn't start at the low end. */ + Key fillHighKey = lowKey; + fillHighKey.decrement(); + + /* Create the filler with the state's error transition. */ + RedTransEl newTel( keyOps->minKey, fillHighKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + } + else { + /* The range list is not empty, get the the last range. */ + RedTransEl *last = &destRange[destRange.length()-1]; + Key nextKey = last->highKey; + nextKey.increment(); + if ( nextKey < lowKey ) { + /* There is a gap to fill. Make the high key. */ + Key fillHighKey = lowKey; + fillHighKey.decrement(); + + /* Create the filler with the state's error transtion. */ + RedTransEl newTel( nextKey, fillHighKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + } + } + + /* Filler taken care of. Append the range. */ + destRange.append( RedTransEl( lowKey, highKey, trans ) ); +} + +void RedFsmBuild::finishTransList( int snum ) +{ + /* Get the current state and range. */ + RedState *curState = redFsm->allStates + snum; + RedTransList &destRange = curState->outRange; + + if ( curState == redFsm->errState ) + return; + + /* If building a complete machine we may need filler on the end. */ + if ( redFsm->wantComplete ) { + /* Check if there are any ranges already. */ + if ( destRange.length() == 0 ) { + /* Fill with the whole alphabet. */ + /* Add the range on the lower and upper bound. */ + RedTransEl newTel( keyOps->minKey, keyOps->maxKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + else { + /* Get the last and check for a gap on the end. */ + RedTransEl *last = &destRange[destRange.length()-1]; + if ( last->highKey < keyOps->maxKey ) { + /* Make the high key. */ + Key fillLowKey = last->highKey; + fillLowKey.increment(); + + /* Create the new range with the error trans and append it. */ + RedTransEl newTel( fillLowKey, keyOps->maxKey, redFsm->getErrorTrans() ); + destRange.append( newTel ); + } + } + } +} + +void RedFsmBuild::setId( int snum, int id ) +{ + RedState *curState = redFsm->allStates + snum; + curState->id = id; +} + +void RedFsmBuild::setEofTrans( int snum, int eofTarget, int actId ) +{ + RedState *curState = redFsm->allStates + snum; + RedState *targState = redFsm->allStates + eofTarget; + RedAction *eofAct = redFsm->allActionTables + actId; + curState->eofTrans = redFsm->allocateTrans( targState, eofAct ); +} + +void RedFsmBuild::setFinal( int snum ) +{ + RedState *curState = redFsm->allStates + snum; + curState->isFinal = true; +} + + +void RedFsmBuild::setStateActions( int snum, long toStateAction, + long fromStateAction, long eofAction ) +{ + RedState *curState = redFsm->allStates + snum; + if ( toStateAction >= 0 ) + curState->toStateAction = redFsm->allActionTables + toStateAction; + if ( fromStateAction >= 0 ) + curState->fromStateAction = redFsm->allActionTables + fromStateAction; + if ( eofAction >= 0 ) + curState->eofAction = redFsm->allActionTables + eofAction; +} + +void RedFsmBuild::closeMachine() +{ + //for ( GenActionList::Iter a = redFsm->actionList; a.lte(); a++ ) + // resolveTargetStates( a->inlineList ); + + /* Note that even if we want a complete graph we do not give the error + * state a default transition. All machines break out of the processing + * loop when in the error state. */ + + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + for ( GenStateCondList::Iter sci = st->stateCondList; sci.lte(); sci++ ) + st->stateCondVect.append( sci ); + } +} + +void RedFsmBuild::initCondSpaceList( ulong length ) +{ + redFsm->allCondSpaces = new GenCondSpace[length]; + for ( ulong c = 0; c < length; c++ ) + redFsm->condSpaceList.append( redFsm->allCondSpaces + c ); +} + +void RedFsmBuild::newCondSpace( int cnum, int condSpaceId, Key baseKey ) +{ + GenCondSpace *cond = redFsm->allCondSpaces + cnum; + cond->condSpaceId = condSpaceId; + cond->baseKey = baseKey; +} + +void RedFsmBuild::condSpaceItem( int cnum, long condActionId ) +{ + GenCondSpace *cond = redFsm->allCondSpaces + cnum; + cond->condSet.append( redFsm->allActions + condActionId ); +} + +void RedFsmBuild::initStateCondList( int snum, ulong length ) +{ + /* Could preallocate these, as we could with transitions. */ +} + +void RedFsmBuild::addStateCond( int snum, Key lowKey, Key highKey, long condNum ) +{ + RedState *curState = redFsm->allStates + snum; + + /* Create the new state condition. */ + GenStateCond *stateCond = new GenStateCond; + stateCond->lowKey = lowKey; + stateCond->highKey = highKey; + + /* Assign it a cond space. */ + GenCondSpace *condSpace = redFsm->allCondSpaces + condNum; + stateCond->condSpace = condSpace; + + curState->stateCondList.append( stateCond ); +} + + +void RedFsmBuild::setForcedErrorState() +{ + redFsm->forcedErrorState = true; +} + +Key RedFsmBuild::findMaxKey() +{ + Key maxKey = keyOps->maxKey; + for ( RedStateList::Iter st = redFsm->stateList; st.lte(); st++ ) { + assert( st->outSingle.length() == 0 ); + assert( st->defTrans == 0 ); + + long rangeLen = st->outRange.length(); + if ( rangeLen > 0 ) { + Key highKey = st->outRange[rangeLen-1].highKey; + if ( highKey > maxKey ) + maxKey = highKey; + } + } + return maxKey; +} + + +void RedFsmBuild::makeActionTableList() +{ + /* Must first order the action tables based on their id. */ + int numTables = nextActionTableId; + RedActionTable **tables = new RedActionTable*[numTables]; + for ( ActionTableMap::Iter at = actionTableMap; at.lte(); at++ ) + tables[at->id] = at; + + initActionTableList( numTables ); + curActionTable = 0; + + for ( int t = 0; t < numTables; t++ ) { + long length = tables[t]->key.length(); + + /* Collect the action table. */ + RedAction *redAct = redFsm->allActionTables + curActionTable; + redAct->actListId = curActionTable; + redAct->key.setAsNew( length ); + + int pos = 0; + for ( ActionTable::Iter atel = tables[t]->key; atel.lte(); atel++ ) { + int actionId = atel->value->actionId; + redAct->key[pos].key = 0; + redAct->key[pos].value = redFsm->allActions+actionId; + pos += 1; + } + + /* Insert into the action table map. */ + redFsm->actionMap.insert( redAct ); + + curActionTable += 1; + + } + + delete[] tables; +} + +void RedFsmBuild::reduceActionTables() +{ + /* Reduce the actions tables to a set. */ + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + RedActionTable *actionTable = 0; + + /* Reduce To State Actions. */ + if ( st->toStateActionTable.length() > 0 ) { + if ( actionTableMap.insert( st->toStateActionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + + /* Reduce From State Actions. */ + if ( st->fromStateActionTable.length() > 0 ) { + if ( actionTableMap.insert( st->fromStateActionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + + /* Reduce EOF actions. */ + if ( st->eofActionTable.length() > 0 ) { + if ( actionTableMap.insert( st->eofActionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + + /* Loop the transitions and reduce their actions. */ + for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) { + if ( trans->actionTable.length() > 0 ) { + if ( actionTableMap.insert( trans->actionTable, &actionTable ) ) + actionTable->id = nextActionTableId++; + } + } + } +} + +void RedFsmBuild::appendTrans( TransListVect &outList, Key lowKey, + Key highKey, FsmTrans *trans ) +{ + if ( trans->toState != 0 || trans->actionTable.length() > 0 ) + outList.append( TransEl( lowKey, highKey, trans ) ); +} + +void RedFsmBuild::makeTrans( Key lowKey, Key highKey, FsmTrans *trans ) +{ + /* First reduce the action. */ + RedActionTable *actionTable = 0; + if ( trans->actionTable.length() > 0 ) + actionTable = actionTableMap.find( trans->actionTable ); + + long targ = trans->toState == 0 ? -1 : trans->toState->alg.stateNum; + long action = actionTable == 0 ? -1 : actionTable->id; + + newTrans( curState, curTrans++, lowKey, highKey, targ, action ); +} + +void RedFsmBuild::makeTransList( FsmState *state ) +{ + TransListVect outList; + + /* If there is only are no ranges the task is simple. */ + if ( state->outList.length() > 0 ) { + /* Loop each source range. */ + for ( TransList::Iter trans = state->outList; trans.lte(); trans++ ) { + /* Reduce the transition. If it reduced to anything then add it. */ + appendTrans( outList, trans->lowKey, trans->highKey, trans ); + } + } + + long length = outList.length(); + initTransList( curState, length ); + curTrans = 0; + + for ( TransListVect::Iter tvi = outList; tvi.lte(); tvi++ ) + makeTrans( tvi->lowKey, tvi->highKey, tvi->value ); + finishTransList( curState ); +} + +void RedFsmBuild::newAction( int anum, char *name, int line, int col, Action *action ) +{ + redFsm->allActions[anum].actionId = anum; + redFsm->allActions[anum].name = name; + redFsm->allActions[anum].loc.line = line; + redFsm->allActions[anum].loc.col = col; + redFsm->allActions[anum].inlineList = action->inlineList; + redFsm->allActions[anum].objField = action->objField; + redFsm->allActions[anum].markType = action->markType; + redFsm->allActions[anum].markId = action->markId + 1; +} + +void RedFsmBuild::makeAction( Action *action ) +{ + int line = action->loc.line; + int col = action->loc.col; + + char *name = 0; + if ( action->name != 0 ) + name = action->name; + + newAction( curAction++, name, line, col, action ); +} + +void xmlEscapeHost( std::ostream &out, char *data, int len ) +{ + char *end = data + len; + while ( data != end ) { + switch ( *data ) { + case '<': out << "<"; break; + case '>': out << ">"; break; + case '&': out << "&"; break; + default: out << *data; break; + } + data += 1; + } +} + +void RedFsmBuild::makeStateActions( FsmState *state ) +{ + RedActionTable *toStateActions = 0; + if ( state->toStateActionTable.length() > 0 ) + toStateActions = actionTableMap.find( state->toStateActionTable ); + + RedActionTable *fromStateActions = 0; + if ( state->fromStateActionTable.length() > 0 ) + fromStateActions = actionTableMap.find( state->fromStateActionTable ); + + RedActionTable *eofActions = 0; + if ( state->eofActionTable.length() > 0 ) + eofActions = actionTableMap.find( state->eofActionTable ); + + if ( toStateActions != 0 || fromStateActions != 0 || eofActions != 0 ) { + long toStateAction = -1; + long fromStateAction = -1; + long eofAction = -1; + + if ( toStateActions != 0 ) + toStateAction = toStateActions->id; + if ( fromStateActions != 0 ) + fromStateAction = fromStateActions->id; + if ( eofActions != 0 ) + eofAction = eofActions->id; + + setStateActions( curState, toStateAction, + fromStateAction, eofAction ); + } +} + +void RedFsmBuild::makeStateConditions( FsmState *state ) +{ + if ( state->stateCondList.length() > 0 ) { + + long length = state->stateCondList.length(); + initStateCondList( curState, length ); + curStateCond = 0; + + for ( StateCondList::Iter scdi = state->stateCondList; scdi.lte(); scdi++ ) { + Key lowKey = scdi->lowKey; + Key highKey = scdi->highKey; + long condId = scdi->condSpace->condSpaceId; + addStateCond( curState, lowKey, highKey, condId ); + } + } +} + +void RedFsmBuild::makeStateList() +{ + /* Write the list of states. */ + long length = fsm->stateList.length(); + initStateList( length ); + curState = 0; + + for ( StateList::Iter st = fsm->stateList; st.lte(); st++ ) { + /* Both or neither should be set. */ + assert( !( (st->eofTarget != 0) xor (st->eofActionTable.length() > 0) ) ); + + makeStateActions( st ); + makeStateConditions( st ); + makeTransList( st ); + + setId( curState, st->alg.stateNum ); + if ( st->isFinState() ) + setFinal( curState ); + + /* If there is an eof target, make an eof transition. */ + if ( st->eofTarget != 0 ) { + /* Find the eof actions. */ + RedActionTable *eofActions = 0; + eofActions = actionTableMap.find( st->eofActionTable ); + setEofTrans( curState, st->eofTarget->alg.stateNum, eofActions->id ); + } + + curState += 1; + } +} + +void RedFsmBuild::makeEntryPoints() +{ + if ( fsm->lmRequiresErrorState ) + setForcedErrorState(); + + for ( EntryMap::Iter en = fsm->entryPoints; en.lte(); en++ ) { + /* Get the name instantiation from nameIndex. */ + NameInst *nameInst = fsm->nameIndex[en->key]; + FsmState *state = en->value; + char *name = nameInst->name; + long entry = state->alg.stateNum; + addEntryPoint( en->key, name, entry ); + } + + for ( RegionList::Iter reg = pd->regionList; reg.lte(); reg++ ) { + assert( reg->regionNameInst != 0 ); + + TokenRegion *use = reg; + + if ( use->isCiOnly ) + use = use->derivedFrom->ignoreOnlyRegion; + + NameInst *regionName = use->regionNameInst; + addRegionToEntry( reg->id, regionName->id ); + } +} + +void RedFsmBuild::makeMachine() +{ + /* Action tables. */ + reduceActionTables(); + + makeActionList(); + makeActionTableList(); + makeConditions(); + + /* Start state. */ + startState = fsm->startState->alg.stateNum; + + /* Error state. */ + if ( fsm->errState != 0 ) + errState = fsm->errState->alg.stateNum; + + makeEntryPoints(); + makeStateList(); +} + +void RedFsmBuild::makeConditions() +{ + if ( condData->condSpaceMap.length() > 0 ) { + long nextCondSpaceId = 0; + for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) + cs->condSpaceId = nextCondSpaceId++; + + long length = condData->condSpaceMap.length(); + initCondSpaceList( length ); + curCondSpace = 0; + + for ( CondSpaceMap::Iter cs = condData->condSpaceMap; cs.lte(); cs++ ) { + long condSpaceId = cs->condSpaceId; + Key baseKey = cs->baseKey; + + newCondSpace( curCondSpace, condSpaceId, baseKey ); + for ( CondSet::Iter csi = cs->condSet; csi.lte(); csi++ ) { + long actionOffset = (*csi)->actionId; + condSpaceItem( curCondSpace, actionOffset ); + } + + curCondSpace += 1; + } + } +} + +RedFsm *RedFsmBuild::reduceMachine() +{ + redFsm = new RedFsm(); + redFsm->wantComplete = true; + + /* Open the definition. */ + makeMachine(); + + /* Do this before distributing transitions out to singles and defaults + * makes life easier. */ + redFsm->maxKey = findMaxKey(); + + redFsm->assignActionLocs(); + + /* Find the first final state (The final state with the lowest id). */ + redFsm->findFirstFinState(); + + /* Choose default transitions and the single transition. */ + redFsm->chooseDefaultSpan(); + + /* Maybe do flat expand, otherwise choose single. */ + redFsm->chooseSingle(); + + /* Set up incoming transitions. */ + redFsm->setInTrans(); + + /* Anlayze Machine will find the final action reference counts, among + * other things. We will use these in reporting the usage + * of fsm directives in action code. */ + redFsm->analyzeMachine(); + + return redFsm; +} + diff --git a/src/redbuild.h b/src/redbuild.h new file mode 100644 index 00000000..dbbb3e19 --- /dev/null +++ b/src/redbuild.h @@ -0,0 +1,161 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FSMREDUCE_H +#define _FSMREDUCE_H + +#include <iostream> +#include "avltree.h" +#include "fsmgraph.h" +#include "parsedata.h" +#include "fsmrun.h" + +/* Forwards. */ +struct FsmTrans; +struct FsmGraph; +struct Compiler; +struct FsmCodeGen; +struct RedFsm; +struct GenCondSpace; +struct Condition; + +struct RedActionTable +: + public AvlTreeEl<RedActionTable> +{ + RedActionTable( const ActionTable &key ) + : + key(key), + id(0) + { } + + const ActionTable &getKey() + { return key; } + + ActionTable key; + int id; +}; + +typedef AvlTree<RedActionTable, ActionTable, CmpActionTable> ActionTableMap; + +struct NextRedTrans +{ + Key lowKey, highKey; + FsmTrans *trans; + FsmTrans *next; + + void load() { + if ( trans != 0 ) { + next = trans->next; + lowKey = trans->lowKey; + highKey = trans->highKey; + } + } + + NextRedTrans( FsmTrans *t ) { + trans = t; + load(); + } + + void increment() { + trans = next; + load(); + } +}; + +class RedFsmBuild +{ +public: + RedFsmBuild( char *fsmName, Compiler *pd, FsmGraph *fsm ); + RedFsm *reduceMachine( ); + +private: + void appendTrans( TransListVect &outList, Key lowKey, Key highKey, FsmTrans *trans ); + void makeStateActions( FsmState *state ); + void makeStateList(); + void makeStateConditions( FsmState *state ); + + void initActionList( unsigned long length ); + void newAction( int anum, char *name, int line, int col, Action *action ); + void initActionTableList( unsigned long length ); + void initCondSpaceList( ulong length ); + void condSpaceItem( int cnum, long condActionId ); + void newCondSpace( int cnum, int condSpaceId, Key baseKey ); + void initStateCondList( int snum, ulong length ); + void addStateCond( int snum, Key lowKey, Key highKey, long condNum ); + void initStateList( unsigned long length ); + void addRegionToEntry( int regionId, int entryId ); + void addEntryPoint( int entryId, char *name, unsigned long entryState ); + void setId( int snum, int id ); + void initTransList( int snum, unsigned long length ); + void newTrans( int snum, int tnum, Key lowKey, Key highKey, + long targ, long act ); + void finishTransList( int snum ); + void setFinal( int snum ); + void setEofTrans( int snum, int eofTarget, int actId ); + void setStateActions( int snum, long toStateAction, + long fromStateAction, long eofAction ); + void setForcedErrorState(); + void closeMachine(); + Key findMaxKey(); + + + void makeEntryPoints(); + void makeGetKeyExpr(); + void makeAccessExpr(); + void makeCurStateExpr(); + void makeConditions(); + void makeInlineList( InlineList *inlineList, InlineItem *context ); + void makeActionList(); + void makeActionTableList(); + void reduceTrans( FsmTrans *trans ); + void reduceActionTables(); + void makeTransList( FsmState *state ); + void makeTrans( Key lowKey, Key highKey, FsmTrans *defTrans ); + void makeAction( Action *action ); + void makeLmSwitch( InlineItem *item ); + void makeMachine(); + void makeActionExec( InlineItem *item ); + void makeActionExecTE( InlineItem *item ); + + char *fsmName; + Compiler *pd; + FsmGraph *fsm; + ActionTableMap actionTableMap; + int nextActionTableId; + + int startState; + int errState; + +public: + RedFsm *redFsm; + +private: + int curAction; + int curActionTable; + int curTrans; + int curState; + int curCondSpace; + int curStateCond; +}; + + +#endif /* _FSMREDUCE_H */ diff --git a/src/redfsm.cc b/src/redfsm.cc new file mode 100644 index 00000000..d3a65b7c --- /dev/null +++ b/src/redfsm.cc @@ -0,0 +1,1112 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <iostream> +#include <sstream> +#include "redfsm.h" +#include "avlmap.h" +#include "mergesort.h" +#include "fsmgraph.h" +#include "parsetree.h" +#include "fsmrun.h" + +using std::ostringstream; + +string nameOrLoc( GenAction *genAction ) +{ + if ( genAction->name != 0 ) + return string(genAction->name); + else { + ostringstream ret; + ret << genAction->loc.line << ":" << genAction->loc.col; + return ret.str(); + } +} + +RedFsm::RedFsm() +: + wantComplete(false), + forcedErrorState(false), + nextActionId(0), + nextTransId(0), + errState(0), + errTrans(0), + firstFinState(0), + numFinStates(0), + allActions(0), + allActionTables(0), + allConditions(0), + allCondSpaces(0), + allStates(0), + bAnyToStateActions(false), + bAnyFromStateActions(false), + bAnyRegActions(false), + bAnyEofActions(false), + bAnyActionGotos(false), + bAnyActionCalls(false), + bAnyActionRets(false), + bAnyRegActionRets(false), + bAnyRegActionByValControl(false), + bAnyRegNextStmt(false), + bAnyRegCurStateRef(false), + bAnyRegBreak(false), + bAnyLmSwitchError(false), + bAnyConditions(false) +{ +} + +/* Does the machine have any actions. */ +bool RedFsm::anyActions() +{ + return actionMap.length() > 0; +} + +void RedFsm::depthFirstOrdering( RedState *state ) +{ + /* Nothing to do if the state is already on the list. */ + if ( state->onStateList ) + return; + + /* Doing depth first, put state on the list. */ + state->onStateList = true; + stateList.append( state ); + +// /* At this point transitions should only be in ranges. */ +// assert( state->outSingle.length() == 0 ); +// assert( state->defTrans == 0 ); + + /* Recurse on singles. */ + for ( RedTransList::Iter stel = state->outSingle; stel.lte(); stel++ ) { + if ( stel->value->targ != 0 ) + depthFirstOrdering( stel->value->targ ); + } + + /* Recurse on everything ranges. */ + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + if ( rtel->value->targ != 0 ) + depthFirstOrdering( rtel->value->targ ); + } + + if ( state->defTrans != 0 && state->defTrans->targ != 0 ) + depthFirstOrdering( state->defTrans->targ ); +} + +/* Ordering states by transition connections. */ +void RedFsm::depthFirstOrdering() +{ + /* Init on state list flags. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + st->onStateList = false; + + /* Clear out the state list, we will rebuild it. */ + int stateListLen = stateList.length(); + stateList.abandon(); + + /* Add back to the state list from the start state and all other entry + * points. */ + depthFirstOrdering( startState ); + for ( RedStateSet::Iter en = entryPoints; en.lte(); en++ ) + depthFirstOrdering( *en ); + if ( forcedErrorState ) + depthFirstOrdering( errState ); + + /* Make sure we put everything back on. */ + assert( stateListLen == stateList.length() ); +} + +/* Assign state ids by appearance in the state list. */ +void RedFsm::sequentialStateIds() +{ + /* Table based machines depend on the state numbers starting at zero. */ + nextStateId = 0; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + st->id = nextStateId++; +} + +/* Stable sort the states by final state status. */ +void RedFsm::sortStatesByFinal() +{ + /* Move forward through the list and throw final states onto the end. */ + RedState *state = 0; + RedState *next = stateList.head; + RedState *last = stateList.tail; + while ( state != last ) { + /* Move forward and load up the next. */ + state = next; + next = state->next; + + /* Throw to the end? */ + if ( state->isFinal ) { + stateList.detach( state ); + stateList.append( state ); + } + } +} + +/* Assign state ids by final state state status. */ +void RedFsm::sortStateIdsByFinal() +{ + /* Table based machines depend on this starting at zero. */ + nextStateId = 0; + + /* First pass to assign non final ids. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( ! st->isFinal ) + st->id = nextStateId++; + } + + /* Second pass to assign final ids. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->isFinal ) + st->id = nextStateId++; + } +} + +struct CmpStateById +{ + static int compare( RedState *st1, RedState *st2 ) + { + if ( st1->id < st2->id ) + return -1; + else if ( st1->id > st2->id ) + return 1; + else + return 0; + } +}; + +void RedFsm::sortByStateId() +{ + /* Make the array. */ + int pos = 0; + RedState **ptrList = new RedState*[stateList.length()]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + ptrList[pos++] = st; + + MergeSort<RedState*, CmpStateById> mergeSort; + mergeSort.sort( ptrList, stateList.length() ); + + stateList.abandon(); + for ( int st = 0; st < pos; st++ ) + stateList.append( ptrList[st] ); + + delete[] ptrList; +} + +/* Find the final state with the lowest id. */ +void RedFsm::findFirstFinState() +{ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->isFinal && (firstFinState == 0 || st->id < firstFinState->id) ) + firstFinState = st; + } +} + +void RedFsm::assignActionLocs() +{ + int nextLocation = 0; + for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) { + /* Store the loc, skip over the array and a null terminator. */ + act->location = nextLocation; + nextLocation += act->key.length() + 1; + } +} + +/* Check if we can extend the current range by displacing any ranges + * ahead to the singles. */ +bool RedFsm::canExtend( const RedTransList &list, int pos ) +{ + /* Get the transition that we want to extend. */ + RedTrans *extendTrans = list[pos].value; + + /* Look ahead in the transition list. */ + for ( int next = pos + 1; next < list.length(); pos++, next++ ) { + /* If they are not continuous then cannot extend. */ + Key nextKey = list[next].lowKey; + nextKey.decrement(); + if ( list[pos].highKey != nextKey ) + break; + + /* Check for the extenstion property. */ + if ( extendTrans == list[next].value ) + return true; + + /* If the span of the next element is more than one, then don't keep + * checking, it won't be moved to single. */ + unsigned long long nextSpan = keyOps->span( list[next].lowKey, list[next].highKey ); + if ( nextSpan > 1 ) + break; + } + return false; +} + +/* Move ranges to the singles list. */ +void RedFsm::moveTransToSingle( RedState *state ) +{ + RedTransList &range = state->outRange; + RedTransList &single = state->outSingle; + for ( int rpos = 0; rpos < range.length(); ) { + /* Check if this is a range we can extend. */ + if ( canExtend( range, rpos ) ) { + /* Transfer singles over. */ + while ( range[rpos].value != range[rpos+1].value ) { + /* Transfer the range to single. */ + single.append( range[rpos+1] ); + range.remove( rpos+1 ); + } + + /* Extend. */ + range[rpos].highKey = range[rpos+1].highKey; + range.remove( rpos+1 ); + } + /* Maybe move it to the singles. */ + else if ( keyOps->span( range[rpos].lowKey, range[rpos].highKey ) == 1 ) { + single.append( range[rpos] ); + range.remove( rpos ); + } + else { + /* Keeping it in the ranges. */ + rpos += 1; + } + } +} + +/* Look through ranges and choose suitable single character transitions. */ +void RedFsm::chooseSingle() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Rewrite the transition list taking out the suitable single + * transtions. */ + moveTransToSingle( st ); + } +} + +void RedFsm::makeFlat() +{ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + if ( st->stateCondList.length() == 0 ) { + st->condLowKey = 0; + st->condHighKey = 0; + } + else { + st->condLowKey = st->stateCondList.head->lowKey; + st->condHighKey = st->stateCondList.tail->highKey; + + unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey ); + st->condList = new GenCondSpace*[ span ]; + memset( st->condList, 0, sizeof(GenCondSpace*)*span ); + + for ( GenStateCondList::Iter sci = st->stateCondList; sci.lte(); sci++ ) { + unsigned long long base, trSpan; + base = keyOps->span( st->condLowKey, sci->lowKey )-1; + trSpan = keyOps->span( sci->lowKey, sci->highKey ); + for ( unsigned long long pos = 0; pos < trSpan; pos++ ) + st->condList[base+pos] = sci->condSpace; + } + } + + if ( st->outRange.length() == 0 ) { + st->lowKey = st->highKey = 0; + st->transList = 0; + } + else { + st->lowKey = st->outRange[0].lowKey; + st->highKey = st->outRange[st->outRange.length()-1].highKey; + unsigned long long span = keyOps->span( st->lowKey, st->highKey ); + st->transList = new RedTrans*[ span ]; + memset( st->transList, 0, sizeof(RedTrans*)*span ); + + for ( RedTransList::Iter trans = st->outRange; trans.lte(); trans++ ) { + unsigned long long base, trSpan; + base = keyOps->span( st->lowKey, trans->lowKey )-1; + trSpan = keyOps->span( trans->lowKey, trans->highKey ); + for ( unsigned long long pos = 0; pos < trSpan; pos++ ) + st->transList[base+pos] = trans->value; + } + + /* Fill in the gaps with the default transition. */ + for ( unsigned long long pos = 0; pos < span; pos++ ) { + if ( st->transList[pos] == 0 ) + st->transList[pos] = st->defTrans; + } + } + } +} + + +/* A default transition has been picked, move it from the outRange to the + * default pointer. */ +void RedFsm::moveToDefault( RedTrans *defTrans, RedState *state ) +{ + /* Rewrite the outRange, omitting any ranges that use + * the picked default. */ + RedTransList outRange; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + /* If it does not take the default, copy it over. */ + if ( rtel->value != defTrans ) + outRange.append( *rtel ); + } + + /* Save off the range we just created into the state's range. */ + state->outRange.transfer( outRange ); + + /* Store the default. */ + state->defTrans = defTrans; +} + +bool RedFsm::alphabetCovered( RedTransList &outRange ) +{ + /* Cannot cover without any out ranges. */ + if ( outRange.length() == 0 ) + return false; + + /* If the first range doesn't start at the the lower bound then the + * alphabet is not covered. */ + RedTransList::Iter rtel = outRange; + if ( keyOps->minKey < rtel->lowKey ) + return false; + + /* Check that every range is next to the previous one. */ + rtel.increment(); + for ( ; rtel.lte(); rtel++ ) { + Key highKey = rtel[-1].highKey; + highKey.increment(); + if ( highKey != rtel->lowKey ) + return false; + } + + /* The last must extend to the upper bound. */ + RedTransEl *last = &outRange[outRange.length()-1]; + if ( last->highKey < keyOps->maxKey ) + return false; + + return true; +} + +RedTrans *RedFsm::chooseDefaultSpan( RedState *state ) +{ + /* Make a set of transitions from the outRange. */ + RedTransPtrSet stateTransSet; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) + stateTransSet.insert( rtel->value ); + + /* For each transition in the find how many alphabet characters the + * transition spans. */ + unsigned long long *span = new unsigned long long[stateTransSet.length()]; + memset( span, 0, sizeof(unsigned long long) * stateTransSet.length() ); + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + /* Lookup the transition in the set. */ + RedTrans **inSet = stateTransSet.find( rtel->value ); + int pos = inSet - stateTransSet.data; + span[pos] += keyOps->span( rtel->lowKey, rtel->highKey ); + } + + /* Find the max span, choose it for making the default. */ + RedTrans *maxTrans = 0; + unsigned long long maxSpan = 0; + for ( RedTransPtrSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) { + if ( span[rtel.pos()] > maxSpan ) { + maxSpan = span[rtel.pos()]; + maxTrans = *rtel; + } + } + + delete[] span; + return maxTrans; +} + +/* Pick default transitions from ranges for the states. */ +void RedFsm::chooseDefaultSpan() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Only pick a default transition if the alphabet is covered. This + * avoids any transitions in the out range that go to error and avoids + * the need for an ERR state. */ + if ( alphabetCovered( st->outRange ) ) { + /* Pick a default transition by largest span. */ + RedTrans *defTrans = chooseDefaultSpan( st ); + + /* Rewrite the transition list taking out the transition we picked + * as the default and store the default. */ + moveToDefault( defTrans, st ); + } + } +} + +RedTrans *RedFsm::chooseDefaultGoto( RedState *state ) +{ + /* Make a set of transitions from the outRange. */ + RedTransPtrSet stateTransSet; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + if ( rtel->value->targ == state->next ) + return rtel->value; + } + return 0; +} + +void RedFsm::chooseDefaultGoto() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Pick a default transition. */ + RedTrans *defTrans = chooseDefaultGoto( st ); + if ( defTrans == 0 ) + defTrans = chooseDefaultSpan( st ); + + /* Rewrite the transition list taking out the transition we picked + * as the default and store the default. */ + moveToDefault( defTrans, st ); + } +} + +RedTrans *RedFsm::chooseDefaultNumRanges( RedState *state ) +{ + /* Make a set of transitions from the outRange. */ + RedTransPtrSet stateTransSet; + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) + stateTransSet.insert( rtel->value ); + + /* For each transition in the find how many ranges use the transition. */ + int *numRanges = new int[stateTransSet.length()]; + memset( numRanges, 0, sizeof(int) * stateTransSet.length() ); + for ( RedTransList::Iter rtel = state->outRange; rtel.lte(); rtel++ ) { + /* Lookup the transition in the set. */ + RedTrans **inSet = stateTransSet.find( rtel->value ); + numRanges[inSet - stateTransSet.data] += 1; + } + + /* Find the max number of ranges. */ + RedTrans *maxTrans = 0; + int maxNumRanges = 0; + for ( RedTransPtrSet::Iter rtel = stateTransSet; rtel.lte(); rtel++ ) { + if ( numRanges[rtel.pos()] > maxNumRanges ) { + maxNumRanges = numRanges[rtel.pos()]; + maxTrans = *rtel; + } + } + + delete[] numRanges; + return maxTrans; +} + +void RedFsm::chooseDefaultNumRanges() +{ + /* Loop the states. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Pick a default transition. */ + RedTrans *defTrans = chooseDefaultNumRanges( st ); + + /* Rewrite the transition list taking out the transition we picked + * as the default and store the default. */ + moveToDefault( defTrans, st ); + } +} + +RedTrans *RedFsm::getErrorTrans( ) +{ + /* If the error trans has not been made aready, make it. */ + if ( errTrans == 0 ) { + /* This insert should always succeed since no transition created by + * the user can point to the error state. */ + errTrans = new RedTrans( getErrorState(), 0, nextTransId++ ); + RedTrans *inRes = transSet.insert( errTrans ); + assert( inRes != 0 ); + } + return errTrans; +} + +RedState *RedFsm::getErrorState() +{ + /* Something went wrong. An error state is needed but one was not supplied + * by the frontend. */ + assert( errState != 0 ); + return errState; +} + + +RedTrans *RedFsm::allocateTrans( RedState *targ, RedAction *action ) +{ + /* Create a reduced trans and look for it in the transiton set. */ + RedTrans redTrans( targ, action, 0 ); + RedTrans *inDict = transSet.find( &redTrans ); + if ( inDict == 0 ) { + inDict = new RedTrans( targ, action, nextTransId++ ); + transSet.insert( inDict ); + } + return inDict; +} + +void RedFsm::partitionFsm( int nparts ) +{ + /* At this point the states are ordered by a depth-first traversal. We + * will allocate to partitions based on this ordering. */ + this->nParts = nparts; + int partSize = stateList.length() / nparts; + int remainder = stateList.length() % nparts; + int numInPart = partSize; + int partition = 0; + if ( remainder-- > 0 ) + numInPart += 1; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + st->partition = partition; + + numInPart -= 1; + if ( numInPart == 0 ) { + partition += 1; + numInPart = partSize; + if ( remainder-- > 0 ) + numInPart += 1; + } + } +} + +void RedFsm::setInTrans() +{ + /* First pass counts the number of transitions. */ + for ( RedTransSet::Iter trans = transSet; trans.lte(); trans++ ) + trans->targ->numInTrans += 1; + + /* Pass over states to allocate the needed memory. Reset the counts so we + * can use them as the current size. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + st->inTrans = new RedTrans*[st->numInTrans]; + st->numInTrans = 0; + } + + /* Second pass over transitions copies pointers into the in trans list. */ + for ( RedTransSet::Iter trans = transSet; trans.lte(); trans++ ) + trans->targ->inTrans[trans->targ->numInTrans++] = trans; +} + +GenCondSpace *RedFsm::findCondSpace( Key lowKey, Key highKey ) +{ + for ( CondSpaceList::Iter cs = condSpaceList; cs.lte(); cs++ ) { + Key csHighKey = cs->baseKey; + csHighKey += keyOps->alphSize() * (1 << cs->condSet.length()); + + if ( lowKey >= cs->baseKey && highKey <= csHighKey ) + return cs; + } + return 0; +} + +Condition *RedFsm::findCondition( Key key ) +{ + for ( ConditionList::Iter cond = conditionList; cond.lte(); cond++ ) { + Key upperKey = cond->baseKey + (1 << cond->condSet.length()); + if ( cond->baseKey <= key && key <= upperKey ) + return cond; + } + return 0; +} + +void RedFsm::setValueLimits() +{ + maxSingleLen = 0; + maxRangeLen = 0; + maxKeyOffset = 0; + maxIndexOffset = 0; + maxActListId = 0; + maxActionLoc = 0; + maxActArrItem = 0; + maxSpan = 0; + maxCondSpan = 0; + maxFlatIndexOffset = 0; + maxCondOffset = 0; + maxCondLen = 0; + maxCondSpaceId = 0; + maxCondIndexOffset = 0; + + /* In both of these cases the 0 index is reserved for no value, so the max + * is one more than it would be if they started at 0. */ + maxIndex = transSet.length(); + maxCond = condSpaceList.length(); + + /* The nextStateId - 1 is the last state id assigned. */ + maxState = nextStateId - 1; + + for ( CondSpaceList::Iter csi = condSpaceList; csi.lte(); csi++ ) { + if ( csi->condSpaceId > maxCondSpaceId ) + maxCondSpaceId = csi->condSpaceId; + } + + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Maximum cond length. */ + if ( st->stateCondList.length() > maxCondLen ) + maxCondLen = st->stateCondList.length(); + + /* Maximum single length. */ + if ( st->outSingle.length() > maxSingleLen ) + maxSingleLen = st->outSingle.length(); + + /* Maximum range length. */ + if ( st->outRange.length() > maxRangeLen ) + maxRangeLen = st->outRange.length(); + + /* The key offset index offset for the state after last is not used, skip it.. */ + if ( ! st.last() ) { + maxCondOffset += st->stateCondList.length(); + maxKeyOffset += st->outSingle.length() + st->outRange.length()*2; + maxIndexOffset += st->outSingle.length() + st->outRange.length() + 1; + } + + /* Max cond span. */ + if ( st->condList != 0 ) { + unsigned long long span = keyOps->span( st->condLowKey, st->condHighKey ); + if ( span > maxCondSpan ) + maxCondSpan = span; + } + + /* Max key span. */ + if ( st->transList != 0 ) { + unsigned long long span = keyOps->span( st->lowKey, st->highKey ); + if ( span > maxSpan ) + maxSpan = span; + } + + /* Max cond index offset. */ + if ( ! st.last() ) { + if ( st->condList != 0 ) + maxCondIndexOffset += keyOps->span( st->condLowKey, st->condHighKey ); + } + + /* Max flat index offset. */ + if ( ! st.last() ) { + if ( st->transList != 0 ) + maxFlatIndexOffset += keyOps->span( st->lowKey, st->highKey ); + maxFlatIndexOffset += 1; + } + } + + for ( GenActionTableMap::Iter at = actionMap; at.lte(); at++ ) { + /* Maximum id of action lists. */ + if ( at->actListId+1 > maxActListId ) + maxActListId = at->actListId+1; + + /* Maximum location of items in action array. */ + if ( at->location+1 > maxActionLoc ) + maxActionLoc = at->location+1; + + /* Maximum values going into the action array. */ + if ( at->key.length() > maxActArrItem ) + maxActArrItem = at->key.length(); + for ( GenActionTable::Iter item = at->key; item.lte(); item++ ) { + if ( item->value->actionId > maxActArrItem ) + maxActArrItem = item->value->actionId; + } + } +} + +void RedFsm::findFinalActionRefs() +{ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Rerence count out of single transitions. */ + for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) { + if ( rtel->value->action != 0 ) { + rtel->value->action->numTransRefs += 1; + for ( GenActionTable::Iter item = rtel->value->action->key; item.lte(); item++ ) + item->value->numTransRefs += 1; + } + } + + /* Reference count out of range transitions. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + if ( rtel->value->action != 0 ) { + rtel->value->action->numTransRefs += 1; + for ( GenActionTable::Iter item = rtel->value->action->key; item.lte(); item++ ) + item->value->numTransRefs += 1; + } + } + + /* Reference count default transition. */ + if ( st->defTrans != 0 && st->defTrans->action != 0 ) { + st->defTrans->action->numTransRefs += 1; + for ( GenActionTable::Iter item = st->defTrans->action->key; item.lte(); item++ ) + item->value->numTransRefs += 1; + } + + /* Reference count to state actions. */ + if ( st->toStateAction != 0 ) { + st->toStateAction->numToStateRefs += 1; + for ( GenActionTable::Iter item = st->toStateAction->key; item.lte(); item++ ) + item->value->numToStateRefs += 1; + } + + /* Reference count from state actions. */ + if ( st->fromStateAction != 0 ) { + st->fromStateAction->numFromStateRefs += 1; + for ( GenActionTable::Iter item = st->fromStateAction->key; item.lte(); item++ ) + item->value->numFromStateRefs += 1; + } + + /* Reference count EOF actions. */ + if ( st->eofAction != 0 ) { + st->eofAction->numEofRefs += 1; + for ( GenActionTable::Iter item = st->eofAction->key; item.lte(); item++ ) + item->value->numEofRefs += 1; + } + } +} + +void RedFsm::analyzeAction( GenAction *act, InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + /* Check for various things in regular actions. */ + if ( act->numTransRefs > 0 || act->numToStateRefs > 0 || + act->numFromStateRefs > 0 || act->numEofRefs > 0 ) + { + if ( item->type == InlineItem::LmSwitch && + item->tokenRegion->lmSwitchHandlesError ) + { + bAnyLmSwitchError = true; + } + } + + if ( item->children != 0 ) + analyzeAction( act, item->children ); + } +} + +void RedFsm::analyzeActionList( RedAction *redAct, InlineList *inlineList ) +{ + for ( InlineList::Iter item = *inlineList; item.lte(); item++ ) { + if ( item->children != 0 ) + analyzeActionList( redAct, item->children ); + } +} + +/* Assign ids to referenced actions. */ +void RedFsm::assignActionIds() +{ + int nextActionId = 0; + for ( GenActionList::Iter act = genActionList; act.lte(); act++ ) { + /* Only ever interested in referenced actions. */ + if ( numRefs( act ) > 0 ) + act->actionId = nextActionId++; + } +} + +/* Gather various info on the machine. */ +void RedFsm::analyzeMachine() +{ + /* Find the true count of action references. */ + findFinalActionRefs(); + + /* Check if there are any calls in action code. */ + for ( GenActionList::Iter act = genActionList; act.lte(); act++ ) { + /* Record the occurrence of various kinds of actions. */ + if ( act->numToStateRefs > 0 ) + bAnyToStateActions = true; + if ( act->numFromStateRefs > 0 ) + bAnyFromStateActions = true; + if ( act->numEofRefs > 0 ) + bAnyEofActions = true; + if ( act->numTransRefs > 0 ) + bAnyRegActions = true; + + /* Recurse through the action's parse tree looking for various things. */ + analyzeAction( act, act->inlineList ); + } + + /* Analyze reduced action lists. */ + for ( GenActionTableMap::Iter redAct = actionMap; redAct.lte(); redAct++ ) { + for ( GenActionTable::Iter act = redAct->key; act.lte(); act++ ) + analyzeActionList( redAct, act->value->inlineList ); + } + + /* Find states that have transitions with actions that have next + * statements. */ + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Check any actions out of outSinge. */ + for ( RedTransList::Iter rtel = st->outSingle; rtel.lte(); rtel++ ) { + if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() ) + st->bAnyRegCurStateRef = true; + } + + /* Check any actions out of outRange. */ + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + if ( rtel->value->action != 0 && rtel->value->action->anyCurStateRef() ) + st->bAnyRegCurStateRef = true; + } + + /* Check any action out of default. */ + if ( st->defTrans != 0 && st->defTrans->action != 0 && + st->defTrans->action->anyCurStateRef() ) + st->bAnyRegCurStateRef = true; + + if ( st->stateCondList.length() > 0 ) + bAnyConditions = true; + } + + /* Assign ids to actions that are referenced. */ + assignActionIds(); + + /* Set the maximums of various values used for deciding types. */ + setValueLimits(); +} + +int transAction( RedTrans *trans ) +{ + int retAct = 0; + if ( trans->action != 0 ) + retAct = trans->action->location+1; + return retAct; +} + +int toStateAction( RedState *state ) +{ + int act = 0; + if ( state->toStateAction != 0 ) + act = state->toStateAction->location+1; + return act; +} + +int fromStateAction( RedState *state ) +{ + int act = 0; + if ( state->fromStateAction != 0 ) + act = state->fromStateAction->location+1; + return act; +} + +int eofAction( RedState *state ) +{ + int act = 0; + if ( state->eofAction != 0 ) + act = state->eofAction->location+1; + return act; +} + + +FsmTables *RedFsm::makeFsmTables() +{ + /* The fsm runtime needs states sorted by id. */ + sortByStateId(); + + int pos, curKeyOffset, curIndOffset; + FsmTables *fsmTables = new FsmTables; + fsmTables->numStates = stateList.length(); + + /* + * actions + */ + + fsmTables->numActions = 1; + for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) + fsmTables->numActions += 1 + act->key.length(); + + pos = 0; + fsmTables->actions = new long[fsmTables->numActions]; + fsmTables->actions[pos++] = 0; + for ( GenActionTableMap::Iter act = actionMap; act.lte(); act++ ) { + fsmTables->actions[pos++] = act->key.length(); + for ( GenActionTable::Iter item = act->key; item.lte(); item++ ) + fsmTables->actions[pos++] = item->value->actionId; + } + + /* + * keyOffset + */ + pos = 0, curKeyOffset = 0; + fsmTables->keyOffsets = new long[fsmTables->numStates]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + /* Store the current offset. */ + fsmTables->keyOffsets[pos++] = curKeyOffset; + + /* Move the key offset ahead. */ + curKeyOffset += st->outSingle.length() + st->outRange.length()*2; + } + + /* + * transKeys + */ + fsmTables->numTransKeys = 0; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + fsmTables->numTransKeys += st->outSingle.length(); + fsmTables->numTransKeys += 2 * st->outRange.length(); + } + + pos = 0; + fsmTables->transKeys = new char[fsmTables->numTransKeys]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) + fsmTables->transKeys[pos++] = stel->lowKey.getVal(); + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) { + fsmTables->transKeys[pos++] = rtel->lowKey.getVal(); + fsmTables->transKeys[pos++] = rtel->highKey.getVal(); + } + } + + /* + * singleLengths + */ + pos = 0; + fsmTables->singleLengths = new long[fsmTables->numStates]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + fsmTables->singleLengths[pos++] = st->outSingle.length(); + + /* + * rangeLengths + */ + pos = 0; + fsmTables->rangeLengths = new long[fsmTables->numStates]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + fsmTables->rangeLengths[pos++] = st->outRange.length(); + + /* + * indexOffsets + */ + pos = 0, curIndOffset = 0; + fsmTables->indexOffsets = new long[fsmTables->numStates]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + fsmTables->indexOffsets[pos++] = curIndOffset; + + curIndOffset += st->outSingle.length() + st->outRange.length(); + if ( st->defTrans != 0 ) + curIndOffset += 1; + } + + /* + * transTargsWI + */ + fsmTables->numTransTargsWI = 0; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + fsmTables->numTransTargsWI += st->outSingle.length(); + fsmTables->numTransTargsWI += st->outRange.length(); + if ( st->defTrans != 0 ) + fsmTables->numTransTargsWI += 1; + } + + pos = 0; + fsmTables->transTargsWI = new long[fsmTables->numTransTargsWI]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) + fsmTables->transTargsWI[pos++] = stel->value->targ->id; + + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) + fsmTables->transTargsWI[pos++] = rtel->value->targ->id; + + if ( st->defTrans != 0 ) + fsmTables->transTargsWI[pos++] = st->defTrans->targ->id; + } + + /* + * transActionsWI + */ + fsmTables->numTransActionsWI = 0; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + fsmTables->numTransActionsWI += st->outSingle.length(); + fsmTables->numTransActionsWI += st->outRange.length(); + if ( st->defTrans != 0 ) + fsmTables->numTransActionsWI += 1; + } + + pos = 0; + fsmTables->transActionsWI = new long[fsmTables->numTransActionsWI]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + for ( RedTransList::Iter stel = st->outSingle; stel.lte(); stel++ ) + fsmTables->transActionsWI[pos++] = transAction( stel->value ); + + for ( RedTransList::Iter rtel = st->outRange; rtel.lte(); rtel++ ) + fsmTables->transActionsWI[pos++] = transAction( rtel->value ); + + if ( st->defTrans != 0 ) + fsmTables->transActionsWI[pos++] = transAction( st->defTrans ); + } + + /* + * toStateActions + */ + pos = 0; + fsmTables->toStateActions = new long[fsmTables->numStates]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + fsmTables->toStateActions[pos++] = toStateAction( st ); + + /* + * fromStateActions + */ + pos = 0; + fsmTables->fromStateActions = new long[fsmTables->numStates]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + fsmTables->fromStateActions[pos++] = fromStateAction( st ); + + /* + * eofActions + */ + pos = 0; + fsmTables->eofActions = new long[fsmTables->numStates]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) + fsmTables->eofActions[pos++] = eofAction( st ); + + /* + * eofTargs + */ + pos = 0; + fsmTables->eofTargs = new long[fsmTables->numStates]; + for ( RedStateList::Iter st = stateList; st.lte(); st++ ) { + int targ = -1; + if ( st->eofTrans != 0 ) + targ = st->eofTrans->targ->id; + fsmTables->eofTargs[pos++] = targ; + } + + /* Start state. */ + fsmTables->startState = startState->id; + + /* First final state. */ + fsmTables->firstFinal = ( firstFinState != 0 ) ? + firstFinState->id : nextStateId; + + /* The error state. */ + fsmTables->errorState = ( errState != 0 ) ? + errState->id : -1; + + /* The array pointing to actions. */ + pos = 0; + fsmTables->numActionSwitch = genActionList.length(); + fsmTables->actionSwitch = new GenAction*[fsmTables->numActionSwitch]; + for ( GenActionList::Iter act = genActionList; act.lte(); act++ ) + fsmTables->actionSwitch[pos++] = act; + + /* + * entryByRegion + */ + + fsmTables->numRegions = regionToEntry.length()+1; + fsmTables->entryByRegion = new long[fsmTables->numRegions]; + fsmTables->entryByRegion[0] = fsmTables->errorState; + + pos = 1; + for ( RegionToEntry::Iter en = regionToEntry; en.lte(); en++ ) { + /* Find the entry state from the entry id. */ + RedEntryMapEl *entryMapEl = redEntryMap.find( *en ); + + /* Save it off. */ + fsmTables->entryByRegion[pos++] = entryMapEl != 0 ? entryMapEl->value + : fsmTables->errorState; + } + + return fsmTables; +} + + diff --git a/src/redfsm.h b/src/redfsm.h new file mode 100644 index 00000000..39b98d5f --- /dev/null +++ b/src/redfsm.h @@ -0,0 +1,524 @@ +/* + * Copyright 2006-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _REDFSM_H +#define _REDFSM_H + +#include <assert.h> +#include <string.h> +#include <string> +#include "keyops.h" +#include "vector.h" +#include "dlist.h" +#include "compare.h" +#include "bstmap.h" +#include "bstset.h" +#include "avlmap.h" +#include "avltree.h" +#include "avlbasic.h" +#include "mergesort.h" +#include "sbstmap.h" +#include "sbstset.h" +#include "sbsttable.h" +#include "global.h" +#include "pdarun.h" + +#define TRANS_ERR_TRANS 0 +#define STATE_ERR_STATE 0 +#define FUNC_NO_FUNC 0 + +using std::string; + +struct RedState; +struct InlineList; +struct Compiler; +struct ObjField; + +/* Element in list of actions. Contains the string for the code to exectute. */ +struct GenAction +{ + /* Data collected during parse. */ + InputLoc loc; + char *name; + InlineList *inlineList; + int actionId; + MarkType markType; + ObjField *objField; + long markId; + + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; + + GenAction *prev, *next; +}; + +typedef DList<GenAction> GenActionList; +string nameOrLoc( GenAction *genAction ); + +/* Number of references in the final machine. */ +inline int numRefs( GenAction *genAction ) +{ + return genAction->numTransRefs + + genAction->numToStateRefs + + genAction->numFromStateRefs + + genAction->numEofRefs; +} + + +/* Forwards. */ +struct RedState; +struct FsmState; + +/* Transistion GenAction Element. */ +typedef SBstMapEl< int, GenAction* > GenActionTableEl; + +/* Transition GenAction Table. */ +struct GenActionTable + : public SBstMap< int, GenAction*, CmpOrd<int> > +{ + void setAction( int ordering, GenAction *action ); + void setActions( int *orderings, GenAction **actions, int nActs ); + void setActions( const GenActionTable &other ); +}; + +/* Compare of a whole action table element (key & value). */ +struct GenCmpActionTableEl +{ + static int compare( const GenActionTableEl &action1, + const GenActionTableEl &action2 ) + { + if ( action1.key < action2.key ) + return -1; + else if ( action1.key > action2.key ) + return 1; + else if ( action1.value < action2.value ) + return -1; + else if ( action1.value > action2.value ) + return 1; + return 0; + } +}; + +/* Compare for GenActionTable. */ +typedef CmpSTable< GenActionTableEl, GenCmpActionTableEl > GenCmpActionTable; + +/* Set of states. */ +typedef BstSet<RedState*> RedStateSet; +typedef BstSet<int> IntSet; + +/* Reduced action. */ +struct RedAction +: + public AvlTreeEl<RedAction> +{ + RedAction( ) + : + key(), + eofRefs(0), + numTransRefs(0), + numToStateRefs(0), + numFromStateRefs(0), + numEofRefs(0), + bAnyNextStmt(false), + bAnyCurStateRef(false), + bAnyBreakStmt(false) + { } + + const GenActionTable &getKey() + { return key; } + + GenActionTable key; + int actListId; + int location; + IntSet *eofRefs; + + /* Number of references in the final machine. */ + bool numRefs() + { return numTransRefs + numToStateRefs + numFromStateRefs + numEofRefs; } + int numTransRefs; + int numToStateRefs; + int numFromStateRefs; + int numEofRefs; + + bool anyNextStmt() { return bAnyNextStmt; } + bool anyCurStateRef() { return bAnyCurStateRef; } + bool anyBreakStmt() { return bAnyBreakStmt; } + + bool bAnyNextStmt; + bool bAnyCurStateRef; + bool bAnyBreakStmt; +}; +typedef AvlTree<RedAction, GenActionTable, GenCmpActionTable> GenActionTableMap; + +/* Reduced transition. */ +struct RedTrans +: + public AvlTreeEl<RedTrans> +{ + RedTrans( RedState *targ, RedAction *action, int id ) + : targ(targ), action(action), id(id), labelNeeded(true) { } + + RedState *targ; + RedAction *action; + int id; + bool partitionBoundary; + bool labelNeeded; +}; + +/* Compare of transitions for the final reduction of transitions. Comparison + * is on target and the pointer to the shared action table. It is assumed that + * when this is used the action tables have been reduced. */ +struct CmpRedTrans +{ + static int compare( const RedTrans &t1, const RedTrans &t2 ) + { + if ( t1.targ < t2.targ ) + return -1; + else if ( t1.targ > t2.targ ) + return 1; + else if ( t1.action < t2.action ) + return -1; + else if ( t1.action > t2.action ) + return 1; + else + return 0; + } +}; + +typedef AvlBasic<RedTrans, CmpRedTrans> RedTransSet; + +/* Element in out range. */ +struct RedTransEl +{ + /* Constructors. */ + RedTransEl( Key lowKey, Key highKey, RedTrans *value ) + : lowKey(lowKey), highKey(highKey), value(value) { } + + Key lowKey, highKey; + RedTrans *value; +}; + +typedef Vector<RedTransEl> RedTransList; +typedef Vector<RedState*> RedStateVect; + +typedef BstMapEl<RedState*, unsigned long long> RedSpanMapEl; +typedef BstMap<RedState*, unsigned long long> RedSpanMap; + +/* Compare used by span map sort. Reverse sorts by the span. */ +struct CmpRedSpanMapEl +{ + static int compare( const RedSpanMapEl &smel1, const RedSpanMapEl &smel2 ) + { + if ( smel1.value > smel2.value ) + return -1; + else if ( smel1.value < smel2.value ) + return 1; + else + return 0; + } +}; + +/* Sorting state-span map entries by span. */ +typedef MergeSort<RedSpanMapEl, CmpRedSpanMapEl> RedSpanMapSort; + +/* Set of entry ids that go into this state. */ +typedef Vector<int> EntryIdVect; +typedef Vector<char*> EntryNameVect; + +/* Maps entry ids (defined by the frontend, to reduced state ids. */ +typedef BstMap<int, int> RedEntryMap; +typedef BstMapEl<int, int> RedEntryMapEl; + +typedef Vector<int> RegionToEntry; + +typedef Vector< GenAction* > GenCondSet; + +struct Condition +{ + Condition( ) + : key(0), baseKey(0) {} + + Key key; + Key baseKey; + GenCondSet condSet; + + Condition *next, *prev; +}; +typedef DList<Condition> ConditionList; + +struct GenCondSpace +{ + Key baseKey; + GenCondSet condSet; + int condSpaceId; + + GenCondSpace *next, *prev; +}; +typedef DList<GenCondSpace> CondSpaceList; + +struct GenStateCond +{ + Key lowKey; + Key highKey; + + GenCondSpace *condSpace; + + GenStateCond *prev, *next; +}; +typedef DList<GenStateCond> GenStateCondList; +typedef Vector<GenStateCond*> StateCondVect; + +/* Reduced state. */ +struct RedState +{ + RedState() + : + defTrans(0), + condList(0), + transList(0), + isFinal(false), + labelNeeded(false), + outNeeded(false), + onStateList(false), + toStateAction(0), + fromStateAction(0), + eofAction(0), + eofTrans(0), + id(0), + bAnyRegCurStateRef(false), + partitionBoundary(false), + inTrans(0), + numInTrans(0) + { } + + /* Transitions out. */ + RedTransList outSingle; + RedTransList outRange; + RedTrans *defTrans; + + /* For flat conditions. */ + Key condLowKey, condHighKey; + GenCondSpace **condList; + + /* For flat keys. */ + Key lowKey, highKey; + RedTrans **transList; + + /* The list of states that transitions from this state go to. */ + RedStateVect targStates; + + bool isFinal; + bool labelNeeded; + bool outNeeded; + bool onStateList; + RedAction *toStateAction; + RedAction *fromStateAction; + RedAction *eofAction; + RedTrans *eofTrans; + int id; + GenStateCondList stateCondList; + StateCondVect stateCondVect; + + /* Pointers for the list of states. */ + RedState *prev, *next; + + bool anyRegCurStateRef() { return bAnyRegCurStateRef; } + bool bAnyRegCurStateRef; + + int partition; + bool partitionBoundary; + + RedTrans **inTrans; + int numInTrans; +}; + +/* List of states. */ +typedef DList<RedState> RedStateList; + +/* Set of reduced transitons. Comparison is by pointer. */ +typedef BstSet< RedTrans*, CmpOrd<RedTrans*> > RedTransPtrSet; + +/* Next version of the fsm machine. */ +struct RedFsm +{ + RedFsm(); + + bool wantComplete; + bool forcedErrorState; + + int nextActionId; + int nextTransId; + + /* Next State Id doubles as the total number of state ids. */ + int nextStateId; + + RedTransSet transSet; + GenActionTableMap actionMap; + RedStateList stateList; + RedStateSet entryPoints; + RedState *startState; + RedState *errState; + RedTrans *errTrans; + RedTrans *errActionTrans; + RedState *firstFinState; + int numFinStates; + int nParts; + + GenAction *allActions; + RedAction *allActionTables; + Condition *allConditions; + GenCondSpace *allCondSpaces; + RedState *allStates; + GenActionList genActionList; + ConditionList conditionList; + CondSpaceList condSpaceList; + EntryIdVect entryPointIds; + EntryNameVect entryPointNames; + RedEntryMap redEntryMap; + RegionToEntry regionToEntry; + + bool bAnyToStateActions; + bool bAnyFromStateActions; + bool bAnyRegActions; + bool bAnyEofActions; + bool bAnyActionGotos; + bool bAnyActionCalls; + bool bAnyActionRets; + bool bAnyRegActionRets; + bool bAnyRegActionByValControl; + bool bAnyRegNextStmt; + bool bAnyRegCurStateRef; + bool bAnyRegBreak; + bool bAnyLmSwitchError; + bool bAnyConditions; + + int maxState; + int maxSingleLen; + int maxRangeLen; + int maxKeyOffset; + int maxIndexOffset; + int maxIndex; + int maxActListId; + int maxActionLoc; + int maxActArrItem; + unsigned long long maxSpan; + unsigned long long maxCondSpan; + int maxFlatIndexOffset; + Key maxKey; + int maxCondOffset; + int maxCondLen; + int maxCondSpaceId; + int maxCondIndexOffset; + int maxCond; + + bool anyActions(); + bool anyToStateActions() { return bAnyToStateActions; } + bool anyFromStateActions() { return bAnyFromStateActions; } + bool anyRegActions() { return bAnyRegActions; } + bool anyEofActions() { return bAnyEofActions; } + bool anyActionGotos() { return bAnyActionGotos; } + bool anyActionCalls() { return bAnyActionCalls; } + bool anyActionRets() { return bAnyActionRets; } + bool anyRegActionRets() { return bAnyRegActionRets; } + bool anyRegActionByValControl() { return bAnyRegActionByValControl; } + bool anyRegNextStmt() { return bAnyRegNextStmt; } + bool anyRegCurStateRef() { return bAnyRegCurStateRef; } + bool anyRegBreak() { return bAnyRegBreak; } + bool anyLmSwitchError() { return bAnyLmSwitchError; } + bool anyConditions() { return bAnyConditions; } + + GenCondSpace *findCondSpace( Key lowKey, Key highKey ); + Condition *findCondition( Key key ); + + /* Is is it possible to extend a range by bumping ranges that span only + * one character to the singles array. */ + bool canExtend( const RedTransList &list, int pos ); + + /* Pick single transitions from the ranges. */ + void moveTransToSingle( RedState *state ); + void chooseSingle(); + + void makeFlat(); + + /* Move a selected transition from ranges to default. */ + void moveToDefault( RedTrans *defTrans, RedState *state ); + + /* Pick a default transition by largest span. */ + RedTrans *chooseDefaultSpan( RedState *state ); + void chooseDefaultSpan(); + + /* Pick a default transition by most number of ranges. */ + RedTrans *chooseDefaultNumRanges( RedState *state ); + void chooseDefaultNumRanges(); + + /* Pick a default transition tailored towards goto driven machine. */ + RedTrans *chooseDefaultGoto( RedState *state ); + void chooseDefaultGoto(); + + /* Ordering states by transition connections. */ + void optimizeStateOrdering( RedState *state ); + void optimizeStateOrdering(); + + /* Ordering states by transition connections. */ + void depthFirstOrdering( RedState *state ); + void depthFirstOrdering(); + + /* Set state ids. */ + void sequentialStateIds(); + void sortStateIdsByFinal(); + + /* Arrange states in by final id. This is a stable sort. */ + void sortStatesByFinal(); + + /* Sorting states by id. */ + void sortByStateId(); + + /* Locating the first final state. This is the final state with the lowest + * id. */ + void findFirstFinState(); + + void assignActionLocs(); + + RedTrans *getErrorTrans(); + RedState *getErrorState(); + + /* Is every char in the alphabet covered? */ + bool alphabetCovered( RedTransList &outRange ); + + RedTrans *allocateTrans( RedState *targState, RedAction *actionTable ); + + void partitionFsm( int nParts ); + + void setInTrans(); + void setValueLimits(); + void assignActionIds(); + void analyzeActionList( RedAction *redAct, InlineList *inlineList ); + void analyzeAction( GenAction *act, InlineList *inlineList ); + void findFinalActionRefs(); + void analyzeMachine(); + + FsmTables *makeFsmTables(); +}; + + +#endif /* _REDFSM_H */ diff --git a/src/resolve.cc b/src/resolve.cc new file mode 100644 index 00000000..a661e68e --- /dev/null +++ b/src/resolve.cc @@ -0,0 +1,805 @@ +/* + * Copyright 2009-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "bytecode.h" +#include "parsedata.h" +#include "fsmrun.h" +#include <iostream> +#include <assert.h> + +using std::cout; +using std::cerr; +using std::endl; + +UniqueType *TypeRef::lookupTypeName( Compiler *pd ) +{ + /* Lookup up the qualifiction and then the name. */ + nspace = nspaceQual->getQual( pd ); + + if ( nspace == 0 ) + error(loc) << "do not have region for resolving reference" << endp; + + while ( nspace != 0 ) { + /* Search for the token in the region by typeName. */ + TypeMapEl *inDict = nspace->typeMap.find( typeName ); + + if ( inDict != 0 ) { + switch ( inDict->type ) { + /* Defer to the typeRef we are an alias of. We need to guard against loops here. */ + case TypeMapEl::TypeAliasType: + return inDict->typeRef->lookupType( pd ); + + case TypeMapEl::LangElType: + return pd->findUniqueType( TYPE_TREE, inDict->value ); + } + } + + nspace = nspace->parentNamespace; + } + + error(loc) << "unknown type in typeof expression" << endp; + return 0; +} + +UniqueType *TypeRef::lookupTypeLiteral( Compiler *pd ) +{ + /* Lookup up the qualifiction and then the name. */ + nspace = nspaceQual->getQual( pd ); + + if ( nspace == 0 ) + error(loc) << "do not have region for resolving reference" << endp; + + /* Interpret escape sequences and remove quotes. */ + bool unusedCI; + String interp; + prepareLitString( interp, unusedCI, pdaLiteral->token.data, + pdaLiteral->token.loc ); + + while ( nspace != 0 ) { + LiteralDictEl *ldel = nspace->literalDict.find( interp ); + + if ( ldel != 0 ) + return pd->findUniqueType( TYPE_TREE, ldel->value->tdLangEl ); + + nspace = nspace->parentNamespace; + } + + error(loc) << "unknown type in typeof expression" << endp; + return 0; +} + +UniqueType *TypeRef::lookupTypeMap( Compiler *pd ) +{ + /* Lookup up the qualifiction and then the name. */ + nspace = nspaceQual->getQual( pd ); + + UniqueType *utKey = typeRef1->lookupType( pd ); + UniqueType *utValue = typeRef2->lookupType( pd ); + + UniqueMap searchKey( utKey, utValue ); + UniqueMap *inMap = pd->uniqueMapMap.find( &searchKey ); + if ( inMap == 0 ) { + inMap = new UniqueMap( utKey, utValue ); + pd->uniqueMapMap.insert( inMap ); + + /* FIXME: Need uniqe name allocator for types. */ + static int mapId = 0; + String name( 36, "__map%d", mapId++ ); + + GenericType *generic = new GenericType( name, GEN_MAP, + pd->nextGenericId++, 0/*langEl*/, typeRef2 ); + generic->keyTypeArg = typeRef1; + + nspace->genericList.append( generic ); + + generic->declare( pd, nspace ); + + inMap->generic = generic; + } + + generic = inMap->generic; + return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl ); +} + +UniqueType *TypeRef::lookupTypeList( Compiler *pd ) +{ + /* Lookup up the qualifiction and then the name. */ + nspace = nspaceQual->getQual( pd ); + + UniqueType *utValue = typeRef1->lookupType( pd ); + + UniqueList searchKey( utValue ); + UniqueList *inMap = pd->uniqueListMap.find( &searchKey ); + if ( inMap == 0 ) { + inMap = new UniqueList( utValue ); + pd->uniqueListMap.insert( inMap ); + + /* FIXME: Need uniqe name allocator for types. */ + static int listId = 0; + String name( 36, "__list%d", listId++ ); + + GenericType *generic = new GenericType( name, GEN_LIST, + pd->nextGenericId++, 0/*langEl*/, typeRef1 ); + + nspace->genericList.append( generic ); + + generic->declare( pd, nspace ); + + inMap->generic = generic; + } + + generic = inMap->generic; + return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl ); +} + +UniqueType *TypeRef::lookupTypeVector( Compiler *pd ) +{ + /* Lookup up the qualifiction and then the name. */ + nspace = nspaceQual->getQual( pd ); + + UniqueType *utValue = typeRef1->lookupType( pd ); + + UniqueVector searchKey( utValue ); + UniqueVector *inMap = pd->uniqueVectorMap.find( &searchKey ); + if ( inMap == 0 ) { + inMap = new UniqueVector( utValue ); + pd->uniqueVectorMap.insert( inMap ); + + /* FIXME: Need uniqe name allocator for types. */ + static int vectorId = 0; + String name( 36, "__vector%d", vectorId++ ); + + GenericType *generic = new GenericType( name, GEN_VECTOR, + pd->nextGenericId++, 0/*langEl*/, typeRef1 ); + + nspace->genericList.append( generic ); + + generic->declare( pd, nspace ); + + inMap->generic = generic; + } + + generic = inMap->generic; + return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl ); +} + +UniqueType *TypeRef::lookupTypeParser( Compiler *pd ) +{ + /* Lookup up the qualifiction and then the name. */ + nspace = nspaceQual->getQual( pd ); + + UniqueType *utParse = typeRef1->lookupType( pd ); + + UniqueParser searchKey( utParse ); + UniqueParser *inMap = pd->uniqueParserMap.find( &searchKey ); + if ( inMap == 0 ) { + inMap = new UniqueParser( utParse ); + pd->uniqueParserMap.insert( inMap ); + + /* FIXME: Need uniqe name allocator for types. */ + static int accumId = 0; + String name( 36, "__accum%d", accumId++ ); + + GenericType *generic = new GenericType( name, GEN_PARSER, + pd->nextGenericId++, 0/*langEl*/, typeRef1 ); + + nspace->genericList.append( generic ); + + generic->declare( pd, nspace ); + + inMap->generic = generic; + } + + generic = inMap->generic; + return pd->findUniqueType( TYPE_TREE, inMap->generic->langEl ); +} + +UniqueType *TypeRef::lookupTypePtr( Compiler *pd ) +{ + typeRef1->lookupType( pd ); + return pd->findUniqueType( TYPE_PTR, typeRef1->uniqueType->langEl ); +} + +UniqueType *TypeRef::lookupTypeRef( Compiler *pd ) +{ + typeRef1->lookupType( pd ); + return pd->findUniqueType( TYPE_REF, typeRef1->uniqueType->langEl ); +} + +void TypeRef::resolveRepeat( Compiler *pd ) +{ + if ( uniqueType->typeId != TYPE_TREE ) + error(loc) << "cannot repeat non-tree type" << endp; + + UniqueRepeat searchKey( repeatType, uniqueType->langEl ); + UniqueRepeat *uniqueRepeat = pd->uniqeRepeatMap.find( &searchKey ); + if ( uniqueRepeat == 0 ) { + uniqueRepeat = new UniqueRepeat( repeatType, uniqueType->langEl ); + pd->uniqeRepeatMap.insert( uniqueRepeat ); + + LangEl *declLangEl = 0; + + switch ( repeatType ) { + case RepeatRepeat: { + /* If the factor is a repeat, create the repeat element and link the + * factor to it. */ + String repeatName( 128, "_repeat_%s", typeName.data ); + declLangEl = pd->makeRepeatProd( nspace, repeatName, nspaceQual, typeName ); + break; + } + case RepeatList: { + /* If the factor is a repeat, create the repeat element and link the + * factor to it. */ + String listName( 128, "_list_%s", typeName.data ); + declLangEl = pd->makeListProd( nspace, listName, nspaceQual, typeName ); + break; + } + case RepeatOpt: { + /* If the factor is an opt, create the opt element and link the factor + * to it. */ + String optName( 128, "_opt_%s", typeName.data ); + declLangEl = pd->makeOptProd( nspace, optName, nspaceQual, typeName ); + break; + } + + case RepeatNone: + break; + } + + uniqueRepeat->declLangEl = declLangEl; + declLangEl->repeatOf = uniqueRepeat->langEl; + } + + uniqueType = pd->findUniqueType( TYPE_TREE, uniqueRepeat->declLangEl ); +} + + +UniqueType *TypeRef::lookupType( Compiler *pd ) +{ + if ( uniqueType != 0 ) + return uniqueType; + + /* Not an iterator. May be a reference. */ + switch ( type ) { + case Name: + uniqueType = lookupTypeName( pd ); + break; + case Literal: + uniqueType = lookupTypeLiteral( pd ); + break; + case Map: + uniqueType = lookupTypeMap( pd ); + break; + case List: + uniqueType = lookupTypeList( pd ); + break; + case Vector: + uniqueType = lookupTypeVector( pd ); + break; + case Parser: + uniqueType = lookupTypeParser( pd ); + break; + case Ptr: + uniqueType = lookupTypePtr( pd ); + break; + case Ref: + uniqueType = lookupTypeRef( pd ); + break; + case Iterator: + case Unspecified: + /* No lookup needed, unique type(s) set when constructed. */ + break; + } + + if ( repeatType != RepeatNone ) + resolveRepeat( pd ); + + return uniqueType; +} + +void Compiler::resolveFactor( ProdEl *fact ) +{ + fact->typeRef->lookupType( this ); + fact->langEl = fact->typeRef->uniqueType->langEl; +} + +void LangTerm::resolve( Compiler *pd ) +{ + switch ( type ) { + case ConstructType: + typeRef->lookupType( pd ); + + /* Evaluate the initialization expressions. */ + if ( fieldInitArgs != 0 ) { + for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ ) + (*pi)->expr->resolve( pd ); + } + break; + case VarRefType: + break; + + case MakeTreeType: + case MakeTokenType: + case MethodCallType: + if ( args != 0 ) { + for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) + (*pe)->resolve( pd ); + } + break; + + case NumberType: + case StringType: + case MatchType: + break; + case NewType: + expr->resolve( pd ); + break; + case TypeIdType: + typeRef->lookupType( pd ); + break; + case SearchType: + typeRef->lookupType( pd ); + break; + case NilType: + case TrueType: + case FalseType: + break; + + case ParseType: + case ParseStopType: + typeRef->lookupType( pd ); + parserTypeRef->lookupType( pd ); + generic = parserTypeRef->generic; + break; + + case EmbedStringType: + break; + } +} + +void LangVarRef::resolve( Compiler *pd ) const +{ + +} + +void LangExpr::resolve( Compiler *pd ) const +{ + switch ( type ) { + case BinaryType: { + left->resolve( pd ); + right->resolve( pd ); + break; + } + case UnaryType: { + right->resolve( pd ); + break; + } + case TermType: { + term->resolve( pd ); + break; + } + } +} + +void LangStmt::resolveParserItems( Compiler *pd ) const +{ + /* Assign bind ids to the variables in the replacement. */ + for ( ReplItemList::Iter item = *parserText->list; item.lte(); item++ ) { + varRef->resolve( pd ); + + switch ( item->type ) { + case ReplItem::FactorType: + break; + case ReplItem::InputText: + break; + case ReplItem::ExprType: + item->expr->resolve( pd ); + break; + } + } +} + +void LangStmt::resolve( Compiler *pd ) const +{ + switch ( type ) { + case PrintType: + case PrintXMLACType: + case PrintXMLType: + case PrintStreamType: { + /* Push the args backwards. */ + for ( ExprVect::Iter pex = exprPtrVect->last(); pex.gtb(); pex-- ) + (*pex)->resolve( pd ); + break; + } + case ExprType: { + /* Evaluate the exrepssion, then pop it immediately. */ + expr->resolve( pd ); + break; + } + case IfType: { + /* Evaluate the test. */ + expr->resolve( pd ); + + /* Analyze the if true branch. */ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->resolve( pd ); + + if ( elsePart != 0 ) + elsePart->resolve( pd ); + break; + } + case ElseType: { + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->resolve( pd ); + break; + } + case RejectType: + break; + case WhileType: { + expr->resolve( pd ); + + /* Compute the while block. */ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->resolve( pd ); + break; + } + case AssignType: { + /* Evaluate the exrepssion. */ +// cout << "Assign Type" << endl; + expr->resolve( pd ); + break; + } + case ForIterType: { + typeRef->lookupType( pd ); + + /* Evaluate and push the arguments. */ + langTerm->resolve( pd ); + + /* Compile the contents. */ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->resolve( pd ); + + break; + } + case ReturnType: { + /* Evaluate the exrepssion. */ + expr->resolve( pd ); + break; + } + case BreakType: { + break; + } + case YieldType: { + /* take a reference and yield it. Immediately reset the referece. */ + varRef->resolve( pd ); + break; + } + case ParserType: { + //for ( ) + break; + } + } +} + +void ObjectDef::resolve( Compiler *pd ) +{ + for ( ObjFieldList::Iter fli = *objFieldList; fli.lte(); fli++ ) { + ObjField *field = fli->value; + + if ( field->typeRef != 0 ) { + field->typeRef->lookupType( pd ); + } + } +} + +void CodeBlock::resolve( Compiler *pd ) const +{ + if ( localFrame != 0 ) + localFrame->resolve( pd ); + + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->resolve( pd ); +} + +void Compiler::resolveFunction( Function *func ) +{ + CodeBlock *block = func->codeBlock; + block->resolve( this ); +} + +void Compiler::resolveUserIter( Function *func ) +{ + CodeBlock *block = func->codeBlock; + block->resolve( this ); +} + +void Compiler::resolvePreEof( TokenRegion *region ) +{ + CodeBlock *block = region->preEofBlock; + block->resolve( this ); +} + +void Compiler::resolveRootBlock() +{ + rootLocalFrame->resolve( this ); + + CodeBlock *block = rootCodeBlock; + block->resolve( this ); +} + +void Compiler::resolveTranslateBlock( LangEl *langEl ) +{ + CodeBlock *block = langEl->transBlock; + block->resolve( this ); +} + +void Compiler::resolveReductionCode( Definition *prod ) +{ + CodeBlock *block = prod->redBlock; + block->resolve( this ); +} + +void Compiler::resolveParseTree() +{ + /* Compile functions. */ + for ( FunctionList::Iter f = functionList; f.lte(); f++ ) { + if ( f->isUserIter ) + resolveUserIter( f ); + else + resolveFunction( f ); + + if ( f->typeRef != 0 ) + f->typeRef->lookupType( this ); + + for ( ParameterList::Iter param = *f->paramList; param.lte(); param++ ) + param->typeRef->lookupType( this ); + } + + /* Compile the reduction code. */ + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + if ( prod->redBlock != 0 ) + resolveReductionCode( prod ); + } + + /* Compile the token translation code. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->transBlock != 0 ) + resolveTranslateBlock( lel ); + } + + /* Compile preeof blocks. */ + for ( RegionList::Iter r = regionList; r.lte(); r++ ) { + if ( r->preEofBlock != 0 ) + resolvePreEof( r ); + } + + /* Compile the init code */ + resolveRootBlock( ); + + /* Init all user object fields (need consistent size). */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + ObjectDef *objDef = lel->objectDef; + if ( objDef != 0 ) { + /* Init all fields of the object. */ + for ( ObjFieldList::Iter f = *objDef->objFieldList; f.lte(); f++ ) { + f->value->typeRef->lookupType( this ); + } + } + } + + /* Init all fields of the global object. */ + for ( ObjFieldList::Iter f = *globalObjectDef->objFieldList; f.lte(); f++ ) { + f->value->typeRef->lookupType( this ); + } + +} + + +void Compiler::resolveUses() +{ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->objectDefUses != 0 ) { + /* Look for the production's associated region. */ + Namespace *nspace = lel->objectDefUsesQual->getQual( this ); + + if ( nspace == 0 ) + error() << "do not have namespace for resolving reference" << endp; + + /* Look up the language element in the region. */ + LangEl *langEl = findType( this, nspace, lel->objectDefUses ); + lel->objectDef = langEl->objectDef; + } + } +} + +void Compiler::resolvePatternEls() +{ + for ( PatternList::Iter pat = patternList; pat.lte(); pat++ ) { + for ( PatternItemList::Iter item = *pat->list; item.lte(); item++ ) { + switch ( item->type ) { + case PatternItem::FactorType: + /* Use pdaFactor reference resolving. */ + resolveFactor( item->factor ); + break; + case PatternItem::InputText: + /* Nothing to do here. */ + break; + } + } + } +} + +void Compiler::resolveReplacementEls() +{ + for ( ReplList::Iter repl = replList; repl.lte(); repl++ ) { + for ( ReplItemList::Iter item = *repl->list; item.lte(); item++ ) { + switch ( item->type ) { + case ReplItem::FactorType: + /* Use pdaFactor reference resolving. */ + resolveFactor( item->factor ); + break; + case ReplItem::InputText: + case ReplItem::ExprType: + break; + } + } + } +} + +void Compiler::resolveParserEls() +{ + for ( ParserTextList::Iter accum = parserTextList; accum.lte(); accum++ ) { + for ( ReplItemList::Iter item = *accum->list; item.lte(); item++ ) { + switch ( item->type ) { + case ReplItem::FactorType: + resolveFactor( item->factor ); + break; + case ReplItem::InputText: + case ReplItem::ExprType: + break; + } + } + } +} + +/* Resolves production els and computes the precedence of each prod. */ +void Compiler::resolveProductionEls() +{ + /* NOTE: as we process this list it may be growing! */ + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + /* First resolve. */ + for ( ProdElList::Iter fact = *prod->prodElList; fact.lte(); fact++ ) + resolveFactor( fact ); + + /* If there is no explicit precdence ... */ + if ( prod->predOf == 0 ) { + /* Compute the precedence of the productions. */ + for ( ProdElList::Iter fact = prod->prodElList->last(); fact.gtb(); fact-- ) { + /* Production inherits the precedence of the last terminal with + * precedence. */ + if ( fact->langEl->predType != PredNone ) { + prod->predOf = fact->langEl; + break; + } + } + } + } +} + +void Compiler::resolveGenericTypes() +{ + for ( NamespaceList::Iter ns = namespaceList; ns.lte(); ns++ ) { + for ( GenericList::Iter gen = ns->genericList; gen.lte(); gen++ ) { +// cout << __PRETTY_FUNCTION__ << " " << gen->name.data << " " << gen->typeArg << endl; + + gen->utArg = gen->typeArg->lookupType( this ); + + if ( gen->typeId == GEN_MAP ) + gen->keyUT = gen->keyTypeArg->lookupType( this ); + } + } +} + +void Compiler::makeTerminalWrappers() +{ + /* Make terminal language elements corresponding to each nonterminal in + * the grammar. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->type == LangEl::NonTerm ) { + String name( lel->name.length() + 5, "_T_%s", lel->name.data ); + LangEl *termDup = new LangEl( lel->nspace, name, LangEl::Term ); + + /* Give the dup the attributes of the nonterminal. This ensures + * that the attributes are allocated when patterns and + * constructors are parsed. */ + termDup->objectDef = lel->objectDef; + + langEls.append( termDup ); + lel->termDup = termDup; + termDup->termDup = lel; + } + } +} + +void Compiler::makeEofElements() +{ + /* Make eof language elements for each user terminal. This is a bit excessive and + * need to be reduced to the ones that we need parsers for, but we don't know that yet. + * Another pass before this one is needed. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->eofLel == 0 && + lel != eofLangEl && + lel != errorLangEl && + lel != noTokenLangEl && + !( lel->tokenDef != 0 && lel->tokenDef->dupOf != 0 ) ) + { + String name( lel->name.length() + 5, "_eof_%s", lel->name.data ); + LangEl *eofLel = new LangEl( lel->nspace, name, LangEl::Term ); + + langEls.append( eofLel ); + lel->eofLel = eofLel; + eofLel->eofLel = lel; + eofLel->isEOF = true; + } + } +} + +void Compiler::makeIgnoreCollectors() +{ + for ( RegionList::Iter region = regionList; region.lte(); region++ ) { + if ( region->isFullRegion ) { + String name( region->name.length() + 5, "_ign_%s", region->name.data ); + LangEl *ignLel = new LangEl( rootNamespace, name, LangEl::Term ); + langEls.append( ignLel ); + ignLel->isCI = true; + ignLel->ciRegion = region; + + region->ciLel = ignLel; + } + } +} + +void Compiler::typeResolve() +{ + /* + * Type Resolving. + */ + + /* Resolve uses statements. */ + resolveUses(); + + /* Resolve pattern and replacement elements. */ + resolvePatternEls(); + resolveReplacementEls(); + resolveParserEls(); + + resolveParseTree(); + + resolveGenericTypes(); + + argvTypeRef->lookupType( this ); + + /* We must do this as the last step in the type resolution process because + * all type resolves can cause new language elments with associated + * productions. They get tacked onto the end of the list of productions. + * Doing it at the end results processing a growing list. */ + resolveProductionEls(); +} diff --git a/src/rtvector.h b/src/rtvector.h new file mode 100644 index 00000000..e03a17f9 --- /dev/null +++ b/src/rtvector.h @@ -0,0 +1,34 @@ +/* + * Copyright 2002, 2006, 2009 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Aapl. + * + * Aapl is free software; you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 2.1 of the License, or (at your option) + * any later version. + * + * Aapl is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Aapl; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _RT_VECTOR_H +#define _RT_VECTOR_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/src/string.c b/src/string.c new file mode 100644 index 00000000..7508a39a --- /dev/null +++ b/src/string.c @@ -0,0 +1,240 @@ +/* + * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <pool.h> +#include <pdarun.h> +#include <bytecode.h> + +#include <assert.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> + +/* + * In this system strings are not null terminated. Often strings come from a + * parse, in which case the string is just a pointer into the the data string. + * A block in a parsed stream can house many tokens and there is no room for + * nulls. + */ + +Head *stringCopy( Program *prg, Head *head ) +{ + Head *result = 0; + if ( head != 0 ) { + if ( (char*)(head+1) == head->data ) + result = stringAllocFull( prg, head->data, head->length ); + else + result = stringAllocPointer( prg, head->data, head->length ); + } + return result; +} + +void stringFree( Program *prg, Head *head ) +{ + if ( head != 0 ) { + if ( head->location != 0 ) + locationFree( prg, head->location ); + + if ( (char*)(head+1) == head->data ) { + /* Full string allocation. */ + free( head ); + } + else { + /* Just a string head. */ + headFree( prg, head ); + } + } +} + +const char *stringData( Head *head ) +{ + if ( head == 0 ) + return 0; + return head->data; +} + +long stringLength( Head *head ) +{ + if ( head == 0 ) + return 0; + return head->length; +} + +void stringShorten( Head *head, long newlen ) +{ + assert( newlen <= head->length ); + head->length = newlen; +} + +Head *initStrSpace( long length ) +{ + /* Find the length and allocate the space for the shared string. */ + Head *head = (Head*) malloc( sizeof(Head) + length ); + //if ( head == 0 ) + // throw std::bad_alloc(); + + /* Init the header. */ + head->data = (char*)(head+1); + head->length = length; + head->location = 0; + + /* Save the pointer to the data. */ + return head; +} + +/* Create from a c-style string. */ +Head *stringAllocFull( Program *prg, const char *data, long length ) +{ + /* Init space for the data. */ + Head *head = initStrSpace( length ); + + /* Copy in the data. */ + memcpy( (head+1), data, length ); + + return head; +} + +/* Create from a c-style string. */ +Head *stringAllocPointer( Program *prg, const char *data, long length ) +{ + /* Find the length and allocate the space for the shared string. */ + Head *head = headAllocate( prg ); + + /* Init the header. */ + head->data = data; + head->length = length; + + return head; +} + +Head *concatStr( Head *s1, Head *s2 ) +{ + long s1Len = s1->length; + long s2Len = s2->length; + + /* Init space for the data. */ + Head *head = initStrSpace( s1Len + s2Len ); + + /* Copy in the data. */ + memcpy( (head+1), s1->data, s1Len ); + memcpy( (char*)(head+1) + s1Len, s2->data, s2Len ); + + return head; +} + +Head *stringToUpper( Head *s ) +{ + /* Init space for the data. */ + long len = s->length; + Head *head = initStrSpace( len ); + + /* Copy in the data. */ + const char *src = s->data; + char *dst = (char*)(head+1); + int i; + for ( i = 0; i < len; i++ ) + *dst++ = toupper( *src++ ); + + return head; +} + +Head *stringToLower( Head *s ) +{ + /* Init space for the data. */ + long len = s->length; + Head *head = initStrSpace( len ); + + /* Copy in the data. */ + const char *src = s->data; + char *dst = (char*)(head+1); + int i; + for ( i = 0; i < len; i++ ) + *dst++ = tolower( *src++ ); + + return head; +} + + +/* Compare two strings. If identical returns 1, otherwise 0. */ +Word cmpString( Head *s1, Head *s2 ) +{ + if ( s1->length < s2->length ) + return -1; + else if ( s1->length > s2->length ) + return 1; + else { + char *d1 = (char*)(s1->data); + char *d2 = (char*)(s2->data); + return memcmp( d1, d2, s1->length ); + } +} + +Word strAtoi( Head *str ) +{ + /* FIXME: need to implement this by hand. There is no null terminator. */ + char *nulled = (char*)malloc( str->length + 1 ); + memcpy( nulled, str->data, str->length ); + nulled[str->length] = 0; + int res = atoi( nulled ); + free( nulled ); + return res; +} + +Head *intToStr( Program *prg, Word i ) +{ + char data[20]; + sprintf( data, "%ld", i ); + return stringAllocFull( prg, data, strlen(data) ); +} + +Word strUord16( Head *head ) +{ + uchar *data = (uchar*)(head->data); + ulong res; + res = (ulong)data[1]; + res |= ((ulong)data[0]) << 8; + return res; +} + +Word strUord8( Head *head ) +{ + uchar *data = (uchar*)(head->data); + ulong res = (ulong)data[0]; + return res; +} + +Head *makeLiteral( Program *prg, long offset ) +{ + return stringAllocPointer( prg, + prg->rtd->litdata[offset], + prg->rtd->litlen[offset] ); +} + +Head *stringSprintf( Program *prg, Str *format, Int *integer ) +{ + Head *formatHead = format->value; + long written = snprintf( 0, 0, stringData(formatHead), integer->value ); + Head *head = initStrSpace( written+1 ); + written = snprintf( (char*)head->data, written+1, stringData(formatHead), integer->value ); + head->length -= 1; + return head; +} diff --git a/src/synthesis.cc b/src/synthesis.cc new file mode 100644 index 00000000..794927ad --- /dev/null +++ b/src/synthesis.cc @@ -0,0 +1,3277 @@ +/* + * Copyright 2007-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "bytecode.h" +#include "parsedata.h" +#include "fsmrun.h" +#include "pdarun.h" +#include "input.h" +#include <iostream> +#include <assert.h> + +using std::cout; +using std::cerr; +using std::endl; + +void Compiler::initUniqueTypes( ) +{ + uniqueTypeNil = new UniqueType( TYPE_NIL ); + uniqueTypePtr = new UniqueType( TYPE_TREE, ptrLangEl ); + uniqueTypeBool = new UniqueType( TYPE_TREE, boolLangEl ); + uniqueTypeInt = new UniqueType( TYPE_TREE, intLangEl ); + uniqueTypeStr = new UniqueType( TYPE_TREE, strLangEl ); + uniqueTypeStream = new UniqueType( TYPE_TREE, streamLangEl ); + uniqueTypeInput = new UniqueType( TYPE_TREE, inputLangEl ); + uniqueTypeIgnore = new UniqueType( TYPE_TREE, ignoreLangEl ); + uniqueTypeAny = new UniqueType( TYPE_TREE, anyLangEl ); + + uniqeTypeMap.insert( uniqueTypeNil ); + uniqeTypeMap.insert( uniqueTypePtr ); + uniqeTypeMap.insert( uniqueTypeBool ); + uniqeTypeMap.insert( uniqueTypeInt ); + uniqeTypeMap.insert( uniqueTypeStr ); + uniqeTypeMap.insert( uniqueTypeStream ); + uniqeTypeMap.insert( uniqueTypeInput ); + uniqeTypeMap.insert( uniqueTypeIgnore ); + uniqeTypeMap.insert( uniqueTypeAny ); +} + +IterDef::IterDef( Type type ) : + type(type), + func(0), + useFuncId(false), + useSearchUT(false) +{ + switch ( type ) { + case Tree: + inCreateWV = IN_TRITER_FROM_REF; + inCreateWC = IN_TRITER_FROM_REF; + inDestroy = IN_TRITER_DESTROY; + inAdvance = IN_TRITER_ADVANCE; + + inGetCurR = IN_TRITER_GET_CUR_R; + inGetCurWC = IN_TRITER_GET_CUR_WC; + inSetCurWC = IN_TRITER_SET_CUR_WC; + inRefFromCur = IN_TRITER_REF_FROM_CUR; + useSearchUT = true; + break; + case Child: + inCreateWV = IN_TRITER_FROM_REF; + inCreateWC = IN_TRITER_FROM_REF; + inDestroy = IN_TRITER_DESTROY; + inAdvance = IN_TRITER_NEXT_CHILD; + + inGetCurR = IN_TRITER_GET_CUR_R; + inGetCurWC = IN_TRITER_GET_CUR_WC; + inSetCurWC = IN_TRITER_SET_CUR_WC; + inRefFromCur = IN_TRITER_REF_FROM_CUR; + useSearchUT = true; + break; + case RevChild: + inCreateWV = IN_REV_TRITER_FROM_REF; + inCreateWC = IN_REV_TRITER_FROM_REF; + inDestroy = IN_REV_TRITER_DESTROY; + inAdvance = IN_REV_TRITER_PREV_CHILD; + + inGetCurR = IN_TRITER_GET_CUR_R; + inGetCurWC = IN_TRITER_GET_CUR_WC; + inSetCurWC = IN_TRITER_SET_CUR_WC; + inRefFromCur = IN_TRITER_REF_FROM_CUR; + useSearchUT = true; + break; + + case Repeat: + inCreateWV = IN_TRITER_FROM_REF; + inCreateWC = IN_TRITER_FROM_REF; + inDestroy = IN_TRITER_DESTROY; + inAdvance = IN_TRITER_NEXT_REPEAT; + + inGetCurR = IN_TRITER_GET_CUR_R; + inGetCurWC = IN_TRITER_GET_CUR_WC; + inSetCurWC = IN_TRITER_SET_CUR_WC; + inRefFromCur = IN_TRITER_REF_FROM_CUR; + useSearchUT = true; + break; + + case RevRepeat: + inCreateWV = IN_TRITER_FROM_REF; + inCreateWC = IN_TRITER_FROM_REF; + inDestroy = IN_TRITER_DESTROY; + inAdvance = IN_TRITER_PREV_REPEAT; + + inGetCurR = IN_TRITER_GET_CUR_R; + inGetCurWC = IN_TRITER_GET_CUR_WC; + inSetCurWC = IN_TRITER_SET_CUR_WC; + inRefFromCur = IN_TRITER_REF_FROM_CUR; + useSearchUT = true; + break; + + case User: + assert(false); + } +} + +IterDef::IterDef( Type type, Function *func ) : + type(type), + func(func), + useFuncId(true), + useSearchUT(true), + inCreateWV(IN_UITER_CREATE_WV), + inCreateWC(IN_UITER_CREATE_WC), + inDestroy(IN_UITER_DESTROY), + inAdvance(IN_UITER_ADVANCE), + inGetCurR(IN_UITER_GET_CUR_R), + inGetCurWC(IN_UITER_GET_CUR_WC), + inSetCurWC(IN_UITER_SET_CUR_WC), + inRefFromCur(IN_UITER_REF_FROM_CUR) +{} + +ObjMethod *initFunction( UniqueType *retType, ObjectDef *obj, + const String &name, int methIdWV, int methIdWC, bool isConst ) +{ + ObjMethod *objMethod = new ObjMethod( retType, name, + methIdWV, methIdWC, 0, 0, 0, isConst ); + obj->objMethodMap->insert( name, objMethod ); + return objMethod; +} + +ObjMethod *initFunction( UniqueType *retType, ObjectDef *obj, + const String &name, int methIdWV, int methIdWC, UniqueType *arg1, bool isConst ) +{ + UniqueType *args[] = { arg1 }; + ObjMethod *objMethod = new ObjMethod( retType, name, + methIdWV, methIdWC, 1, args, 0, isConst ); + obj->objMethodMap->insert( name, objMethod ); + return objMethod; +} + +ObjMethod *initFunction( UniqueType *retType, ObjectDef *obj, + const String &name, int methIdWV, int methIdWC, + UniqueType *arg1, UniqueType *arg2, bool isConst ) +{ + UniqueType *args[] = { arg1, arg2 }; + ObjMethod *objMethod = new ObjMethod( retType, name, + methIdWV, methIdWC, 2, args, 0, isConst ); + obj->objMethodMap->insert( name, objMethod ); + return objMethod; +} + +IterDef *Compiler::findIterDef( IterDef::Type type, Function *func ) +{ + IterDefSetEl *el = iterDefSet.find( IterDef( type, func ) ); + if ( el == 0 ) + el = iterDefSet.insert( IterDef( type, func ) ); + return &el->key; +} + +IterDef *Compiler::findIterDef( IterDef::Type type ) +{ + IterDefSetEl *el = iterDefSet.find( IterDef( type ) ); + if ( el == 0 ) + el = iterDefSet.insert( IterDef( type ) ); + return &el->key; +} + +UniqueType *Compiler::findUniqueType( int typeId ) +{ + UniqueType searchKey( typeId ); + UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); + if ( uniqueType == 0 ) { + uniqueType = new UniqueType( typeId ); + uniqeTypeMap.insert( uniqueType ); + } + return uniqueType; +} + +UniqueType *Compiler::findUniqueType( int typeId, LangEl *langEl ) +{ + UniqueType searchKey( typeId, langEl ); + UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); + if ( uniqueType == 0 ) { + uniqueType = new UniqueType( typeId, langEl ); + uniqeTypeMap.insert( uniqueType ); + } + return uniqueType; +} + +UniqueType *Compiler::findUniqueType( int typeId, IterDef *iterDef ) +{ + UniqueType searchKey( typeId, iterDef ); + UniqueType *uniqueType = uniqeTypeMap.find( &searchKey ); + if ( uniqueType == 0 ) { + uniqueType = new UniqueType( typeId, iterDef ); + uniqeTypeMap.insert( uniqueType ); + } + return uniqueType; +} + +void ObjectDef::iterPushScope() +{ + //cout << "iter push scope "; + if ( scope->childIter == 0 ) { + scope->childIter = scope->children.head; + } + else { + scope->childIter = scope->childIter->next; + /* Resetting. */ + if ( scope->childIter == 0 ) + scope ->childIter = scope->children.head; + } + + scope = scope->childIter; +} + +void ObjectDef::iterPopScope() +{ + //cout << "iter pop scope" << endl; + scope = scope->parentScope; +} + +void ObjectDef::pushScope() +{ + ObjNameScope *newScope = new ObjNameScope; + newScope->objFieldMap = new ObjFieldMap; + + newScope->parentScope = scope; + scope->children.append( newScope ); + + scope = newScope; +} + +void ObjectDef::popScope() +{ + scope = scope->parentScope; +} + +void ObjectDef::insertField( const String &name, ObjField *value ) +{ + scope->objFieldMap->insert( name, value ); + objFieldList->append( value ); +} + +/* Recurisve find through a single object def's scope. */ +ObjField *ObjectDef::findFieldInScope( const String &name, ObjNameScope *inScope ) +{ + ObjFieldMapEl *objDefMapEl = inScope->objFieldMap->find( name ); + if ( objDefMapEl != 0 ) + return objDefMapEl->value; + if ( inScope->parentScope != 0 ) + return findFieldInScope( name, inScope->parentScope ); + return 0; +} + +ObjField *ObjectDef::checkRedecl( const String &name ) +{ + //cout << "looking for " << name << endl; + ObjFieldMapEl *objDefMapEl = scope->objFieldMap->find( name ); + if ( objDefMapEl != 0 ) + return objDefMapEl->value; + return 0; + +} + +/* 0-based. */ +ObjField *ObjectDef::findFieldNum( long offset ) +{ + int fn = 0; + ObjFieldList::Iter field = *objFieldList; + while ( fn < offset ) { + fn++; + field++; + } + return field->value; +} + +ObjField *ObjectDef::findField( const String &name ) +{ + //cout << "looking for " << name << endl; + ObjField *objField = findFieldInScope( name, scope ); + if ( objField != 0 ) + return objField; + return 0; +} + +ObjMethod *ObjectDef::findMethod( const String &name ) +{ + ObjMethodMapEl *objMethodMapEl = objMethodMap->find( name ); + if ( objMethodMapEl != 0 ) + return objMethodMapEl->value; + return 0; +} + +long sizeOfField( UniqueType *fieldUT ) +{ + long size = 0; + if ( fieldUT->typeId == TYPE_ITER ) { + /* Select on the iterator type. */ + switch ( fieldUT->iterDef->type ) { + case IterDef::Tree: + case IterDef::Child: + case IterDef::Repeat: + case IterDef::RevRepeat: + size = sizeof(TreeIter) / sizeof(Word); + break; + case IterDef::RevChild: + size = sizeof(RevTreeIter) / sizeof(Word); + break; + + case IterDef::User: + /* User iterators are just a pointer to the UserIter struct. The + * struct needs to go right beneath the call to the user iterator + * so it can be found by a yield. It is therefore allocated on the + * stack right before the call. */ + size = 1; + break; + } + } + else if ( fieldUT->typeId == TYPE_REF ) + size = 2; + else + size = 1; + + return size; +} + +void ObjectDef::referenceField( Compiler *pd, ObjField *field ) +{ + field->beenReferenced = true; + initField( pd, field ); +} + +void ObjectDef::initField( Compiler *pd, ObjField *field ) +{ + if ( !field->beenInitialized ) { + field->beenInitialized = true; + UniqueType *fieldUT = field->typeRef->uniqueType; + + if ( type == FrameType ) { + nextOffset += sizeOfField( fieldUT ); + field->offset = -nextOffset; + + pd->initLocalInstructions( field ); + } + else if ( field->isRhsGet ) { + field->useOffset = false; + field->inGetR = IN_GET_RHS_VAL_R; + field->inGetWC = IN_GET_RHS_VAL_WC; + field->inGetWV = IN_GET_RHS_VAL_WV; + field->inSetWC = IN_SET_RHS_VAL_WC; + field->inSetWV = IN_SET_RHS_VAL_WC; + } + else { + field->offset = nextOffset; + nextOffset += sizeOfField( fieldUT ); + + /* Initialize the instructions. */ + pd->initFieldInstructions( field ); + } + } +} + +UniqueType *LangVarRef::loadFieldInstr( Compiler *pd, CodeVect &code, + ObjectDef *inObject, ObjField *el, bool forWriting, bool revert ) const +{ + /* Ensure that the field is referenced. */ + inObject->referenceField( pd, el ); + + UniqueType *elUT = el->typeRef->uniqueType; + + /* If it's a reference then we load it read always. */ + if ( forWriting ) { + /* The instruction, depends on whether or not we are reverting. */ + if ( elUT->typeId == TYPE_ITER ) + code.append( elUT->iterDef->inGetCurWC ); + else if ( pd->revertOn && revert ) + code.append( el->inGetWV ); + else + code.append( el->inGetWC ); + } + else { + /* Loading something for writing */ + if ( elUT->typeId == TYPE_ITER ) + code.append( elUT->iterDef->inGetCurR ); + else + code.append( el->inGetR ); + } + + if ( el->useOffset ) { + /* Gets of locals and fields require offsets. Fake vars like token + * data and lhs don't require it. */ + code.appendHalf( el->offset ); + } + else if ( el->isRhsGet ) { + /* Need to place the array computing the val. */ + code.append( el->rhsVal.length() ); + for ( Vector<RhsVal>::Iter rg = el->rhsVal; rg.lte(); rg++ ) { + code.append( rg->prodNum ); + code.append( rg->childNum ); + } + } + + /* If we are dealing with an iterator then dereference it. */ + if ( elUT->typeId == TYPE_ITER ) + elUT = el->typeRef->searchUniqueType; + + return elUT; +} + +ObjectDef *objDefFromUT( Compiler *pd, UniqueType *ut ) +{ + ObjectDef *objDef = 0; + if ( ut->typeId == TYPE_TREE || ut->typeId == TYPE_REF ) + objDef = ut->langEl->objectDef; + else { + /* This should have generated a compiler error. */ + assert(false); + } + return objDef; +} + +/* The qualification must start at a local frame. There cannot be any pointer. */ +long LangVarRef::loadQualificationRefs( Compiler *pd, CodeVect &code ) const +{ + long count = 0; + ObjectDef *rootObj = pd->curLocalFrame; + + /* Start the search from the root object. */ + ObjectDef *searchObjDef = rootObj; + + for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) { + /* Lookup the field in the current qualification. */ + ObjField *el = searchObjDef->findField( qi->data ); + if ( el == 0 ) + error(qi->loc) << "cannot resolve qualification " << qi->data << endp; + + if ( qi.pos() > 0 ) { + code.append( IN_REF_FROM_QUAL_REF ); + code.appendHalf( 0 ); + code.appendHalf( el->offset ); + } + else if ( el->typeRef->iterDef != 0 ) { + code.append( el->typeRef->iterDef->inRefFromCur ); + code.appendHalf( el->offset ); + } + else if ( el->typeRef->type == TypeRef::Ref ) { + code.append( IN_REF_FROM_REF ); + code.appendHalf( el->offset ); + } + else { + code.append( IN_REF_FROM_LOCAL ); + code.appendHalf( el->offset ); + } + + UniqueType *elUT = el->typeRef->uniqueType; + if ( elUT->typeId == TYPE_ITER ) + elUT = el->typeRef->searchUniqueType; + + assert( qi->type == QualItem::Dot ); + + searchObjDef = objDefFromUT( pd, elUT ); + count += 1; + } + return count; +} + +void LangVarRef::loadQualification( Compiler *pd, CodeVect &code, + ObjectDef *rootObj, int lastPtrInQual, bool forWriting, bool revert ) const +{ + /* Start the search from the root object. */ + ObjectDef *searchObjDef = rootObj; + + for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) { + /* Lookup the field int the current qualification. */ + ObjField *el = searchObjDef->findField( qi->data ); + if ( el == 0 ) + error(qi->loc) << "cannot resolve qualification " << qi->data << endp; + + if ( forWriting && el->refActive ) + error(qi->loc) << "reference active, cannot write to object" << endp; + + bool lfForWriting = forWriting; + bool lfRevert = revert; + + /* If there is a pointer in the qualification, we need to compute + * forWriting and revert. */ + if ( lastPtrInQual >= 0 ) { + if ( qi.pos() <= lastPtrInQual ) { + /* If we are before or at the pointer we are strictly read + * only, regardless of the origin. */ + lfForWriting = false; + lfRevert = false; + } + else { + /* If we are past the pointer then we are always reverting + * because the object is global. Forwriting is as passed in. + * */ + lfRevert = true; + } + } + + UniqueType *qualUT = loadFieldInstr( pd, code, searchObjDef, + el, lfForWriting, lfRevert ); + + if ( qi->type == QualItem::Dot ) { + /* Cannot a reference. Iterator yes (access of the iterator not + * hte current) */ + if ( qualUT->typeId == TYPE_PTR ) + error(loc) << "dot cannot be used to access a pointer" << endp; + } + else if ( qi->type == QualItem::Arrow ) { + if ( qualUT->typeId == TYPE_PTR ) { + /* Always dereference references when used for qualification. If + * this is the last one then we must start with the reverse + * execution business. */ + if ( pd->revertOn && qi.pos() == lastPtrInQual && forWriting ) { + /* This is like a global load. */ + code.append( IN_PTR_DEREF_WV ); + } + else { + /* If reading or not yet the last in ref then we only need a + * reading deref. */ + code.append( IN_PTR_DEREF_R ); + } + + qualUT = pd->findUniqueType( TYPE_TREE, qualUT->langEl ); + } + else { + error(loc) << "arrow operator cannot be used to access this type" << endp; + } + } + + searchObjDef = objDefFromUT( pd, qualUT ); + } +} + +void LangVarRef::loadContextObj( Compiler *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) const +{ + /* Start the search in the global object. */ + ObjectDef *rootObj = pd->context->contextObjDef; + + if ( forWriting && lastPtrInQual < 0 ) { + /* If we are writing an no reference was found in the qualification + * then load the gloabl with a revert. */ + if ( pd->revertOn ) + code.append( IN_LOAD_CONTEXT_WV ); + else + code.append( IN_LOAD_CONTEXT_WC ); + } + else { + /* Either we are reading or we are loading a pointer that will be + * dereferenced. */ + code.append( IN_LOAD_CONTEXT_R ); + } + + loadQualification( pd, code, rootObj, lastPtrInQual, forWriting, true ); +} + +void LangVarRef::loadGlobalObj( Compiler *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) const +{ + /* Start the search in the global object. */ + ObjectDef *rootObj = pd->globalObjectDef; + + if ( forWriting && lastPtrInQual < 0 ) { + /* If we are writing an no reference was found in the qualification + * then load the gloabl with a revert. */ + if ( pd->revertOn ) + code.append( IN_LOAD_GLOBAL_WV ); + else + code.append( IN_LOAD_GLOBAL_WC ); + } + else { + /* Either we are reading or we are loading a pointer that will be + * dereferenced. */ + code.append( IN_LOAD_GLOBAL_R ); + } + + loadQualification( pd, code, rootObj, lastPtrInQual, forWriting, true ); +} + +void LangVarRef::loadCustom( Compiler *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) const +{ + /* Start the search in the local frame. */ + loadQualification( pd, code, pd->curLocalFrame, lastPtrInQual, forWriting, true ); +} + +void LangVarRef::loadLocalObj( Compiler *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) const +{ + /* Start the search in the local frame. */ + loadQualification( pd, code, pd->curLocalFrame, lastPtrInQual, forWriting, false ); +} + +bool LangVarRef::isLocalRef( Compiler *pd ) const +{ + if ( qual->length() > 0 ) { + if ( pd->curLocalFrame->findField( qual->data[0].data ) != 0 ) + return true; + } + else if ( pd->curLocalFrame->findField( name ) != 0 ) + return true; + else if ( pd->curLocalFrame->findMethod( name ) != 0 ) + return true; + + return false; +} + +bool LangVarRef::isContextRef( Compiler *pd ) const +{ + if ( pd->context != 0 ) { + if ( qual->length() > 0 ) { + if ( pd->context->contextObjDef->findField( qual->data[0].data ) != 0 ) + return true; + } + else if ( pd->context->contextObjDef->findField( name ) != 0 ) + return true; + else if ( pd->context->contextObjDef->findMethod( name ) != 0 ) + return true; + } + + return false; +} + +bool LangVarRef::isCustom( Compiler *pd ) const +{ + if ( qual->length() > 0 ) { + ObjField *field = pd->curLocalFrame->findField( qual->data[0].data ); + if ( field != 0 && field->isCustom ) + return true; + } + else { + ObjField *field = pd->curLocalFrame->findField( name ); + if ( field != 0 ) { + if ( field->isCustom ) + return true; + } + else { + ObjMethod *method = pd->curLocalFrame->findMethod( name ); + if ( method != 0 && method->isCustom ) + return true; + } + + } + return false; +} + +void LangVarRef::loadObj( Compiler *pd, CodeVect &code, + int lastPtrInQual, bool forWriting ) const +{ + if ( isCustom( pd ) ) + loadCustom( pd, code, lastPtrInQual, forWriting ); + else if ( isLocalRef( pd ) ) + loadLocalObj( pd, code, lastPtrInQual, forWriting ); + else if ( isContextRef( pd ) ) + loadContextObj( pd, code, lastPtrInQual, forWriting ); + else + loadGlobalObj( pd, code, lastPtrInQual, forWriting ); +} + +VarRefLookup LangVarRef::lookupQualification( Compiler *pd, ObjectDef *rootDef ) const +{ + int lastPtrInQual = -1; + ObjectDef *searchObjDef = rootDef; + int firstConstPart = -1; + + for ( QualItemVect::Iter qi = *qual; qi.lte(); qi++ ) { + /* Lookup the field int the current qualification. */ + ObjField *el = searchObjDef->findField( qi->data ); + if ( el == 0 ) + error(qi->loc) << "cannot resolve qualification " << qi->data << endp; + + /* Lookup the type of the field. */ + UniqueType *qualUT = el->typeRef->uniqueType; + + /* If we are dealing with an iterator then dereference it. */ + if ( qualUT->typeId == TYPE_ITER ) + qualUT = el->typeRef->searchUniqueType; + + /* Is it const? */ + if ( firstConstPart < 0 && el->isConst ) + firstConstPart = qi.pos(); + + /* Check for references. When loop is done we will have the last one + * present, if any. */ + if ( qualUT->typeId == TYPE_PTR ) + lastPtrInQual = qi.pos(); + + if ( qi->type == QualItem::Dot ) { + /* Cannot dot a reference. Iterator yes (access of the iterator + * not the current) */ + if ( qualUT->typeId == TYPE_PTR ) + error(loc) << "dot cannot be used to access a pointer" << endp; + } + else if ( qi->type == QualItem::Arrow ) { + if ( qualUT->typeId == TYPE_ITER ) + qualUT = el->typeRef->searchUniqueType; + else if ( qualUT->typeId == TYPE_PTR ) + qualUT = pd->findUniqueType( TYPE_TREE, qualUT->langEl ); + } + + searchObjDef = objDefFromUT( pd, qualUT ); + } + + return VarRefLookup( lastPtrInQual, firstConstPart, searchObjDef ); +} + +VarRefLookup LangVarRef::lookupObj( Compiler *pd ) const +{ + ObjectDef *rootDef; + if ( isLocalRef( pd ) ) + rootDef = pd->curLocalFrame; + else if ( isContextRef( pd ) ) + rootDef = pd->context->contextObjDef; + else + rootDef = pd->globalObjectDef; + + return lookupQualification( pd, rootDef ); +} + +VarRefLookup LangVarRef::lookupField( Compiler *pd ) const +{ + /* Lookup the object that the field is in. */ + VarRefLookup lookup = lookupObj( pd ); + + /* Lookup the field. */ + ObjField *field = lookup.inObject->findField( name ); + if ( field == 0 ) + error(loc) << "cannot find name " << name << " in object" << endp; + + lookup.objField = field; + lookup.uniqueType = field->typeRef->uniqueType; + + if ( field->typeRef->searchUniqueType != 0 ) + lookup.iterSearchUT = field->typeRef->searchUniqueType; + + return lookup; +} + + +VarRefLookup LangVarRef::lookupMethod( Compiler *pd ) +{ + /* Lookup the object that the field is in. */ + VarRefLookup lookup = lookupObj( pd ); + + /* Find the method. */ + assert( lookup.inObject->objMethodMap != 0 ); + ObjMethod *method = lookup.inObject->findMethod( name ); + if ( method == 0 ) { + /* Not found as a method, try it as an object on which we will call a + * default function. */ + qual->append( QualItem( InputLoc(), name, QualItem::Dot ) ); + name = "finish"; + + /* Lookup the object that the field is in. */ + VarRefLookup lookup = lookupObj( pd ); + + /* Find the method. */ + assert( lookup.inObject->objMethodMap != 0 ); + method = lookup.inObject->findMethod( name ); + if ( method == 0 ) + error(loc) << "cannot find " << name << "(...) in object" << endp; + } + + lookup.objMethod = method; + lookup.uniqueType = method->returnUT; + + return lookup; +} + +void LangVarRef::setFieldInstr( Compiler *pd, CodeVect &code, + ObjectDef *inObject, ObjField *el, UniqueType *exprUT, bool revert ) const +{ + /* Ensure that the field is referenced. */ + inObject->referenceField( pd, el ); + + if ( pd->revertOn && revert ) + code.append( el->inSetWV ); + else + code.append( el->inSetWC ); + + /* Maybe write out an offset. */ + if ( el->useOffset ) + code.appendHalf( el->offset ); +} + +bool castAssignment( Compiler *pd, CodeVect &code, UniqueType *destUT, + UniqueType *destSearchUT, UniqueType *srcUT ) +{ + if ( destUT == srcUT ) + return true; + + /* Casting trees to any. */ + if ( destUT->typeId == TYPE_TREE && destUT->langEl == pd->anyLangEl && + srcUT->typeId == TYPE_TREE ) + return true; + + /* Setting a reference from a tree. */ + if ( destUT->typeId == TYPE_REF && srcUT->typeId == TYPE_TREE && + destUT->langEl == srcUT->langEl ) + return true; + + /* Setting a tree from a reference. */ + if ( destUT->typeId == TYPE_TREE && srcUT->typeId == TYPE_REF && + destUT->langEl == srcUT->langEl ) + return true; + + /* Setting an iterator from a tree. */ + if ( destUT->typeId == TYPE_ITER && srcUT->typeId == TYPE_TREE && + destSearchUT->langEl == srcUT->langEl ) + return true; + + /* Assigning nil to a tree. */ + if ( destUT->typeId == TYPE_TREE && srcUT->typeId == TYPE_NIL ) + return true; + + /* Assigning nil to a pointer. */ + if ( destUT->typeId == TYPE_PTR && srcUT->typeId == TYPE_NIL ) + return true; + + return false; +} + +void LangVarRef::setField( Compiler *pd, CodeVect &code, + ObjectDef *inObject, UniqueType *exprUT, bool revert ) const +{ + ObjField *el = inObject->findField( name ); + if ( el == 0 ) + error(loc) << "cannot find name " << name << " in object" << endp; + + setFieldInstr( pd, code, inObject, el, exprUT, revert ); +} + +void LangVarRef::setFieldIter( Compiler *pd, CodeVect &code, + ObjectDef *inObject, UniqueType *objUT, UniqueType *exprType, bool revert ) const +{ + ObjField *el = inObject->findField( name ); + if ( el == 0 ) + error(loc) << "cannot find name " << name << " in object" << endp; + + code.append( objUT->iterDef->inSetCurWC ); + code.appendHalf( el->offset ); +} + +UniqueType *LangVarRef::evaluate( Compiler *pd, CodeVect &code, bool forWriting ) const +{ + /* Lookup the loadObj. */ + VarRefLookup lookup = lookupField( pd ); + + /* Load the object, if any. */ + loadObj( pd, code, lookup.lastPtrInQual, forWriting ); + + /* Load the field. */ + UniqueType *ut = loadFieldInstr( pd, code, lookup.inObject, + lookup.objField, forWriting, false ); + + return ut; +} + +void LangVarRef::canTakeRef( Compiler *pd, VarRefLookup &lookup ) const +{ + bool canTake = false; + + /* If the var is not a local, it must be an attribute accessed + * via a local and attributes. */ + if ( lookup.inObject->type == ObjectDef::FrameType ) + canTake = true; + else if ( isLocalRef(pd) && lookup.lastPtrInQual < 0 && lookup.uniqueType->typeId != TYPE_PTR ) + canTake = true; + + if ( !canTake ) { + error(loc) << "can only take references of locals or " + "attributes accessed via a local" << endp; + } + + if ( lookup.objField->refActive ) + error(loc) << "reference currently active, cannot take another" << endp; +} + +/* Return the field referenced. */ +ObjField *LangVarRef::preEvaluateRef( Compiler *pd, CodeVect &code ) const +{ + VarRefLookup lookup = lookupField( pd ); + + canTakeRef( pd, lookup ); + + loadQualificationRefs( pd, code ); + + return lookup.objField; +} + +/* Return the field referenced. */ +ObjField *LangVarRef::evaluateRef( Compiler *pd, CodeVect &code, long pushCount ) const +{ + VarRefLookup lookup = lookupField( pd ); + + canTakeRef( pd, lookup ); + + /* Ensure that the field is referenced. */ + lookup.inObject->referenceField( pd, lookup.objField ); + + /* Note that we could have modified children. */ + if ( qual->length() == 0 ) + lookup.objField->refActive = true; + + /* Whenever we take a reference we have to assume writing and that the + * tree is dirty. */ + lookup.objField->dirtyTree = true; + + if ( qual->length() > 0 ) { + code.append( IN_REF_FROM_QUAL_REF ); + code.appendHalf( pushCount ); + code.appendHalf( lookup.objField->offset ); + } + else if ( lookup.objField->typeRef->iterDef != 0 ) { + code.append( lookup.objField->typeRef->iterDef->inRefFromCur ); + code.appendHalf( lookup.objField->offset ); + } + else if ( lookup.objField->typeRef->type == TypeRef::Ref ) { + code.append( IN_REF_FROM_REF ); + code.appendHalf( lookup.objField->offset ); + } + else { + code.append( IN_REF_FROM_LOCAL ); + code.appendHalf( lookup.objField->offset ); + } + + return lookup.objField; +} + +ObjField **LangVarRef::evaluateArgs( Compiler *pd, CodeVect &code, + VarRefLookup &lookup, ExprVect *args ) const +{ + /* Parameter list is given only for user defined methods. Otherwise it + * will be null. */ + ParameterList *paramList = lookup.objMethod->paramList; + + /* Match the number of arguments. */ + int numArgs = args != 0 ? args->length() : 0; + if ( numArgs != lookup.objMethod->numParams ) + error(loc) << "wrong number of arguments" << endp; + + /* This is for storing the object fields used by references. */ + ObjField **paramRefs = new ObjField*[numArgs]; + memset( paramRefs, 0, sizeof(ObjField*) * numArgs ); + + /* Evaluate and push the args. */ + if ( args != 0 ) { + /* We use this only if there is a paramter list. */ + ParameterList::Iter p; + long pushCount = 0; + + /* First pass we need to push object loads for reference parameters. */ + paramList != 0 && ( p = *paramList ); + for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) { + /* Get the expression and the UT for the arg. */ + LangExpr *expression = *pe; + UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()]; + + if ( paramUT->typeId == TYPE_REF ) { + /* Make sure we are dealing with a variable reference. */ + if ( expression->type != LangExpr::TermType ) + error(loc) << "not a term: argument must be a local variable" << endp; + if ( expression->term->type != LangTerm::VarRefType ) + error(loc) << "not a variable: argument must be a local variable" << endp; + + /* Lookup the field. */ + LangVarRef *varRef = expression->term->varRef; + + ObjField *refOf = varRef->preEvaluateRef( pd, code ); + paramRefs[pe.pos()] = refOf; + + pushCount += varRef->qual->length() * 2; + } + + /* Advance the parameter list iterator if we have it. */ + paramList != 0 && p.increment(); + } + + paramList != 0 && ( p = *paramList ); + for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) { + /* Get the expression and the UT for the arg. */ + LangExpr *expression = *pe; + UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()]; + + if ( paramUT->typeId == TYPE_REF ) { + + /* Make sure we are dealing with a variable reference. */ + if ( expression->type != LangExpr::TermType ) + error(loc) << "not a term: argument must be a local variable" << endp; + if ( expression->term->type != LangTerm::VarRefType ) + error(loc) << "not a variable: argument must be a local variable" << endp; + + /* Lookup the field. */ + LangVarRef *varRef = expression->term->varRef; + + pushCount -= varRef->qual->length() * 2; + + ObjField *refOf = varRef->evaluateRef( pd, code, pushCount ); + paramRefs[pe.pos()] = refOf; + + pushCount += 2; + } + else { + UniqueType *exprUT = expression->evaluate( pd, code ); + + if ( !castAssignment( pd, code, paramUT, 0, exprUT ) ) + error(loc) << "arg " << pe.pos()+1 << " is of the wrong type" << endp; + + pushCount += 1; + } + + /* Advance the parameter list iterator if we have it. */ + paramList != 0 && p.increment(); + } + } + + return paramRefs; +} + +void LangVarRef::resetActiveRefs( Compiler *pd, VarRefLookup &lookup, ObjField **paramRefs ) const +{ + /* Parameter list is given only for user defined methods. Otherwise it + * will be null. */ + for ( long p = 0; p < lookup.objMethod->numParams; p++ ) { + if ( paramRefs[p] != 0 ) + paramRefs[p]->refActive = false; + } +} + + +void LangVarRef::callOperation( Compiler *pd, CodeVect &code, VarRefLookup &lookup ) const +{ + /* This is for writing if it is a non-const builtin. */ + bool forWriting = lookup.objMethod->func == 0 && + !lookup.objMethod->isConst; + + if ( lookup.objMethod->useCallObj ) { + /* Load the object, if any. */ + loadObj( pd, code, lookup.lastPtrInQual, forWriting ); + } + + /* Check if we need to revert the function. If it operates on a reference + * or if it is not local then we need to revert it. */ + bool revert = lookup.lastPtrInQual >= 0 || !isLocalRef(pd); + + /* The call instruction. */ + if ( pd->revertOn && revert ) { + if ( lookup.objMethod->opcodeWV == IN_PARSE_FINISH_WV ) { + code.append( IN_PARSE_SAVE_STEPS ); + code.append( IN_PARSE_LOAD_START ); + code.append( IN_PARSE_FINISH_WV ); + code.appendHalf( 0 ); + code.append( IN_PCR_CALL ); + code.append( IN_PARSE_FINISH_WV3 ); + } + else { + code.append( lookup.objMethod->opcodeWV ); + } + } + else { + if ( lookup.objMethod->opcodeWC == IN_PARSE_FINISH_WC ) { + code.append( IN_PARSE_SAVE_STEPS ); + code.append( IN_PARSE_LOAD_START ); + code.append( IN_PARSE_FINISH_WC ); + code.appendHalf( 0 ); + code.append( IN_PCR_CALL ); + code.append( IN_PARSE_FINISH_WC3 ); + } + else { + code.append( lookup.objMethod->opcodeWC ); + } + } + + if ( lookup.objMethod->useFuncId ) + code.appendHalf( lookup.objMethod->funcId ); +} + +void LangVarRef::popRefQuals( Compiler *pd, CodeVect &code, + VarRefLookup &lookup, ExprVect *args ) const +{ + long popCount = 0; + + /* Evaluate and push the args. */ + if ( args != 0 ) { + /* We use this only if there is a paramter list. */ + for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) { + /* Get the expression and the UT for the arg. */ + LangExpr *expression = *pe; + UniqueType *paramUT = lookup.objMethod->paramUTs[pe.pos()]; + + if ( paramUT->typeId == TYPE_REF ) { + /* Lookup the field. */ + LangVarRef *varRef = expression->term->varRef; + popCount += varRef->qual->length() * 2; + } + } + if ( popCount > 0 ) { + code.append( IN_POP_N_WORDS ); + code.appendHalf( (short)popCount ); + } + } +} + +UniqueType *LangVarRef::evaluateCall( Compiler *pd, CodeVect &code, ExprVect *args ) +{ + /* Evaluate the object. */ + VarRefLookup lookup = lookupMethod( pd ); + + /* Evaluate and push the arguments. */ + ObjField **paramRefs = evaluateArgs( pd, code, lookup, args ); + + /* Write the call opcode. */ + callOperation( pd, code, lookup ); + + popRefQuals( pd, code, lookup, args ); + + resetActiveRefs( pd, lookup, paramRefs); + delete[] paramRefs; + + /* Return the type to the expression. */ + return lookup.uniqueType; +} + +UniqueType *LangTerm::evaluateMatch( Compiler *pd, CodeVect &code ) const +{ + /* Add the vars bound by the pattern into the local scope. */ + for ( PatternItemList::Iter item = *pattern->list; item.lte(); item++ ) { + if ( item->varRef != 0 ) + item->bindId = pattern->nextBindId++; + } + + UniqueType *ut = varRef->evaluate( pd, code ); + if ( ut->typeId != TYPE_TREE ) + error(varRef->loc) << "expected match against a tree type" << endp; + + /* Store the language element type in the pattern. This is needed by + * the pattern parser. */ + pattern->langEl = ut->langEl; + + code.append( IN_MATCH ); + code.appendHalf( pattern->patRepId ); + + for ( PatternItemList::Iter item = pattern->list->last(); item.gtb(); item-- ) { + if ( item->varRef != 0 ) { + /* Compute the unique type. */ + UniqueType *exprType = pd->findUniqueType( TYPE_TREE, item->factor->langEl ); + + /* Get the type of the variable being assigned to. */ + VarRefLookup lookup = item->varRef->lookupField( pd ); + + item->varRef->loadObj( pd, code, lookup.lastPtrInQual, false ); + item->varRef->setField( pd, code, lookup.inObject, exprType, false ); + } + } + + return ut; +} + +UniqueType *LangTerm::evaluateNew( Compiler *pd, CodeVect &code ) const +{ + /* Evaluate the expression. */ + UniqueType *ut = expr->evaluate( pd, code ); + if ( ut->typeId != TYPE_TREE ) + error() << "new can only be applied to tree types" << endp; + + code.append( IN_TREE_NEW ); + return pd->findUniqueType( TYPE_PTR, ut->langEl ); +} + +void LangTerm::assignFieldArgs( Compiler *pd, CodeVect &code, UniqueType *replUT ) const +{ + /* Now assign the field initializations. Note that we need to do this in + * reverse because the last expression evaluated is at the top of the + * stack. */ + if ( fieldInitArgs != 0 && fieldInitArgs->length() > 0 ) { + ObjectDef *objDef = objDefFromUT( pd, replUT ); + /* Note the reverse traversal. */ + for ( FieldInitVect::Iter pi = fieldInitArgs->last(); pi.gtb(); pi-- ) { + FieldInit *fieldInit = *pi; + ObjField *field = objDef->findFieldNum( pi.pos() ); + if ( field == 0 ) { + error(fieldInit->loc) << "failed to find init pos " << + pi.pos() << " in object" << endp; + } + + /* Lookup the type of the field and compare it to the type of the + * expression. */ + UniqueType *fieldUT = field->typeRef->uniqueType; + if ( !castAssignment( pd, code, fieldUT, 0, fieldInit->exprUT ) ) + error(fieldInit->loc) << "type mismatch in initialization" << endp; + + /* The set field instruction must leave the object on the top of + * the stack. */ + code.append( IN_SET_FIELD_LEAVE_WC ); + code.appendHalf( field->offset ); + } + } +} + +UniqueType *LangTerm::evaluateConstruct( Compiler *pd, CodeVect &code ) const +{ + /* Evaluate the initialization expressions. */ + if ( fieldInitArgs != 0 && fieldInitArgs->length() > 0 ) { + for ( FieldInitVect::Iter pi = *fieldInitArgs; pi.lte(); pi++ ) { + FieldInit *fieldInit = *pi; + fieldInit->exprUT = fieldInit->expr->evaluate( pd, code ); + } + } + + /* Assign bind ids to the variables in the replacement. */ + for ( ReplItemList::Iter item = *replacement->list; item.lte(); item++ ) { + if ( item->expr != 0 ) + item->bindId = replacement->nextBindId++; + } + + /* Evaluate variable references. */ + for ( ReplItemList::Iter item = replacement->list->last(); item.gtb(); item-- ) { + if ( item->type == ReplItem::ExprType ) { + UniqueType *ut = item->expr->evaluate( pd, code ); + + if ( ut->typeId != TYPE_TREE ) + error() << "variables used in replacements must be trees" << endp; + + item->langEl = ut->langEl; + } + } + + /* Construct the tree using the tree information stored in the compiled + * code. */ + code.append( IN_CONSTRUCT ); + code.appendHalf( replacement->patRepId ); + + /* Lookup the type of the replacement and store it in the replacement + * object so that replacement parsing has a target. */ + UniqueType *replUT = typeRef->uniqueType; + if ( replUT->typeId != TYPE_TREE ) + error(loc) << "don't know how to construct this type" << endp; + + if ( replUT->langEl->generic != 0 && replUT->langEl->generic->typeId == GEN_PARSER ) { + code.append( IN_CONSTRUCT_INPUT ); + code.append( IN_DUP_TOP_OFF ); + code.appendHalf( 1 ); + code.append( IN_SET_INPUT ); + } + + replacement->langEl = replUT->langEl; + assignFieldArgs( pd, code, replUT ); + + if ( varRef != 0 ) { + code.append( IN_DUP_TOP ); + + /* Get the type of the variable being assigned to. */ + VarRefLookup lookup = varRef->lookupField( pd ); + + varRef->loadObj( pd, code, lookup.lastPtrInQual, false ); + varRef->setField( pd, code, lookup.inObject, replUT, false ); + } + + return replUT; +} + +UniqueType *LangTerm::evaluateParse( Compiler *pd, CodeVect &code, bool stop ) const +{ + UniqueType *ut = typeRef->uniqueType; + assert( ut != 0 ); + + if ( ut->typeId != TYPE_TREE ) + error(loc) << "can only parse trees" << endl; + + /* Should be one arg, a stream. */ + if ( args == 0 || ( args->length() != 1 && args->length() != 2 ) ) + error(loc) << "expecting one or two args" << endp; + + int context, input; + if ( ut->langEl->contextIn == 0 ) { + if ( args->length() != 1 ) + error(loc) << "parse command requires just input" << endp; + context = -1; + input = 0; + } + else { + if ( args->length() != 2 ) + error(loc) << "parse command requires context and input" << endp; + context = 0; + input = 1; + } + + /* + * Make the parser. + */ + code.append( IN_CONSTRUCT ); + code.appendHalf( replacement->patRepId ); + + /* Dup once for the context load, again for the argument load, again for + * the parse frag, leaving the original there for the finish. */ + code.append( IN_DUP_TOP ); +// code.append( IN_DUP_TOP ); +// code.append( IN_DUP_TOP ); + + /* + * First load the context into the parser. + */ + if ( context < 0 ) { + code.append( IN_LOAD_NIL ); + } + else { + UniqueType *argUT = args->data[context]->evaluate( pd, code ); + if ( argUT != pd->uniqueTypeStream && argUT->typeId != TYPE_TREE ) + error(loc) << "context argument must be a stream or a tree" << endp; + } + + /* FIXME: need to select right one here. */ + code.append( IN_DUP_TOP_OFF ); + code.appendHalf( 1 ); + code.append( IN_SET_ACCUM_CTX_WC ); + + /* + * Evaluate the parse arg. + */ + + /* Evaluate the parse args. */ + UniqueType *argUT = args->data[input]->evaluate( pd, code ); + if ( argUT != pd->uniqueTypeStream && argUT->typeId != TYPE_TREE ) + error(loc) << "input argument must be a stream or a tree" << endp; + + /* Allocate a parser id. This will cause a parser to be built for + * the type. */ + if ( ut->langEl->parserId < 0 ) + ut->langEl->parserId = pd->nextParserId++; + + /* If this is a parse stop then we need to verify that the type is + * compatible with parse stop. */ + if ( stop ) + ut->langEl->parseStop = true; + + if ( argUT != pd->uniqueTypeInput ) { + code.append( IN_CONSTRUCT_INPUT ); + if ( pd->revertOn ) + code.append( IN_INPUT_APPEND_WV ); + else + code.append( IN_INPUT_APPEND_WC ); + } + + code.append( IN_DUP_TOP_OFF ); + code.appendHalf( 1 ); + code.append( IN_SET_INPUT ); + + int stopId = stop ? ut->langEl->id : 0; + + /* Parse instruction, dependent on whether or not we are producing revert + * or commit code. */ + if ( pd->revertOn ) { + code.append( IN_PARSE_SAVE_STEPS ); + code.append( IN_PARSE_LOAD_START ); + code.append( IN_PARSE_FRAG_WV ); + code.appendHalf( stopId ); + code.append( IN_PCR_CALL ); + code.append( IN_PARSE_FRAG_WV3 ); + + /* Finish immediately. */ + code.append( IN_PARSE_SAVE_STEPS ); + code.append( IN_PARSE_LOAD_START ); + code.append( IN_PARSE_FINISH_WV ); + code.appendHalf( stopId ); + code.append( IN_PCR_CALL ); + code.append( IN_PARSE_FINISH_WV3 ); + } + else { + code.append( IN_PARSE_SAVE_STEPS ); + code.append( IN_PARSE_LOAD_START ); + code.append( IN_PARSE_FRAG_WC ); + code.appendHalf( stopId ); + code.append( IN_PCR_CALL ); + code.append( IN_PARSE_FRAG_WC3 ); + + /* Finish immediately. */ + code.append( IN_PARSE_SAVE_STEPS ); + code.append( IN_PARSE_LOAD_START ); + code.append( IN_PARSE_FINISH_WC ); + code.appendHalf( stopId ); + code.append( IN_PCR_CALL ); + code.append( IN_PARSE_FINISH_WC3 ); + } + + /* Lookup the type of the replacement and store it in the replacement + * object so that replacement parsing has a target. */ + replacement->langEl = generic->langEl; + + if ( varRef != 0 ) { + code.append( IN_DUP_TOP ); + + /* Get the type of the variable being assigned to. */ + VarRefLookup lookup = varRef->lookupField( pd ); + + varRef->loadObj( pd, code, lookup.lastPtrInQual, false ); + varRef->setField( pd, code, lookup.inObject, ut, false ); + } + + return ut; +} + +UniqueType *LangTerm::evaluateEmbedString( Compiler *pd, CodeVect &code ) const +{ + /* Assign bind ids to the variables in the replacement. */ + for ( ReplItemList::Iter item = *replItemList; item.lte(); item++ ) { + switch ( item->type ) { + case ReplItem::FactorType: { + String result; + bool unusedCI; + prepareLitString( result, unusedCI, + item->factor->typeRef->pdaLiteral->token.data, + item->factor->typeRef->pdaLiteral->token.loc ); + + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( result, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.append( IN_LOAD_STR ); + code.appendWord( mapEl->value ); + break; + } + case ReplItem::InputText: { + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( item->data, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.append( IN_LOAD_STR ); + code.appendWord( mapEl->value ); + break; + } + case ReplItem::ExprType: + item->expr->evaluate( pd, code ); + break; + } + + } + + long items = replItemList->length(); + for ( long i = 0; i < items-1; i++ ) + code.append( IN_CONCAT_STR ); + + return pd->uniqueTypeStr; +} + +UniqueType *LangTerm::evaluate( Compiler *pd, CodeVect &code ) const +{ + switch ( type ) { + case VarRefType: + return varRef->evaluate( pd, code ); + case MethodCallType: + return varRef->evaluateCall( pd, code, args ); + case NilType: + code.append( IN_LOAD_NIL ); + return pd->uniqueTypeNil; + case TrueType: + code.append( IN_LOAD_TRUE ); + return pd->uniqueTypeBool; + case FalseType: + code.append( IN_LOAD_FALSE ); + return pd->uniqueTypeBool; + case MakeTokenType: + return evaluateMakeToken( pd, code ); + case MakeTreeType: + return evaluateMakeTree( pd, code ); + case NumberType: { + unsigned int n = atoi( data ); + code.append( IN_LOAD_INT ); + code.appendWord( n ); + return pd->uniqueTypeInt; + } + case StringType: { + String interp; + bool unused; + prepareLitString( interp, unused, data, InputLoc() ); + + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( interp, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.append( IN_LOAD_STR ); + code.appendWord( mapEl->value ); + return pd->uniqueTypeStr; + } + case MatchType: + return evaluateMatch( pd, code ); + case ParseType: + return evaluateParse( pd, code, false ); + case ParseStopType: + return evaluateParse( pd, code, true ); + case ConstructType: + return evaluateConstruct( pd, code ); + case NewType: + return evaluateNew( pd, code ); + case TypeIdType: { + /* Evaluate the expression. */ + UniqueType *ut = typeRef->uniqueType; + if ( ut->typeId != TYPE_TREE ) + error() << "typeid can only be applied to tree types" << endp; + + code.append( IN_LOAD_INT ); + code.appendWord( ut->langEl->id ); + return pd->uniqueTypeInt; + } + case SearchType: { + /* Evaluate the expression. */ + UniqueType *ut = typeRef->uniqueType; + if ( ut->typeId != TYPE_TREE ) + error(loc) << "can only search for tree types" << endp; + + UniqueType *treeUT = varRef->evaluate( pd, code ); + if ( treeUT->typeId != TYPE_TREE ) + error(loc) << "search can be applied only to tree types" << endl; + + code.append( IN_TREE_SEARCH ); + code.appendWord( ut->langEl->id ); + return ut; + }; + case EmbedStringType: { + return evaluateEmbedString( pd, code ); + } + } + return 0; +} + +UniqueType *LangExpr::evaluate( Compiler *pd, CodeVect &code ) const +{ + switch ( type ) { + case BinaryType: { + switch ( op ) { + case '+': { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { + code.append( IN_ADD_INT ); + return pd->uniqueTypeInt; + } + + if ( lt == pd->uniqueTypeStr && rt == pd->uniqueTypeStr ) { + code.append( IN_CONCAT_STR ); + return pd->uniqueTypeStr; + } + + error(loc) << "do not have an addition operator for these types" << endp; + break; + } + case '-': { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { + code.append( IN_SUB_INT ); + return pd->uniqueTypeInt; + } + + error(loc) << "do not have an addition operator for these types" << endp; + break; + } + case '*': { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { + code.append( IN_MULT_INT ); + return pd->uniqueTypeInt; + } + + error(loc) << "do not have an multiplication " + "operator for these types" << endp; + break; + } + case '/': { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt == pd->uniqueTypeInt && rt == pd->uniqueTypeInt ) { + code.append( IN_DIV_INT ); + return pd->uniqueTypeInt; + } + + error(loc) << "do not have an division" + "operator for these types" << endp; + break; + } + case OP_DoubleEql: { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt != rt ) + error(loc) << "comparison of different types" << endp; + + code.append( IN_TST_EQL ); + return pd->uniqueTypeBool; + } + case OP_NotEql: { + UniqueType *lt = left->evaluate( pd, code ); + UniqueType *rt = right->evaluate( pd, code ); + + if ( lt != rt ) + error(loc) << "comparison of different types" << endp; + + code.append( IN_TST_NOT_EQL ); + return pd->uniqueTypeBool; + } + case '<': { + left->evaluate( pd, code ); + right->evaluate( pd, code ); + + code.append( IN_TST_LESS ); + return pd->uniqueTypeBool; + } + case '>': { + left->evaluate( pd, code ); + right->evaluate( pd, code ); + + code.append( IN_TST_GRTR ); + return pd->uniqueTypeBool; + } + case OP_LessEql: { + left->evaluate( pd, code ); + right->evaluate( pd, code ); + + code.append( IN_TST_LESS_EQL ); + return pd->uniqueTypeBool; + } + case OP_GrtrEql: { + left->evaluate( pd, code ); + right->evaluate( pd, code ); + + code.append( IN_TST_GRTR_EQL ); + return pd->uniqueTypeBool; + } + case OP_LogicalAnd: { + /* Evaluate the left and duplicate it. */ + left->evaluate( pd, code ); + code.append( IN_DUP_TOP ); + + /* Jump over the right if false, leaving the original left + * result on the top of the stack. We don't know the + * distance yet so record the position of the jump. */ + long jump = code.length(); + code.append( IN_JMP_FALSE ); + code.appendHalf( 0 ); + + /* Evauluate the right, add the test. Store it separately. */ + right->evaluate( pd, code ); + code.append( IN_TST_LOGICAL_AND ); + + /* Set the distance of the jump. */ + long distance = code.length() - jump - 3; + code.setHalf( jump+1, distance ); + + return pd->uniqueTypeInt; + } + case OP_LogicalOr: { + /* Evaluate the left and duplicate it. */ + left->evaluate( pd, code ); + code.append( IN_DUP_TOP ); + + /* Jump over the right if true, leaving the original left + * result on the top of the stack. We don't know the + * distance yet so record the position of the jump. */ + long jump = code.length(); + code.append( IN_JMP_TRUE ); + code.appendHalf( 0 ); + + /* Evauluate the right, add the test. */ + right->evaluate( pd, code ); + code.append( IN_TST_LOGICAL_OR ); + + /* Set the distance of the jump. */ + long distance = code.length() - jump - 3; + code.setHalf( jump+1, distance ); + + return pd->uniqueTypeInt; + } + } + + assert(false); + return 0; + } + case UnaryType: { + switch ( op ) { + case '!': { + /* Evaluate the left and duplicate it. */ + right->evaluate( pd, code ); + code.append( IN_NOT ); + return pd->uniqueTypeBool; + } + case '$': { + right->evaluate( pd, code ); + code.append( IN_TREE_TO_STR ); + return pd->uniqueTypeStr; + + } + case '%': { + right->evaluate( pd, code ); + code.append( IN_TREE_TO_STR_NOTRIM ); + return pd->uniqueTypeStr; + } + case '^': { + UniqueType *rt = right->evaluate( pd, code ); + code.append( IN_TREE_TRIM ); + return rt; + } + case OP_Deref: { + UniqueType *ut = right->evaluate( pd, code ); + if ( ut->typeId != TYPE_PTR ) + error(loc) << "can only dereference pointers" << endl; + + code.append( IN_PTR_DEREF_R ); + ut = pd->findUniqueType( TYPE_TREE, ut->langEl ); + return ut; + } + default: + assert(false); + } + return 0; + } + case TermType: { + return term->evaluate( pd, code ); + } + } + return 0; +} + +void LangVarRef::assignValue( Compiler *pd, CodeVect &code, + UniqueType *exprUT ) const +{ + /* Lookup the left hand side of the assignment. */ + VarRefLookup lookup = lookupField( pd ); + + if ( lookup.objField->refActive ) + error(loc) << "reference active, cannot write to object" << endp; + + if ( lookup.firstConstPart >= 0 ) { + error(loc) << "left hand side qualification \"" << + qual->data[lookup.firstConstPart].data << "\" is const" << endp; + } + + if ( lookup.objField->isConst ) + error(loc) << "field \"" << name << "\" is const" << endp; + + /* Writing guarantees the field is dirty. tree is dirty. */ + lookup.objField->dirtyTree = true; + + /* Check the types of the assignment and possibly cast. */ + UniqueType *objUT = lookup.objField->typeRef->uniqueType; + assert( lookup.uniqueType == lookup.objField->typeRef->uniqueType ); + if ( !castAssignment( pd, code, objUT, lookup.iterSearchUT, exprUT ) ) + error(loc) << "type mismatch in assignment" << endp; + + /* Decide if we need to revert the assignment. */ + bool revert = lookup.lastPtrInQual >= 0 || !isLocalRef(pd); + + /* Load the object and generate the field setting code. */ + loadObj( pd, code, lookup.lastPtrInQual, true ); + + if ( lookup.uniqueType->typeId == TYPE_ITER ) + setFieldIter( pd, code, lookup.inObject, lookup.uniqueType, exprUT, false ); + else + setField( pd, code, lookup.inObject, exprUT, revert ); +} + +UniqueType *LangTerm::evaluateMakeToken( Compiler *pd, CodeVect &code ) const +{ +// if ( pd->compileContext != Compiler::CompileTranslation ) +// error(loc) << "make_token can be used only in a translation block" << endp; + + /* Match the number of arguments. */ + int numArgs = args != 0 ? args->length() : 0; + if ( numArgs < 2 ) + error(loc) << "need at least two arguments" << endp; + + for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) { + /* Evaluate. */ + UniqueType *exprUT = (*pe)->evaluate( pd, code ); + + if ( pe.pos() == 0 && exprUT != pd->uniqueTypeInt ) + error(loc) << "first arg, id, must be an int" << endp; + + if ( pe.pos() == 1 && exprUT != pd->uniqueTypeStr ) + error(loc) << "second arg, length, must be a string" << endp; + } + + /* The token is now created, send it. */ + code.append( IN_MAKE_TOKEN ); + code.append( args->length() ); + + return pd->uniqueTypeAny; +} + +UniqueType *LangTerm::evaluateMakeTree( Compiler *pd, CodeVect &code ) const +{ + if ( pd->compileContext != Compiler::CompileTranslation ) + error(loc) << "make_tree can be used only in a translation block" << endp; + + /* Match the number of arguments. */ + int numArgs = args != 0 ? args->length() : 0; + if ( numArgs < 1 ) + error(loc) << "need at least one argument" << endp; + + for ( ExprVect::Iter pe = *args; pe.lte(); pe++ ) { + /* Evaluate. */ + UniqueType *exprUT = (*pe)->evaluate( pd, code ); + + if ( pe.pos() == 0 && exprUT != pd->uniqueTypeInt ) + error(loc) << "first arg, nonterm id, must be an int" << endp; + } + + /* The token is now created, send it. */ + code.append( IN_MAKE_TREE ); + code.append( args->length() ); + + return pd->uniqueTypeAny; +} + +void LangStmt::compileForIterBody( Compiler *pd, + CodeVect &code, UniqueType *iterUT ) const +{ + /* Remember the top of the loop. */ + long top = code.length(); + + /* Advance */ + code.append( iterUT->iterDef->inAdvance ); + code.appendHalf( objField->offset ); + + /* Test: jump past the while block if false. Note that we don't have the + * distance yet. */ + long jumpFalse = code.length(); + code.append( IN_JMP_FALSE ); + code.appendHalf( 0 ); + + /* + * Set up the loop cleanup code. + */ + + /* Set up the current loop cleanup. */ + CodeVect loopCleanup; + if ( pd->loopCleanup != 0 ) + loopCleanup.setAs( *pd->loopCleanup ); + + /* Add the cleanup for the current loop. */ + loopCleanup.append( iterUT->iterDef->inDestroy ); + loopCleanup.appendHalf( objField->offset ); + + /* Push the loop cleanup. */ + CodeVect *oldLoopCleanup = pd->loopCleanup; + pd->loopCleanup = &loopCleanup; + + /* Compile the contents. */ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->compile( pd, code ); + + pd->loopCleanup = oldLoopCleanup; + + /* Jump back to the top to retest. */ + long retestDist = code.length() - top + 3; + code.append( IN_JMP ); + code.appendHalf( -retestDist ); + + /* Set the jump false distance. */ + long falseDist = code.length() - jumpFalse - 3; + code.setHalf( jumpFalse+1, falseDist ); + + /* Compute the jump distance for the break jumps. */ + for ( LongVect::Iter brk = pd->breakJumps; brk.lte(); brk++ ) { + long distance = code.length() - *brk - 3; + code.setHalf( *brk+1, distance ); + } + pd->breakJumps.empty(); + + /* Destroy the iterator. */ + code.append( iterUT->iterDef->inDestroy ); + code.appendHalf( objField->offset ); + + /* Clean up any prepush args. */ +} + +LangTerm *LangStmt::chooseDefaultIter( Compiler *pd, LangTerm *fromVarRef ) const +{ + /* Lookup the lang term and decide what iterator to use based + * on its type. */ + VarRefLookup lookup = fromVarRef->varRef->lookupField( pd ); + + if ( lookup.inObject->type != ObjectDef::FrameType ) + error(loc) << "root of iteration must be a local" << endp; + + LangVarRef *callVarRef = 0; + if ( lookup.uniqueType->typeId == TYPE_TREE || + lookup.uniqueType->typeId == TYPE_REF || + lookup.uniqueType->typeId == TYPE_ITER || + lookup.uniqueType->typeId == TYPE_PTR ) + { + /* The iterator name. */ + callVarRef = new LangVarRef( loc, new QualItemVect, "triter" ); + } + else { + error(loc) << "there is no default iterator for a " + "root of that type" << endp; + } + + /* The parameters. */ + ExprVect *callExprVect = new ExprVect; + LangExpr *callExpr = new LangExpr( new LangTerm( + LangTerm::VarRefType, fromVarRef->varRef ) ); + callExprVect->append( callExpr ); + + LangTerm *callLangTerm = new LangTerm( callVarRef, callExprVect ); + + return callLangTerm; +} + +void LangStmt::compileForIter( Compiler *pd, CodeVect &code ) const +{ + pd->curLocalFrame->iterPushScope(); + + LangTerm *iterCallTerm = langTerm; + if ( iterCallTerm->type != LangTerm::MethodCallType ) + iterCallTerm = chooseDefaultIter( pd, langTerm ); + + /* The type we are searching for. */ + UniqueType *searchUT = typeRef->uniqueType; + + /* + * Declare the iterator variable. + */ + VarRefLookup lookup = iterCallTerm->varRef->lookupMethod( pd ); + if ( lookup.objMethod->iterDef == 0 ) { + error(loc) << "attempt to iterate using something " + "that is not an iterator" << endp; + } + + /* Now that we have done the iterator call lookup we can make the type + * reference for the object field. */ + UniqueType *iterUniqueType = pd->findUniqueType( TYPE_ITER, lookup.objMethod->iterDef ); + objField->typeRef = new TypeRef( loc, lookup.objMethod->iterDef, iterUniqueType, searchUT ); + + /* Also force the field to be initialized. */ + pd->curLocalFrame->initField( pd, objField ); + + /* + * Create the iterator from the local var. + */ + + UniqueType *iterUT = objField->typeRef->uniqueType; + + /* Evaluate and push the arguments. */ + ObjField **paramRefs = iterCallTerm->varRef->evaluateArgs( + pd, code, lookup, iterCallTerm->args ); + + if ( pd->revertOn ) + code.append( iterUT->iterDef->inCreateWV ); + else + code.append( iterUT->iterDef->inCreateWC ); + + code.appendHalf( objField->offset ); + if ( lookup.objMethod->func != 0 ) + code.appendHalf( lookup.objMethod->func->funcId ); + + if ( iterUT->iterDef->useSearchUT ) { + if ( searchUT->typeId == TYPE_PTR ) + code.appendHalf( pd->uniqueTypePtr->langEl->id ); + else + code.appendHalf( searchUT->langEl->id ); + } + + compileForIterBody( pd, code, iterUT ); + + iterCallTerm->varRef->popRefQuals( pd, code, lookup, iterCallTerm->args ); + + iterCallTerm->varRef->resetActiveRefs( pd, lookup, paramRefs ); + delete[] paramRefs; + + pd->curLocalFrame->iterPopScope(); +} + +void LangStmt::compileWhile( Compiler *pd, CodeVect &code ) const +{ + pd->curLocalFrame->iterPushScope(); + + /* Generate code for the while test. Remember the top. */ + long top = code.length(); + expr->evaluate( pd, code ); + + /* Jump past the while block if false. Note that we don't have the + * distance yet. */ + long jumpFalse = code.length(); + code.append( IN_JMP_FALSE ); + code.appendHalf( 0 ); + + /* Compute the while block. */ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->compile( pd, code ); + + /* Jump back to the top to retest. */ + long retestDist = code.length() - top + 3; + code.append( IN_JMP ); + code.appendHalf( -retestDist ); + + /* Set the jump false distance. */ + long falseDist = code.length() - jumpFalse - 3; + code.setHalf( jumpFalse+1, falseDist ); + + /* Compute the jump distance for the break jumps. */ + for ( LongVect::Iter brk = pd->breakJumps; brk.lte(); brk++ ) { + long distance = code.length() - *brk - 3; + code.setHalf( *brk+1, distance ); + } + pd->breakJumps.empty(); + + pd->curLocalFrame->iterPopScope(); +} + +void LangStmt::evaluateParserItems( Compiler *pd, CodeVect &code ) const +{ + varRef->evaluate( pd, code ); + + /* Assign bind ids to the variables in the replacement. */ + for ( ReplItemList::Iter item = *parserText->list; item.lte(); item++ ) { + switch ( item->type ) { + case ReplItem::FactorType: { + String result; + bool unusedCI; + prepareLitString( result, unusedCI, + item->factor->typeRef->pdaLiteral->token.data, + item->factor->typeRef->pdaLiteral->token.loc ); + + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( result, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.append( IN_LOAD_STR ); + code.appendWord( mapEl->value ); + break; + } + case ReplItem::InputText: { + /* Make sure we have this string. */ + StringMapEl *mapEl = 0; + if ( pd->literalStrings.insert( item->data, &mapEl ) ) + mapEl->value = pd->literalStrings.length()-1; + + code.append( IN_LOAD_STR ); + code.appendWord( mapEl->value ); + break; + } + case ReplItem::ExprType: + item->expr->evaluate( pd, code ); + break; + } + + code.append( IN_DUP_TOP_OFF ); + code.appendHalf( 1 ); + + /* Not a stream. Get the input first. */ + code.append( IN_GET_INPUT ); + if ( pd->revertOn ) + code.append( IN_INPUT_APPEND_WV ); + else + code.append( IN_INPUT_APPEND_WC ); + code.append( IN_POP ); + + code.append( IN_DUP_TOP ); + + /* Parse instruction, dependent on whether or not we are producing + * revert or commit code. */ + if ( pd->revertOn ) { + code.append( IN_PARSE_SAVE_STEPS ); + code.append( IN_PARSE_LOAD_START ); + code.append( IN_PARSE_FRAG_WV ); + code.appendHalf( 0 ); + code.append( IN_PCR_CALL ); + code.append( IN_PARSE_FRAG_WV3 ); + } + else { + code.append( IN_PARSE_SAVE_STEPS ); + code.append( IN_PARSE_LOAD_START ); + code.append( IN_PARSE_FRAG_WC ); + code.appendHalf( 0 ); + code.append( IN_PCR_CALL ); + code.append( IN_PARSE_FRAG_WC3 ); + } + } + code.append( IN_POP ); +} + +void LangStmt::compile( Compiler *pd, CodeVect &code ) const +{ + switch ( type ) { + case PrintType: + case PrintXMLACType: + case PrintXMLType: + case PrintStreamType: { + UniqueType **types = new UniqueType*[exprPtrVect->length()]; + + /* Push the args backwards. */ + for ( ExprVect::Iter pex = exprPtrVect->last(); pex.gtb(); pex-- ) + types[pex.pos()] = (*pex)->evaluate( pd, code ); + + /* Run the printing forwards. */ + if ( type == PrintType ) { + code.append( IN_PRINT ); + code.append( exprPtrVect->length() ); + } + else if ( type == PrintXMLACType ) { + code.append( IN_PRINT_XML_AC ); + code.append( exprPtrVect->length() ); + } + else if ( type == PrintXMLType ) { + code.append( IN_PRINT_XML ); + code.append( exprPtrVect->length() ); + } + else if ( type == PrintStreamType ) { + /* Minus one because the first arg is the stream. */ + code.append( IN_PRINT_STREAM ); + code.append( exprPtrVect->length() - 1 ); + } + + delete[] types; + + break; + } + case ExprType: { + /* Evaluate the exrepssion, then pop it immediately. */ + expr->evaluate( pd, code ); + code.append( IN_POP ); + break; + } + case IfType: { + pd->curLocalFrame->iterPushScope(); + + long jumpFalse = 0, jumpPastElse = 0, distance = 0; + + /* Evaluate the test. */ + expr->evaluate( pd, code ); + + /* Jump past the if block if false. We don't know the distance + * yet so store the location of the jump. */ + jumpFalse = code.length(); + code.append( IN_JMP_FALSE ); + code.appendHalf( 0 ); + + /* Compile the if true branch. */ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->compile( pd, code ); + + if ( elsePart != 0 ) { + /* Jump past the else code for the if true branch. */ + jumpPastElse = code.length(); + code.append( IN_JMP ); + code.appendHalf( 0 ); + } + + /* Set the distance for the jump false case. */ + distance = code.length() - jumpFalse - 3; + code.setHalf( jumpFalse+1, distance ); + + pd->curLocalFrame->iterPopScope(); + + if ( elsePart != 0 ) { + /* Compile the else branch. */ + elsePart->compile( pd, code ); + + /* Set the distance for jump over the else part. */ + distance = code.length() - jumpPastElse - 3; + code.setHalf( jumpPastElse+1, distance ); + } + + break; + } + case ElseType: { + pd->curLocalFrame->iterPushScope(); + + /* Compile the else branch. */ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->compile( pd, code ); + + pd->curLocalFrame->iterPopScope(); + break; + } + case RejectType: { + code.append( IN_REJECT ); + break; + } + case WhileType: { + compileWhile( pd, code ); + break; + } + case AssignType: { + /* Evaluate the exrepssion. */ + UniqueType *exprUT = expr->evaluate( pd, code ); + + /* Do the assignment. */ + varRef->assignValue( pd, code, exprUT ); + break; + } + case ForIterType: { + compileForIter( pd, code ); + break; + } + case ReturnType: { + /* Evaluate the exrepssion. */ + UniqueType *exprUT = expr->evaluate( pd, code ); + + if ( pd->curFunction == 0 ) { + /* In the main function */ + pd->mainReturnUT = exprUT; + } + else { + UniqueType *resUT = pd->curFunction->typeRef->uniqueType; + if ( !castAssignment( pd, code, resUT, 0, exprUT ) ) + error(loc) << "return value wrong type" << endp; + } + + code.append( IN_SAVE_RET ); + + /* The loop cleanup code. */ + if ( pd->loopCleanup != 0 ) + code.append( *pd->loopCleanup ); + + /* Jump to the return label. The distnacnce will be filled in + * later. */ + pd->returnJumps.append( code.length() ); + code.append( IN_JMP ); + code.appendHalf( 0 ); + break; + } + case BreakType: { + pd->breakJumps.append( code.length() ); + code.append( IN_JMP ); + code.appendHalf( 0 ); + break; + } + case YieldType: { + /* take a reference and yield it. Immediately reset the referece. */ + varRef->preEvaluateRef( pd, code ); + ObjField *objField = varRef->evaluateRef( pd, code, 0 ); + code.append( IN_YIELD ); + + if ( varRef->qual->length() > 0 ) { + code.append( IN_POP_N_WORDS ); + code.appendHalf( (short)(varRef->qual->length()*2) ); + } + + objField->refActive = false; + break; + } + case ParserType: { + evaluateParserItems( pd, code ); + break; + } + } +} + +void CodeBlock::compile( Compiler *pd, CodeVect &code ) const +{ + for ( StmtList::Iter stmt = *stmtList; stmt.lte(); stmt++ ) + stmt->compile( pd, code ); +} + +void Compiler::addMatchLength( ObjectDef *frame, LangEl *lel ) +{ + /* Make the type ref. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt ); + + /* Create the field and insert it into the map. */ + ObjField *el = new ObjField( InputLoc(), typeRef, "match_length" ); + el->beenReferenced = true; + el->beenInitialized = true; + el->isConst = true; + el->useOffset = false; + el->inGetR = IN_GET_MATCH_LENGTH_R; + frame->insertField( el->name, el ); +} + +void Compiler::addMatchText( ObjectDef *frame, LangEl *lel ) +{ + /* Make the type ref. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr ); + + /* Create the field and insert it into the map. */ + ObjField *el = new ObjField( InputLoc(), typeRef, "match_text" ); + el->beenReferenced = true; + el->beenInitialized = true; + el->isConst = true; + el->useOffset = false; + el->inGetR = IN_GET_MATCH_TEXT_R; + frame->insertField( el->name, el ); +} + +void Compiler::addInput( ObjectDef *frame ) +{ + /* Make the type ref. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInput ); + + /* Create the field and insert it into the map. */ + ObjField *el = new ObjField( InputLoc(), typeRef, "input" ); + el->beenReferenced = true; + el->beenInitialized = true; + el->isConst = false; + el->useOffset = false; + el->isCustom = true; + el->inGetR = IN_LOAD_INPUT_R; + el->inGetWV = IN_LOAD_INPUT_WV; + el->inGetWC = IN_LOAD_INPUT_WC; + frame->insertField( el->name, el ); +} + +void Compiler::addCtx( ObjectDef *frame ) +{ + /* Make the type ref. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStream ); + + /* Create the field and insert it into the map. */ + ObjField *el = new ObjField( InputLoc(), typeRef, "ctx" ); + el->beenReferenced = true; + el->beenInitialized = true; + el->isConst = false; + el->useOffset = false; + el->isCustom = true; + el->inGetR = IN_LOAD_CTX_R; + el->inGetWV = IN_LOAD_CTX_WV; + el->inGetWC = IN_LOAD_CTX_WC; + frame->insertField( el->name, el ); +} + +void Compiler::initFieldInstructions( ObjField *el ) +{ + el->inGetR = IN_GET_FIELD_R; + el->inGetWC = IN_GET_FIELD_WC; + el->inGetWV = IN_GET_FIELD_WV; + el->inSetWC = IN_SET_FIELD_WC; + el->inSetWV = IN_SET_FIELD_WV; +} + +void Compiler::initLocalInstructions( ObjField *el ) +{ + el->inGetR = IN_GET_LOCAL_R; + el->inGetWC = IN_GET_LOCAL_WC; + el->inSetWC = IN_SET_LOCAL_WC; +} + +void Compiler::initLocalRefInstructions( ObjField *el ) +{ + el->inGetR = IN_GET_LOCAL_REF_R; + el->inGetWC = IN_GET_LOCAL_REF_WC; + el->inSetWC = IN_SET_LOCAL_REF_WC; +} + +void Compiler::initIntObject( ) +{ + intObj = new ObjectDef( ObjectDef::BuiltinType, "int", nextObjectId++ ); + intLangEl->objectDef = intObj; + + initFunction( uniqueTypeStr, intObj, "to_string", IN_INT_TO_STR, IN_INT_TO_STR, true ); +} + +/* Add a constant length field to the object. + * Opcode supplied by the caller. */ +void Compiler::addLengthField( ObjectDef *objDef, Code getLength ) +{ + /* Create the "length" field. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt ); + ObjField *el = new ObjField( InputLoc(), typeRef, "length" ); + el->beenReferenced = true; + el->beenInitialized = true; + el->isConst = true; + el->useOffset = false; + el->inGetR = getLength; + + objDef->insertField( el->name, el ); +} + +void Compiler::initStrObject( ) +{ + strObj = new ObjectDef( ObjectDef::BuiltinType, "str", nextObjectId++ ); + strLangEl->objectDef = strObj; + + initFunction( uniqueTypeInt, strObj, "atoi", IN_STR_ATOI, IN_STR_ATOI, true ); + initFunction( uniqueTypeInt, strObj, "uord8", IN_STR_UORD8, IN_STR_UORD8, true ); + initFunction( uniqueTypeInt, strObj, "sord8", IN_STR_SORD8, IN_STR_SORD8, true ); + initFunction( uniqueTypeInt, strObj, "uord16", IN_STR_UORD16, IN_STR_UORD16, true ); + initFunction( uniqueTypeInt, strObj, "sord16", IN_STR_SORD16, IN_STR_SORD16, true ); + initFunction( uniqueTypeInt, strObj, "uord32", IN_STR_UORD32, IN_STR_UORD32, true ); + initFunction( uniqueTypeInt, strObj, "sord32", IN_STR_SORD32, IN_STR_SORD32, true ); + addLengthField( strObj, IN_STR_LENGTH ); + + initFunction( uniqueTypeStr, globalObjectDef, "sprintf", + IN_SPRINTF, IN_SPRINTF, uniqueTypeStr, uniqueTypeInt, true ); +} + +void Compiler::initStreamObject( ) +{ + streamObj = new ObjectDef( ObjectDef::BuiltinType, + "stream", nextObjectId++ ); + streamLangEl->objectDef = streamObj; +} + +void Compiler::initInputObject( ) +{ + inputObj = new ObjectDef( ObjectDef::BuiltinType, + "accum_stream", nextObjectId++ ); + inputLangEl->objectDef = inputObj; + + initFunction( uniqueTypeStr, inputObj, "pull", + IN_INPUT_PULL_WV, IN_INPUT_PULL_WV, uniqueTypeInt, false ); + initFunction( uniqueTypeStr, inputObj, "push", + IN_INPUT_PUSH_WV, IN_INPUT_PUSH_WV, uniqueTypeAny, false ); + initFunction( uniqueTypeStr, inputObj, "push_ignore", + IN_INPUT_PUSH_IGNORE_WV, IN_INPUT_PUSH_IGNORE_WV, uniqueTypeAny, false ); +} + +ObjField *Compiler::makeDataEl() +{ + /* Create the "data" field. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr ); + ObjField *el = new ObjField( InputLoc(), typeRef, "data" ); + + /* Setting beenReferenced to true prevents us from assigning instructions + * and an offset to the field. */ + + el->beenReferenced = true; + el->beenInitialized = true; + el->useOffset = false; + el->inGetR = IN_GET_TOKEN_DATA_R; + el->inSetWC = IN_SET_TOKEN_DATA_WC; + el->inSetWV = IN_SET_TOKEN_DATA_WV; + return el; +} + +ObjField *Compiler::makePosEl() +{ + /* Create the "data" field. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt ); + ObjField *el = new ObjField( InputLoc(), typeRef, "pos" ); + + /* Setting beenReferenced to true prevents us from assigning instructions + * and an offset to the field. */ + + el->isConst = true; + el->beenReferenced = true; + el->beenInitialized = true; + el->useOffset = false; + el->inGetR = IN_GET_TOKEN_POS_R; + return el; +} + +ObjField *Compiler::makeLineEl() +{ + /* Create the "data" field. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeInt ); + ObjField *el = new ObjField( InputLoc(), typeRef, "line" ); + + /* Setting beenReferenced to true prevents us from assigning instructions + * and an offset to the field. */ + + el->isConst = true; + el->beenReferenced = true; + el->beenInitialized = true; + el->useOffset = false; + el->inGetR = IN_GET_TOKEN_LINE_R; + return el; +} + +void Compiler::initTokenObjects( ) +{ + /* Make a default object Definition. */ + tokenObj = new ObjectDef( ObjectDef::BuiltinType, "token", nextObjectId++ ); + + ObjField *dataEl = makeDataEl(); + tokenObj->insertField( dataEl->name, dataEl ); + + ObjField *posEl = makePosEl(); + tokenObj->insertField( posEl->name, posEl ); + + ObjField *lineEl = makeLineEl(); + tokenObj->insertField( lineEl->name, lineEl ); + + /* Give all user terminals the token object type. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->isUserTerm ) { + if ( lel->objectDef == 0 ) + lel->objectDef = tokenObj; + else { + /* Create the "data" field. */ + ObjField *dataEl = makeDataEl(); + lel->objectDef->insertField( dataEl->name, dataEl ); + + /* Create the "pos" field. */ + ObjField *posEl = makePosEl(); + lel->objectDef->insertField( posEl->name, posEl ); + + /* Create the "line" field. */ + ObjField *lineEl = makeLineEl(); + lel->objectDef->insertField( lineEl->name, lineEl ); + } + } + } +} + +void Compiler::findLocalTrees( CharSet &trees ) +{ + /* We exlcude "lhs" from being downrefed because we need to use if after + * the frame is is cleaned and so it must survive. */ + for ( ObjFieldList::Iter ol = *curLocalFrame->objFieldList; ol.lte(); ol++ ) { + ObjField *el = ol->value; + /* FIXME: This test needs to be improved. Match_text was getting + * through before useOffset was tested. What will? */ + if ( el->useOffset && !el->isLhsEl && ( el->beenReferenced || el->isParam ) ) { + UniqueType *ut = el->typeRef->uniqueType; + if ( ut->typeId == TYPE_TREE || ut->typeId == TYPE_PTR ) + trees.insert( el->offset ); + } + } +} + +void Compiler::makeProdCopies( Definition *prod ) +{ + int pos = 0; + for ( ProdElList::Iter pel = *prod->prodElList; pel.lte(); pel++, pos++) { + if ( pel->captureField != 0 ) { + prod->copy.append( pel->captureField->offset ); + prod->copy.append( pos ); + } + } +} + +void Compiler::compileReductionCode( Definition *prod ) +{ + CodeBlock *block = prod->redBlock; + + /* Init the compilation context. */ + compileContext = CompileReduction; + curLocalFrame = block->localFrame; + revertOn = true; + block->frameId = nextFrameId++; + + CodeVect &code = block->codeWV; + + /* Add the alloc frame opcode. We don't have the right + * frame size yet. We will fill it in later. */ + code.append( IN_INIT_LOCALS ); + code.appendHalf( 0 ); + long afterInit = code.length(); + + /* Compile the reduce block. */ + block->compile( this, code ); + + /* We have the frame size now. Set in the alloc frame instruction. */ + long frameSize = curLocalFrame->size(); + code.setHalf( 1, frameSize ); + + /* Might need to load right hand side values. */ + addProdRHSLoads( prod, code, afterInit ); + + addProdLHSLoad( prod, code, afterInit ); + addPushBackLHS( prod, code, afterInit ); + + code.append( IN_PCR_RET ); + + /* Now that compilation is done variables are referenced. Make the local + * trees descriptor. */ + findLocalTrees( block->trees ); +} + +void Compiler::compileTranslateBlock( LangEl *langEl ) +{ + CodeBlock *block = langEl->transBlock; + + /* Set up compilation context. */ + compileContext = CompileTranslation; + curLocalFrame = block->localFrame; + revertOn = true; + block->frameId = nextFrameId++; + + /* References to the reduce item. */ + addMatchLength( curLocalFrame, langEl ); + addMatchText( curLocalFrame, langEl ); + addInput( curLocalFrame ); + addCtx( curLocalFrame ); + + CodeVect &code = block->codeWV; + + /* Add the alloc frame opcode. We don't have the right + * frame size yet. We will fill it in later. */ + code.append( IN_INIT_LOCALS ); + code.appendHalf( 0 ); + + if ( langEl->tokenDef->reCaptureVect.length() > 0 ) { + code.append( IN_INIT_CAPTURES ); + code.append( langEl->tokenDef->reCaptureVect.length() ); + + ObjFieldList::Iter f = *curLocalFrame->objFieldList; + for ( int i = 0; i < langEl->tokenDef->reCaptureVect.length(); i++, f++ ) + curLocalFrame->referenceField( this, f->value ); + } + + /* Set the local frame and compile the reduce block. */ + block->compile( this, code ); + + /* We have the frame size now. Set in the alloc frame instruction. */ + long frameSize = curLocalFrame->size(); + code.setHalf( 1, frameSize ); + + code.append( IN_PCR_RET ); + + /* Now that compilation is done variables are referenced. Make the local + * trees descriptor. */ + findLocalTrees( block->trees ); +} + +void Compiler::compilePreEof( TokenRegion *region ) +{ + CodeBlock *block = region->preEofBlock; + + /* Set up compilation context. */ + compileContext = CompileTranslation; + curLocalFrame = region->preEofBlock->localFrame; + revertOn = true; + block->frameId = nextFrameId++; + + addInput( curLocalFrame ); + addCtx( curLocalFrame ); + + CodeVect &code = block->codeWV; + + /* Add the alloc frame opcode. We don't have the right + * frame size yet. We will fill it in later. */ + code.append( IN_INIT_LOCALS ); + code.appendHalf( 0 ); + + /* Set the local frame and compile the reduce block. */ + block->compile( this, code ); + + /* We have the frame size now. Set in the alloc frame instruction. */ + long frameSize = curLocalFrame->size(); + code.setHalf( 1, frameSize ); + + code.append( IN_PCR_RET ); + + /* Now that compilation is done variables are referenced. Make the local + * trees descriptor. */ + findLocalTrees( block->trees ); +} + +void Compiler::compileRootBlock( ) +{ + CodeBlock *block = rootCodeBlock; + + /* The root block never needs to be reverted. */ + + /* Set up the compile context. No locals are needed for the root code + * block, but we need an empty local frame for the compile. */ + compileContext = CompileRoot; + curLocalFrame = rootLocalFrame; + revertOn = false; + + /* The block needs a frame id. */ + block->frameId = nextFrameId++; + + /* The root block is not reverted. */ + CodeVect &code = block->codeWC; + + /* Add the alloc frame opcode. We don't have the right + * frame size yet. We will fill it in later. */ + code.append( IN_INIT_LOCALS ); + code.appendHalf( 0 ); + + code.append( IN_LOAD_ARGV ); + code.appendHalf( argvOffset() ); + + block->compile( this, code ); + + /* We have the frame size now. Store it in frame init. */ + long frameSize = curLocalFrame->size(); + code.setHalf( 1, frameSize ); + + code.append( IN_STOP ); + + /* Make the local trees descriptor. */ + findLocalTrees( block->trees ); +} + +void Compiler::initAllLanguageObjects() +{ + /* Init all user object fields (need consistent size). */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + ObjectDef *objDef = lel->objectDef; + if ( objDef != 0 ) { + /* Init all fields of the object. */ + for ( ObjFieldList::Iter f = *objDef->objFieldList; f.lte(); f++ ) + objDef->initField( this, f->value ); + } + } + + /* Init all fields of the global object. */ + for ( ObjFieldList::Iter f = *globalObjectDef->objFieldList; f.lte(); f++ ) + globalObjectDef->initField( this, f->value ); +} + +void Compiler::initMapFunctions( GenericType *gen ) +{ + addLengthField( gen->objDef, IN_MAP_LENGTH ); + initFunction( gen->utArg, gen->objDef, "find", + IN_MAP_FIND, IN_MAP_FIND, gen->keyUT, true ); + initFunction( uniqueTypeInt, gen->objDef, "insert", + IN_MAP_INSERT_WV, IN_MAP_INSERT_WC, gen->keyUT, gen->utArg, false ); + initFunction( uniqueTypeInt, gen->objDef, "store", + IN_MAP_STORE_WV, IN_MAP_STORE_WC, gen->keyUT, gen->utArg, false ); + initFunction( gen->utArg, gen->objDef, "remove", + IN_MAP_REMOVE_WV, IN_MAP_REMOVE_WC, gen->keyUT, false ); +} + +void Compiler::initListFunctions( GenericType *gen ) +{ + addLengthField( gen->objDef, IN_LIST_LENGTH ); + + initFunction( uniqueTypeInt, gen->objDef, "append", + IN_LIST_APPEND_WV, IN_LIST_APPEND_WC, gen->utArg, false ); + initFunction( uniqueTypeInt, gen->objDef, "push", + IN_LIST_APPEND_WV, IN_LIST_APPEND_WC, gen->utArg, false ); + + initFunction( gen->utArg, gen->objDef, "remove_end", + IN_LIST_REMOVE_END_WV, IN_LIST_REMOVE_END_WC, false ); + initFunction( gen->utArg, gen->objDef, "pop", + IN_LIST_REMOVE_END_WV, IN_LIST_REMOVE_END_WC, false ); +} + +void Compiler::initListField( GenericType *gen, const char *name, int offset ) +{ + /* Make the type ref and create the field. */ + TypeRef *typeRef = new TypeRef( InputLoc(), gen->utArg ); + ObjField *el = new ObjField( InputLoc(), typeRef, name ); + + el->inGetR = IN_GET_LIST_MEM_R; + el->inGetWC = IN_GET_LIST_MEM_WC; + el->inGetWV = IN_GET_LIST_MEM_WV; + el->inSetWC = IN_SET_LIST_MEM_WC; + el->inSetWV = IN_SET_LIST_MEM_WV; + + gen->objDef->insertField( el->name, el ); + + el->useOffset = true; + el->beenReferenced = true; + el->beenInitialized = true; + + /* Zero for head, One for tail. */ + el->offset = offset; +} + +void Compiler::initListFields( GenericType *gen ) +{ + initListField( gen, "head", 0 ); + initListField( gen, "tail", 1 ); + initListField( gen, "top", 1 ); +} + +void Compiler::initVectorFunctions( GenericType *gen ) +{ + addLengthField( gen->objDef, IN_VECTOR_LENGTH ); + initFunction( uniqueTypeInt, gen->objDef, "append", + IN_VECTOR_APPEND_WV, IN_VECTOR_APPEND_WC, gen->utArg, false ); + initFunction( uniqueTypeInt, gen->objDef, "insert", + IN_VECTOR_INSERT_WV, IN_VECTOR_INSERT_WC, uniqueTypeInt, gen->utArg, false ); +} + +void Compiler::initParserFunctions( GenericType *gen ) +{ + initFunction( gen->utArg, gen->objDef, "finish", + IN_PARSE_FINISH_WV, IN_PARSE_FINISH_WC, true ); +} + +void Compiler::initCtxField( GenericType *gen ) +{ + LangEl *langEl = gen->utArg->langEl; + Context *context = langEl->contextIn; + + /* Make the type ref and create the field. */ + UniqueType *ctxUT = findUniqueType( TYPE_TREE, context->lel ); + TypeRef *typeRef = new TypeRef( InputLoc(), ctxUT ); + ObjField *el = new ObjField( InputLoc(), typeRef, "ctx" ); + + el->inGetR = IN_GET_ACCUM_CTX_R; + el->inGetWC = IN_GET_ACCUM_CTX_WC; + el->inGetWV = IN_GET_ACCUM_CTX_WV; + el->inSetWC = IN_SET_ACCUM_CTX_WC; + el->inSetWV = IN_SET_ACCUM_CTX_WV; + + gen->objDef->insertField( el->name, el ); + + el->useOffset = false; + el->beenReferenced = true; + el->beenInitialized = true; +} + +void Compiler::initParserFields( GenericType *gen ) +{ + LangEl *langEl = gen->utArg->langEl; + if ( langEl->contextIn != 0 ) + initCtxField( gen ); +} + +void Compiler::initGenericTypes() +{ + for ( NamespaceList::Iter ns = namespaceList; ns.lte(); ns++ ) { + for ( GenericList::Iter gen = ns->genericList; gen.lte(); gen++ ) { + gen->utArg = gen->typeArg->uniqueType; + + if ( gen->typeId == GEN_MAP ) + gen->keyUT = gen->keyTypeArg->uniqueType; + + gen->objDef = new ObjectDef( ObjectDef::BuiltinType, + gen->name, nextObjectId++ ); + + switch ( gen->typeId ) { + case GEN_MAP: + initMapFunctions( gen ); + break; + case GEN_LIST: + initListFunctions( gen ); + initListFields( gen ); + break; + case GEN_VECTOR: + initVectorFunctions( gen ); + break; + case GEN_PARSER: + /* Need to generate a parser for the type. */ + gen->utArg->langEl->parserId = nextParserId++; + initParserFunctions( gen ); + initParserFields( gen ); + break; + } + + gen->langEl->objectDef = gen->objDef; + } + } +} + +void Compiler::makeFuncVisible( Function *func, bool isUserIter ) +{ + func->localFrame = func->codeBlock->localFrame; + + /* Set up the parameters. */ + long paramPos = 0, paramListSize = 0; + UniqueType **paramUTs = new UniqueType*[func->paramList->length()]; + for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) { + paramUTs[paramPos] = param->typeRef->uniqueType; + + if ( func->localFrame->findField( param->name ) != 0 ) + error(param->loc) << "parameter " << param->name << " redeclared" << endp; + + func->localFrame->insertField( param->name, param ); + param->beenInitialized = true; + param->pos = paramPos; + + /* Initialize the object field as a local variable. We also want trees + * downreffed. */ + if ( paramUTs[paramPos]->typeId == TYPE_REF ) + initLocalRefInstructions( param ); + else + initLocalInstructions( param ); + + paramListSize += sizeOfField( paramUTs[paramPos] ); + paramPos += 1; + } + + /* Param offset is relative to one past the last item in the array of + * words containing the args. */ + long paramOffset = 0; + for ( ParameterList::Iter param = *func->paramList; param.lte(); param++ ) { + /* Moving downward, and need the offset to point to the lower half of + * the argument. */ + paramOffset -= sizeOfField( paramUTs[param->pos] ); + + /* How much space do we need to make for call overhead. */ + long frameAfterArgs = isUserIter ? IFR_AA : FR_AA; + + /* Going up first we have the frame data, then maybe + * the user iterator, then the args from high to low. */ + param->offset = frameAfterArgs + + ( isUserIter ? ( sizeof(UserIter) / sizeof(Word) ) : 0 ) + + paramListSize + paramOffset; + } + + func->paramListSize = paramListSize; + func->paramUTs = paramUTs; + + /* Insert the function into the global function map. */ + UniqueType *returnUT = func->typeRef != 0 ? + func->typeRef->uniqueType : uniqueTypeInt; + ObjMethod *objMethod = new ObjMethod( returnUT, func->name, + IN_CALL_WV, IN_CALL_WC, + func->paramList->length(), paramUTs, func->paramList, false ); + objMethod->funcId = func->funcId; + objMethod->useFuncId = true; + objMethod->useCallObj = false; + objMethod->func = func; + + if ( isUserIter ) { + IterDef *uiter = findIterDef( IterDef::User, func ); + objMethod->iterDef = uiter; + } + + globalObjectDef->objMethodMap->insert( func->name, objMethod ); +} + +void Compiler::compileUserIter( Function *func, CodeVect &code ) +{ + CodeBlock *block = func->codeBlock; + + /* Add the alloc frame opcode. We don't have the right + * frame size yet. We will fill it in later. */ + code.append( IN_INIT_LOCALS ); + code.appendHalf( 0 ); + + /* Compile the block. */ + block->compile( this, code ); + + /* We have the frame size now. Set in the alloc frame instruction. */ + int frameSize = func->localFrame->size(); + code.setHalf( 1, frameSize ); + + /* Check for a return statement. */ + if ( block->stmtList->length() == 0 || + block->stmtList->tail->type != LangStmt::YieldType ) + { + /* Push the return value. */ + code.append( IN_LOAD_NIL ); + code.append( IN_YIELD ); + } +} + +void Compiler::compileUserIter( Function *func ) +{ + CodeBlock *block = func->codeBlock; + + /* Set up the context. */ + compileContext = CompileFunction; + curFunction = func; + block->frameId = nextFrameId++; + + /* Need an object for the local frame. */ + curLocalFrame = func->codeBlock->localFrame; + + /* Compile for revert and commit. */ + revertOn = true; + compileUserIter( func, block->codeWV ); + + revertOn = false; + compileUserIter( func, block->codeWC ); + + /* Now that compilation is done variables are referenced. Make the local + * trees descriptor. */ + findLocalTrees( block->trees ); + + /* FIXME: Need to deal with the freeing of local trees. */ +} + +/* Called for each type of function compile: revert and commit. */ +void Compiler::compileFunction( Function *func, CodeVect &code ) +{ + CodeBlock *block = func->codeBlock; + + /* Add the alloc frame opcode. We don't have the right + * frame size yet. We will fill it in later. */ + code.append( IN_INIT_LOCALS ); + code.appendHalf( 0 ); + + /* Compile the block. */ + block->compile( this, code ); + + /* We have the frame size now. Set in the alloc frame instruction. */ + int frameSize = func->localFrame->size(); + code.setHalf( 1, frameSize ); + + /* Check for a return statement. */ + if ( block->stmtList->length() == 0 || + block->stmtList->tail->type != LangStmt::ReturnType ) + { + /* Push the return value. */ + code.append( IN_LOAD_NIL ); + code.append( IN_SAVE_RET ); + } + + /* Compute the jump distance for the return jumps. */ + for ( LongVect::Iter rj = returnJumps; rj.lte(); rj++ ) { + long distance = code.length() - *rj - 3; + code.setHalf( *rj+1, distance ); + } + + /* Reset the vector of return jumps. */ + returnJumps.empty(); + + /* Return cleans up the stack (including the args) and leaves the return + * value on the top. */ + code.append( IN_RET ); +} + +void Compiler::compileFunction( Function *func ) +{ + CodeBlock *block = func->codeBlock; + + /* Set up the compilation context. */ + compileContext = CompileFunction; + curFunction = func; + + /* Assign a frame Id. */ + block->frameId = nextFrameId++; + + /* Need an object for the local frame. */ + curLocalFrame = func->codeBlock->localFrame; + + /* Compile once for revert. */ + revertOn = true; + compileFunction( func, block->codeWV ); + + /* Compile once for commit. */ + revertOn = false; + compileFunction( func, block->codeWC ); + + /* Now that compilation is done variables are referenced. Make the local + * trees descriptor. */ + findLocalTrees( block->trees ); +} + +void Compiler::makeDefaultIterators() +{ + /* Tree iterator. */ + { + UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); + ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef, + "triter", IN_HALT, IN_HALT, anyRefUT, true ); + + IterDef *triter = findIterDef( IterDef::Tree ); + objMethod->iterDef = triter; + } + + /* Child iterator. */ + { + UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); + ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef, + "child", IN_HALT, IN_HALT, anyRefUT, true ); + + IterDef *triter = findIterDef( IterDef::Child ); + objMethod->iterDef = triter; + } + + /* Reverse iterator. */ + { + UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); + ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef, + "rev_child", IN_HALT, IN_HALT, anyRefUT, true ); + + IterDef *triter = findIterDef( IterDef::RevChild ); + objMethod->iterDef = triter; + } + + /* Repeat iterator. */ + { + UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); + ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef, + "repeat", IN_HALT, IN_HALT, anyRefUT, true ); + + IterDef *triter = findIterDef( IterDef::Repeat ); + objMethod->iterDef = triter; + } + + /* Reverse repeat iterator. */ + { + UniqueType *anyRefUT = findUniqueType( TYPE_REF, anyLangEl ); + ObjMethod *objMethod = initFunction( uniqueTypeAny, globalObjectDef, + "rev_repeat", IN_HALT, IN_HALT, anyRefUT, true ); + + IterDef *triter = findIterDef( IterDef::RevRepeat ); + objMethod->iterDef = triter; + } +} + +void Compiler::addStdin() +{ + /* Make the type ref. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStream ); + + /* Create the field and insert it into the map. */ + ObjField *el = new ObjField( InputLoc(), typeRef, "stdin" ); + el->beenReferenced = true; + el->beenInitialized = true; + el->isConst = true; + el->useOffset = false; + el->inGetR = IN_GET_STDIN; + globalObjectDef->insertField( el->name, el ); +} + +void Compiler::addStdout() +{ + /* Make the type ref. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr ); + + /* Create the field and insert it into the map. */ + ObjField *el = new ObjField( InputLoc(), typeRef, "stout" ); + el->beenReferenced = true; + el->beenInitialized = true; + el->isConst = true; + el->useOffset = false; + el->inGetR = IN_GET_STDOUT; + globalObjectDef->insertField( el->name, el ); +} + +void Compiler::addStderr() +{ + /* Make the type ref. */ + TypeRef *typeRef = new TypeRef( InputLoc(), uniqueTypeStr ); + + /* Create the field and insert it into the map. */ + ObjField *el = new ObjField( InputLoc(), typeRef, "stderr" ); + el->beenReferenced = true; + el->beenInitialized = true; + el->isConst = true; + el->useOffset = false; + el->inGetR = IN_GET_STDERR; + globalObjectDef->insertField( el->name, el ); +} + +void Compiler::addArgv() +{ + /* Create the field and insert it into the map. */ + ObjField *el = new ObjField( InputLoc(), argvTypeRef, "argv" ); + el->isArgv = true; + el->isConst = true; + globalObjectDef->insertField( el->name, el ); +} + +int Compiler::argvOffset() +{ + for ( ObjFieldList::Iter field = *globalObjectDef->objFieldList; + field.lte(); field++ ) + { + if ( field->value->isArgv ) { + globalObjectDef->referenceField( this, field->value ); + return field->value->offset; + } + } + assert(false); +} + +void Compiler::initGlobalFunctions() +{ + ObjMethod *method; + + method = initFunction( uniqueTypeStream, globalObjectDef, "open", + IN_OPEN_FILE, IN_OPEN_FILE, uniqueTypeStr, uniqueTypeStr, true ); + method->useCallObj = false; + + method = initFunction( uniqueTypeStr, globalObjectDef, "tolower", + IN_TO_LOWER, IN_TO_LOWER, uniqueTypeStr, true ); + method->useCallObj = false; + + method = initFunction( uniqueTypeStr, globalObjectDef, "toupper", + IN_TO_UPPER, IN_TO_UPPER, uniqueTypeStr, true ); + method->useCallObj = false; + + method = initFunction( uniqueTypeInt, globalObjectDef, "exit", + IN_EXIT, IN_EXIT, uniqueTypeInt, true ); + + method = initFunction( uniqueTypeStr, globalObjectDef, "error", + IN_ERROR, IN_ERROR, true ); + + addStdin(); + addStdout(); + addStderr(); + addArgv(); +} + +void Compiler::removeNonUnparsableRepls() +{ + for ( ReplList::Iter repl = replList; repl.lte(); ) { + Replacement *maybeDel = repl++; + if ( !maybeDel->parse ) + replList.detach( maybeDel ); + } +} + +void Compiler::compileByteCode() +{ +// initUniqueTypes(); + initIntObject(); + initStrObject(); + initStreamObject(); + initInputObject(); + initTokenObjects(); + makeDefaultIterators(); + initAllLanguageObjects(); + initGenericTypes(); + + initGlobalFunctions(); + + for ( FunctionList::Iter f = functionList; f.lte(); f++ ) + makeFuncVisible( f, f->isUserIter ); + + /* This may be comment rot: The function info structure relies on functions + * being compiled first, then iterators. */ + + /* Compile functions. */ + for ( FunctionList::Iter f = functionList; f.lte(); f++ ) { + if ( f->inContext != 0 ) + context = f->inContext; + if ( f->isUserIter ) + compileUserIter( f ); + else + compileFunction( f ); + context = 0; + } + + /* Compile the reduction code. */ + for ( DefList::Iter prod = prodList; prod.lte(); prod++ ) { + makeProdCopies( prod ); + if ( prod->redBlock != 0 ) { + if ( prod->redBlock->context != 0 ) + context = prod->redBlock->context; + compileReductionCode( prod ); + context = 0; + } + } + + /* Compile the token translation code. */ + for ( LelList::Iter lel = langEls; lel.lte(); lel++ ) { + if ( lel->transBlock != 0 ) { + if ( lel->transBlock->context != 0 ) + context = lel->transBlock->context; + compileTranslateBlock( lel ); + context = 0; + } + } + + /* Compile preeof blocks. */ + for ( RegionList::Iter r = regionList; r.lte(); r++ ) { + if ( r->preEofBlock != 0 ) + compilePreEof( r ); + } + + /* Compile the init code */ + compileRootBlock( ); + removeNonUnparsableRepls(); +} diff --git a/src/tree.c b/src/tree.c new file mode 100644 index 00000000..14f7d81f --- /dev/null +++ b/src/tree.c @@ -0,0 +1,2484 @@ +/* + * Copyright 2008-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <pdarun.h> +#include <tree.h> +#include <pool.h> +#include <bytecode.h> +#include <debug.h> +#include <map.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +#define true 1 +#define false 0 + +#define BUFFER_INITIAL_SIZE 4096 + +void listPrepend( List *list, ListEl *new_el) { listAddBefore(list, list->head, new_el); } +void listAppend( List *list, ListEl *new_el) { listAddAfter(list, list->tail, new_el); } + +ListEl *listDetach( List *list, ListEl *el ); +ListEl *listDetachFirst(List *list ) { return listDetach(list, list->head); } +ListEl *listDetachLast(List *list ) { return listDetach(list, list->tail); } + +long listLength(List *list) + { return list->listLen; } + +void initTreeIter( TreeIter *treeIter, const Ref *rootRef, int searchId, Tree **stackRoot ) +{ + treeIter->rootRef = *rootRef; + treeIter->searchId = searchId; + treeIter->stackRoot = stackRoot; + treeIter->stackSize = 0; + treeIter->ref.kid = 0; + treeIter->ref.next = 0; +} + +void initRevTreeIter( RevTreeIter *revTriter, const Ref *rootRef, + int searchId, Tree **stackRoot, int children ) +{ + revTriter->rootRef = *rootRef; + revTriter->searchId = searchId; + revTriter->stackRoot = stackRoot; + revTriter->stackSize = children; + revTriter->kidAtYield = 0; + revTriter->children = children; + revTriter->ref.kid = 0; + revTriter->ref.next = 0; +} + +void initUserIter( UserIter *userIter, Tree **stackRoot, long argSize, long searchId ) +{ + userIter->stackRoot = stackRoot; + userIter->argSize = argSize; + userIter->stackSize = 0; + userIter->resume = 0; + userIter->frame = 0; + userIter->searchId = searchId; + + userIter->ref.kid = 0; + userIter->ref.next = 0; +} + +Kid *allocAttrs( Program *prg, long length ) +{ + Kid *cur = 0; + long i; + for ( i = 0; i < length; i++ ) { + Kid *next = cur; + cur = kidAllocate( prg ); + cur->next = next; + } + return cur; +} + +void freeAttrs( Program *prg, Kid *attrs ) +{ + Kid *cur = attrs; + while ( cur != 0 ) { + Kid *next = cur->next; + kidFree( prg, cur ); + cur = next; + } +} + +void freeKidList( Program *prg, Kid *kid ) +{ + while ( kid != 0 ) { + Kid *next = kid->next; + kidFree( prg, kid ); + kid = next; + } +} + +void setAttr( Tree *tree, long pos, Tree *val ) +{ + long i; + Kid *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + for ( i = 0; i < pos; i++ ) + kid = kid->next; + kid->tree = val; +} + +Tree *getGlobal( Program *prg, long pos ) + { return getAttr( prg->global, pos ); } + +Tree *getAttr( Tree *tree, long pos ) +{ + long i; + Kid *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + for ( i = 0; i < pos; i++ ) + kid = kid->next; + return kid->tree; +} + + +Tree *getRepeatNext( Tree *tree ) +{ + Kid *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + return kid->next->tree; +} + +Tree *getRepeatVal( Tree *tree ) +{ + Kid *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + return kid->tree; +} + +int repeatEnd( Tree *tree ) +{ + Kid *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + return kid == 0; +} + +int listLast( Tree *tree ) +{ + Kid *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + return kid->next == 0; +} + +Kid *getAttrKid( Tree *tree, long pos ) +{ + long i; + Kid *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + for ( i = 0; i < pos; i++ ) + kid = kid->next; + return kid; +} + +Kid *kidListConcat( Kid *list1, Kid *list2 ) +{ + if ( list1 == 0 ) + return list2; + else if ( list2 == 0 ) + return list1; + + Kid *dest = list1; + while ( dest->next != 0 ) + dest = dest->next; + dest->next = list2; + return list1; +} + + +Stream *openStreamFile( Program *prg, FILE *file ) +{ + Stream *res = (Stream*)mapElAllocate( prg ); + res->id = LEL_ID_STREAM; + res->file = file; + res->in = newSourceStreamFile( file ); + initSourceStream( res->in ); + return res; +} + +Stream *openStreamFd( Program *prg, long fd ) +{ + Stream *res = (Stream*)mapElAllocate( prg ); + res->id = LEL_ID_STREAM; + res->in = newSourceStreamFd( fd ); + initSourceStream( res->in ); + return res; +} + +Stream *openFile( Program *prg, Tree *name, Tree *mode ) +{ + Head *headName = ((Str*)name)->value; + Head *headMode = ((Str*)mode)->value; + + const char *givenMode = stringData(headMode); + const char *fopenMode = 0; + if ( memcmp( givenMode, "r", stringLength(headMode) ) == 0 ) + fopenMode = "rb"; + else if ( memcmp( givenMode, "w", stringLength(headMode) ) == 0 ) + fopenMode = "wb"; + else { + fatal( "unknown file open mode: %s\n", givenMode ); + } + + /* Need to make a C-string (null terminated). */ + char *fileName = (char*)malloc(stringLength(headName)+1); + memcpy( fileName, stringData(headName), stringLength(headName) ); + fileName[stringLength(headName)] = 0; + FILE *file = fopen( fileName, fopenMode ); + free(fileName); + return openStreamFile( prg, file ); +} + +Tree *constructInteger( Program *prg, long i ) +{ + Int *integer = (Int*) treeAllocate( prg ); + integer->id = LEL_ID_INT; + integer->value = i; + + return (Tree*)integer; +} + +Tree *constructString( Program *prg, Head *s ) +{ + Str *str = (Str*) treeAllocate( prg ); + str->id = LEL_ID_STR; + str->value = s; + + return (Tree*)str; +} + +Tree *constructPointer( Program *prg, Tree *tree ) +{ + Kid *kid = kidAllocate( prg ); + kid->tree = tree; + kid->next = prg->heap; + prg->heap = kid; + + Pointer *pointer = (Pointer*) treeAllocate( prg ); + pointer->id = LEL_ID_PTR; + pointer->value = kid; + + return (Tree*)pointer; +} + +Tree *constructTerm( Program *prg, Word id, Head *tokdata ) +{ + LangElInfo *lelInfo = prg->rtd->lelInfo; + + Tree *tree = treeAllocate( prg ); + tree->id = id; + tree->refs = 0; + tree->tokdata = tokdata; + + int objectLength = lelInfo[tree->id].objectLength; + tree->child = allocAttrs( prg, objectLength ); + + return tree; +} + +Tree *constructInput( Program *prg ) +{ + Input *input = inputAllocate( prg ); + input->refs = 0; + input->id = LEL_ID_INPUT; + input->in = malloc( sizeof(InputStream) ); + initInputStream( input->in ); + return (Tree*)input; +} + +Kid *constructReplacementKid( Tree **bindings, Program *prg, Kid *prev, long pat ); + +static Kid *constructIgnoreList( Program *prg, long ignoreInd ) +{ + PatReplNode *nodes = prg->rtd->patReplNodes; + + Kid *first = 0, *last = 0; + while ( ignoreInd >= 0 ) { + Head *ignoreData = stringAllocPointer( prg, nodes[ignoreInd].data, nodes[ignoreInd].length ); + + Tree *ignTree = treeAllocate( prg ); + ignTree->refs = 1; + ignTree->id = nodes[ignoreInd].id; + ignTree->tokdata = ignoreData; + + Kid *ignKid = kidAllocate( prg ); + ignKid->tree = ignTree; + ignKid->next = 0; + + if ( last == 0 ) + first = ignKid; + else + last->next = ignKid; + + ignoreInd = nodes[ignoreInd].next; + last = ignKid; + } + + return first; +} + +static Kid *constructLeftIgnoreList( Program *prg, long pat ) +{ + PatReplNode *nodes = prg->rtd->patReplNodes; + return constructIgnoreList( prg, nodes[pat].leftIgnore ); +} + +static Kid *constructRightIgnoreList( Program *prg, long pat ) +{ + PatReplNode *nodes = prg->rtd->patReplNodes; + return constructIgnoreList( prg, nodes[pat].rightIgnore ); +} + +static void insLeftIgnore( Program *prg, Tree *tree, Tree *ignoreList ) +{ + assert( ! (tree->flags & AF_LEFT_IGNORE) ); + + /* Allocate. */ + Kid *kid = kidAllocate( prg ); + kid->tree = ignoreList; + treeUpref( ignoreList ); + + /* Attach it. */ + kid->next = tree->child; + tree->child = kid; + + tree->flags |= AF_LEFT_IGNORE; +} + +static void insRightIgnore( Program *prg, Tree *tree, Tree *ignoreList ) +{ + assert( ! (tree->flags & AF_RIGHT_IGNORE) ); + + /* Insert an ignore head in the child list. */ + Kid *kid = kidAllocate( prg ); + kid->tree = ignoreList; + treeUpref( ignoreList ); + + /* Attach it. */ + if ( tree->flags & AF_LEFT_IGNORE ) { + kid->next = tree->child->next; + tree->child->next = kid; + } + else { + kid->next = tree->child; + tree->child = kid; + } + + tree->flags |= AF_RIGHT_IGNORE; +} + +Tree *pushRightIgnore( Program *prg, Tree *pushTo, Tree *rightIgnore ) +{ + /* About to alter the data tree. Split first. */ + pushTo = splitTree( prg, pushTo ); + + if ( pushTo->flags & AF_RIGHT_IGNORE ) { + /* The previous token already has a right ignore. Merge by + * attaching it as a left ignore of the new list. */ + Kid *curIgnore = treeRightIgnoreKid( prg, pushTo ); + insLeftIgnore( prg, rightIgnore, curIgnore->tree ); + + /* Replace the current ignore. Safe to access refs here because we just + * upreffed it in insLeftIgnore. */ + curIgnore->tree->refs -= 1; + curIgnore->tree = rightIgnore; + treeUpref( rightIgnore ); + } + else { + /* Attach The ignore list. */ + insRightIgnore( prg, pushTo, rightIgnore ); + } + + return pushTo; +} + +Tree *pushLeftIgnore( Program *prg, Tree *pushTo, Tree *leftIgnore ) +{ + pushTo = splitTree( prg, pushTo ); + + /* Attach as left ignore to the token we are sending. */ + if ( pushTo->flags & AF_LEFT_IGNORE ) { + /* The token already has a left-ignore. Merge by attaching it as a + * right ignore of the new list. */ + Kid *curIgnore = treeLeftIgnoreKid( prg, pushTo ); + insRightIgnore( prg, leftIgnore, curIgnore->tree ); + + /* Replace the current ignore. Safe to upref here because we just + * upreffed it in insRightIgnore. */ + curIgnore->tree->refs -= 1; + curIgnore->tree = leftIgnore; + treeUpref( leftIgnore ); + } + else { + /* Attach the ignore list. */ + insLeftIgnore( prg, pushTo, leftIgnore ); + } + + return pushTo; +} + +static void remLeftIgnore( Program *prg, Tree **sp, Tree *tree ) +{ + assert( tree->flags & AF_LEFT_IGNORE ); + + Kid *next = tree->child->next; + treeDownref( prg, sp, tree->child->tree ); + kidFree( prg, tree->child ); + tree->child = next; + + tree->flags &= ~AF_LEFT_IGNORE; +} + +static void remRightIgnore( Program *prg, Tree **sp, Tree *tree ) +{ + assert( tree->flags & AF_RIGHT_IGNORE ); + + if ( tree->flags & AF_LEFT_IGNORE ) { + Kid *next = tree->child->next->next; + treeDownref( prg, sp, tree->child->next->tree ); + kidFree( prg, tree->child->next ); + tree->child->next = next; + } + else { + Kid *next = tree->child->next; + treeDownref( prg, sp, tree->child->tree ); + kidFree( prg, tree->child ); + tree->child = next; + } + + tree->flags &= ~AF_RIGHT_IGNORE; +} + +Tree *popRightIgnore( Program *prg, Tree **sp, Tree *popFrom, Tree **rightIgnore ) +{ + /* Modifying the tree we are detaching from. */ + popFrom = splitTree( prg, popFrom ); + + Kid *riKid = treeRightIgnoreKid( prg, popFrom ); + + /* If the right ignore has a left ignore, then that was the original + * right ignore. */ + Kid *li = treeLeftIgnoreKid( prg, riKid->tree ); + if ( li != 0 ) { + treeUpref( li->tree ); + remLeftIgnore( prg, sp, riKid->tree ); + *rightIgnore = riKid->tree; + treeUpref( *rightIgnore ); + riKid->tree = li->tree; + } + else { + *rightIgnore = riKid->tree; + treeUpref( *rightIgnore ); + remRightIgnore( prg, sp, popFrom ); + } + + return popFrom; +} + +Tree *popLeftIgnore( Program *prg, Tree **sp, Tree *popFrom, Tree **leftIgnore ) +{ + /* Modifying, make the write safe. */ + popFrom = splitTree( prg, popFrom ); + + Kid *liKid = treeLeftIgnoreKid( prg, popFrom ); + + /* If the left ignore has a right ignore, then that was the original + * left ignore. */ + Kid *ri = treeRightIgnoreKid( prg, liKid->tree ); + if ( ri != 0 ) { + treeUpref( ri->tree ); + remRightIgnore( prg, sp, liKid->tree ); + *leftIgnore = liKid->tree; + treeUpref( *leftIgnore ); + liKid->tree = ri->tree; + } + else { + *leftIgnore = liKid->tree; + treeUpref( *leftIgnore ); + remLeftIgnore( prg, sp, popFrom ); + } + + return popFrom; +} + + +/* Returns an uprefed tree. Saves us having to downref and bindings to zero to + * return a zero-ref tree. */ +Tree *constructReplacementTree( Kid *kid, Tree **bindings, Program *prg, long pat ) +{ + PatReplNode *nodes = prg->rtd->patReplNodes; + LangElInfo *lelInfo = prg->rtd->lelInfo; + Tree *tree = 0; + + if ( nodes[pat].bindId > 0 ) { + /* All bindings have been uprefed. */ + tree = bindings[nodes[pat].bindId]; + + long ignore = nodes[pat].leftIgnore; + Tree *leftIgnore = 0; + if ( ignore >= 0 ) { + Kid *ignore = constructLeftIgnoreList( prg, pat ); + + leftIgnore = treeAllocate( prg ); + leftIgnore->id = LEL_ID_IGNORE; + leftIgnore->child = ignore; + + tree = pushLeftIgnore( prg, tree, leftIgnore ); + } + + ignore = nodes[pat].rightIgnore; + Tree *rightIgnore = 0; + if ( ignore >= 0 ) { + Kid *ignore = constructRightIgnoreList( prg, pat ); + + rightIgnore = treeAllocate( prg ); + rightIgnore->id = LEL_ID_IGNORE; + rightIgnore->child = ignore; + + tree = pushRightIgnore( prg, tree, rightIgnore ); + } + } + else { + tree = treeAllocate( prg ); + tree->id = nodes[pat].id; + tree->refs = 1; + tree->tokdata = nodes[pat].length == 0 ? 0 : + stringAllocPointer( prg, + nodes[pat].data, nodes[pat].length ); + + int objectLength = lelInfo[tree->id].objectLength; + + Kid *attrs = allocAttrs( prg, objectLength ); + Kid *child = constructReplacementKid( bindings, prg, + 0, nodes[pat].child ); + + tree->child = kidListConcat( attrs, child ); + + /* Right first, then left. */ + Kid *ignore = constructRightIgnoreList( prg, pat ); + if ( ignore != 0 ) { + Tree *ignoreList = treeAllocate( prg ); + ignoreList->id = LEL_ID_IGNORE; + ignoreList->refs = 1; + ignoreList->child = ignore; + + Kid *ignoreHead = kidAllocate( prg ); + ignoreHead->tree = ignoreList; + ignoreHead->next = tree->child; + tree->child = ignoreHead; + + tree->flags |= AF_RIGHT_IGNORE; + } + + ignore = constructLeftIgnoreList( prg, pat ); + if ( ignore != 0 ) { + Tree *ignoreList = treeAllocate( prg ); + ignoreList->id = LEL_ID_IGNORE; + ignoreList->refs = 1; + ignoreList->child = ignore; + + Kid *ignoreHead = kidAllocate( prg ); + ignoreHead->tree = ignoreList; + ignoreHead->next = tree->child; + tree->child = ignoreHead; + + tree->flags |= AF_LEFT_IGNORE; + } + + int i; + for ( i = 0; i < lelInfo[tree->id].numCaptureAttr; i++ ) { + long ci = pat+1+i; + CaptureAttr *ca = prg->rtd->captureAttr + lelInfo[tree->id].captureAttr + i; + Tree *attr = treeAllocate( prg ); + attr->id = nodes[ci].id; + attr->refs = 1; + attr->tokdata = nodes[ci].length == 0 ? 0 : + stringAllocPointer( prg, + nodes[ci].data, nodes[ci].length ); + + setAttr( tree, ca->offset, attr ); + } + } + + return tree; +} + +Kid *constructReplacementKid( Tree **bindings, Program *prg, Kid *prev, long pat ) +{ + PatReplNode *nodes = prg->rtd->patReplNodes; + Kid *kid = 0; + + if ( pat != -1 ) { + kid = kidAllocate( prg ); + kid->tree = constructReplacementTree( kid, bindings, prg, pat ); + + /* Recurse down next. */ + Kid *next = constructReplacementKid( bindings, prg, + kid, nodes[pat].next ); + + kid->next = next; + } + + return kid; +} + +Tree *constructToken( Program *prg, Tree **root, long nargs ) +{ + Tree **const sp = root; + Tree **base = vm_ptop() + nargs; + + Int *idInt = (Int*)base[-1]; + Str *textStr = (Str*)base[-2]; + + long id = idInt->value; + Head *tokdata = stringCopy( prg, textStr->value ); + + LangElInfo *lelInfo = prg->rtd->lelInfo; + Tree *tree; + + if ( lelInfo[id].ignore ) { + tree = treeAllocate( prg ); + tree->refs = 1; + tree->id = id; + tree->tokdata = tokdata; + } + else { + long objectLength = lelInfo[id].objectLength; + Kid *attrs = allocAttrs( prg, objectLength ); + + tree = treeAllocate( prg ); + tree->id = id; + tree->refs = 1; + tree->tokdata = tokdata; + + tree->child = attrs; + + assert( nargs-2 <= objectLength ); + long id; + for ( id = 0; id < nargs-2; id++ ) { + setAttr( tree, id, base[-3-id] ); + treeUpref( getAttr( tree, id) ); + } + } + return tree; +} + +Tree *makeTree( Program *prg, Tree **root, long nargs ) +{ + Tree **const sp = root; + Tree **base = vm_ptop() + nargs; + + Int *idInt = (Int*)base[-1]; + + long id = idInt->value; + LangElInfo *lelInfo = prg->rtd->lelInfo; + + Tree *tree = treeAllocate( prg ); + tree->id = id; + tree->refs = 1; + + long objectLength = lelInfo[id].objectLength; + Kid *attrs = allocAttrs( prg, objectLength ); + + Kid *last = 0, *child = 0; + for ( id = 0; id < nargs-1; id++ ) { + Kid *kid = kidAllocate( prg ); + kid->tree = base[-2-id]; + treeUpref( kid->tree ); + + if ( last == 0 ) + child = kid; + else + last->next = kid; + + last = kid; + } + + tree->child = kidListConcat( attrs, child ); + + return tree; +} + +int testFalse( Program *prg, Tree *tree ) +{ + int flse = ( + tree == 0 || + tree == prg->falseVal || + ( tree->id == LEL_ID_INT && ((Int*)tree)->value == 0 ) ); + return flse; +} + +Kid *copyIgnoreList( Program *prg, Kid *ignoreHeader ) +{ + Kid *newHeader = kidAllocate( prg ); + Kid *last = 0, *ic = (Kid*)ignoreHeader->tree; + while ( ic != 0 ) { + Kid *newIc = kidAllocate( prg ); + + newIc->tree = ic->tree; + newIc->tree->refs += 1; + + /* List pointers. */ + if ( last == 0 ) + newHeader->tree = (Tree*)newIc; + else + last->next = newIc; + + ic = ic->next; + last = newIc; + } + return newHeader; +} + +Kid *copyKidList( Program *prg, Kid *kidList ) +{ + Kid *newList = 0, *last = 0, *ic = kidList; + + while ( ic != 0 ) { + Kid *newIc = kidAllocate( prg ); + + newIc->tree = ic->tree; + treeUpref( newIc->tree ); + + /* List pointers. */ + if ( last == 0 ) + newList = newIc; + else + last->next = newIc; + + ic = ic->next; + last = newIc; + } + return newList; +} + +/* New tree has zero ref. */ +Tree *copyRealTree( Program *prg, Tree *tree, Kid *oldNextDown, Kid **newNextDown ) +{ + /* Need to keep a lookout for next down. If + * copying it, return the copy. */ + Tree *newTree = treeAllocate( prg ); + + newTree->id = tree->id; + newTree->tokdata = stringCopy( prg, tree->tokdata ); + + /* Copy the child list. Start with ignores, then the list. */ + Kid *child = tree->child, *last = 0; + + /* Left ignores. */ + if ( tree->flags & AF_LEFT_IGNORE ) { + newTree->flags |= AF_LEFT_IGNORE; +// Kid *newHeader = copyIgnoreList( prg, child ); +// +// /* Always the head. */ +// newTree->child = newHeader; +// +// child = child->next; +// last = newHeader; + } + + /* Right ignores. */ + if ( tree->flags & AF_RIGHT_IGNORE ) { + newTree->flags |= AF_RIGHT_IGNORE; +// Kid *newHeader = copyIgnoreList( prg, child ); +// if ( last == 0 ) +// newTree->child = newHeader; +// else +// last->next = newHeader; +// child = child->next; +// last = newHeader; + } + + /* Attributes and children. */ + while ( child != 0 ) { + Kid *newKid = kidAllocate( prg ); + + /* Watch out for next down. */ + if ( child == oldNextDown ) + *newNextDown = newKid; + + newKid->tree = child->tree; + newKid->next = 0; + + /* May be an attribute. */ + if ( newKid->tree != 0 ) + newKid->tree->refs += 1; + + /* Store the first child. */ + if ( last == 0 ) + newTree->child = newKid; + else + last->next = newKid; + + child = child->next; + last = newKid; + } + + return newTree; +} + +List *copyList( Program *prg, List *list, Kid *oldNextDown, Kid **newNextDown ) +{ +// #ifdef COLM_LOG_BYTECODE +// if ( colm_log_bytecode ) { +// cerr << "splitting list: " << list << " refs: " << +// list->refs << endl; +// } +// #endif + + /* Not a need copy. */ + List *newList = (List*)mapElAllocate( prg ); + newList->id = list->genericInfo->langElId; + newList->genericInfo = list->genericInfo; + + ListEl *src = list->head; + while( src != 0 ) { + ListEl *newEl = listElAllocate( prg ); + newEl->value = src->value; + treeUpref( newEl->value ); + + listAppend( newList, newEl ); + + /* Watch out for next down. */ + if ( (Kid*)src == oldNextDown ) + *newNextDown = (Kid*)newEl; + + src = src->next; + } + + return newList; +} + +Map *copyMap( Program *prg, Map *map, Kid *oldNextDown, Kid **newNextDown ) +{ +// #ifdef COLM_LOG_BYTECODE +// if ( colm_log_bytecode ) { +// cerr << "splitting map: " << map << " refs: " << +// map->refs << endl; +// } +// #endif + + Map *newMap = (Map*)mapElAllocate( prg ); + newMap->id = map->genericInfo->langElId; + newMap->genericInfo = map->genericInfo; + newMap->treeSize = map->treeSize; + newMap->root = 0; + + /* If there is a root, copy the tree. */ + if ( map->root != 0 ) { + newMap->root = mapCopyBranch( prg, newMap, map->root, + oldNextDown, newNextDown ); + } + MapEl *el; + for ( el = newMap->head; el != 0; el = el->next ) { + assert( map->genericInfo->typeArg == TYPE_TREE ); + treeUpref( el->tree ); + } + + return newMap; +} + +Tree *copyTree( Program *prg, Tree *tree, Kid *oldNextDown, Kid **newNextDown ) +{ + LangElInfo *lelInfo = prg->rtd->lelInfo; + long genericId = lelInfo[tree->id].genericId; + if ( genericId > 0 ) { + GenericInfo *generic = &prg->rtd->genericInfo[genericId]; + if ( generic->type == GEN_LIST ) + tree = (Tree*) copyList( prg, (List*) tree, oldNextDown, newNextDown ); + else if ( generic->type == GEN_MAP ) + tree = (Tree*) copyMap( prg, (Map*) tree, oldNextDown, newNextDown ); + else if ( generic->type == GEN_PARSER ) { + /* Need to figure out the semantics here. */ + fatal( "ATTEMPT TO COPY PARSER\n" ); + assert(false); + } + } + else if ( tree->id == LEL_ID_PTR ) + assert(false); + else if ( tree->id == LEL_ID_BOOL ) + assert(false); + else if ( tree->id == LEL_ID_INT ) + assert(false); + else if ( tree->id == LEL_ID_STR ) + assert(false); + else if ( tree->id == LEL_ID_STREAM ) + assert(false); + else { + tree = copyRealTree( prg, tree, oldNextDown, newNextDown ); + } + + assert( tree->refs == 0 ); + return tree; +} + +Tree *splitTree( Program *prg, Tree *tree ) +{ + if ( tree != 0 ) { + assert( tree->refs >= 1 ); + + if ( tree->refs > 1 ) { + Kid *oldNextDown = 0, *newNextDown = 0; + Tree *newTree = copyTree( prg, tree, oldNextDown, &newNextDown ); + treeUpref( newTree ); + + /* Downref the original. Don't need to consider freeing because + * refs were > 1. */ + tree->refs -= 1; + + tree = newTree; + } + + assert( tree->refs == 1 ); + } + return tree; +} + +Tree *createGeneric( Program *prg, long genericId ) +{ + GenericInfo *genericInfo = &prg->rtd->genericInfo[genericId]; + Tree *newGeneric = 0; + switch ( genericInfo->type ) { + case GEN_MAP: { + Map *map = (Map*)mapElAllocate( prg ); + map->id = genericInfo->langElId; + map->genericInfo = genericInfo; + newGeneric = (Tree*) map; + break; + } + case GEN_LIST: { + List *list = (List*)mapElAllocate( prg ); + list->id = genericInfo->langElId; + list->genericInfo = genericInfo; + newGeneric = (Tree*) list; + break; + } + case GEN_PARSER: { + Parser *parser = (Parser*)mapElAllocate( prg ); + parser->id = genericInfo->langElId; + parser->genericInfo = genericInfo; + parser->fsmRun = malloc( sizeof(FsmRun) ); + parser->pdaRun = malloc( sizeof(PdaRun) ); + + /* Start off the parsing process. */ + initPdaRun( parser->pdaRun, prg, prg->rtd->pdaTables, + parser->fsmRun, genericInfo->parserId, false, false, 0 ); + initFsmRun( parser->fsmRun, prg ); + newToken( prg, parser->pdaRun, parser->fsmRun ); + + newGeneric = (Tree*) parser; + break; + } + default: + assert(false); + return 0; + } + + return newGeneric; +} + + +/* We can't make recursive calls here since the tree we are freeing may be + * very large. Need the VM stack. */ +void treeFreeRec( Program *prg, Tree **sp, Tree *tree ) +{ + Tree **top = sp; + LangElInfo *lelInfo; + long genericId; + +free_tree: + lelInfo = prg->rtd->lelInfo; + genericId = lelInfo[tree->id].genericId; + if ( genericId > 0 ) { + GenericInfo *generic = &prg->rtd->genericInfo[genericId]; + if ( generic->type == GEN_LIST ) { + List *list = (List*) tree; + ListEl *el = list->head; + while ( el != 0 ) { + ListEl *next = el->next; + vm_push( el->value ); + listElFree( prg, el ); + el = next; + } + mapElFree( prg, (MapEl*)list ); + } + else if ( generic->type == GEN_MAP ) { + Map *map = (Map*)tree; + MapEl *el = map->head; + while ( el != 0 ) { + MapEl *next = el->next; + vm_push( el->key ); + vm_push( el->tree ); + mapElFree( prg, el ); + el = next; + } + mapElFree( prg, (MapEl*)map ); + } + else if ( generic->type == GEN_PARSER ) { + Parser *parser = (Parser*)tree; + clearFsmRun( prg, parser->fsmRun ); + clearPdaRun( prg, sp, parser->pdaRun ); + free( parser->pdaRun ); + free( parser->fsmRun ); + treeDownref( prg, sp, (Tree*)parser->input ); + mapElFree( prg, (MapEl*)parser ); + } + else { + assert(false); + } + } + else { + if ( tree->id == LEL_ID_STR ) { + Str *str = (Str*) tree; + stringFree( prg, str->value ); + treeFree( prg, tree ); + } + else if ( tree->id == LEL_ID_BOOL || tree->id == LEL_ID_INT ) + treeFree( prg, tree ); + else if ( tree->id == LEL_ID_PTR ) + treeFree( prg, tree ); + else if ( tree->id == LEL_ID_STREAM ) { + Stream *stream = (Stream*)tree; + clearSourceStream( prg, sp, stream->in ); + free( stream->in ); + if ( stream->file != 0 ) + fclose( stream->file ); + streamFree( prg, stream ); + } + else if ( tree->id == LEL_ID_INPUT ) { + Input *input = (Input*)tree; + clearInputStream( prg, sp, input->in ); + free( input->in ); + inputFree( prg, input ); + } + else { + if ( tree->id != LEL_ID_IGNORE ) + stringFree( prg, tree->tokdata ); + + /* Attributes and grammar-based children. */ + Kid *child = tree->child; + while ( child != 0 ) { + Kid *next = child->next; + vm_push( child->tree ); + kidFree( prg, child ); + child = next; + } + + treeFree( prg, tree ); + } + } + + /* Any trees to downref? */ + while ( sp != top ) { + tree = vm_pop(); + if ( tree != 0 ) { + assert( tree->refs > 0 ); + tree->refs -= 1; + if ( tree->refs == 0 ) + goto free_tree; + } + } +} + +void treeUpref( Tree *tree ) +{ + if ( tree != 0 ) + tree->refs += 1; +} + +void treeDownref( Program *prg, Tree **sp, Tree *tree ) +{ + if ( tree != 0 ) { + assert( tree->refs > 0 ); + tree->refs -= 1; + if ( tree->refs == 0 ) + treeFreeRec( prg, sp, tree ); + } +} + +/* Find the first child of a tree. */ +Kid *treeChild( Program *prg, const Tree *tree ) +{ + LangElInfo *lelInfo = prg->rtd->lelInfo; + Kid *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + /* Skip over attributes. */ + long objectLength = lelInfo[tree->id].objectLength; + long a; + for ( a = 0; a < objectLength; a++ ) + kid = kid->next; + + return kid; +} + +/* Detach at the first real child of a tree. */ +Kid *treeExtractChild( Program *prg, Tree *tree ) +{ + LangElInfo *lelInfo = prg->rtd->lelInfo; + Kid *kid = tree->child, *last = 0; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + /* Skip over attributes. */ + long a, objectLength = lelInfo[tree->id].objectLength; + for ( a = 0; a < objectLength; a++ ) { + last = kid; + kid = kid->next; + } + + if ( last == 0 ) + tree->child = 0; + else + last->next = 0; + + return kid; +} + + +/* Find the first child of a tree. */ +Kid *treeAttr( Program *prg, const Tree *tree ) +{ + Kid *kid = tree->child; + + if ( tree->flags & AF_LEFT_IGNORE ) + kid = kid->next; + if ( tree->flags & AF_RIGHT_IGNORE ) + kid = kid->next; + + return kid; +} + +Tree *treeLeftIgnore( Program *prg, Tree *tree ) +{ + if ( tree->flags & AF_LEFT_IGNORE ) + return tree->child->tree; + return 0; +} + +Tree *treeRightIgnore( Program *prg, Tree *tree ) +{ + if ( tree->flags & AF_RIGHT_IGNORE ) { + if ( tree->flags & AF_LEFT_IGNORE ) + return tree->child->next->tree; + else + return tree->child->tree; + } + return 0; +} + +Kid *treeLeftIgnoreKid( Program *prg, Tree *tree ) +{ + if ( tree->flags & AF_LEFT_IGNORE ) + return tree->child; + return 0; +} + +Kid *treeRightIgnoreKid( Program *prg, Tree *tree ) +{ + if ( tree->flags & AF_RIGHT_IGNORE ) { + if ( tree->flags & AF_LEFT_IGNORE ) + return tree->child->next; + else + return tree->child; + } + return 0; +} + +Tree *treeIterDerefCur( TreeIter *iter ) +{ + return iter->ref.kid == 0 ? 0 : iter->ref.kid->tree; +} + +void refSetValue( Ref *ref, Tree *v ) +{ + Kid *firstKid = ref->kid; + while ( ref != 0 && ref->kid == firstKid ) { + ref->kid->tree = v; + ref = ref->next; + } +} + +Tree *getRhsEl( Program *prg, Tree *lhs, long position ) +{ + Kid *pos = treeChild( prg, lhs ); + while ( position > 0 ) { + pos = pos->next; + position -= 1; + } + return pos->tree; +} + +Tree *getRhsVal( Program *prg, Tree *tree, int *a ) +{ + int i, len = a[0]; + for ( i = 0; i < len; i++ ) { + int prodNum = a[1 + i * 2]; + int childNum = a[1 + i * 2 + 1]; + if ( tree->prodNum == prodNum ) + return getRhsEl( prg, tree, childNum ); + } + return 0; +} + +void setField( Program *prg, Tree *tree, long field, Tree *value ) +{ + assert( tree->refs == 1 ); + if ( value != 0 ) + assert( value->refs >= 1 ); + setAttr( tree, field, value ); +} + +Tree *getField( Tree *tree, Word field ) +{ + return getAttr( tree, field ); +} + +Kid *getFieldKid( Tree *tree, Word field ) +{ + return getAttrKid( tree, field ); +} + +Tree *getFieldSplit( Program *prg, Tree *tree, Word field ) +{ + Tree *val = getAttr( tree, field ); + Tree *split = splitTree( prg, val ); + setAttr( tree, field, split ); + return split; +} + +void setUiterCur( Program *prg, UserIter *uiter, Tree *tree ) +{ + uiter->ref.kid->tree = tree; +} + +void setTriterCur( Program *prg, TreeIter *iter, Tree *tree ) +{ + iter->ref.kid->tree = tree; +} + +Tree *getPtrVal( Pointer *ptr ) +{ + return ptr->value->tree; +} + +Tree *getPtrValSplit( Program *prg, Pointer *ptr ) +{ + Tree *val = ptr->value->tree; + Tree *split = splitTree( prg, val ); + ptr->value->tree = split; + return split; +} + +/* This must traverse in the same order that the bindId assignments are done + * in. */ +int matchPattern( Tree **bindings, Program *prg, long pat, Kid *kid, int checkNext ) +{ + PatReplNode *nodes = prg->rtd->patReplNodes; + +// #ifdef COLM_LOG_MATCH +// if ( colm_log_match ) { +// LangElInfo *lelInfo = prg->rtd->lelInfo; +// cerr << "match pattern " << ( pat == -1 ? "NULL" : lelInfo[nodes[pat].id].name ) << +// " vs " << ( kid == 0 ? "NULL" : lelInfo[kid->tree->id].name ) << endl; +// } +// #endif + + /* match node, recurse on children. */ + if ( pat != -1 && kid != 0 ) { + if ( nodes[pat].id == kid->tree->id ) { + /* If the pattern node has data, then this means we need to match + * the data against the token data. */ + if ( nodes[pat].data != 0 ) { + /* Check the length of token text. */ + if ( nodes[pat].length != stringLength( kid->tree->tokdata ) ) + return false; + + /* Check the token text data. */ + if ( nodes[pat].length > 0 && memcmp( nodes[pat].data, + stringData( kid->tree->tokdata ), nodes[pat].length ) != 0 ) + return false; + } + + /* No failure, all okay. */ + if ( nodes[pat].bindId > 0 ) { +// #ifdef COLM_LOG_MATCH +// if ( colm_log_match ) { +// cerr << "bindId: " << nodes[pat].bindId << endl; +// } +// #endif + bindings[nodes[pat].bindId] = kid->tree; + } + + /* If we didn't match a terminal duplicate of a nonterm then check + * down the children. */ + if ( !nodes[pat].stop ) { + /* Check for failure down child branch. */ + int childCheck = matchPattern( bindings, prg, + nodes[pat].child, treeChild( prg, kid->tree ), true ); + if ( ! childCheck ) + return false; + } + + /* If checking next, then look for failure there. */ + if ( checkNext ) { + int nextCheck = matchPattern( bindings, prg, + nodes[pat].next, kid->next, true ); + if ( ! nextCheck ) + return false; + } + + return true; + } + } + else if ( pat == -1 && kid == 0 ) { + /* Both null is a match. */ + return 1; + } + + return false; +} + + +long cmpTree( Program *prg, const Tree *tree1, const Tree *tree2 ) +{ + long cmpres = 0; + if ( tree1 == 0 ) { + if ( tree2 == 0 ) + return 0; + else + return -1; + } + else if ( tree2 == 0 ) + return 1; + else if ( tree1->id < tree2->id ) + return -1; + else if ( tree1->id > tree2->id ) + return 1; + else if ( tree1->id == LEL_ID_PTR ) { + if ( ((Pointer*)tree1)->value < ((Pointer*)tree2)->value ) + return -1; + else if ( ((Pointer*)tree1)->value > ((Pointer*)tree2)->value ) + return 1; + } + else if ( tree1->id == LEL_ID_INT ) { + if ( ((Int*)tree1)->value < ((Int*)tree2)->value ) + return -1; + else if ( ((Int*)tree1)->value > ((Int*)tree2)->value ) + return 1; + } + else if ( tree1->id == LEL_ID_STR ) { + cmpres = cmpString( ((Str*)tree1)->value, ((Str*)tree2)->value ); + if ( cmpres != 0 ) + return cmpres; + } + else { + if ( tree1->tokdata == 0 && tree2->tokdata != 0 ) + return -1; + else if ( tree1->tokdata != 0 && tree2->tokdata == 0 ) + return 1; + else if ( tree1->tokdata != 0 && tree2->tokdata != 0 ) { + cmpres = cmpString( tree1->tokdata, tree2->tokdata ); + if ( cmpres != 0 ) + return cmpres; + } + } + + Kid *kid1 = treeChild( prg, tree1 ); + Kid *kid2 = treeChild( prg, tree2 ); + + while ( true ) { + if ( kid1 == 0 && kid2 == 0 ) + return 0; + else if ( kid1 == 0 && kid2 != 0 ) + return -1; + else if ( kid1 != 0 && kid2 == 0 ) + return 1; + else { + cmpres = cmpTree( prg, kid1->tree, kid2->tree ); + if ( cmpres != 0 ) + return cmpres; + } + kid1 = kid1->next; + kid2 = kid2->next; + } +} + + +void splitRef( Program *prg, Tree ***psp, Ref *fromRef ) +{ + /* Go up the chain of kids, turing the pointers down. */ + Ref *last = 0, *ref = fromRef, *next = 0; + while ( ref->next != 0 ) { + next = ref->next; + ref->next = last; + last = ref; + ref = next; + } + ref->next = last; + + /* Now traverse the list, which goes down. */ + while ( ref != 0 ) { + if ( ref->kid->tree->refs > 1 ) { +// #ifdef COLM_LOG_BYTECODE +// if ( colm_log_bytecode ) { +// cerr << "splitting tree: " << ref->kid << " refs: " << +// ref->kid->tree->refs << endl; +// } +// #endif + + Ref *nextDown = ref->next; + while ( nextDown != 0 && nextDown->kid == ref->kid ) + nextDown = nextDown->next; + + Kid *oldNextKidDown = nextDown != 0 ? nextDown->kid : 0; + Kid *newNextKidDown = 0; + + Tree *newTree = copyTree( prg, ref->kid->tree, + oldNextKidDown, &newNextKidDown ); + treeUpref( newTree ); + + /* Downref the original. Don't need to consider freeing because + * refs were > 1. */ + ref->kid->tree->refs -= 1; + + while ( ref != 0 && ref != nextDown ) { + next = ref->next; + ref->next = 0; + + ref->kid->tree = newTree; + ref = next; + } + + /* Correct kid pointers down from ref. */ + while ( nextDown != 0 && nextDown->kid == oldNextKidDown ) { + nextDown->kid = newNextKidDown; + nextDown = nextDown->next; + } + } + else { + /* Reset the list as we go down. */ + next = ref->next; + ref->next = 0; + ref = next; + } + } +} + +void splitIterCur( Program *prg, Tree ***psp, TreeIter *iter ) +{ + if ( iter->ref.kid == 0 ) + return; + + splitRef( prg, psp, &iter->ref ); +} + +Tree *setListMem( List *list, Half field, Tree *value ) +{ + assert( list->refs == 1 ); + if ( value != 0 ) + assert( value->refs >= 1 ); + + Tree *existing = 0; + switch ( field ) { + case 0: + existing = list->head->value; + list->head->value = value; + break; + case 1: + existing = list->tail->value; + list->tail->value = value; + break; + default: + assert( false ); + break; + } + return existing; +} + +TreePair mapRemove( Program *prg, Map *map, Tree *key ) +{ + MapEl *mapEl = mapImplFind( prg, map, key ); + TreePair result = { 0, 0 }; + if ( mapEl != 0 ) { + mapDetach( prg, map, mapEl ); + result.key = mapEl->key; + result.val = mapEl->tree; + mapElFree( prg, mapEl ); + } + + return result; +} + +Tree *mapUnstore( Program *prg, Map *map, Tree *key, Tree *existing ) +{ + Tree *stored = 0; + if ( existing == 0 ) { + MapEl *mapEl = mapDetachByKey( prg, map, key ); + stored = mapEl->tree; + mapElFree( prg, mapEl ); + } + else { + MapEl *mapEl = mapImplFind( prg, map, key ); + stored = mapEl->tree; + mapEl->tree = existing; + } + return stored; +} + +Tree *mapFind( Program *prg, Map *map, Tree *key ) +{ + MapEl *mapEl = mapImplFind( prg, map, key ); + return mapEl == 0 ? 0 : mapEl->tree; +} + +long mapLength( Map *map ) +{ + return map->treeSize; +} + +void listAppend2( Program *prg, List *list, Tree *val ) +{ + assert( list->refs == 1 ); + if ( val != 0 ) + assert( val->refs >= 1 ); + ListEl *listEl = listElAllocate( prg ); + listEl->value = val; + listAppend( list, listEl ); +} + +Tree *listRemoveEnd( Program *prg, List *list ) +{ + Tree *tree = list->tail->value; + listElFree( prg, listDetachLast( list ) ); + return tree; +} + +Tree *getListMem( List *list, Word field ) +{ + Tree *result = 0; + switch ( field ) { + case 0: + result = list->head->value; + break; + case 1: + result = list->tail->value; + break; + default: + assert( false ); + break; + } + return result; +} + +Tree *getListMemSplit( Program *prg, List *list, Word field ) +{ + Tree *sv = 0; + switch ( field ) { + case 0: + sv = splitTree( prg, list->head->value ); + list->head->value = sv; + break; + case 1: + sv = splitTree( prg, list->tail->value ); + list->tail->value = sv; + break; + default: + assert( false ); + break; + } + return sv; +} + + +int mapInsert( Program *prg, Map *map, Tree *key, Tree *element ) +{ + MapEl *mapEl = mapInsertKey( prg, map, key, 0 ); + + if ( mapEl != 0 ) { + mapEl->tree = element; + return true; + } + + return false; +} + +void mapUnremove( Program *prg, Map *map, Tree *key, Tree *element ) +{ + MapEl *mapEl = mapInsertKey( prg, map, key, 0 ); + assert( mapEl != 0 ); + mapEl->tree = element; +} + +Tree *mapUninsert( Program *prg, Map *map, Tree *key ) +{ + MapEl *el = mapDetachByKey( prg, map, key ); + Tree *val = el->tree; + mapElFree( prg, el ); + return val; +} + +Tree *mapStore( Program *prg, Map *map, Tree *key, Tree *element ) +{ + Tree *oldTree = 0; + MapEl *elInTree = 0; + MapEl *mapEl = mapInsertKey( prg, map, key, &elInTree ); + + if ( mapEl != 0 ) + mapEl->tree = element; + else { + /* Element with key exists. Overwriting the value. */ + oldTree = elInTree->tree; + elInTree->tree = element; + } + + return oldTree; +} + +void iterFind( Program *prg, Tree ***psp, TreeIter *iter, int tryFirst ) +{ + int anyTree = iter->searchId == prg->rtd->anyId; + Tree **top = iter->stackRoot; + Kid *child; + Tree **sp = *psp; + +rec_call: + if ( tryFirst && ( iter->ref.kid->tree->id == iter->searchId || anyTree ) ) { + *psp = sp; + return; + } + else { + child = treeChild( prg, iter->ref.kid->tree ); + if ( child != 0 ) { + vm_push( (SW) iter->ref.next ); + vm_push( (SW) iter->ref.kid ); + iter->ref.kid = child; + iter->ref.next = (Ref*)vm_ptop(); + while ( iter->ref.kid != 0 ) { + tryFirst = true; + goto rec_call; + rec_return: + iter->ref.kid = iter->ref.kid->next; + } + iter->ref.kid = (Kid*)vm_pop(); + iter->ref.next = (Ref*)vm_pop(); + } + } + + if ( top != vm_ptop() ) + goto rec_return; + + iter->ref.kid = 0; + *psp = sp; +} + +Tree *treeIterAdvance( Program *prg, Tree ***psp, TreeIter *iter ) +{ + assert( iter->stackSize == iter->stackRoot - *psp ); + + if ( iter->ref.kid == 0 ) { + /* Kid is zero, start from the root. */ + iter->ref = iter->rootRef; + iterFind( prg, psp, iter, true ); + } + else { + /* Have a previous item, continue searching from there. */ + iterFind( prg, psp, iter, false ); + } + + iter->stackSize = iter->stackRoot - *psp; + + return (iter->ref.kid ? prg->trueVal : prg->falseVal ); +} + +Tree *treeIterNextChild( Program *prg, Tree ***psp, TreeIter *iter ) +{ + Tree **sp = *psp; + assert( iter->stackSize == iter->stackRoot - vm_ptop() ); + Kid *kid = 0; + + if ( iter->ref.kid == 0 ) { + /* Kid is zero, start from the first child. */ + Kid *child = treeChild( prg, iter->rootRef.kid->tree ); + + if ( child == 0 ) + iter->ref.next = 0; + else { + /* Make a reference to the root. */ + vm_push( (SW) iter->rootRef.next ); + vm_push( (SW) iter->rootRef.kid ); + iter->ref.next = (Ref*)vm_ptop(); + + kid = child; + } + } + else { + /* Start at next. */ + kid = iter->ref.kid->next; + } + + if ( iter->searchId != prg->rtd->anyId ) { + /* Have a previous item, go to the next sibling. */ + while ( kid != 0 && kid->tree->id != iter->searchId ) + kid = kid->next; + } + + iter->ref.kid = kid; + iter->stackSize = iter->stackRoot - vm_ptop(); + *psp = sp; + return ( iter->ref.kid ? prg->trueVal : prg->falseVal ); +} + +Tree *treeRevIterPrevChild( Program *prg, Tree ***psp, RevTreeIter *iter ) +{ + Tree **sp = *psp; + + assert( iter->stackSize == iter->stackRoot - vm_ptop() ); + + if ( iter->kidAtYield != iter->ref.kid ) { + /* Need to reload the kids. */ + Kid *kid = treeChild( prg, iter->rootRef.kid->tree ); + Kid **dst = (Kid**)iter->stackRoot - 1; + while ( kid != 0 ) { + *dst-- = kid; + kid = kid->next; + } + } + + if ( iter->ref.kid == 0 ) + iter->cur = (Kid**)iter->stackRoot - iter->children; + else + iter->cur += 1; + + if ( iter->searchId != prg->rtd->anyId ) { + /* Have a previous item, go to the next sibling. */ + while ( iter->cur != (Kid**)iter->stackRoot && (*iter->cur)->tree->id != iter->searchId ) + iter->cur += 1; + } + + if ( iter->cur == (Kid**)iter->stackRoot ) { + iter->ref.next = 0; + iter->ref.kid = 0; + } + else { + iter->ref.next = &iter->rootRef; + iter->ref.kid = *iter->cur; + } + + /* We will use this to detect a split above the iterated tree. */ + iter->kidAtYield = iter->ref.kid; + + iter->stackSize = iter->stackRoot - vm_ptop(); + + *psp = sp; + + return (iter->ref.kid ? prg->trueVal : prg->falseVal ); +} + +void iterFindRepeat( Program *prg, Tree ***psp, TreeIter *iter, int tryFirst ) +{ + Tree **sp = *psp; + int anyTree = iter->searchId == prg->rtd->anyId; + Tree **top = iter->stackRoot; + Kid *child; + +rec_call: + if ( tryFirst && ( iter->ref.kid->tree->id == iter->searchId || anyTree ) ) { + *psp = sp; + return; + } + else { + /* The repeat iterator is just like the normal top-down-left-right, + * execept it only goes into the children of a node if the node is the + * root of the iteration, or if does not have any neighbours to the + * right. */ + if ( top == vm_ptop() || iter->ref.kid->next == 0 ) { + child = treeChild( prg, iter->ref.kid->tree ); + if ( child != 0 ) { + vm_push( (SW) iter->ref.next ); + vm_push( (SW) iter->ref.kid ); + iter->ref.kid = child; + iter->ref.next = (Ref*)vm_ptop(); + while ( iter->ref.kid != 0 ) { + tryFirst = true; + goto rec_call; + rec_return: + iter->ref.kid = iter->ref.kid->next; + } + iter->ref.kid = (Kid*)vm_pop(); + iter->ref.next = (Ref*)vm_pop(); + } + } + } + + if ( top != vm_ptop() ) + goto rec_return; + + iter->ref.kid = 0; + *psp = sp; +} + +Tree *treeIterNextRepeat( Program *prg, Tree ***psp, TreeIter *iter ) +{ + assert( iter->stackSize == iter->stackRoot - *psp ); + + if ( iter->ref.kid == 0 ) { + /* Kid is zero, start from the root. */ + iter->ref = iter->rootRef; + iterFindRepeat( prg, psp, iter, true ); + } + else { + /* Have a previous item, continue searching from there. */ + iterFindRepeat( prg, psp, iter, false ); + } + + iter->stackSize = iter->stackRoot - *psp; + + return (iter->ref.kid ? prg->trueVal : prg->falseVal ); +} + +void iterFindRevRepeat( Program *prg, Tree ***psp, TreeIter *iter, int tryFirst ) +{ + Tree **sp = *psp; + int anyTree = iter->searchId == prg->rtd->anyId; + Tree **top = iter->stackRoot; + Kid *child; + + if ( tryFirst ) { + while ( true ) { + if ( top == vm_ptop() || iter->ref.kid->next == 0 ) { + child = treeChild( prg, iter->ref.kid->tree ); + + if ( child == 0 ) + break; + vm_push( (SW) iter->ref.next ); + vm_push( (SW) iter->ref.kid ); + iter->ref.kid = child; + iter->ref.next = (Ref*)vm_ptop(); + } + else { + /* Not the top and not there is a next, go over to it. */ + iter->ref.kid = iter->ref.kid->next; + } + } + + goto first; + } + + while ( true ) { + if ( top == vm_ptop() ) { + iter->ref.kid = 0; + return; + } + + if ( iter->ref.kid->next == 0 ) { + /* Go up one and then down. Remember we can't use iter->ref.next + * because the chain may have been split, setting it null (to + * prevent repeated walks up). */ + Ref *ref = (Ref*)vm_ptop(); + iter->ref.kid = treeChild( prg, ref->kid->tree ); + } + else { + iter->ref.kid = (Kid*)vm_pop(); + iter->ref.next = (Ref*)vm_pop(); + } +first: + if ( iter->ref.kid->tree->id == iter->searchId || anyTree ) { + *psp = sp; + return; + } + } + *psp = sp; + return; +} + + +Tree *treeIterPrevRepeat( Program *prg, Tree ***psp, TreeIter *iter ) +{ + assert( iter->stackSize == iter->stackRoot - *psp ); + + if ( iter->ref.kid == 0 ) { + /* Kid is zero, start from the root. */ + iter->ref = iter->rootRef; + iterFindRevRepeat( prg, psp, iter, true ); + } + else { + /* Have a previous item, continue searching from there. */ + iterFindRevRepeat( prg, psp, iter, false ); + } + + iter->stackSize = iter->stackRoot - *psp; + + return (iter->ref.kid ? prg->trueVal : prg->falseVal ); +} + +Tree *treeSearch( Program *prg, Kid *kid, long id ) +{ + /* This node the one? */ + if ( kid->tree->id == id ) + return kid->tree; + + Tree *res = 0; + + /* Search children. */ + Kid *child = treeChild( prg, kid->tree ); + if ( child != 0 ) + res = treeSearch( prg, child, id ); + + /* Search siblings. */ + if ( res == 0 && kid->next != 0 ) + res = treeSearch( prg, kid->next, id ); + + return res; +} + +Tree *treeSearch2( Program *prg, Tree *tree, long id ) +{ + Tree *res = 0; + if ( tree->id == id ) + res = tree; + else { + Kid *child = treeChild( prg, tree ); + if ( child != 0 ) + res = treeSearch( prg, child, id ); + } + return res; +} + +void xmlEscapeData( struct ColmPrintArgs *printArgs, const char *data, long len ) +{ + int i; + for ( i = 0; i < len; i++ ) { + if ( data[i] == '<' ) + printArgs->out( printArgs, "<", 4 ); + else if ( data[i] == '>' ) + printArgs->out( printArgs, ">", 4 ); + else if ( data[i] == '&' ) + printArgs->out( printArgs, "&", 5 ); + else if ( (32 <= data[i] && data[i] <= 126) || data[i] == '\t' || data[i] == '\n' || data[i] == '\r' ) + printArgs->out( printArgs, &data[i], 1 ); + else { + char out[64]; + sprintf( out, "&#%u;", ((unsigned)data[i]) ); + printArgs->out( printArgs, out, strlen(out) ); + } + } +} + +void initStrCollect( StrCollect *collect ) +{ + collect->data = (char*) malloc( BUFFER_INITIAL_SIZE ); + collect->allocated = BUFFER_INITIAL_SIZE; + collect->length = 0; +} + +void strCollectDestroy( StrCollect *collect ) +{ + free( collect->data ); +} + +void strCollectAppend( StrCollect *collect, const char *data, long len ) +{ + long newLen = collect->length + len; + if ( newLen > collect->allocated ) { + collect->allocated *= newLen * 2; + collect->data = (char*) realloc( collect->data, collect->allocated ); + } + memcpy( collect->data + collect->length, data, len ); + collect->length += len; +} + +void strCollectClear( StrCollect *collect ) +{ + collect->length = 0; +} + +#define INT_SZ 32 + +void printStr( struct ColmPrintArgs *printArgs, Head *str ) +{ + printArgs->out( printArgs, (char*)(str->data), str->length ); +} + +void appendCollect( struct ColmPrintArgs *args, const char *data, int length ) +{ + strCollectAppend( (StrCollect*) args->arg, data, length ); +} + +void appendFile( struct ColmPrintArgs *args, const char *data, int length ) +{ + fwrite( data, length, 1, (FILE*)args->arg ); +} + +Tree *treeTrim( struct ColmProgram *prg, Tree **sp, Tree *tree ) +{ + debug( REALM_PARSE, "attaching left ignore\n" ); + + /* Make the ignore list for the left-ignore. */ + Tree *leftIgnore = treeAllocate( prg ); + leftIgnore->id = LEL_ID_IGNORE; + leftIgnore->flags |= AF_SUPPRESS_RIGHT; + + tree = pushLeftIgnore( prg, tree, leftIgnore ); + + debug( REALM_PARSE, "attaching ignore right\n" ); + + /* Copy the ignore list first if we need to attach it as a right + * ignore. */ + Tree *rightIgnore = 0; + rightIgnore = treeAllocate( prg ); + rightIgnore->id = LEL_ID_IGNORE; + rightIgnore->flags |= AF_SUPPRESS_LEFT; + + tree = pushRightIgnore( prg, tree, rightIgnore ); + + return tree; +} + +enum ReturnType +{ + Done = 1, + CollectIgnoreLeft, + CollectIgnoreRight, + RecIgnoreList, + ChildPrint +}; + +enum VisitType +{ + IgnoreWrapper, + IgnoreData, + Term, + NonTerm, +}; + +#define TF_TERM_SEEN 0x1 + +void printKid( Program *prg, Tree **sp, struct ColmPrintArgs *printArgs, Kid *kid ) +{ + enum ReturnType rt; + Kid *parent = 0; + Kid *leadingIgnore = 0; + enum VisitType visitType; + int flags = 0; + + /* Iterate the kids passed in. We are expecting a next, which will allow us + * to print the trailing ignore list. */ + while ( kid != 0 ) { + vm_push( (SW) Done ); + goto rec_call; + rec_return_top: + kid = kid->next; + } + + return; + +rec_call: + if ( kid->tree == 0 ) + goto skip_null; + + /* If not currently skipping ignore data, then print it. Ignore data can + * be associated with terminals and nonterminals. */ + if ( kid->tree->flags & AF_LEFT_IGNORE ) { + vm_push( (SW)parent ); + vm_push( (SW)kid ); + parent = kid; + kid = treeLeftIgnoreKid( prg, kid->tree ); + vm_push( (SW) CollectIgnoreLeft ); + goto rec_call; + rec_return_ign_left: + kid = (Kid*)vm_pop(); + parent = (Kid*)vm_pop(); + } + + if ( kid->tree->id == LEL_ID_IGNORE ) + visitType = IgnoreWrapper; + else if ( parent != 0 && parent->tree->id == LEL_ID_IGNORE ) + visitType = IgnoreData; + else if ( kid->tree->id < prg->rtd->firstNonTermId ) + visitType = Term; + else + visitType = NonTerm; + + debug( REALM_PRINT, "visit type: %d\n", visitType ); + + if ( visitType == IgnoreData ) { + debug( REALM_PRINT, "putting %p on ignore list\n", kid->tree ); + Kid *newIgnore = kidAllocate( prg ); + newIgnore->next = leadingIgnore; + leadingIgnore = newIgnore; + leadingIgnore->tree = kid->tree; + goto skip_node; + } + + if ( visitType == IgnoreWrapper ) { + Kid *newIgnore = kidAllocate( prg ); + newIgnore->next = leadingIgnore; + leadingIgnore = newIgnore; + leadingIgnore->tree = kid->tree; + /* Don't skip. */ + } + + /* print leading ignore? Triggered by terminals. */ + if ( visitType == Term ) { + /* Reverse the leading ignore list. */ + if ( leadingIgnore != 0 ) { + Kid *ignore = 0, *last = 0; + + /* Reverse the list and take the opportunity to implement the + * suppress left. */ + while ( true ) { + Kid *next = leadingIgnore->next; + leadingIgnore->next = last; + + if ( leadingIgnore->tree->flags & AF_SUPPRESS_LEFT ) { + /* We are moving left. Chop off the tail. */ + debug( REALM_PRINT, "suppressing left\n" ); + freeKidList( prg, next ); + break; + } + + if ( next == 0 ) + break; + + last = leadingIgnore; + leadingIgnore = next; + } + + /* Print the leading ignore list. Also implement the suppress right + * in the process. */ + if ( printArgs->comm && (!printArgs->trim || (flags & TF_TERM_SEEN && kid->tree->id > 0)) ) { + ignore = leadingIgnore; + while ( ignore != 0 ) { + if ( ignore->tree->flags & AF_SUPPRESS_RIGHT ) + break; + + if ( ignore->tree->id != LEL_ID_IGNORE ) { + vm_push( (SW)visitType ); + vm_push( (SW)leadingIgnore ); + vm_push( (SW)ignore ); + vm_push( (SW)parent ); + vm_push( (SW)kid ); + + leadingIgnore = 0; + kid = ignore; + parent = 0; + + debug( REALM_PRINT, "rec call on %p\n", kid->tree ); + vm_push( (SW) RecIgnoreList ); + goto rec_call; + rec_return_il: + + kid = (Kid*)vm_pop(); + parent = (Kid*)vm_pop(); + ignore = (Kid*)vm_pop(); + leadingIgnore = (Kid*)vm_pop(); + visitType = (enum VisitType)vm_pop(); + } + + ignore = ignore->next; + } + } + + /* Free the leading ignore list. */ + freeKidList( prg, leadingIgnore ); + leadingIgnore = 0; + } + } + + if ( visitType == Term || visitType == NonTerm ) { + /* Open the tree. */ + printArgs->openTree( prg, sp, printArgs, parent, kid ); + } + + if ( visitType == Term ) + flags |= TF_TERM_SEEN; + + if ( visitType == Term || visitType == IgnoreData ) { + /* Print contents. */ + if ( kid->tree->id < prg->rtd->firstNonTermId ) { + debug( REALM_PRINT, "printing terminal %p\n", kid->tree ); + if ( kid->tree->id != 0 ) + printArgs->printTerm( prg, sp, printArgs, kid ); + } + } + + /* Print children. */ + Kid *child = printArgs->attr ? + treeAttr( prg, kid->tree ) : + treeChild( prg, kid->tree ); + + if ( child != 0 ) { + vm_push( (SW)visitType ); + vm_push( (SW)parent ); + vm_push( (SW)kid ); + parent = kid; + kid = child; + while ( kid != 0 ) { + vm_push( (SW) ChildPrint ); + goto rec_call; + rec_return: + kid = kid->next; + } + kid = (Kid*)vm_pop(); + parent = (Kid*)vm_pop(); + visitType = (enum VisitType)vm_pop(); + } + + if ( visitType == Term || visitType == NonTerm ) { + /* close the tree. */ + printArgs->closeTree( prg, sp, printArgs, parent, kid ); + } + +skip_node: + + /* If not currently skipping ignore data, then print it. Ignore data can + * be associated with terminals and nonterminals. */ + if ( kid->tree->flags & AF_RIGHT_IGNORE ) { + debug( REALM_PRINT, "right ignore\n" ); + vm_push( (SW)parent ); + vm_push( (SW)kid ); + parent = kid; + kid = treeRightIgnoreKid( prg, kid->tree ); + vm_push( (SW) CollectIgnoreRight ); + goto rec_call; + rec_return_ign_right: + kid = (Kid*)vm_pop(); + parent = (Kid*)vm_pop(); + } + +/* For skiping over content on null. */ +skip_null: + + rt = (enum ReturnType)vm_pop(); + switch ( rt ) { + case Done: + debug( REALM_PRINT, "return: done\n" ); + goto rec_return_top; + break; + case CollectIgnoreLeft: + debug( REALM_PRINT, "return: ignore left\n" ); + goto rec_return_ign_left; + case CollectIgnoreRight: + debug( REALM_PRINT, "return: ignore right\n" ); + goto rec_return_ign_right; + case RecIgnoreList: + debug( REALM_PRINT, "return: ignore list\n" ); + goto rec_return_il; + case ChildPrint: + debug( REALM_PRINT, "return: child print\n" ); + goto rec_return; + } +} + +void printTreeArgs( Program *prg, Tree **sp, struct ColmPrintArgs *printArgs, Tree *tree ) +{ + if ( tree == 0 ) + printArgs->out( printArgs, "NIL", 3 ); + else { + /* This term tree allows us to print trailing ignores. */ + Tree termTree; + memset( &termTree, 0, sizeof(termTree) ); + + Kid kid, term; + term.tree = &termTree; + term.next = 0; + term.flags = 0; + + kid.tree = tree; + kid.next = &term; + kid.flags = 0; + + printKid( prg, sp, printArgs, &kid ); + } +} + +void printTermTree( Program *prg, Tree **sp, struct ColmPrintArgs *printArgs, Kid *kid ) +{ + debug( REALM_PRINT, "printing term %p\n", kid->tree ); + + if ( kid->tree->id == LEL_ID_INT ) { + char buf[INT_SZ]; + sprintf( buf, "%ld", ((Int*)kid->tree)->value ); + printArgs->out( printArgs, buf, strlen(buf) ); + } + else if ( kid->tree->id == LEL_ID_BOOL ) { + if ( ((Int*)kid->tree)->value ) + printArgs->out( printArgs, "true", 4 ); + else + printArgs->out( printArgs, "false", 5 ); + } + else if ( kid->tree->id == LEL_ID_PTR ) { + char buf[INT_SZ]; + printArgs->out( printArgs, "#", 1 ); + sprintf( buf, "%p", (void*) ((Pointer*)kid->tree)->value ); + printArgs->out( printArgs, buf, strlen(buf) ); + } + else if ( kid->tree->id == LEL_ID_STR ) { + printStr( printArgs, ((Str*)kid->tree)->value ); + } + else if ( kid->tree->id == LEL_ID_STREAM ) { + char buf[INT_SZ]; + printArgs->out( printArgs, "#", 1 ); + sprintf( buf, "%p", (void*) ((Stream*)kid->tree)->file ); + printArgs->out( printArgs, buf, strlen(buf) ); + } + else if ( kid->tree->tokdata != 0 && + stringLength( kid->tree->tokdata ) > 0 ) + { + printArgs->out( printArgs, stringData( kid->tree->tokdata ), + stringLength( kid->tree->tokdata ) ); + } +} + + +void printNull( Program *prg, Tree **sp, struct ColmPrintArgs *args, Kid *parent, Kid *kid ) +{ +} + +void openTreeXml( Program *prg, Tree **sp, struct ColmPrintArgs *args, Kid *parent, Kid *kid ) +{ + /* Skip the terminal that is for forcing trailing ignores out. */ + if ( kid->tree->id == 0 ) + return; + + LangElInfo *lelInfo = prg->rtd->lelInfo; + + /* List flattening: skip the repeats and lists that are a continuation of + * the list. */ + if ( parent != 0 && parent->tree->id == kid->tree->id && kid->next == 0 && + ( lelInfo[parent->tree->id].repeat || lelInfo[parent->tree->id].list ) ) + { + return; + } + + const char *name = lelInfo[kid->tree->id].xmlTag; + args->out( args, "<", 1 ); + args->out( args, name, strlen( name ) ); + args->out( args, ">", 1 ); +} + +void printTermXml( Program *prg, Tree **sp, struct ColmPrintArgs *printArgs, Kid *kid ) +{ + //Kid *child; + + /*child = */ treeChild( prg, kid->tree ); + if ( kid->tree->id == LEL_ID_PTR ) { + char ptr[32]; + sprintf( ptr, "%p\n", (void*)((Pointer*)kid->tree)->value ); + printArgs->out( printArgs, ptr, strlen(ptr) ); + } + else if ( kid->tree->id == LEL_ID_BOOL ) { + if ( ((Int*)kid->tree)->value ) + printArgs->out( printArgs, "true", 4 ); + else + printArgs->out( printArgs, "false", 5 ); + } + else if ( kid->tree->id == LEL_ID_INT ) { + char ptr[32]; + sprintf( ptr, "%ld", ((Int*)kid->tree)->value ); + printArgs->out( printArgs, ptr, strlen(ptr) ); + } + else if ( kid->tree->id == LEL_ID_STR ) { + Head *head = (Head*) ((Str*)kid->tree)->value; + + xmlEscapeData( printArgs, (char*)(head->data), head->length ); + } + else if ( 0 < kid->tree->id && kid->tree->id < prg->rtd->firstNonTermId && + kid->tree->id != LEL_ID_IGNORE && + kid->tree->tokdata != 0 && + stringLength( kid->tree->tokdata ) > 0 ) + { + xmlEscapeData( printArgs, stringData( kid->tree->tokdata ), + stringLength( kid->tree->tokdata ) ); + } +} + + +void closeTreeXml( Program *prg, Tree **sp, struct ColmPrintArgs *args, Kid *parent, Kid *kid ) +{ + /* Skip the terminal that is for forcing trailing ignores out. */ + if ( kid->tree->id == 0 ) + return; + + LangElInfo *lelInfo = prg->rtd->lelInfo; + + /* List flattening: skip the repeats and lists that are a continuation of + * the list. */ + if ( parent != 0 && parent->tree->id == kid->tree->id && kid->next == 0 && + ( lelInfo[parent->tree->id].repeat || lelInfo[parent->tree->id].list ) ) + { + return; + } + + const char *name = lelInfo[kid->tree->id].xmlTag; + args->out( args, "</", 2 ); + args->out( args, name, strlen( name ) ); + args->out( args, ">", 1 ); +} + +void printTreeCollect( Program *prg, Tree **sp, StrCollect *collect, Tree *tree, int trim ) +{ + struct ColmPrintArgs printArgs = { collect, true, false, trim, &appendCollect, + &printNull, &printTermTree, &printNull }; + printTreeArgs( prg, sp, &printArgs, tree ); +} + +void printTreeFile( Program *prg, Tree **sp, FILE *out, Tree *tree, int trim ) +{ + struct ColmPrintArgs printArgs = { out, true, false, trim, &appendFile, + &printNull, &printTermTree, &printNull }; + printTreeArgs( prg, sp, &printArgs, tree ); +} + +void printXmlStdout( Program *prg, Tree **sp, Tree *tree, int commAttr, int trim ) +{ + struct ColmPrintArgs printArgs = { stdout, commAttr, commAttr, trim, &appendFile, + &openTreeXml, &printTermXml, &closeTreeXml }; + printTreeArgs( prg, sp, &printArgs, tree ); +} + diff --git a/src/tree.h b/src/tree.h new file mode 100644 index 00000000..29c0ee74 --- /dev/null +++ b/src/tree.h @@ -0,0 +1,355 @@ +/* + * Copyright 2010-2012 Adrian Thurston <thurston@complang.org> + */ + +/* This file is part of Colm. + * + * Colm is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Colm is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Colm; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __COLM_TREE_H +#define __COLM_TREE_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <colm.h> + +typedef unsigned char Code; +typedef unsigned long Word; +typedef unsigned long Half; +struct Bindings; + +typedef struct _File +{ + struct _File *prev; + struct _File *next; +} File; + +typedef struct _Location +{ + File *file; + long line; + long column; + long byte; +} Location; + +/* Header located just before string data. */ +typedef struct _Head +{ + const char *data; + long length; + Location *location; +} Head; + +typedef struct ColmKid +{ + /* The tree needs to be first since pointers to kids are used to reference + * trees on the stack. A pointer to the word that is a Tree* is cast to + * a Kid*. */ + struct ColmTree *tree; + struct ColmKid *next; + unsigned char flags; +} Kid; + +typedef struct _Ref +{ + struct ColmKid *kid; + struct _Ref *next; +} Ref; + +typedef struct ColmTree +{ + /* First four will be overlaid in other structures. */ + short id; + unsigned short flags; + long refs; + Kid *child; + + Head *tokdata; + + /* FIXME: this needs to go somewhere else. Will do for now. */ + unsigned short prodNum; +} Tree; + + +typedef struct _TreePair +{ + Tree *key; + Tree *val; +} TreePair; + +typedef struct _ParseTree +{ + short id; + unsigned short flags; + + struct _ParseTree *child; + struct _ParseTree *next; + struct _ParseTree *leftIgnore; + struct _ParseTree *rightIgnore; + Kid *shadow; + + /* Parsing algorithm. */ + long state; + long region; + short causeReduce; + + /* FIXME: unify probably. */ + char retryLower; + char retryUpper; +} ParseTree; + +typedef struct _Int +{ + /* Must overlay Tree. */ + short id; + unsigned short flags; + long refs; + Kid *child; + + long value; +} Int; + +typedef struct _Pointer +{ + /* Must overlay Tree. */ + short id; + unsigned short flags; + long refs; + Kid *child; + + Kid *value; +} Pointer; + +typedef struct _Str +{ + /* Must overlay Tree. */ + short id; + unsigned short flags; + long refs; + Kid *child; + + Head *value; +} Str; + +typedef struct _ListEl +{ + /* Must overlay kid. */ + Tree *value; + struct _ListEl *next; + struct _ListEl *prev; +} ListEl; + +/* + * Maps + */ +typedef struct _GenericInfo +{ + long type; + long typeArg; + long keyOffset; + long keyType; + long langElId; + long parserId; +} GenericInfo; + +typedef struct _List +{ + /* Must overlay Tree. */ + short id; + unsigned short flags; + long refs; + ListEl *head; + + ListEl *tail; + long listLen; + GenericInfo *genericInfo; + +} List; + +typedef struct _Stream +{ + /* Must overlay Tree. */ + short id; + unsigned short flags; + long refs; + Kid *child; + + FILE *file; + SourceStream *in; +} Stream; + +typedef struct _Input +{ + /* Must overlay Tree. */ + short id; + unsigned short flags; + long refs; + Kid *child; + + InputStream *in; +} Input; + +typedef struct _Parser +{ + /* Must overlay Tree. */ + short id; + unsigned short flags; + long refs; + Kid *child; + + GenericInfo *genericInfo; + + struct _PdaRun *pdaRun; + struct _FsmRun *fsmRun; + struct _Input *input; + Tree *result; +} Parser; + +typedef struct _TreeIter +{ + Ref rootRef; + Ref ref; + long searchId; + Tree **stackRoot; + long stackSize; +} TreeIter; + +/* This must overlay tree iter because some of the same bytecodes are used. */ +typedef struct _RevTreeIter +{ + Ref rootRef; + Ref ref; + long searchId; + Tree **stackRoot; + long stackSize; + + /* For detecting a split at the leaf. */ + Kid *kidAtYield; + long children; + Kid **cur; +} RevTreeIter; + + +typedef struct _UserIter +{ + /* The current item. */ + Ref ref; + Tree **stackRoot; + long argSize; + long stackSize; + Code *resume; + Tree **frame; + long searchId; +} UserIter; + + +void treeUpref( Tree *tree ); +void treeDownref( struct ColmProgram *prg, Tree **sp, Tree *tree ); +long cmpTree( struct ColmProgram *prg, const Tree *tree1, const Tree *tree2 ); + +Tree *pushRightIgnore( struct ColmProgram *prg, Tree *pushTo, Tree *rightIgnore ); +Tree *pushLeftIgnore( struct ColmProgram *prg, Tree *pushTo, Tree *leftIgnore ); +Tree *popRightIgnore( struct ColmProgram *prg, Tree **sp, Tree *popFrom, Tree **rightIgnore ); +Tree *popLeftIgnore( struct ColmProgram *prg, Tree **sp, Tree *popFrom, Tree **leftIgnore ); +Tree *treeLeftIgnore( struct ColmProgram *prg, Tree *tree ); +Tree *treeRightIgnore( struct ColmProgram *prg, Tree *tree ); +Kid *treeLeftIgnoreKid( struct ColmProgram *prg, Tree *tree ); +Kid *treeRightIgnoreKid( struct ColmProgram *prg, Tree *tree ); +Kid *treeChild( struct ColmProgram *prg, const Tree *tree ); +Kid *treeAttr( struct ColmProgram *prg, const Tree *tree ); +Kid *kidListConcat( Kid *list1, Kid *list2 ); +Kid *treeExtractChild( struct ColmProgram *prg, Tree *tree ); +Kid *reverseKidList( Kid *kid ); + +Tree *constructInteger( struct ColmProgram *prg, long i ); +Tree *constructPointer( struct ColmProgram *prg, Tree *tree ); +Tree *constructTerm( struct ColmProgram *prg, Word id, Head *tokdata ); +Tree *constructReplacementTree( Kid *kid, Tree **bindings, struct ColmProgram *prg, long pat ); +Tree *createGeneric( struct ColmProgram *prg, long genericId ); +Tree *constructToken( struct ColmProgram *prg, Tree **root, long nargs ); +Tree *constructInput( struct ColmProgram *prg ); + + +int testFalse( struct ColmProgram *prg, Tree *tree ); +Tree *makeTree( struct ColmProgram *prg, Tree **root, long nargs ); +Stream *openFile( struct ColmProgram *prg, Tree *name, Tree *mode ); +Stream *openStreamFd( struct ColmProgram *prg, long fd ); +Kid *copyIgnoreList( struct ColmProgram *prg, Kid *ignoreHeader ); +Kid *copyKidList( struct ColmProgram *prg, Kid *kidList ); +void streamFree( struct ColmProgram *prg, Stream *s ); +Tree *copyTree( struct ColmProgram *prg, Tree *tree, Kid *oldNextDown, Kid **newNextDown ); + +Tree *getPtrVal( Pointer *ptr ); +Tree *getPtrValSplit( struct ColmProgram *prg, Pointer *ptr ); +Tree *getField( Tree *tree, Word field ); +Tree *getFieldSplit( struct ColmProgram *prg, Tree *tree, Word field ); +Tree *getRhsEl( struct ColmProgram *prg, Tree *lhs, long position ); +void setField( struct ColmProgram *prg, Tree *tree, long field, Tree *value ); + +void setTriterCur( struct ColmProgram *prg, TreeIter *iter, Tree *tree ); +void setUiterCur( struct ColmProgram *prg, UserIter *uiter, Tree *tree ); +void refSetValue( Ref *ref, Tree *v ); +Tree *treeSearch( struct ColmProgram *prg, Kid *kid, long id ); +Tree *treeSearch2( struct ColmProgram *prg, Tree *tree, long id ); + +int matchPattern( Tree **bindings, struct ColmProgram *prg, long pat, Kid *kid, int checkNext ); +Tree *treeIterDerefCur( TreeIter *iter ); + +/* For making references of attributes. */ +Kid *getFieldKid( Tree *tree, Word field ); + +Tree *copyRealTree( struct ColmProgram *prg, Tree *tree, Kid *oldNextDown, Kid **newNextDown ); +void splitIterCur( struct ColmProgram *prg, Tree ***psp, TreeIter *iter ); +Tree *setListMem( List *list, Half field, Tree *value ); + +void listAppend2( struct ColmProgram *prg, List *list, Tree *val ); +Tree *listRemoveEnd( struct ColmProgram *prg, List *list ); +Tree *getListMem( List *list, Word field ); +Tree *getListMemSplit( struct ColmProgram *prg, List *list, Word field ); + +Tree *treeIterAdvance( struct ColmProgram *prg, Tree ***psp, TreeIter *iter ); +Tree *treeIterNextChild( struct ColmProgram *prg, Tree ***psp, TreeIter *iter ); +Tree *treeRevIterPrevChild( struct ColmProgram *prg, Tree ***psp, RevTreeIter *iter ); +Tree *treeIterNextRepeat( struct ColmProgram *prg, Tree ***psp, TreeIter *iter ); +Tree *treeIterPrevRepeat( struct ColmProgram *prg, Tree ***psp, TreeIter *iter ); + +/* An automatically grown buffer for collecting tokens. Always reuses space; + * never down resizes. */ +typedef struct _StrCollect +{ + char *data; + int allocated; + int length; +} StrCollect; + +void initStrCollect( StrCollect *collect ); +void strCollectDestroy( StrCollect *collect ); +void strCollectAppend( StrCollect *collect, const char *data, long len ); +void strCollectClear( StrCollect *collect ); +Tree *treeTrim( struct ColmProgram *prg, Tree **sp, Tree *tree ); + +void printTreeCollect( struct ColmProgram *prg, Tree **sp, StrCollect *collect, Tree *tree, int trim ); +void printTreeFile( struct ColmProgram *prg, Tree **sp, FILE *out, Tree *tree, int trim ); +void printXmlStdout( struct ColmProgram *prg, Tree **sp, Tree *tree, int commAttr, int trim ); + +#if defined(__cplusplus) +} +#endif + +#endif + |